Skip to content

Commit

Permalink
Make style
Browse files Browse the repository at this point in the history
  • Loading branch information
erogol committed Jul 31, 2023
1 parent 129aff9 commit f4c893d
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 42 deletions.
1 change: 1 addition & 0 deletions TTS/tts/datasets/formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@ def kss(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
return items


def bel_tts_formatter(root_path, meta_file, **kwargs): # pylint: disable=unused-argument
txt_file = os.path.join(root_path, meta_file)
items = []
Expand Down
6 changes: 3 additions & 3 deletions TTS/tts/models/vits.py
Original file line number Diff line number Diff line change
Expand Up @@ -1814,7 +1814,7 @@ def export_onnx(self, output_path: str = "coqui_vits.onnx", verbose: bool = True
# rollback values
_forward = self.forward
disc = None
if hasattr(self, 'disc'):
if hasattr(self, "disc"):
disc = self.disc
training = self.training

Expand Down Expand Up @@ -1908,15 +1908,15 @@ def inference_onnx(self, x, x_lengths=None, speaker_id=None, language_id=None):
[self.inference_noise_scale, self.length_scale, self.inference_noise_scale_dp],
dtype=np.float32,
)

audio = self.onnx_sess.run(
["output"],
{
"input": x,
"input_lengths": x_lengths,
"scales": scales,
"sid": None if speaker_id is None else torch.tensor([speaker_id]).cpu().numpy(),
"langid": None if language_id is None else torch.tensor([language_id]).cpu().numpy()
"langid": None if language_id is None else torch.tensor([language_id]).cpu().numpy(),
},
)
return audio[0][0]
Expand Down
3 changes: 2 additions & 1 deletion TTS/tts/utils/text/cleaners.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ def multilingual_cleaners(text):
text = collapse_whitespace(text)
return text


def no_cleaners(text):
# remove newline characters
text = text.replace("\n", "")
return text
return text
5 changes: 3 additions & 2 deletions recipes/bel-alex73/dump_config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from train_glowtts import config
import json
import re

from train_glowtts import config

s = json.dumps(config, default=vars, indent=2)
s = re.sub(r'"test_sentences":\s*\[\],', '', s)
s = re.sub(r'"test_sentences":\s*\[\],', "", s)
print(s)
50 changes: 25 additions & 25 deletions recipes/bel-alex73/train_glowtts.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,30 +3,31 @@
# Trainer: Where the ✨️ happens.
# TrainingArgs: Defines the set of arguments of the Trainer.
from trainer import Trainer, TrainerArgs
from TTS.tts.configs.shared_configs import BaseAudioConfig

# GlowTTSConfig: all model related values for training, validating and testing.
from TTS.tts.configs.glow_tts_config import GlowTTSConfig

# BaseDatasetConfig: defines name, formatter and path of the dataset.
from TTS.tts.configs.shared_configs import BaseDatasetConfig, CharactersConfig
from TTS.tts.configs.shared_configs import BaseAudioConfig, BaseDatasetConfig, CharactersConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.glow_tts import GlowTTS
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor

# we use the same path as this script as our training folder.
output_path = '/storage/output-glowtts/'
output_path = "/storage/output-glowtts/"


# DEFINE DATASET CONFIG
# Set LJSpeech as our target dataset and define its path.
# You can also use a simple Dict to define the dataset and pass it to your custom formatter.
dataset_config = BaseDatasetConfig(
formatter="bel_tts_formatter", meta_file_train="ipa_final_dataset.csv", path=os.path.join(output_path, "/storage/filtered_dataset/")
formatter="bel_tts_formatter",
meta_file_train="ipa_final_dataset.csv",
path=os.path.join(output_path, "/storage/filtered_dataset/"),
)

characters=CharactersConfig(
characters = CharactersConfig(
characters_class="TTS.tts.utils.text.characters.Graphemes",
pad="_",
eos="~",
Expand Down Expand Up @@ -71,41 +72,40 @@
)

if __name__ == "__main__":

# INITIALIZE THE AUDIO PROCESSOR
# Audio processor is used for feature extraction and audio I/O.
# It mainly serves to the dataloader and the training loggers.
# INITIALIZE THE AUDIO PROCESSOR
# Audio processor is used for feature extraction and audio I/O.
# It mainly serves to the dataloader and the training loggers.
ap = AudioProcessor.init_from_config(config)

# INITIALIZE THE TOKENIZER
# Tokenizer is used to convert text to sequences of token IDs.
# If characters are not defined in the config, default characters are passed to the config
# INITIALIZE THE TOKENIZER
# Tokenizer is used to convert text to sequences of token IDs.
# If characters are not defined in the config, default characters are passed to the config
tokenizer, config = TTSTokenizer.init_from_config(config)

# LOAD DATA SAMPLES
# Each sample is a list of ```[text, audio_file_path, speaker_name]```
# You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details.
# LOAD DATA SAMPLES
# Each sample is a list of ```[text, audio_file_path, speaker_name]```
# You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(
dataset_config,
eval_split=True,
eval_split_max_size=config.eval_split_max_size,
eval_split_size=config.eval_split_size,
)

# INITIALIZE THE MODEL
# Models take a config object and a speaker manager as input
# Config defines the details of the model like the number of layers, the size of the embedding, etc.
# Speaker manager is used by multi-speaker models.
# INITIALIZE THE MODEL
# Models take a config object and a speaker manager as input
# Config defines the details of the model like the number of layers, the size of the embedding, etc.
# Speaker manager is used by multi-speaker models.
model = GlowTTS(config, ap, tokenizer, speaker_manager=None)

# INITIALIZE THE TRAINER
# Trainer provides a generic API to train all the 🐸TTS models with all its perks like mixed-precision training,
# distributed training, etc.
# INITIALIZE THE TRAINER
# Trainer provides a generic API to train all the 🐸TTS models with all its perks like mixed-precision training,
# distributed training, etc.
trainer = Trainer(
TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
)

# AND... 3,2,1... 🚀
# AND... 3,2,1... 🚀
trainer.fit()
10 changes: 5 additions & 5 deletions recipes/bel-alex73/train_hifigan.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import os

from trainer import Trainer, TrainerArgs
from TTS.tts.configs.shared_configs import BaseAudioConfig
from coqpit import Coqpit
from trainer import Trainer, TrainerArgs

from TTS.tts.configs.shared_configs import BaseAudioConfig
from TTS.utils.audio import AudioProcessor
from TTS.vocoder.configs.hifigan_config import *;
from TTS.vocoder.configs.hifigan_config import *
from TTS.vocoder.datasets.preprocess import load_wav_data
from TTS.vocoder.models.gan import GAN

output_path = '/storage/output-hifigan/'
output_path = "/storage/output-hifigan/"

audio_config = BaseAudioConfig(
mel_fmin=50,
Expand Down Expand Up @@ -57,4 +57,4 @@
trainer = Trainer(
TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
)
trainer.fit()
trainer.fit()
10 changes: 5 additions & 5 deletions recipes/vctk/delightful_tts/train_delightful_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from TTS.config.shared_configs import BaseDatasetConfig
from TTS.tts.configs.delightful_tts_config import DelightfulTtsAudioConfig, DelightfulTTSConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.delightful_tts import DelightfulTtsArgs, DelightfulTTS, VocoderConfig
from TTS.tts.models.delightful_tts import DelightfulTTS, DelightfulTtsArgs, VocoderConfig
from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio.processor import AudioProcessor
Expand All @@ -14,7 +14,9 @@
output_path = os.path.dirname(os.path.abspath(__file__))


dataset_config = BaseDatasetConfig(dataset_name="vctk", formatter="vctk", meta_file_train="", path=data_path, language="en-us")
dataset_config = BaseDatasetConfig(
dataset_name="vctk", formatter="vctk", meta_file_train="", path=data_path, language="en-us"
)

audio_config = DelightfulTtsAudioConfig()

Expand Down Expand Up @@ -73,9 +75,7 @@
config.model_args.num_speakers = speaker_manager.num_speakers


model = DelightfulTTS(
ap=ap, config=config, tokenizer=tokenizer, speaker_manager=speaker_manager, emotion_manager=None
)
model = DelightfulTTS(ap=ap, config=config, tokenizer=tokenizer, speaker_manager=speaker_manager, emotion_manager=None)

trainer = Trainer(
TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
Expand Down
2 changes: 1 addition & 1 deletion tests/tts_tests2/test_delightful_tts_d-vectors_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
binary_align_loss_alpha=0.0,
use_attn_priors=False,
test_sentences=[
["Be a voice, not an echo.", "ljspeech-0"],
["Be a voice, not an echo.", "ljspeech-0"],
],
output_path=output_path,
use_speaker_embedding=False,
Expand Down

0 comments on commit f4c893d

Please sign in to comment.