From 4b8fde68d06ee5d013c5958e86d077031c98de6e Mon Sep 17 00:00:00 2001 From: qwq Date: Mon, 15 Apr 2024 16:27:26 +0800 Subject: [PATCH] The tts_to_file and tts methods of TTS/api.py themselves support the speed parameter, but the internal tts method and synthesizer.tts method do not pass the speed parameter, resulting in the speed parameter being meaningless. After adding it, the speaking speed change function can be used normally. --- TTS/api.py | 2 ++ tests/tts_tests/test_tts.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 tests/tts_tests/test_tts.py diff --git a/TTS/api.py b/TTS/api.py index 7abc188e74..962081b6be 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -283,6 +283,7 @@ def tts( style_text=None, reference_speaker_name=None, split_sentences=split_sentences, + speed=speed, **kwargs, ) return wav @@ -337,6 +338,7 @@ def tts_to_file( language=language, speaker_wav=speaker_wav, split_sentences=split_sentences, + speed=speed, **kwargs, ) self.synthesizer.save_wav(wav=wav, path=file_path, pipe_out=pipe_out) diff --git a/tests/tts_tests/test_tts.py b/tests/tts_tests/test_tts.py new file mode 100644 index 0000000000..a36a7d0b44 --- /dev/null +++ b/tests/tts_tests/test_tts.py @@ -0,0 +1,19 @@ +import torch +from TTS.api import TTS + +# Get device +device = "cuda" if torch.cuda.is_available() else "cpu" + +# List available 🐸TTS models +print(TTS().list_models()) + +# Init TTS +tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) + +# Run TTS +# ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language +# Text to speech list of amplitude values as output +# wav = tts.tts(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en") +# Text to speech to a file +# The tts_to_file and tts methods of TTS/api.py themselves support the speed parameter, but the internal tts method and synthesizer.tts method do not pass the speed parameter, resulting in the speed parameter being meaningless. After adding it, the speaking speed change function can be used normally. +tts.tts_to_file(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav", speed=0.5) \ No newline at end of file