Update synthesizer.py

Configurable verbose output
coqui-ai · Mar 5, 2024 · 275229a · 275229a
1 parent 64fdd0e
commit 275229a
Showing 1 changed file with 16 additions and 9 deletions.
diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py
@@ -264,6 +264,7 @@ def tts(
         style_text=None,
         reference_wav=None,
         reference_speaker_name=None,
+        verbose: bool = True,
         split_sentences: bool = True,
         **kwargs,
     ) -> List[int]:
@@ -278,6 +279,7 @@ def tts(
             style_text ([type], optional): transcription of style_wav for Capacitron. Defaults to None.
             reference_wav ([type], optional): reference waveform for voice conversion. Defaults to None.
             reference_speaker_name ([type], optional): speaker id of reference waveform. Defaults to None.
+            verbose (bool, optional): print verbose output. Defaults to True.
             split_sentences (bool, optional): split the input text into sentences. Defaults to True.
             **kwargs: additional arguments to pass to the TTS model.
         Returns:
@@ -294,9 +296,11 @@ def tts(
         if text:
             sens = [text]
             if split_sentences:
-                print(" > Text splitted to sentences.")
+                if verbose:
+                    print(" > Text splitted to sentences.")
                 sens = self.split_into_sentences(text)
-            print(sens)
+            if verbose:
+                print(sens)
 
         # handle multi-speaker
         if "voice_dir" in kwargs:
@@ -420,7 +424,8 @@ def tts(
                         self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate,
                     ]
                     if scale_factor[1] != 1:
-                        print(" > interpolating tts model output.")
+                        if verbose:
+                            print(" > interpolating tts model output.")
                         vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input)
                     else:
                         vocoder_input = torch.tensor(vocoder_input).unsqueeze(0)  # pylint: disable=not-callable
@@ -484,7 +489,8 @@ def tts(
                     self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate,
                 ]
                 if scale_factor[1] != 1:
-                    print(" > interpolating tts model output.")
+                    if verbose:
+                        print(" > interpolating tts model output.")
                     vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input)
                 else:
                     vocoder_input = torch.tensor(vocoder_input).unsqueeze(0)  # pylint: disable=not-callable
@@ -497,9 +503,10 @@ def tts(
                 waveform = waveform.numpy()
             wavs = waveform.squeeze()
 
-        # compute stats
-        process_time = time.time() - start_time
-        audio_time = len(wavs) / self.tts_config.audio["sample_rate"]
-        print(f" > Processing time: {process_time}")
-        print(f" > Real-time factor: {process_time / audio_time}")
+        if verbose:
+            # compute stats
+            process_time = time.time() - start_time
+            audio_time = len(wavs) / self.tts_config.audio["sample_rate"]
+            print(f" > Processing time: {process_time}")
+            print(f" > Real-time factor: {process_time / audio_time}")
         return wavs