Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove duplicate AudioProcessor code, fix ExtractTTSpectrogram.ipynb #3230

Merged
merged 19 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
8fa4de1
chore: remove unused argument
eginhard Nov 13, 2023
d758798
refactor(audio.processor): remove duplicate stft+griffin_lim
eginhard Nov 13, 2023
5a5da76
chore(audio.processor): remove unused compute_stft_paddings
eginhard Nov 13, 2023
794f41c
refactor(audio.processor): remove duplicate db_to_amp
eginhard Nov 13, 2023
4fd5c46
refactor(audio.processor): remove duplicate amp_to_db
eginhard Nov 13, 2023
fd9d6d4
refactor(audio.processor): remove duplicate linear_to_mel
eginhard Nov 14, 2023
7548777
refactor(audio.processor): remove duplicate mel_to_linear
eginhard Nov 14, 2023
da229f3
refactor(audio.processor): remove duplicate build_mel_basis
eginhard Nov 14, 2023
f37cc4c
refactor(audio.processor): remove duplicate stft_parameters
eginhard Nov 14, 2023
11e98d3
refactor(audio.processor): use pre-/deemphasis from numpy_transforms
eginhard Nov 14, 2023
b620092
refactor(audio.processor): use rms_volume_norm from numpy_transforms
eginhard Nov 14, 2023
5232bf9
chore(audio.processor): remove duplicate assert
eginhard Nov 14, 2023
842a632
refactor(audio.processor): use find_endpoint from numpy_transforms
eginhard Nov 14, 2023
0a0e7a3
refactor(audio.processor): use trim_silence from numpy_transforms
eginhard Nov 14, 2023
9a43eaf
refactor(audio.processor): use volume_norm from numpy_transforms
eginhard Nov 14, 2023
13e640f
refactor(audio.processor): use load_wav from numpy_transforms
eginhard Nov 14, 2023
aa0fbdf
fix(bin.extract_tts_spectrograms): set quantization bits
eginhard Nov 14, 2023
ddbaecd
fix(ExtractTTSpectrogram.ipynb): adapt to current TTS code
eginhard Nov 15, 2023
8f1db75
refactor(audio.processor): remove duplicate quantization methods
eginhard Nov 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions TTS/bin/extract_tts_spectrograms.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor
from TTS.utils.audio.numpy_transforms import quantize
from TTS.utils.generic_utils import count_parameters

use_cuda = torch.cuda.is_available()
Expand Down Expand Up @@ -159,7 +160,7 @@ def inference(


def extract_spectrograms(
data_loader, model, ap, output_path, quantized_wav=False, save_audio=False, debug=False, metada_name="metada.txt"
data_loader, model, ap, output_path, quantize_bits=0, save_audio=False, debug=False, metada_name="metada.txt"
):
model.eval()
export_metadata = []
Expand Down Expand Up @@ -196,8 +197,8 @@ def extract_spectrograms(
_, wavq_path, mel_path, wav_gl_path, wav_path = set_filename(wav_file_path, output_path)

# quantize and save wav
if quantized_wav:
wavq = ap.quantize(wav)
if quantize_bits > 0:
wavq = quantize(wav, quantize_bits)
np.save(wavq_path, wavq)

# save TTS mel
Expand Down Expand Up @@ -263,7 +264,7 @@ def main(args): # pylint: disable=redefined-outer-name
model,
ap,
args.output_path,
quantized_wav=args.quantized,
quantize_bits=args.quantize_bits,
save_audio=args.save_audio,
debug=args.debug,
metada_name="metada.txt",
Expand All @@ -277,7 +278,7 @@ def main(args): # pylint: disable=redefined-outer-name
parser.add_argument("--output_path", type=str, help="Path to save mel specs", required=True)
parser.add_argument("--debug", default=False, action="store_true", help="Save audio files for debug")
parser.add_argument("--save_audio", default=False, action="store_true", help="Save audio files")
parser.add_argument("--quantized", action="store_true", help="Save quantized audio files")
parser.add_argument("--quantize_bits", type=int, default=0, help="Save quantized audio files if non-zero")
parser.add_argument("--eval", type=bool, help="compute eval.", default=True)
args = parser.parse_args()

Expand Down
1 change: 0 additions & 1 deletion TTS/utils/audio/numpy_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ def stft(
def istft(
*,
y: np.ndarray = None,
fft_size: int = None,
hop_length: int = None,
win_length: int = None,
window: str = "hann",
Expand Down
Loading
Loading