From 8083da21acbf877f2800ff51781fb4ee5eee75d4 Mon Sep 17 00:00:00 2001 From: liuyibing01 Date: Sat, 7 Mar 2020 14:21:35 +0000 Subject: [PATCH] Fix sample file name --- examples/waveflow/README.md | 4 ++-- parakeet/models/waveflow/waveflow.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/examples/waveflow/README.md b/examples/waveflow/README.md index e21039a..d36f0f3 100644 --- a/examples/waveflow/README.md +++ b/examples/waveflow/README.md @@ -4,7 +4,7 @@ PaddlePaddle dynamic graph implementation of [WaveFlow: A Compact Flow-based Mod - WaveFlow can synthesize 22.05 kHz high-fidelity speech around 40x faster than real-time on a Nvidia V100 GPU without engineered inference kernels, which is faster than [WaveGlow] (https://github.com/NVIDIA/waveglow) and serveral orders of magnitude faster than WaveNet. - WaveFlow is a small-footprint flow-based model for raw audio. It has only 5.9M parameters, which is 15x smalller than WaveGlow (87.9M) and comparable to WaveNet (4.6M). -- WaveFlow is directly trained with maximum likelihood without probability density distillation and auxiliary losses as used in Parallel WaveNet and ClariNet, which simplifies the training pipeline and reduces the cost of development. +- WaveFlow is directly trained with maximum likelihood without probability density distillation and auxiliary losses as used in Parallel WaveNet and ClariNet, which simplifies the training pipeline and reduces the cost of development. ## Project Structure ```text @@ -99,7 +99,7 @@ python -u synthesis.py \ --sigma=1.0 ``` -In this example, `--output` specifies where to save the synthesized audios and `--sample` specifies which sample in the valid dataset (a split from the whole LJSpeech dataset, by default contains the first 16 audio samples) to synthesize based on the mel-spectrograms computed from the ground truth sample audio, e.g., `--sample=0` means to synthesize the first audio in the valid dataset. +In this example, `--output` specifies where to save the synthesized audios and `--sample` (<16) specifies which sample in the valid dataset (a split from the whole LJSpeech dataset, by default contains the first 16 audio samples) to synthesize based on the mel-spectrograms computed from the ground truth sample audio, e.g., `--sample=0` means to synthesize the first audio in the valid dataset. ### Benchmarking diff --git a/parakeet/models/waveflow/waveflow.py b/parakeet/models/waveflow/waveflow.py index a8bd8af..4ef1411 100644 --- a/parakeet/models/waveflow/waveflow.py +++ b/parakeet/models/waveflow/waveflow.py @@ -179,10 +179,13 @@ class WaveFlow(): mels_list = [mels for _, mels in self.validloader()] if sample is not None: mels_list = [mels_list[sample]] + else: + sample = 0 - for sample, mel in enumerate(mels_list): - filename = "{}/valid_{}.wav".format(output, sample) - print("Synthesize sample {}, save as {}".format(sample, filename)) + for idx, mel in enumerate(mels_list): + abs_idx = sample + idx + filename = "{}/valid_{}.wav".format(output, abs_idx) + print("Synthesize sample {}, save as {}".format(abs_idx, filename)) start_time = time.time() audio = self.waveflow.synthesize(mel, sigma=self.config.sigma) -- GitLab