提交 5b5a1ea2 编写于 作者: W wuzewu

Fix docs format issue.

上级 7a28aaad
...@@ -194,17 +194,14 @@ class SpeakerVerificationPreprocessor(object): ...@@ -194,17 +194,14 @@ class SpeakerVerificationPreprocessor(object):
return wav return wav
def melspectrogram(self, wav): def melspectrogram(self, wav):
mel = librosa.feature.melspectrogram(wav, mel = librosa.feature.melspectrogram(
sr=self.sampling_rate, wav, sr=self.sampling_rate, n_fft=self.n_fft, hop_length=self.hop_length, n_mels=self.n_mels)
n_fft=self.n_fft,
hop_length=self.hop_length,
n_mels=self.n_mels)
mel = mel.astype(np.float32).T mel = mel.astype(np.float32).T
return mel return mel
def extract_mel_partials(self, wav): def extract_mel_partials(self, wav):
wav_slices, mel_slices = compute_partial_slices(len(wav), self.partial_n_frames, self.hop_length, wav_slices, mel_slices = compute_partial_slices(
self.min_pad_coverage, self.partial_overlap_ratio) len(wav), self.partial_n_frames, self.hop_length, self.min_pad_coverage, self.partial_overlap_ratio)
# pad audio if needed # pad audio if needed
max_wave_length = wav_slices[-1].stop max_wave_length = wav_slices[-1].stop
......
...@@ -58,7 +58,8 @@ class VoiceCloner(nn.Layer): ...@@ -58,7 +58,8 @@ class VoiceCloner(nn.Layer):
'waveflow_ljspeech_ckpt_0.3/step-2000000.pdparams') 'waveflow_ljspeech_ckpt_0.3/step-2000000.pdparams')
# Speaker encoder # Speaker encoder
self.speaker_processor = SpeakerVerificationPreprocessor(sampling_rate=16000, self.speaker_processor = SpeakerVerificationPreprocessor(
sampling_rate=16000,
audio_norm_target_dBFS=-30, audio_norm_target_dBFS=-30,
vad_window_length=30, vad_window_length=30,
vad_moving_average_width=8, vad_moving_average_width=8,
...@@ -74,7 +75,8 @@ class VoiceCloner(nn.Layer): ...@@ -74,7 +75,8 @@ class VoiceCloner(nn.Layer):
self.speaker_encoder.eval() self.speaker_encoder.eval()
# Voice synthesizer # Voice synthesizer
self.synthesizer = Tacotron2(vocab_size=68, self.synthesizer = Tacotron2(
vocab_size=68,
n_tones=10, n_tones=10,
d_mels=80, d_mels=80,
d_encoder=512, d_encoder=512,
...@@ -101,13 +103,8 @@ class VoiceCloner(nn.Layer): ...@@ -101,13 +103,8 @@ class VoiceCloner(nn.Layer):
self.synthesizer.eval() self.synthesizer.eval()
# Vocoder # Vocoder
self.vocoder = ConditionalWaveFlow(upsample_factors=[16, 16], self.vocoder = ConditionalWaveFlow(
n_flows=8, upsample_factors=[16, 16], n_flows=8, n_layers=8, n_group=16, channels=128, n_mels=80, kernel_size=[3, 3])
n_layers=8,
n_group=16,
channels=128,
n_mels=80,
kernel_size=[3, 3])
self.vocoder.set_state_dict(paddle.load(vocoder_ckpt)) self.vocoder.set_state_dict(paddle.load(vocoder_ckpt))
self.vocoder.eval() self.vocoder.eval()
......
...@@ -237,11 +237,13 @@ def process_aishell3(dataset_root, output_dir): ...@@ -237,11 +237,13 @@ def process_aishell3(dataset_root, output_dir):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Preprocess transcription of AiShell3 and save them in a compact file(yaml and pickle).") description="Preprocess transcription of AiShell3 and save them in a compact file(yaml and pickle).")
parser.add_argument("--input", parser.add_argument(
"--input",
type=str, type=str,
default="~/datasets/aishell3/train", default="~/datasets/aishell3/train",
help="path of the training dataset,(contains a label_train-set.txt).") help="path of the training dataset,(contains a label_train-set.txt).")
parser.add_argument("--output", parser.add_argument(
"--output",
type=str, type=str,
help="the directory to save the processed transcription." help="the directory to save the processed transcription."
"If not provided, it would be the same as the input.") "If not provided, it would be the same as the input.")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册