提交 5b5a1ea2 编写于 作者: W wuzewu

Fix docs format issue.

上级 7a28aaad
......@@ -194,17 +194,14 @@ class SpeakerVerificationPreprocessor(object):
return wav
def melspectrogram(self, wav):
mel = librosa.feature.melspectrogram(wav,
sr=self.sampling_rate,
n_fft=self.n_fft,
hop_length=self.hop_length,
n_mels=self.n_mels)
mel = librosa.feature.melspectrogram(
wav, sr=self.sampling_rate, n_fft=self.n_fft, hop_length=self.hop_length, n_mels=self.n_mels)
mel = mel.astype(np.float32).T
return mel
def extract_mel_partials(self, wav):
wav_slices, mel_slices = compute_partial_slices(len(wav), self.partial_n_frames, self.hop_length,
self.min_pad_coverage, self.partial_overlap_ratio)
wav_slices, mel_slices = compute_partial_slices(
len(wav), self.partial_n_frames, self.hop_length, self.min_pad_coverage, self.partial_overlap_ratio)
# pad audio if needed
max_wave_length = wav_slices[-1].stop
......
......@@ -58,7 +58,8 @@ class VoiceCloner(nn.Layer):
'waveflow_ljspeech_ckpt_0.3/step-2000000.pdparams')
# Speaker encoder
self.speaker_processor = SpeakerVerificationPreprocessor(sampling_rate=16000,
self.speaker_processor = SpeakerVerificationPreprocessor(
sampling_rate=16000,
audio_norm_target_dBFS=-30,
vad_window_length=30,
vad_moving_average_width=8,
......@@ -74,7 +75,8 @@ class VoiceCloner(nn.Layer):
self.speaker_encoder.eval()
# Voice synthesizer
self.synthesizer = Tacotron2(vocab_size=68,
self.synthesizer = Tacotron2(
vocab_size=68,
n_tones=10,
d_mels=80,
d_encoder=512,
......@@ -101,13 +103,8 @@ class VoiceCloner(nn.Layer):
self.synthesizer.eval()
# Vocoder
self.vocoder = ConditionalWaveFlow(upsample_factors=[16, 16],
n_flows=8,
n_layers=8,
n_group=16,
channels=128,
n_mels=80,
kernel_size=[3, 3])
self.vocoder = ConditionalWaveFlow(
upsample_factors=[16, 16], n_flows=8, n_layers=8, n_group=16, channels=128, n_mels=80, kernel_size=[3, 3])
self.vocoder.set_state_dict(paddle.load(vocoder_ckpt))
self.vocoder.eval()
......
......@@ -237,11 +237,13 @@ def process_aishell3(dataset_root, output_dir):
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Preprocess transcription of AiShell3 and save them in a compact file(yaml and pickle).")
parser.add_argument("--input",
parser.add_argument(
"--input",
type=str,
default="~/datasets/aishell3/train",
help="path of the training dataset,(contains a label_train-set.txt).")
parser.add_argument("--output",
parser.add_argument(
"--output",
type=str,
help="the directory to save the processed transcription."
"If not provided, it would be the same as the input.")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册