Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
5b5a1ea2
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 1 年 前同步成功
通知
282
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5b5a1ea2
编写于
6月 09, 2021
作者:
W
wuzewu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix docs format issue.
上级
7a28aaad
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
53 addition
and
57 deletion
+53
-57
demo/README.md
demo/README.md
+1
-1
modules/audio/voice_cloning/lstm_tacotron2/audio_processor.py
...les/audio/voice_cloning/lstm_tacotron2/audio_processor.py
+4
-7
modules/audio/voice_cloning/lstm_tacotron2/module.py
modules/audio/voice_cloning/lstm_tacotron2/module.py
+38
-41
modules/audio/voice_cloning/lstm_tacotron2/preprocess_transcription.py
.../voice_cloning/lstm_tacotron2/preprocess_transcription.py
+10
-8
未找到文件。
demo/README.md
浏览文件 @
5b5a1ea2
### PaddleHub Office Website:https://www.paddlepaddle.org.cn/hub
### PaddleHub Module Searching:https://www.paddlepaddle.org.cn/hublist
### PaddleHub Module Searching:https://www.paddlepaddle.org.cn/hublist
modules/audio/voice_cloning/lstm_tacotron2/audio_processor.py
浏览文件 @
5b5a1ea2
...
...
@@ -194,17 +194,14 @@ class SpeakerVerificationPreprocessor(object):
return
wav
def
melspectrogram
(
self
,
wav
):
mel
=
librosa
.
feature
.
melspectrogram
(
wav
,
sr
=
self
.
sampling_rate
,
n_fft
=
self
.
n_fft
,
hop_length
=
self
.
hop_length
,
n_mels
=
self
.
n_mels
)
mel
=
librosa
.
feature
.
melspectrogram
(
wav
,
sr
=
self
.
sampling_rate
,
n_fft
=
self
.
n_fft
,
hop_length
=
self
.
hop_length
,
n_mels
=
self
.
n_mels
)
mel
=
mel
.
astype
(
np
.
float32
).
T
return
mel
def
extract_mel_partials
(
self
,
wav
):
wav_slices
,
mel_slices
=
compute_partial_slices
(
len
(
wav
),
self
.
partial_n_frames
,
self
.
hop_length
,
self
.
min_pad_coverage
,
self
.
partial_overlap_ratio
)
wav_slices
,
mel_slices
=
compute_partial_slices
(
len
(
wav
),
self
.
partial_n_frames
,
self
.
hop_length
,
self
.
min_pad_coverage
,
self
.
partial_overlap_ratio
)
# pad audio if needed
max_wave_length
=
wav_slices
[
-
1
].
stop
...
...
modules/audio/voice_cloning/lstm_tacotron2/module.py
浏览文件 @
5b5a1ea2
...
...
@@ -58,56 +58,53 @@ class VoiceCloner(nn.Layer):
'waveflow_ljspeech_ckpt_0.3/step-2000000.pdparams'
)
# Speaker encoder
self
.
speaker_processor
=
SpeakerVerificationPreprocessor
(
sampling_rate
=
16000
,
audio_norm_target_dBFS
=-
30
,
vad_window_length
=
30
,
vad_moving_average_width
=
8
,
vad_max_silence_length
=
6
,
mel_window_length
=
25
,
mel_window_step
=
10
,
n_mels
=
40
,
partial_n_frames
=
160
,
min_pad_coverage
=
0.75
,
partial_overlap_ratio
=
0.5
)
self
.
speaker_processor
=
SpeakerVerificationPreprocessor
(
sampling_rate
=
16000
,
audio_norm_target_dBFS
=-
30
,
vad_window_length
=
30
,
vad_moving_average_width
=
8
,
vad_max_silence_length
=
6
,
mel_window_length
=
25
,
mel_window_step
=
10
,
n_mels
=
40
,
partial_n_frames
=
160
,
min_pad_coverage
=
0.75
,
partial_overlap_ratio
=
0.5
)
self
.
speaker_encoder
=
LSTMSpeakerEncoder
(
n_mels
=
40
,
num_layers
=
3
,
hidden_size
=
256
,
output_size
=
256
)
self
.
speaker_encoder
.
set_state_dict
(
paddle
.
load
(
speaker_encoder_ckpt
))
self
.
speaker_encoder
.
eval
()
# Voice synthesizer
self
.
synthesizer
=
Tacotron2
(
vocab_size
=
68
,
n_tones
=
10
,
d_mels
=
80
,
d_encoder
=
512
,
encoder_conv_layers
=
3
,
encoder_kernel_size
=
5
,
d_prenet
=
256
,
d_attention_rnn
=
1024
,
d_decoder_rnn
=
1024
,
attention_filters
=
32
,
attention_kernel_size
=
31
,
d_attention
=
128
,
d_postnet
=
512
,
postnet_kernel_size
=
5
,
postnet_conv_layers
=
5
,
reduction_factor
=
1
,
p_encoder_dropout
=
0.5
,
p_prenet_dropout
=
0.5
,
p_attention_dropout
=
0.1
,
p_decoder_dropout
=
0.1
,
p_postnet_dropout
=
0.5
,
d_global_condition
=
256
,
use_stop_token
=
False
)
self
.
synthesizer
=
Tacotron2
(
vocab_size
=
68
,
n_tones
=
10
,
d_mels
=
80
,
d_encoder
=
512
,
encoder_conv_layers
=
3
,
encoder_kernel_size
=
5
,
d_prenet
=
256
,
d_attention_rnn
=
1024
,
d_decoder_rnn
=
1024
,
attention_filters
=
32
,
attention_kernel_size
=
31
,
d_attention
=
128
,
d_postnet
=
512
,
postnet_kernel_size
=
5
,
postnet_conv_layers
=
5
,
reduction_factor
=
1
,
p_encoder_dropout
=
0.5
,
p_prenet_dropout
=
0.5
,
p_attention_dropout
=
0.1
,
p_decoder_dropout
=
0.1
,
p_postnet_dropout
=
0.5
,
d_global_condition
=
256
,
use_stop_token
=
False
)
self
.
synthesizer
.
set_state_dict
(
paddle
.
load
(
synthesizer_ckpt
))
self
.
synthesizer
.
eval
()
# Vocoder
self
.
vocoder
=
ConditionalWaveFlow
(
upsample_factors
=
[
16
,
16
],
n_flows
=
8
,
n_layers
=
8
,
n_group
=
16
,
channels
=
128
,
n_mels
=
80
,
kernel_size
=
[
3
,
3
])
self
.
vocoder
=
ConditionalWaveFlow
(
upsample_factors
=
[
16
,
16
],
n_flows
=
8
,
n_layers
=
8
,
n_group
=
16
,
channels
=
128
,
n_mels
=
80
,
kernel_size
=
[
3
,
3
])
self
.
vocoder
.
set_state_dict
(
paddle
.
load
(
vocoder_ckpt
))
self
.
vocoder
.
eval
()
...
...
modules/audio/voice_cloning/lstm_tacotron2/preprocess_transcription.py
浏览文件 @
5b5a1ea2
...
...
@@ -237,14 +237,16 @@ def process_aishell3(dataset_root, output_dir):
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
"Preprocess transcription of AiShell3 and save them in a compact file(yaml and pickle)."
)
parser
.
add_argument
(
"--input"
,
type
=
str
,
default
=
"~/datasets/aishell3/train"
,
help
=
"path of the training dataset,(contains a label_train-set.txt)."
)
parser
.
add_argument
(
"--output"
,
type
=
str
,
help
=
"the directory to save the processed transcription."
"If not provided, it would be the same as the input."
)
parser
.
add_argument
(
"--input"
,
type
=
str
,
default
=
"~/datasets/aishell3/train"
,
help
=
"path of the training dataset,(contains a label_train-set.txt)."
)
parser
.
add_argument
(
"--output"
,
type
=
str
,
help
=
"the directory to save the processed transcription."
"If not provided, it would be the same as the input."
)
args
=
parser
.
parse_args
()
if
args
.
output
is
None
:
args
.
output
=
args
.
input
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录