Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
133ee7db
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
133ee7db
编写于
11月 19, 2021
作者:
小湉湉
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rename num_speakers
上级
a97c7b52
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
18 addition
and
21 deletion
+18
-21
examples/csmsc/voc1/run.sh
examples/csmsc/voc1/run.sh
+1
-1
paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py
...lespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py
+3
-3
paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e_en.py
...peech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e_en.py
+3
-3
paddlespeech/t2s/exps/fastspeech2/synthesize.py
paddlespeech/t2s/exps/fastspeech2/synthesize.py
+4
-4
paddlespeech/t2s/exps/fastspeech2/train.py
paddlespeech/t2s/exps/fastspeech2/train.py
+4
-7
paddlespeech/t2s/models/fastspeech2/fastspeech2.py
paddlespeech/t2s/models/fastspeech2/fastspeech2.py
+3
-3
未找到文件。
examples/csmsc/voc1/run.sh
浏览文件 @
133ee7db
...
...
@@ -3,7 +3,7 @@
set
-e
source
path.sh
gpus
=
4,5
gpus
=
0,1
stage
=
0
stop_stage
=
100
...
...
paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e.py
浏览文件 @
133ee7db
...
...
@@ -46,14 +46,14 @@ def evaluate(args, fastspeech2_config, pwg_config):
print
(
"vocab_size:"
,
vocab_size
)
with
open
(
args
.
speaker_dict
,
'rt'
)
as
f
:
spk_id
=
[
line
.
strip
().
split
()
for
line
in
f
.
readlines
()]
num_speakers
=
len
(
spk_id
)
print
(
"
num_speakers:"
,
num_speakers
)
spk_num
=
len
(
spk_id
)
print
(
"
spk_num:"
,
spk_num
)
odim
=
fastspeech2_config
.
n_mels
model
=
FastSpeech2
(
idim
=
vocab_size
,
odim
=
odim
,
num_speakers
=
num_speakers
,
spk_num
=
spk_num
,
**
fastspeech2_config
[
"model"
])
model
.
set_state_dict
(
...
...
paddlespeech/t2s/exps/fastspeech2/multi_spk_synthesize_e2e_en.py
浏览文件 @
133ee7db
...
...
@@ -51,14 +51,14 @@ def evaluate(args, fastspeech2_config, pwg_config):
print
(
"vocab_size:"
,
vocab_size
)
with
open
(
args
.
speaker_dict
,
'rt'
)
as
f
:
spk_id
=
[
line
.
strip
().
split
()
for
line
in
f
.
readlines
()]
num_speakers
=
len
(
spk_id
)
print
(
"
num_speakers:"
,
num_speakers
)
spk_num
=
len
(
spk_id
)
print
(
"
spk_num:"
,
spk_num
)
odim
=
fastspeech2_config
.
n_mels
model
=
FastSpeech2
(
idim
=
vocab_size
,
odim
=
odim
,
num_speakers
=
num_speakers
,
spk_num
=
spk_num
,
**
fastspeech2_config
[
"model"
])
model
.
set_state_dict
(
...
...
paddlespeech/t2s/exps/fastspeech2/synthesize.py
浏览文件 @
133ee7db
...
...
@@ -40,19 +40,19 @@ def evaluate(args, fastspeech2_config, pwg_config):
fields
=
[
"utt_id"
,
"text"
]
num_speakers
=
None
spk_num
=
None
if
args
.
speaker_dict
is
not
None
:
print
(
"multiple speaker fastspeech2!"
)
with
open
(
args
.
speaker_dict
,
'rt'
)
as
f
:
spk_id
=
[
line
.
strip
().
split
()
for
line
in
f
.
readlines
()]
num_speakers
=
len
(
spk_id
)
spk_num
=
len
(
spk_id
)
fields
+=
[
"spk_id"
]
elif
args
.
voice_cloning
:
print
(
"voice cloning!"
)
fields
+=
[
"spk_emb"
]
else
:
print
(
"single speaker fastspeech2!"
)
print
(
"
num_speakers:"
,
num_speakers
)
print
(
"
spk_num:"
,
spk_num
)
test_dataset
=
DataTable
(
data
=
test_metadata
,
fields
=
fields
)
...
...
@@ -65,7 +65,7 @@ def evaluate(args, fastspeech2_config, pwg_config):
model
=
FastSpeech2
(
idim
=
vocab_size
,
odim
=
odim
,
num_speakers
=
num_speakers
,
spk_num
=
spk_num
,
**
fastspeech2_config
[
"model"
])
model
.
set_state_dict
(
...
...
paddlespeech/t2s/exps/fastspeech2/train.py
浏览文件 @
133ee7db
...
...
@@ -62,13 +62,13 @@ def train_sp(args, config):
"pitch"
,
"energy"
]
converters
=
{
"speech"
:
np
.
load
,
"pitch"
:
np
.
load
,
"energy"
:
np
.
load
}
num_speakers
=
None
spk_num
=
None
if
args
.
speaker_dict
is
not
None
:
print
(
"multiple speaker fastspeech2!"
)
collate_fn
=
fastspeech2_multi_spk_batch_fn
with
open
(
args
.
speaker_dict
,
'rt'
)
as
f
:
spk_id
=
[
line
.
strip
().
split
()
for
line
in
f
.
readlines
()]
num_speakers
=
len
(
spk_id
)
spk_num
=
len
(
spk_id
)
fields
+=
[
"spk_id"
]
elif
args
.
voice_cloning
:
print
(
"Training voice cloning!"
)
...
...
@@ -78,7 +78,7 @@ def train_sp(args, config):
else
:
print
(
"single speaker fastspeech2!"
)
collate_fn
=
fastspeech2_single_spk_batch_fn
print
(
"
num_speakers:"
,
num_speakers
)
print
(
"
spk_num:"
,
spk_num
)
# dataloader has been too verbose
logging
.
getLogger
(
"DataLoader"
).
disabled
=
True
...
...
@@ -129,10 +129,7 @@ def train_sp(args, config):
odim
=
config
.
n_mels
model
=
FastSpeech2
(
idim
=
vocab_size
,
odim
=
odim
,
num_speakers
=
num_speakers
,
**
config
[
"model"
])
idim
=
vocab_size
,
odim
=
odim
,
spk_num
=
spk_num
,
**
config
[
"model"
])
if
world_size
>
1
:
model
=
DataParallel
(
model
)
print
(
"model done!"
)
...
...
paddlespeech/t2s/models/fastspeech2/fastspeech2.py
浏览文件 @
133ee7db
...
...
@@ -96,7 +96,7 @@ class FastSpeech2(nn.Layer):
pitch_embed_dropout
:
float
=
0.5
,
stop_gradient_from_pitch_predictor
:
bool
=
False
,
# spk emb
num_speakers
:
int
=
None
,
spk_num
:
int
=
None
,
spk_embed_dim
:
int
=
None
,
spk_embed_integration_type
:
str
=
"add"
,
# tone emb
...
...
@@ -146,9 +146,9 @@ class FastSpeech2(nn.Layer):
# initialize parameters
initialize
(
self
,
init_type
)
if
s
elf
.
spk_embed_dim
and
num_speakers
:
if
s
pk_num
and
self
.
spk_embed_dim
:
self
.
spk_embedding_table
=
nn
.
Embedding
(
num_embeddings
=
num_speakers
,
num_embeddings
=
spk_num
,
embedding_dim
=
self
.
spk_embed_dim
,
padding_idx
=
self
.
padding_idx
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录