Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
fc670339
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fc670339
编写于
4月 24, 2023
作者:
小湉湉
提交者:
GitHub
4月 24, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[TTS]Fix losses of StarGAN v2 VC (#3184)
上级
84cc5fc9
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
47 addition
and
27 deletion
+47
-27
examples/vctk/vc3/local/train.sh
examples/vctk/vc3/local/train.sh
+2
-1
paddlespeech/t2s/datasets/am_batch_fn.py
paddlespeech/t2s/datasets/am_batch_fn.py
+4
-4
paddlespeech/t2s/exps/starganv2_vc/train.py
paddlespeech/t2s/exps/starganv2_vc/train.py
+16
-1
paddlespeech/t2s/models/starganv2_vc/losses.py
paddlespeech/t2s/models/starganv2_vc/losses.py
+23
-19
paddlespeech/t2s/models/starganv2_vc/starganv2_vc_updater.py
paddlespeech/t2s/models/starganv2_vc/starganv2_vc_updater.py
+2
-2
未找到文件。
examples/vctk/vc3/local/train.sh
浏览文件 @
fc670339
...
...
@@ -8,4 +8,5 @@ python3 ${BIN_DIR}/train.py \
--dev-metadata
=
dump/dev/norm/metadata.jsonl
\
--config
=
${
config_path
}
\
--output-dir
=
${
train_output_path
}
\
--ngpu
=
1
--ngpu
=
1
\
--speaker-dict
=
dump/speaker_id_map.txt
paddlespeech/t2s/datasets/am_batch_fn.py
浏览文件 @
fc670339
...
...
@@ -820,12 +820,13 @@ class StarGANv2VCCollateFn:
self
.
max_mel_length
=
max_mel_length
def
random_clip
(
self
,
mel
:
np
.
array
):
# [
80, T
]
mel_length
=
mel
.
shape
[
1
]
# [
T, 80
]
mel_length
=
mel
.
shape
[
0
]
if
mel_length
>
self
.
max_mel_length
:
random_start
=
np
.
random
.
randint
(
0
,
mel_length
-
self
.
max_mel_length
)
mel
=
mel
[:,
random_start
:
random_start
+
self
.
max_mel_length
]
mel
=
mel
[
random_start
:
random_start
+
self
.
max_mel_length
,
:]
return
mel
def
__call__
(
self
,
exmaples
):
...
...
@@ -843,7 +844,6 @@ class StarGANv2VCCollateFn:
mel
=
[
self
.
random_clip
(
item
[
"mel"
])
for
item
in
examples
]
ref_mel
=
[
self
.
random_clip
(
item
[
"ref_mel"
])
for
item
in
examples
]
ref_mel_2
=
[
self
.
random_clip
(
item
[
"ref_mel_2"
])
for
item
in
examples
]
mel
=
batch_sequences
(
mel
)
ref_mel
=
batch_sequences
(
ref_mel
)
ref_mel_2
=
batch_sequences
(
ref_mel_2
)
...
...
paddlespeech/t2s/exps/starganv2_vc/train.py
浏览文件 @
fc670339
...
...
@@ -113,6 +113,16 @@ def train_sp(args, config):
model_version
=
'1.0'
uncompress_path
=
download_and_decompress
(
StarGANv2VC_source
[
model_version
],
MODEL_HOME
)
# 根据 speaker 的个数修改 num_domains
# 源码的预训练模型和 default.yaml 里面默认是 20
if
args
.
speaker_dict
is
not
None
:
with
open
(
args
.
speaker_dict
,
'rt'
,
encoding
=
'utf-8'
)
as
f
:
spk_id
=
[
line
.
strip
().
split
()
for
line
in
f
.
readlines
()]
spk_num
=
len
(
spk_id
)
print
(
"spk_num:"
,
spk_num
)
config
[
'mapping_network_params'
][
'num_domains'
]
=
spk_num
config
[
'style_encoder_params'
][
'num_domains'
]
=
spk_num
config
[
'discriminator_params'
][
'num_domains'
]
=
spk_num
generator
=
Generator
(
**
config
[
'generator_params'
])
mapping_network
=
MappingNetwork
(
**
config
[
'mapping_network_params'
])
...
...
@@ -123,7 +133,7 @@ def train_sp(args, config):
jdc_model_dir
=
os
.
path
.
join
(
uncompress_path
,
'jdcnet.pdz'
)
asr_model_dir
=
os
.
path
.
join
(
uncompress_path
,
'asr.pdz'
)
F0_model
=
JDCNet
(
num_class
=
1
,
seq_len
=
192
)
F0_model
=
JDCNet
(
num_class
=
1
,
seq_len
=
config
[
'max_mel_length'
]
)
F0_model
.
set_state_dict
(
paddle
.
load
(
jdc_model_dir
)[
'main_params'
])
F0_model
.
eval
()
...
...
@@ -234,6 +244,11 @@ def main():
parser
.
add_argument
(
"--output-dir"
,
type
=
str
,
help
=
"output dir."
)
parser
.
add_argument
(
"--ngpu"
,
type
=
int
,
default
=
1
,
help
=
"if ngpu == 0, use cpu."
)
parser
.
add_argument
(
"--speaker-dict"
,
type
=
str
,
default
=
None
,
help
=
"speaker id map file for multiple speaker model."
)
args
=
parser
.
parse_args
()
...
...
paddlespeech/t2s/models/starganv2_vc/losses.py
浏览文件 @
fc670339
...
...
@@ -19,35 +19,38 @@ import paddle.nn.functional as F
from
.transforms
import
build_transforms
# 这些都写到 updater 里
def
compute_d_loss
(
nets
:
Dict
[
str
,
Any
],
x_real
:
paddle
.
Tensor
,
y_org
:
paddle
.
Tensor
,
y_trg
:
paddle
.
Tensor
,
z_trg
:
paddle
.
Tensor
=
None
,
x_ref
:
paddle
.
Tensor
=
None
,
use_r1_reg
:
bool
=
True
,
use_adv_cls
:
bool
=
False
,
use_con_reg
:
bool
=
False
,
lambda_reg
:
float
=
1.
,
lambda_adv_cls
:
float
=
0.1
,
lambda_con_reg
:
float
=
10.
):
def
compute_d_loss
(
nets
:
Dict
[
str
,
Any
],
x_real
:
paddle
.
Tensor
,
y_org
:
paddle
.
Tensor
,
y_trg
:
paddle
.
Tensor
,
z_trg
:
paddle
.
Tensor
=
None
,
x_ref
:
paddle
.
Tensor
=
None
,
# TODO: should be True here, but r1_reg has some bug now
use_r1_reg
:
bool
=
False
,
use_adv_cls
:
bool
=
False
,
use_con_reg
:
bool
=
False
,
lambda_reg
:
float
=
1.
,
lambda_adv_cls
:
float
=
0.1
,
lambda_con_reg
:
float
=
10.
):
assert
(
z_trg
is
None
)
!=
(
x_ref
is
None
)
# with real audios
x_real
.
stop_gradient
=
False
out
=
nets
[
'discriminator'
](
x_real
,
y_org
)
loss_real
=
adv_loss
(
out
,
1
)
# R1 regularizaition (https://arxiv.org/abs/1801.04406v4)
if
use_r1_reg
:
loss_reg
=
r1_reg
(
out
,
x_real
)
else
:
loss_reg
=
paddle
.
to_tensor
([
0.
],
dtype
=
paddle
.
float32
)
# loss_reg = paddle.to_tensor([0.], dtype=paddle.float32)
loss_reg
=
paddle
.
zeros
([
1
])
# consistency regularization (bCR-GAN: https://arxiv.org/abs/2002.04724)
loss_con_reg
=
paddle
.
to_tensor
([
0.
],
dtype
=
paddle
.
float32
)
loss_con_reg
=
paddle
.
zeros
([
1
]
)
if
use_con_reg
:
t
=
build_transforms
()
out_aug
=
nets
[
'discriminator'
](
t
(
x_real
).
detach
(),
y_org
)
...
...
@@ -118,9 +121,10 @@ def compute_g_loss(nets: Dict[str, Any],
s_trg
=
nets
[
'style_encoder'
](
x_ref
,
y_trg
)
# compute ASR/F0 features (real)
with
paddle
.
no_grad
():
F0_real
,
GAN_F0_real
,
cyc_F0_real
=
nets
[
'F0_model'
](
x_real
)
ASR_real
=
nets
[
'asr_model'
].
get_feature
(
x_real
)
# 源码没有用 .eval(), 使用了 no_grad()
# 我们使用了 .eval(), 开启 with paddle.no_grad() 会报错
F0_real
,
GAN_F0_real
,
cyc_F0_real
=
nets
[
'F0_model'
](
x_real
)
ASR_real
=
nets
[
'asr_model'
].
get_feature
(
x_real
)
# adversarial loss
x_fake
=
nets
[
'generator'
](
x_real
,
s_trg
,
masks
=
None
,
F0
=
GAN_F0_real
)
...
...
paddlespeech/t2s/models/starganv2_vc/starganv2_vc_updater.py
浏览文件 @
fc670339
...
...
@@ -259,7 +259,7 @@ class StarGANv2VCEvaluator(StandardEvaluator):
y_org
=
y_org
,
y_trg
=
y_trg
,
z_trg
=
z_trg
,
use_r1_reg
=
False
,
use_r1_reg
=
self
.
use_r1_reg
,
use_adv_cls
=
use_adv_cls
,
**
self
.
d_loss_params
)
...
...
@@ -269,7 +269,7 @@ class StarGANv2VCEvaluator(StandardEvaluator):
y_org
=
y_org
,
y_trg
=
y_trg
,
x_ref
=
x_ref
,
use_r1_reg
=
False
,
use_r1_reg
=
self
.
use_r1_reg
,
use_adv_cls
=
use_adv_cls
,
**
self
.
d_loss_params
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录