Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
57c4f4a6
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
57c4f4a6
编写于
3月 03, 2022
作者:
X
xiongxinlei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add sid learning rate and training model
上级
dc28ebe4
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
138 addition
and
1 deletion
+138
-1
examples/voxceleb/sv0/local/train.py
examples/voxceleb/sv0/local/train.py
+33
-1
paddlespeech/vector/layers/lr.py
paddlespeech/vector/layers/lr.py
+45
-0
paddlespeech/vector/training/sid_model.py
paddlespeech/vector/training/sid_model.py
+60
-0
未找到文件。
examples/voxceleb/sv0/local/train.py
浏览文件 @
57c4f4a6
...
...
@@ -15,10 +15,14 @@ import argparse
import
paddle
from
dataset.voxceleb.voxceleb1
import
VoxCeleb1
from
paddleaudio.datasets.voxceleb
import
VoxCeleb1
from
paddlespeech.vector.layers.lr
import
CyclicLRScheduler
from
paddlespeech.vector.models.ecapa_tdnn
import
EcapaTdnn
from
paddlespeech.vector.training.sid_model
import
SpeakerIdetification
def
main
(
args
):
# stage0: set the training device, cpu or gpu
paddle
.
set_device
(
args
.
device
)
# stage1: we must call the paddle.distributed.init_parallel_env() api at the begining
...
...
@@ -27,8 +31,32 @@ def main(args):
local_rank
=
paddle
.
distributed
.
get_rank
()
# stage2: data prepare
# note: some cmd must do in rank==0
train_ds
=
VoxCeleb1
(
'train'
,
target_dir
=
args
.
data_dir
)
# stage3: build the dnn backbone model network
model_conf
=
{
"input_size"
:
80
,
"channels"
:
[
1024
,
1024
,
1024
,
1024
,
3072
],
"kernel_sizes"
:
[
5
,
3
,
3
,
3
,
1
],
"dilations"
:
[
1
,
2
,
3
,
4
,
1
],
"attention_channels"
:
128
,
"lin_neurons"
:
192
,
}
ecapa_tdnn
=
EcapaTdnn
(
**
model_conf
)
# stage4: build the speaker verification train instance with backbone model
model
=
SpeakerIdetification
(
backbone
=
ecapa_tdnn
,
num_class
=
VoxCeleb1
.
num_speakers
)
# stage5: build the optimizer, we now only construct the AdamW optimizer
lr_schedule
=
CyclicLRScheduler
(
base_lr
=
args
.
learning_rate
,
max_lr
=
1e-3
,
step_size
=
140000
//
nranks
)
optimizer
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
lr_schedule
,
parameters
=
model
.
parameters
())
# stage6: build the loss function, we now only support LogSoftmaxWrapper
if
__name__
==
"__main__"
:
# yapf: disable
...
...
@@ -41,6 +69,10 @@ if __name__ == "__main__":
default
=
"./data/"
,
type
=
str
,
help
=
"data directory"
)
parser
.
add_argument
(
"--learning_rate"
,
type
=
float
,
default
=
1e-8
,
help
=
"Learning rate used to train with warmup."
)
args
=
parser
.
parse_args
()
# yapf: enable
...
...
paddlespeech/vector/layers/lr.py
0 → 100644
浏览文件 @
57c4f4a6
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle.optimizer.lr
import
LRScheduler
class
CyclicLRScheduler
(
LRScheduler
):
def
__init__
(
self
,
base_lr
:
float
=
1e-8
,
max_lr
:
float
=
1e-3
,
step_size
:
int
=
10000
):
super
(
CyclicLRScheduler
,
self
).
__init__
()
self
.
current_step
=
-
1
self
.
base_lr
=
base_lr
self
.
max_lr
=
max_lr
self
.
step_size
=
step_size
def
step
(
self
):
if
not
hasattr
(
self
,
'current_step'
):
return
self
.
current_step
+=
1
if
self
.
current_step
>=
2
*
self
.
step_size
:
self
.
current_step
%=
2
*
self
.
step_size
self
.
last_lr
=
self
.
get_lr
()
def
get_lr
(
self
):
p
=
self
.
current_step
/
(
2
*
self
.
step_size
)
# Proportion in one cycle.
if
p
<
0.5
:
# Increase
return
self
.
base_lr
+
p
/
0.5
*
(
self
.
max_lr
-
self
.
base_lr
)
else
:
# Decrease
return
self
.
max_lr
-
(
p
/
0.5
-
1
)
*
(
self
.
max_lr
-
self
.
base_lr
)
paddlespeech/vector/training/sid_model.py
0 → 100644
浏览文件 @
57c4f4a6
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
class
SpeakerIdetification
(
nn
.
Layer
):
def
__init__
(
self
,
backbone
,
num_class
,
lin_blocks
=
0
,
lin_neurons
=
192
,
dropout
=
0.1
,
):
super
(
SpeakerIdetification
,
self
).
__init__
()
self
.
backbone
=
backbone
if
dropout
>
0
:
self
.
dropout
=
nn
.
Dropout
(
dropout
)
else
:
self
.
dropout
=
None
input_size
=
self
.
backbone
.
emb_size
self
.
blocks
=
nn
.
LayerList
()
for
i
in
range
(
lin_blocks
):
self
.
blocks
.
extend
([
nn
.
BatchNorm1D
(
input_size
),
nn
.
Linear
(
in_features
=
input_size
,
out_features
=
lin_neurons
),
])
input_size
=
lin_neurons
self
.
weight
=
paddle
.
create_parameter
(
shape
=
(
input_size
,
num_class
),
dtype
=
'float32'
,
attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
XavierUniform
()),
)
def
forward
(
self
,
x
,
lengths
=
None
):
# x.shape: (N, C, L)
x
=
self
.
backbone
(
x
,
lengths
).
squeeze
(
-
1
)
# (N, emb_size, 1) -> (N, emb_size)
if
self
.
dropout
is
not
None
:
x
=
self
.
dropout
(
x
)
for
fc
in
self
.
blocks
:
x
=
fc
(
x
)
logits
=
F
.
linear
(
F
.
normalize
(
x
),
F
.
normalize
(
self
.
weight
,
axis
=
0
))
return
logits
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录