Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
8873ebe3
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
8873ebe3
编写于
9月 10, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add timer for u2; refactor grad norm type
上级
890a28f9
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
35 addition
and
32 deletion
+35
-32
deepspeech/exps/u2/model.py
deepspeech/exps/u2/model.py
+32
-29
deepspeech/modules/loss.py
deepspeech/modules/loss.py
+3
-3
未找到文件。
deepspeech/exps/u2/model.py
浏览文件 @
8873ebe3
...
...
@@ -34,6 +34,7 @@ from deepspeech.io.sampler import SortagradDistributedBatchSampler
from
deepspeech.models.u2
import
U2Model
from
deepspeech.training.optimizer
import
OptimizerFactory
from
deepspeech.training.scheduler
import
LRSchedulerFactory
from
deepspeech.training.timer
import
Timer
from
deepspeech.training.trainer
import
Trainer
from
deepspeech.utils
import
ctc_utils
from
deepspeech.utils
import
error_rate
...
...
@@ -184,40 +185,42 @@ class U2Trainer(Trainer):
self
.
save
(
tag
=
'init'
)
self
.
lr_scheduler
.
step
(
self
.
iteration
)
if
self
.
parallel
:
if
self
.
parallel
and
hasattr
(
self
.
train_loader
,
'batch_sampler'
)
:
self
.
train_loader
.
batch_sampler
.
set_epoch
(
self
.
epoch
)
logger
.
info
(
f
"Train Total Examples:
{
len
(
self
.
train_loader
.
dataset
)
}
"
)
while
self
.
epoch
<
self
.
config
.
training
.
n_epoch
:
self
.
model
.
train
()
try
:
data_start_time
=
time
.
time
()
for
batch_index
,
batch
in
enumerate
(
self
.
train_loader
):
dataload_time
=
time
.
time
()
-
data_start_time
msg
=
"Train: Rank: {}, "
.
format
(
dist
.
get_rank
())
msg
+=
"epoch: {}, "
.
format
(
self
.
epoch
)
msg
+=
"step: {}, "
.
format
(
self
.
iteration
)
msg
+=
"batch : {}/{}, "
.
format
(
batch_index
+
1
,
len
(
self
.
train_loader
))
msg
+=
"lr: {:>.8f}, "
.
format
(
self
.
lr_scheduler
())
msg
+=
"data time: {:>.3f}s, "
.
format
(
dataload_time
)
self
.
train_batch
(
batch_index
,
batch
,
msg
)
with
Timer
(
"Epoch-Train Time Cost: {}"
):
self
.
model
.
train
()
try
:
data_start_time
=
time
.
time
()
except
Exception
as
e
:
logger
.
error
(
e
)
raise
e
total_loss
,
num_seen_utts
=
self
.
valid
()
if
dist
.
get_world_size
()
>
1
:
num_seen_utts
=
paddle
.
to_tensor
(
num_seen_utts
)
# the default operator in all_reduce function is sum.
dist
.
all_reduce
(
num_seen_utts
)
total_loss
=
paddle
.
to_tensor
(
total_loss
)
dist
.
all_reduce
(
total_loss
)
cv_loss
=
total_loss
/
num_seen_utts
cv_loss
=
float
(
cv_loss
)
else
:
cv_loss
=
total_loss
/
num_seen_utts
for
batch_index
,
batch
in
enumerate
(
self
.
train_loader
):
dataload_time
=
time
.
time
()
-
data_start_time
msg
=
"Train: Rank: {}, "
.
format
(
dist
.
get_rank
())
msg
+=
"epoch: {}, "
.
format
(
self
.
epoch
)
msg
+=
"step: {}, "
.
format
(
self
.
iteration
)
msg
+=
"batch : {}/{}, "
.
format
(
batch_index
+
1
,
len
(
self
.
train_loader
))
msg
+=
"lr: {:>.8f}, "
.
format
(
self
.
lr_scheduler
())
msg
+=
"data time: {:>.3f}s, "
.
format
(
dataload_time
)
self
.
train_batch
(
batch_index
,
batch
,
msg
)
data_start_time
=
time
.
time
()
except
Exception
as
e
:
logger
.
error
(
e
)
raise
e
with
Timer
(
"Eval Time Cost: {}"
):
total_loss
,
num_seen_utts
=
self
.
valid
()
if
dist
.
get_world_size
()
>
1
:
num_seen_utts
=
paddle
.
to_tensor
(
num_seen_utts
)
# the default operator in all_reduce function is sum.
dist
.
all_reduce
(
num_seen_utts
)
total_loss
=
paddle
.
to_tensor
(
total_loss
)
dist
.
all_reduce
(
total_loss
)
cv_loss
=
total_loss
/
num_seen_utts
cv_loss
=
float
(
cv_loss
)
else
:
cv_loss
=
total_loss
/
num_seen_utts
logger
.
info
(
'Epoch {} Val info val_loss {}'
.
format
(
self
.
epoch
,
cv_loss
))
...
...
deepspeech/modules/loss.py
浏览文件 @
8873ebe3
...
...
@@ -36,16 +36,16 @@ class CTCLoss(nn.Layer):
f
"CTCLoss Loss reduction:
{
reduction
}
, div-bs:
{
batch_average
}
"
)
# instance for norm_by_times
# batch
size
for norm_by_batchsize
# batch for norm_by_batchsize
# frame for norm_by_total_logits_len
assert
grad_norm_type
in
(
'instance'
,
'batch
size
'
,
'frame'
,
None
)
assert
grad_norm_type
in
(
'instance'
,
'batch'
,
'frame'
,
None
)
self
.
norm_by_times
=
False
self
.
norm_by_batchsize
=
False
self
.
norm_by_total_logits_len
=
False
logger
.
info
(
f
"CTCLoss Grad Norm Type:
{
grad_norm_type
}
"
)
if
grad_norm_type
==
'instance'
:
self
.
norm_by_times
=
True
if
grad_norm_type
==
'batch
size
'
:
if
grad_norm_type
==
'batch'
:
self
.
norm_by_times
=
True
if
grad_norm_type
==
'frame'
:
self
.
norm_by_total_logits_len
=
True
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录