Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
5fdda953
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
1 年多 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5fdda953
编写于
9月 15, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add op profiling
上级
2dadfba1
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
174 addition
and
29 deletion
+174
-29
deepspeech/exps/u2/bin/train.py
deepspeech/exps/u2/bin/train.py
+1
-0
deepspeech/exps/u2/model.py
deepspeech/exps/u2/model.py
+1
-0
deepspeech/exps/u2_kaldi/model.py
deepspeech/exps/u2_kaldi/model.py
+1
-0
deepspeech/exps/u2_st/model.py
deepspeech/exps/u2_st/model.py
+1
-0
deepspeech/training/cli.py
deepspeech/training/cli.py
+5
-0
deepspeech/training/trainer.py
deepspeech/training/trainer.py
+5
-0
deepspeech/utils/profiler.py
deepspeech/utils/profiler.py
+116
-0
examples/aishell/s1/local/train.sh
examples/aishell/s1/local/train.sh
+22
-15
examples/librispeech/s0/conf/deepspeech2.yaml
examples/librispeech/s0/conf/deepspeech2.yaml
+2
-2
examples/tiny/s0/local/train.sh
examples/tiny/s0/local/train.sh
+20
-12
未找到文件。
deepspeech/exps/u2/bin/train.py
浏览文件 @
5fdda953
...
...
@@ -21,6 +21,7 @@ from deepspeech.exps.u2.config import get_cfg_defaults
from
deepspeech.exps.u2.model
import
U2Trainer
as
Trainer
from
deepspeech.training.cli
import
default_argument_parser
from
deepspeech.utils.utility
import
print_arguments
# from deepspeech.exps.u2.trainer import U2Trainer as Trainer
...
...
deepspeech/exps/u2/model.py
浏览文件 @
5fdda953
...
...
@@ -204,6 +204,7 @@ class U2Trainer(Trainer):
msg
+=
"lr: {:>.8f}, "
.
format
(
self
.
lr_scheduler
())
msg
+=
"data time: {:>.3f}s, "
.
format
(
dataload_time
)
self
.
train_batch
(
batch_index
,
batch
,
msg
)
self
.
after_train_batch
()
data_start_time
=
time
.
time
()
except
Exception
as
e
:
logger
.
error
(
e
)
...
...
deepspeech/exps/u2_kaldi/model.py
浏览文件 @
5fdda953
...
...
@@ -205,6 +205,7 @@ class U2Trainer(Trainer):
msg
+=
"lr: {:>.8f}, "
.
format
(
self
.
lr_scheduler
())
msg
+=
"data time: {:>.3f}s, "
.
format
(
dataload_time
)
self
.
train_batch
(
batch_index
,
batch
,
msg
)
self
.
after_train_batch
()
data_start_time
=
time
.
time
()
except
Exception
as
e
:
logger
.
error
(
e
)
...
...
deepspeech/exps/u2_st/model.py
浏览文件 @
5fdda953
...
...
@@ -222,6 +222,7 @@ class U2STTrainer(Trainer):
msg
+=
"lr: {:>.8f}, "
.
format
(
self
.
lr_scheduler
())
msg
+=
"data time: {:>.3f}s, "
.
format
(
dataload_time
)
self
.
train_batch
(
batch_index
,
batch
,
msg
)
self
.
after_train_batch
()
data_start_time
=
time
.
time
()
except
Exception
as
e
:
logger
.
error
(
e
)
...
...
deepspeech/training/cli.py
浏览文件 @
5fdda953
...
...
@@ -63,8 +63,13 @@ def default_argument_parser():
parser
.
add_argument
(
"--opts"
,
type
=
str
,
default
=
[],
nargs
=
'+'
,
help
=
"options to overwrite --config file and the default config, passing in KEY VALUE pairs"
)
# random seed
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
None
,
help
=
"seed to use for paddle, np and random. None or 0 for random, else set seed."
)
# profiler
parser
.
add_argument
(
'--profiler_options'
,
type
=
str
,
default
=
None
,
help
=
'The option of profiler, which should be in format
\"
key1=value1;key2=value2;key3=value3
\"
.'
)
# yapd: enable
return
parser
deepspeech/training/trainer.py
浏览文件 @
5fdda953
...
...
@@ -20,6 +20,7 @@ from tensorboardX import SummaryWriter
from
deepspeech.training.timer
import
Timer
from
deepspeech.utils
import
mp_tools
from
deepspeech.utils
import
profiler
from
deepspeech.utils.checkpoint
import
Checkpoint
from
deepspeech.utils.log
import
Log
from
deepspeech.utils.utility
import
seed_all
...
...
@@ -183,6 +184,9 @@ class Trainer():
if
isinstance
(
batch_sampler
,
paddle
.
io
.
DistributedBatchSampler
):
batch_sampler
.
set_epoch
(
self
.
epoch
)
def
after_train_batch
(
self
):
profiler
.
add_profiler_step
(
self
.
args
.
profiler_options
)
def
train
(
self
):
"""The training process control by epoch."""
from_scratch
=
self
.
resume_or_scratch
()
...
...
@@ -209,6 +213,7 @@ class Trainer():
msg
+=
"lr: {:>.8f}, "
.
format
(
self
.
lr_scheduler
())
msg
+=
"data time: {:>.3f}s, "
.
format
(
dataload_time
)
self
.
train_batch
(
batch_index
,
batch
,
msg
)
self
.
after_train_batch
()
data_start_time
=
time
.
time
()
except
Exception
as
e
:
logger
.
error
(
e
)
...
...
deepspeech/utils/profiler.py
0 → 100644
浏览文件 @
5fdda953
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
import
paddle
from
deepspeech.utils.log
import
Log
logger
=
Log
(
__name__
).
getlog
()
# A global variable to record the number of calling times for profiler
# functions. It is used to specify the tracing range of training steps.
_profiler_step_id
=
0
# A global variable to avoid parsing from string every time.
_profiler_options
=
None
class
ProfilerOptions
(
object
):
'''
Use a string to initialize a ProfilerOptions.
The string should be in the format: "key1=value1;key2=value;key3=value3".
For example:
"profile_path=model.profile"
"batch_range=[50, 60]; profile_path=model.profile"
"batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"
ProfilerOptions supports following key-value pair:
batch_range - a integer list, e.g. [100, 110].
state - a string, the optional values are 'CPU', 'GPU' or 'All'.
sorted_key - a string, the optional values are 'calls', 'total',
'max', 'min' or 'ave.
tracer_option - a string, the optional values are 'Default', 'OpDetail',
'AllOpDetail'.
profile_path - a string, the path to save the serialized profile data,
which can be used to generate a timeline.
exit_on_finished - a boolean.
'''
def
__init__
(
self
,
options_str
):
assert
isinstance
(
options_str
,
str
)
self
.
_options
=
{
'batch_range'
:
[
10
,
20
],
'state'
:
'All'
,
'sorted_key'
:
'total'
,
'tracer_option'
:
'Default'
,
'profile_path'
:
'/tmp/profile'
,
'exit_on_finished'
:
True
}
self
.
_parse_from_string
(
options_str
)
def
_parse_from_string
(
self
,
options_str
):
for
kv
in
options_str
.
replace
(
' '
,
''
).
split
(
';'
):
key
,
value
=
kv
.
split
(
'='
)
if
key
==
'batch_range'
:
value_list
=
value
.
replace
(
'['
,
''
).
replace
(
']'
,
''
).
split
(
','
)
value_list
=
list
(
map
(
int
,
value_list
))
if
len
(
value_list
)
>=
2
and
value_list
[
0
]
>=
0
and
value_list
[
1
]
>
value_list
[
0
]:
self
.
_options
[
key
]
=
value_list
elif
key
==
'exit_on_finished'
:
self
.
_options
[
key
]
=
value
.
lower
()
in
(
"yes"
,
"true"
,
"t"
,
"1"
)
elif
key
in
[
'state'
,
'sorted_key'
,
'tracer_option'
,
'profile_path'
]:
self
.
_options
[
key
]
=
value
def
__getitem__
(
self
,
name
):
if
self
.
_options
.
get
(
name
,
None
)
is
None
:
raise
ValueError
(
"ProfilerOptions does not have an option named %s."
%
name
)
return
self
.
_options
[
name
]
def
add_profiler_step
(
options_str
=
None
):
'''
Enable the operator-level timing using PaddlePaddle's profiler.
The profiler uses a independent variable to count the profiler steps.
One call of this function is treated as a profiler step.
Args:
profiler_options - a string to initialize the ProfilerOptions.
Default is None, and the profiler is disabled.
'''
if
options_str
is
None
:
return
global
_profiler_step_id
global
_profiler_options
if
_profiler_options
is
None
:
_profiler_options
=
ProfilerOptions
(
options_str
)
logger
.
info
(
f
"
{
options_str
}
"
)
logger
.
info
(
f
"
{
_profiler_options
.
_options
}
"
)
if
_profiler_step_id
==
_profiler_options
[
'batch_range'
][
0
]:
paddle
.
utils
.
profiler
.
start_profiler
(
_profiler_options
[
'state'
],
_profiler_options
[
'tracer_option'
])
elif
_profiler_step_id
==
_profiler_options
[
'batch_range'
][
1
]:
paddle
.
utils
.
profiler
.
stop_profiler
(
_profiler_options
[
'sorted_key'
],
_profiler_options
[
'profile_path'
])
if
_profiler_options
[
'exit_on_finished'
]:
sys
.
exit
(
0
)
_profiler_step_id
+=
1
examples/aishell/s1/local/train.sh
浏览文件 @
5fdda953
#!/bin/bash
if
[
$#
!=
2
]
;
then
echo
"usage: CUDA_VISIBLE_DEVICES=0
${
0
}
config_path ckpt_name"
exit
-1
fi
profiler_options
=
# seed may break model convergence
seed
=
0
source
${
MAIN_ROOT
}
/utils/parse_options.sh
||
exit
1
;
ngpu
=
$(
echo
$CUDA_VISIBLE_DEVICES
|
awk
-F
","
'{print NF}'
)
echo
"using
$ngpu
gpus..."
config_path
=
$1
ckpt_name
=
$2
device
=
gpu
if
[
${
ngpu
}
==
0
]
;
then
device
=
cpu
fi
echo
"using
${
device
}
..."
mkdir
-p
exp
# seed may break model convergence
seed
=
0
if
[
${
seed
}
!=
0
]
;
then
if
[
${
seed
}
!=
0
]
;
then
export
FLAGS_cudnn_deterministic
=
True
echo
"using seed
$seed
& FLAGS_cudnn_deterministic=True ..."
fi
if
[
$#
!=
2
]
;
then
echo
"usage: CUDA_VISIBLE_DEVICES=0
${
0
}
config_path ckpt_name"
exit
-1
fi
config_path
=
$1
ckpt_name
=
$2
mkdir
-p
exp
python3
-u
${
BIN_DIR
}
/train.py
\
--device
${
device
}
\
--nproc
${
ngpu
}
\
--config
${
config_path
}
\
--output
exp/
${
ckpt_name
}
\
--profiler_options
${
profiler_options
}
\
--seed
${
seed
}
if
[
${
seed
}
!=
0
]
;
then
if
[
${
seed
}
!=
0
]
;
then
unset
FLAGS_cudnn_deterministic
fi
...
...
@@ -41,4 +48,4 @@ if [ $? -ne 0 ]; then
exit
1
fi
exit
0
exit
0
\ No newline at end of file
examples/librispeech/s0/conf/deepspeech2.yaml
浏览文件 @
5fdda953
...
...
@@ -11,7 +11,7 @@ data:
max_output_input_ratio
:
.inf
collator
:
batch_size
:
15
batch_size
:
20
mean_std_filepath
:
data/mean_std.json
unit_type
:
char
vocab_filepath
:
data/vocab.txt
...
...
@@ -45,7 +45,7 @@ model:
training
:
n_epoch
:
50
accum_grad
:
4
accum_grad
:
1
lr
:
1e-3
lr_decay
:
0.83
weight_decay
:
1e-06
...
...
examples/tiny/s0/local/train.sh
浏览文件 @
5fdda953
#!/bin/bash
if
[
$#
!=
3
]
;
then
echo
"usage: CUDA_VISIBLE_DEVICES=0
${
0
}
config_path ckpt_name model_type"
exit
-1
fi
profiler_options
=
# seed may break model convergence
seed
=
0
source
${
MAIN_ROOT
}
/utils/parse_options.sh
||
exit
1
;
ngpu
=
$(
echo
$CUDA_VISIBLE_DEVICES
|
awk
-F
","
'{print NF}'
)
echo
"using
$ngpu
gpus..."
config_path
=
$1
ckpt_name
=
$2
model_type
=
$3
device
=
gpu
if
[
${
ngpu
}
==
0
]
;
then
device
=
cpu
fi
mkdir
-p
exp
# seed may break model convergence
seed
=
0
if
[
${
seed
}
!=
0
]
;
then
export
FLAGS_cudnn_deterministic
=
True
echo
"using seed
$seed
& FLAGS_cudnn_deterministic=True ..."
fi
if
[
$#
!=
3
]
;
then
echo
"usage: CUDA_VISIBLE_DEVICES=0
${
0
}
config_path ckpt_name model_type"
exit
-1
fi
config_path
=
$1
ckpt_name
=
$2
model_type
=
$3
mkdir
-p
exp
python3
-u
${
BIN_DIR
}
/train.py
\
--device
${
device
}
\
--nproc
${
ngpu
}
\
--config
${
config_path
}
\
--output
exp/
${
ckpt_name
}
\
--model_type
${
model_type
}
\
--profiler_options
${
profiler_options
}
\
--seed
${
seed
}
if
[
${
seed
}
!=
0
]
;
then
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录