Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
8b45c3e6
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
8b45c3e6
编写于
10月 09, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor trainer.py and rm ueseless dir setup code
上级
f5ec6e34
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
110 addition
and
248 deletion
+110
-248
deepspeech/exps/deepspeech2/model.py
deepspeech/exps/deepspeech2/model.py
+0
-73
deepspeech/exps/u2/model.py
deepspeech/exps/u2/model.py
+0
-48
deepspeech/exps/u2_kaldi/model.py
deepspeech/exps/u2_kaldi/model.py
+1
-47
deepspeech/exps/u2_st/model.py
deepspeech/exps/u2_st/model.py
+0
-48
deepspeech/modules/loss.py
deepspeech/modules/loss.py
+0
-1
deepspeech/training/trainer.py
deepspeech/training/trainer.py
+109
-31
未找到文件。
deepspeech/exps/deepspeech2/model.py
浏览文件 @
8b45c3e6
...
...
@@ -386,13 +386,6 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
logger
.
info
(
msg
)
self
.
autolog
.
report
()
def
run_test
(
self
):
self
.
resume_or_scratch
()
try
:
self
.
test
()
except
KeyboardInterrupt
:
exit
(
-
1
)
def
export
(
self
):
if
self
.
args
.
model_type
==
'offline'
:
infer_model
=
DeepSpeech2InferModel
.
from_pretrained
(
...
...
@@ -409,40 +402,6 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
logger
.
info
(
f
"Export code:
{
static_model
.
forward
.
code
}
"
)
paddle
.
jit
.
save
(
static_model
,
self
.
args
.
export_path
)
def
run_export
(
self
):
try
:
self
.
export
()
except
KeyboardInterrupt
:
exit
(
-
1
)
def
setup
(
self
):
"""Setup the experiment.
"""
paddle
.
set_device
(
'gpu'
if
self
.
args
.
nprocs
>
0
else
'cpu'
)
self
.
setup_output_dir
()
self
.
setup_checkpointer
()
self
.
setup_dataloader
()
self
.
setup_model
()
self
.
iteration
=
0
self
.
epoch
=
0
def
setup_output_dir
(
self
):
"""Create a directory used for output.
"""
# output dir
if
self
.
args
.
output
:
output_dir
=
Path
(
self
.
args
.
output
).
expanduser
()
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
else
:
output_dir
=
Path
(
self
.
args
.
checkpoint_path
).
expanduser
().
parent
.
parent
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
self
.
output_dir
=
output_dir
class
DeepSpeech2ExportTester
(
DeepSpeech2Tester
):
def
__init__
(
self
,
config
,
args
):
...
...
@@ -646,38 +605,6 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester):
output_lens
=
output_lens_handle
.
copy_to_cpu
()
return
output_probs
,
output_lens
def
run_test
(
self
):
try
:
self
.
test
()
except
KeyboardInterrupt
:
exit
(
-
1
)
def
setup
(
self
):
"""Setup the experiment.
"""
paddle
.
set_device
(
'gpu'
if
self
.
args
.
nprocs
>
0
else
'cpu'
)
self
.
setup_output_dir
()
self
.
setup_dataloader
()
self
.
setup_model
()
self
.
iteration
=
0
self
.
epoch
=
0
def
setup_output_dir
(
self
):
"""Create a directory used for output.
"""
# output dir
if
self
.
args
.
output
:
output_dir
=
Path
(
self
.
args
.
output
).
expanduser
()
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
else
:
output_dir
=
Path
(
self
.
args
.
export_path
).
expanduser
().
parent
.
parent
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
self
.
output_dir
=
output_dir
def
setup_model
(
self
):
super
().
setup_model
()
speedyspeech_config
=
inference
.
Config
(
...
...
deepspeech/exps/u2/model.py
浏览文件 @
8b45c3e6
...
...
@@ -551,13 +551,6 @@ class U2Tester(U2Trainer):
})
f
.
write
(
data
+
'
\n
'
)
def
run_test
(
self
):
self
.
resume_or_scratch
()
try
:
self
.
test
()
except
KeyboardInterrupt
:
sys
.
exit
(
-
1
)
@
paddle
.
no_grad
()
def
align
(
self
):
if
self
.
config
.
decoding
.
batch_size
>
1
:
...
...
@@ -617,13 +610,6 @@ class U2Tester(U2Trainer):
intervals
=
tierformat
,
output
=
str
(
textgrid_path
))
def
run_align
(
self
):
self
.
resume_or_scratch
()
try
:
self
.
align
()
except
KeyboardInterrupt
:
sys
.
exit
(
-
1
)
def
load_inferspec
(
self
):
"""infer model and input spec.
...
...
@@ -651,37 +637,3 @@ class U2Tester(U2Trainer):
static_model
=
paddle
.
jit
.
to_static
(
infer_model
,
input_spec
=
input_spec
)
logger
.
info
(
f
"Export code:
{
static_model
.
forward
.
code
}
"
)
paddle
.
jit
.
save
(
static_model
,
self
.
args
.
export_path
)
def
run_export
(
self
):
try
:
self
.
export
()
except
KeyboardInterrupt
:
sys
.
exit
(
-
1
)
def
setup
(
self
):
"""Setup the experiment.
"""
paddle
.
set_device
(
'gpu'
if
self
.
args
.
nprocs
>
0
else
'cpu'
)
self
.
setup_output_dir
()
self
.
setup_checkpointer
()
self
.
setup_dataloader
()
self
.
setup_model
()
self
.
iteration
=
0
self
.
epoch
=
0
def
setup_output_dir
(
self
):
"""Create a directory used for output.
"""
# output dir
if
self
.
args
.
output
:
output_dir
=
Path
(
self
.
args
.
output
).
expanduser
()
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
else
:
output_dir
=
Path
(
self
.
args
.
checkpoint_path
).
expanduser
().
parent
.
parent
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
self
.
output_dir
=
output_dir
deepspeech/exps/u2_kaldi/model.py
浏览文件 @
8b45c3e6
...
...
@@ -525,13 +525,6 @@ class U2Tester(U2Trainer):
})
f
.
write
(
data
+
'
\n
'
)
def
run_test
(
self
):
self
.
resume_or_scratch
()
try
:
self
.
test
()
except
KeyboardInterrupt
:
sys
.
exit
(
-
1
)
@
paddle
.
no_grad
()
def
align
(
self
):
if
self
.
config
.
decoding
.
batch_size
>
1
:
...
...
@@ -591,13 +584,6 @@ class U2Tester(U2Trainer):
intervals
=
tierformat
,
output
=
str
(
textgrid_path
))
def
run_align
(
self
):
self
.
resume_or_scratch
()
try
:
self
.
align
()
except
KeyboardInterrupt
:
sys
.
exit
(
-
1
)
def
load_inferspec
(
self
):
"""infer model and input spec.
...
...
@@ -626,43 +612,11 @@ class U2Tester(U2Trainer):
logger
.
info
(
f
"Export code:
{
static_model
.
forward
.
code
}
"
)
paddle
.
jit
.
save
(
static_model
,
self
.
args
.
export_path
)
def
run_export
(
self
):
try
:
self
.
export
()
except
KeyboardInterrupt
:
sys
.
exit
(
-
1
)
def
setup_dict
(
self
):
# load dictionary for debug log
self
.
args
.
char_list
=
load_dict
(
self
.
args
.
dict_path
,
"maskctc"
in
self
.
args
.
model_name
)
def
setup
(
self
):
"""Setup the experiment.
"""
paddle
.
set_device
(
'gpu'
if
self
.
args
.
nprocs
>
0
else
'cpu'
)
self
.
setup_output_dir
()
self
.
setup_checkpointer
()
self
.
setup_dataloader
()
self
.
setup_model
()
super
().
setup
()
self
.
setup_dict
()
self
.
iteration
=
0
self
.
epoch
=
0
def
setup_output_dir
(
self
):
"""Create a directory used for output.
"""
# output dir
if
self
.
args
.
output
:
output_dir
=
Path
(
self
.
args
.
output
).
expanduser
()
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
else
:
output_dir
=
Path
(
self
.
args
.
checkpoint_path
).
expanduser
().
parent
.
parent
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
self
.
output_dir
=
output_dir
deepspeech/exps/u2_st/model.py
浏览文件 @
8b45c3e6
...
...
@@ -545,13 +545,6 @@ class U2STTester(U2STTrainer):
})
f
.
write
(
data
+
'
\n
'
)
def
run_test
(
self
):
self
.
resume_or_scratch
()
try
:
self
.
test
()
except
KeyboardInterrupt
:
sys
.
exit
(
-
1
)
@
paddle
.
no_grad
()
def
align
(
self
):
if
self
.
config
.
decoding
.
batch_size
>
1
:
...
...
@@ -611,13 +604,6 @@ class U2STTester(U2STTrainer):
intervals
=
tierformat
,
output
=
str
(
textgrid_path
))
def
run_align
(
self
):
self
.
resume_or_scratch
()
try
:
self
.
align
()
except
KeyboardInterrupt
:
sys
.
exit
(
-
1
)
def
load_inferspec
(
self
):
"""infer model and input spec.
...
...
@@ -645,37 +631,3 @@ class U2STTester(U2STTrainer):
static_model
=
paddle
.
jit
.
to_static
(
infer_model
,
input_spec
=
input_spec
)
logger
.
info
(
f
"Export code:
{
static_model
.
forward
.
code
}
"
)
paddle
.
jit
.
save
(
static_model
,
self
.
args
.
export_path
)
def
run_export
(
self
):
try
:
self
.
export
()
except
KeyboardInterrupt
:
sys
.
exit
(
-
1
)
def
setup
(
self
):
"""Setup the experiment.
"""
paddle
.
set_device
(
'gpu'
if
self
.
args
.
nprocs
>
0
else
'cpu'
)
self
.
setup_output_dir
()
self
.
setup_checkpointer
()
self
.
setup_dataloader
()
self
.
setup_model
()
self
.
iteration
=
0
self
.
epoch
=
0
def
setup_output_dir
(
self
):
"""Create a directory used for output.
"""
# output dir
if
self
.
args
.
output
:
output_dir
=
Path
(
self
.
args
.
output
).
expanduser
()
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
else
:
output_dir
=
Path
(
self
.
args
.
checkpoint_path
).
expanduser
().
parent
.
parent
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
self
.
output_dir
=
output_dir
deepspeech/modules/loss.py
浏览文件 @
8b45c3e6
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
inspect
from
functools
import
partial
import
paddle
from
paddle
import
nn
...
...
deepspeech/training/trainer.py
浏览文件 @
8b45c3e6
...
...
@@ -14,6 +14,7 @@
import
sys
import
time
from
collections
import
OrderedDict
from
contextlib
import
contextmanager
from
pathlib
import
Path
import
paddle
...
...
@@ -103,14 +104,28 @@ class Trainer():
self
.
iteration
=
0
self
.
epoch
=
0
self
.
rank
=
dist
.
get_rank
()
self
.
world_size
=
dist
.
get_world_size
()
self
.
_train
=
True
# print deps version
all_version
()
logger
.
info
(
f
"Rank:
{
self
.
rank
}
/
{
dist
.
get_world_size
()
}
"
)
logger
.
info
(
f
"Rank:
{
self
.
rank
}
/
{
self
.
world_size
}
"
)
# set device
paddle
.
set_device
(
'gpu'
if
self
.
args
.
nprocs
>
0
else
'cpu'
)
if
self
.
parallel
:
self
.
init_parallel
()
self
.
checkpoint
=
Checkpoint
(
kbest_n
=
self
.
config
.
training
.
checkpoint
.
kbest_n
,
latest_n
=
self
.
config
.
training
.
checkpoint
.
latest_n
)
# set random seed if needed
if
args
.
seed
:
seed_all
(
args
.
seed
)
logger
.
info
(
f
"Set seed
{
args
.
seed
}
"
)
# profiler and benchmark options
if
self
.
args
.
benchmark_batch_size
:
with
UpdateConfig
(
self
.
config
):
self
.
config
.
collator
.
batch_size
=
self
.
args
.
benchmark_batch_size
...
...
@@ -118,17 +133,18 @@ class Trainer():
logger
.
info
(
f
"Benchmark reset batch-size:
{
self
.
args
.
benchmark_batch_size
}
"
)
@
contextmanager
def
eval
(
self
):
self
.
_train
=
False
yield
self
.
_train
=
True
def
setup
(
self
):
"""Setup the experiment.
"""
paddle
.
set_device
(
'gpu'
if
self
.
args
.
nprocs
>
0
else
'cpu'
)
if
self
.
parallel
:
self
.
init_parallel
()
self
.
setup_output_dir
()
self
.
dump_config
()
self
.
setup_visualizer
()
self
.
setup_checkpointer
()
self
.
setup_dataloader
()
self
.
setup_model
()
...
...
@@ -183,8 +199,8 @@ class Trainer():
if
infos
:
# just restore ckpt
# lr will resotre from optimizer ckpt
self
.
iteration
=
infos
[
"step"
]
self
.
epoch
=
infos
[
"epoch"
]
self
.
iteration
=
infos
[
"step"
]
+
1
self
.
epoch
=
infos
[
"epoch"
]
+
1
scratch
=
False
logger
.
info
(
f
"Restore ckpt: epoch
{
self
.
epoch
}
, step
{
self
.
iteration
}
!"
)
...
...
@@ -302,37 +318,74 @@ class Trainer():
"""The routine of the experiment after setup. This method is intended
to be used by the user.
"""
with
Timer
(
"Training Done: {}"
)
:
try
:
try
:
with
Timer
(
"Training Done: {}"
)
:
self
.
train
()
except
KeyboardInterrupt
:
exit
(
-
1
)
finally
:
self
.
destory
()
except
KeyboardInterrupt
:
exit
(
-
1
)
finally
:
self
.
destory
()
def
run_test
(
self
):
"""Do Test/Decode"""
try
:
with
Timer
(
"Test/Decode Done: {}"
):
with
self
.
eval
():
self
.
resume_or_scratch
()
self
.
test
()
except
KeyboardInterrupt
:
exit
(
-
1
)
def
run_export
(
self
):
"""Do Model Export"""
try
:
with
Timer
(
"Export Done: {}"
):
with
self
.
eval
():
self
.
export
()
except
KeyboardInterrupt
:
exit
(
-
1
)
def
run_align
(
self
):
"""Do CTC alignment"""
try
:
with
Timer
(
"Align Done: {}"
):
with
self
.
eval
():
self
.
resume_or_scratch
()
self
.
align
()
except
KeyboardInterrupt
:
sys
.
exit
(
-
1
)
def
setup_output_dir
(
self
):
"""Create a directory used for output.
"""
# output dir
output_dir
=
Path
(
self
.
args
.
output
).
expanduser
()
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
if
self
.
args
.
output
:
output_dir
=
Path
(
self
.
args
.
output
).
expanduser
()
elif
self
.
args
.
checkpoint_path
:
output_dir
=
Path
(
self
.
args
.
checkpoint_path
).
expanduser
().
parent
.
parent
self
.
output_dir
=
output_dir
self
.
output_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
def
setup_checkpointer
(
self
):
"""Create a directory used to save checkpoints into.
self
.
checkpoint_dir
=
self
.
output_dir
/
"checkpoints"
self
.
checkpoint_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
It is "checkpoints" inside the output directory.
"""
# checkpoint dir
checkpoint_dir
=
self
.
output_dir
/
"checkpoints"
checkpoint_dir
.
mkdir
(
exist_ok
=
True
)
self
.
log_dir
=
output_dir
/
"log"
self
.
log_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
self
.
checkpoint_dir
=
checkpoint_dir
self
.
test_dir
=
output_dir
/
"test"
self
.
test_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
self
.
checkpoint
=
Checkpoint
(
kbest_n
=
self
.
config
.
training
.
checkpoint
.
kbest_n
,
latest_n
=
self
.
config
.
training
.
checkpoint
.
latest_n
)
self
.
decode_dir
=
output_dir
/
"decode"
self
.
decode_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
self
.
export_dir
=
output_dir
/
"export"
self
.
export_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
self
.
visual_dir
=
output_dir
/
"visual"
self
.
visual_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
self
.
config_dir
=
output_dir
/
"conf"
self
.
config_dir
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
@
mp_tools
.
rank_zero_only
def
destory
(
self
):
...
...
@@ -354,7 +407,7 @@ class Trainer():
unexpected behaviors.
"""
# visualizer
visualizer
=
SummaryWriter
(
logdir
=
str
(
self
.
output
_dir
))
visualizer
=
SummaryWriter
(
logdir
=
str
(
self
.
visual
_dir
))
self
.
visualizer
=
visualizer
@
mp_tools
.
rank_zero_only
...
...
@@ -364,7 +417,14 @@ class Trainer():
It is saved in to ``config.yaml`` in the output directory at the
beginning of the experiment.
"""
with
open
(
self
.
output_dir
/
"config.yaml"
,
'wt'
)
as
f
:
config_file
=
self
.
config_dir
/
"config.yaml"
if
self
.
_train
and
config_file
.
exists
():
time_stamp
=
time
.
strftime
(
"%Y_%m_%d_%H_%M_%s"
,
time
.
gmtime
())
target_path
=
self
.
config_dir
/
"."
.
join
(
[
time_stamp
,
"config.yaml"
])
config_file
.
rename
(
target_path
)
with
open
(
config_file
,
'wt'
)
as
f
:
print
(
self
.
config
,
file
=
f
)
def
train_batch
(
self
):
...
...
@@ -378,6 +438,24 @@ class Trainer():
"""
raise
NotImplementedError
(
"valid should be implemented."
)
@
paddle
.
no_grad
()
def
test
(
self
):
"""The test. A subclass should implement this method in Tester.
"""
raise
NotImplementedError
(
"test should be implemented."
)
@
paddle
.
no_grad
()
def
export
(
self
):
"""The test. A subclass should implement this method in Tester.
"""
raise
NotImplementedError
(
"export should be implemented."
)
@
paddle
.
no_grad
()
def
align
(
self
):
"""The align. A subclass should implement this method in Tester.
"""
raise
NotImplementedError
(
"align should be implemented."
)
def
setup_model
(
self
):
"""Setup model, criterion and optimizer, etc. A subclass should
implement this method.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录