Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
d7a33b9d
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
d7a33b9d
编写于
9月 16, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update config with contextlib
上级
cda6ca83
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
35 addition
and
219 deletion
+35
-219
deepspeech/exps/deepspeech2/bin/tune.py
deepspeech/exps/deepspeech2/bin/tune.py
+0
-191
deepspeech/exps/deepspeech2/model.py
deepspeech/exps/deepspeech2/model.py
+4
-4
deepspeech/exps/u2/model.py
deepspeech/exps/u2/model.py
+6
-4
deepspeech/exps/u2/trainer.py
deepspeech/exps/u2/trainer.py
+5
-4
deepspeech/exps/u2_kaldi/model.py
deepspeech/exps/u2_kaldi/model.py
+5
-4
deepspeech/exps/u2_st/model.py
deepspeech/exps/u2_st/model.py
+5
-4
deepspeech/models/u2/u2.py
deepspeech/models/u2/u2.py
+5
-4
deepspeech/models/u2_st.py
deepspeech/models/u2_st.py
+5
-4
未找到文件。
deepspeech/exps/deepspeech2/bin/tune.py
已删除
100644 → 0
浏览文件 @
cda6ca83
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Beam search parameters tuning for DeepSpeech2 model."""
import
functools
import
sys
import
numpy
as
np
from
paddle.io
import
DataLoader
from
deepspeech.exps.deepspeech2.config
import
get_cfg_defaults
from
deepspeech.io.collator
import
SpeechCollator
from
deepspeech.io.dataset
import
ManifestDataset
from
deepspeech.models.ds2
import
DeepSpeech2Model
from
deepspeech.training.cli
import
default_argument_parser
from
deepspeech.utils
import
error_rate
from
deepspeech.utils.utility
import
add_arguments
from
deepspeech.utils.utility
import
print_arguments
def
tune
(
config
,
args
):
"""Tune parameters alpha and beta incrementally."""
if
not
args
.
num_alphas
>=
0
:
raise
ValueError
(
"num_alphas must be non-negative!"
)
if
not
args
.
num_betas
>=
0
:
raise
ValueError
(
"num_betas must be non-negative!"
)
config
.
defrost
()
config
.
data
.
manfiest
=
config
.
data
.
dev_manifest
config
.
data
.
augmentation_config
=
""
config
.
data
.
keep_transcription_text
=
True
dev_dataset
=
ManifestDataset
.
from_config
(
config
)
valid_loader
=
DataLoader
(
dev_dataset
,
batch_size
=
config
.
data
.
batch_size
,
shuffle
=
False
,
drop_last
=
False
,
collate_fn
=
SpeechCollator
(
keep_transcription_text
=
True
))
model
=
DeepSpeech2Model
.
from_pretrained
(
valid_loader
,
config
,
args
.
checkpoint_path
)
model
.
eval
()
# decoders only accept string encoded in utf-8
vocab_list
=
valid_loader
.
dataset
.
vocab_list
errors_func
=
error_rate
.
char_errors
if
config
.
decoding
.
error_rate_type
==
'cer'
else
error_rate
.
word_errors
# create grid for search
cand_alphas
=
np
.
linspace
(
args
.
alpha_from
,
args
.
alpha_to
,
args
.
num_alphas
)
cand_betas
=
np
.
linspace
(
args
.
beta_from
,
args
.
beta_to
,
args
.
num_betas
)
params_grid
=
[(
alpha
,
beta
)
for
alpha
in
cand_alphas
for
beta
in
cand_betas
]
err_sum
=
[
0.0
for
i
in
range
(
len
(
params_grid
))]
err_ave
=
[
0.0
for
i
in
range
(
len
(
params_grid
))]
num_ins
,
len_refs
,
cur_batch
=
0
,
0
,
0
# initialize external scorer
model
.
decoder
.
init_decode
(
args
.
alpha_from
,
args
.
beta_from
,
config
.
decoding
.
lang_model_path
,
vocab_list
,
config
.
decoding
.
decoding_method
)
## incremental tuning parameters over multiple batches
print
(
"start tuning ..."
)
for
infer_data
in
valid_loader
():
if
(
args
.
num_batches
>=
0
)
and
(
cur_batch
>=
args
.
num_batches
):
break
def
ordid2token
(
texts
,
texts_len
):
""" ord() id to chr() chr """
trans
=
[]
for
text
,
n
in
zip
(
texts
,
texts_len
):
n
=
n
.
numpy
().
item
()
ids
=
text
[:
n
]
trans
.
append
(
''
.
join
([
chr
(
i
)
for
i
in
ids
]))
return
trans
audio
,
audio_len
,
text
,
text_len
=
infer_data
target_transcripts
=
ordid2token
(
text
,
text_len
)
num_ins
+=
audio
.
shape
[
0
]
# model infer
eouts
,
eouts_len
=
model
.
encoder
(
audio
,
audio_len
)
probs
=
model
.
decoder
.
softmax
(
eouts
)
# grid search
for
index
,
(
alpha
,
beta
)
in
enumerate
(
params_grid
):
print
(
f
"tuneing: alpha=
{
alpha
}
beta=
{
beta
}
"
)
result_transcripts
=
model
.
decoder
.
decode_probs
(
probs
.
numpy
(),
eouts_len
,
vocab_list
,
config
.
decoding
.
decoding_method
,
config
.
decoding
.
lang_model_path
,
alpha
,
beta
,
config
.
decoding
.
beam_size
,
config
.
decoding
.
cutoff_prob
,
config
.
decoding
.
cutoff_top_n
,
config
.
decoding
.
num_proc_bsearch
)
for
target
,
result
in
zip
(
target_transcripts
,
result_transcripts
):
errors
,
len_ref
=
errors_func
(
target
,
result
)
err_sum
[
index
]
+=
errors
# accumulate the length of references of every batchπ
# in the first iteration
if
args
.
alpha_from
==
alpha
and
args
.
beta_from
==
beta
:
len_refs
+=
len_ref
err_ave
[
index
]
=
err_sum
[
index
]
/
len_refs
if
index
%
2
==
0
:
sys
.
stdout
.
write
(
'.'
)
sys
.
stdout
.
flush
()
print
(
"tuneing: one grid done!"
)
# output on-line tuning result at the end of current batch
err_ave_min
=
min
(
err_ave
)
min_index
=
err_ave
.
index
(
err_ave_min
)
print
(
"
\n
Batch %d [%d/?], current opt (alpha, beta) = (%s, %s), "
" min [%s] = %f"
%
(
cur_batch
,
num_ins
,
"%.3f"
%
params_grid
[
min_index
][
0
],
"%.3f"
%
params_grid
[
min_index
][
1
],
config
.
decoding
.
error_rate_type
,
err_ave_min
))
cur_batch
+=
1
# output WER/CER at every (alpha, beta)
print
(
"
\n
Final %s:
\n
"
%
config
.
decoding
.
error_rate_type
)
for
index
in
range
(
len
(
params_grid
)):
print
(
"(alpha, beta) = (%s, %s), [%s] = %f"
%
(
"%.3f"
%
params_grid
[
index
][
0
],
"%.3f"
%
params_grid
[
index
][
1
],
config
.
decoding
.
error_rate_type
,
err_ave
[
index
]))
err_ave_min
=
min
(
err_ave
)
min_index
=
err_ave
.
index
(
err_ave_min
)
print
(
"
\n
Finish tuning on %d batches, final opt (alpha, beta) = (%s, %s)"
%
(
cur_batch
,
"%.3f"
%
params_grid
[
min_index
][
0
],
"%.3f"
%
params_grid
[
min_index
][
1
]))
print
(
"finish tuning"
)
def
main
(
config
,
args
):
tune
(
config
,
args
)
if
__name__
==
"__main__"
:
parser
=
default_argument_parser
()
add_arg
=
functools
.
partial
(
add_arguments
,
argparser
=
parser
)
add_arg
(
'num_batches'
,
int
,
-
1
,
"# of batches tuning on. "
"Default -1, on whole dev set."
)
add_arg
(
'num_alphas'
,
int
,
45
,
"# of alpha candidates for tuning."
)
add_arg
(
'num_betas'
,
int
,
8
,
"# of beta candidates for tuning."
)
add_arg
(
'alpha_from'
,
float
,
1.0
,
"Where alpha starts tuning from."
)
add_arg
(
'alpha_to'
,
float
,
3.2
,
"Where alpha ends tuning with."
)
add_arg
(
'beta_from'
,
float
,
0.1
,
"Where beta starts tuning from."
)
add_arg
(
'beta_to'
,
float
,
0.45
,
"Where beta ends tuning with."
)
add_arg
(
'batch_size'
,
int
,
256
,
"# of samples per batch."
)
add_arg
(
'beam_size'
,
int
,
500
,
"Beam search width."
)
add_arg
(
'num_proc_bsearch'
,
int
,
8
,
"# of CPUs for beam search."
)
add_arg
(
'cutoff_prob'
,
float
,
1.0
,
"Cutoff probability for pruning."
)
add_arg
(
'cutoff_top_n'
,
int
,
40
,
"Cutoff number for pruning."
)
args
=
parser
.
parse_args
()
print_arguments
(
args
,
globals
())
# https://yaml.org/type/float.html
config
=
get_cfg_defaults
()
if
args
.
config
:
config
.
merge_from_file
(
args
.
config
)
if
args
.
opts
:
config
.
merge_from_list
(
args
.
opts
)
config
.
data
.
batch_size
=
args
.
batch_size
config
.
decoding
.
beam_size
=
args
.
beam_size
config
.
decoding
.
num_proc_bsearch
=
args
.
num_proc_bsearch
config
.
decoding
.
cutoff_prob
=
args
.
cutoff_prob
config
.
decoding
.
cutoff_top_n
=
args
.
cutoff_top_n
config
.
freeze
()
print
(
config
)
if
args
.
dump_config
:
with
open
(
args
.
dump_config
,
'w'
)
as
f
:
print
(
config
,
file
=
f
)
main
(
config
,
args
)
deepspeech/exps/deepspeech2/model.py
浏览文件 @
d7a33b9d
...
...
@@ -41,6 +41,7 @@ from deepspeech.utils import layer_tools
from
deepspeech.utils
import
mp_tools
from
deepspeech.utils.log
import
Autolog
from
deepspeech.utils.log
import
Log
from
deepspeech.utils.utility
import
UpdateConfig
logger
=
Log
(
__name__
).
getlog
()
...
...
@@ -147,10 +148,9 @@ class DeepSpeech2Trainer(Trainer):
def
setup_model
(
self
):
config
=
self
.
config
.
clone
()
config
.
defrost
()
config
.
model
.
feat_size
=
self
.
train_loader
.
collate_fn
.
feature_size
config
.
model
.
dict_size
=
self
.
train_loader
.
collate_fn
.
vocab_size
config
.
freeze
()
with
UpdateConfig
(
config
):
config
.
model
.
feat_size
=
self
.
train_loader
.
collate_fn
.
feature_size
config
.
model
.
dict_size
=
self
.
train_loader
.
collate_fn
.
vocab_size
if
self
.
args
.
model_type
==
'offline'
:
model
=
DeepSpeech2Model
.
from_config
(
config
.
model
)
...
...
deepspeech/exps/u2/model.py
浏览文件 @
d7a33b9d
...
...
@@ -43,6 +43,7 @@ from deepspeech.utils import mp_tools
from
deepspeech.utils
import
text_grid
from
deepspeech.utils
import
utility
from
deepspeech.utils.log
import
Log
from
deepspeech.utils.utility
import
UpdateConfig
logger
=
Log
(
__name__
).
getlog
()
...
...
@@ -315,10 +316,11 @@ class U2Trainer(Trainer):
def
setup_model
(
self
):
config
=
self
.
config
model_conf
=
config
.
model
model_conf
.
defrost
()
model_conf
.
input_dim
=
self
.
train_loader
.
collate_fn
.
feature_size
model_conf
.
output_dim
=
self
.
train_loader
.
collate_fn
.
vocab_size
model_conf
.
freeze
()
with
UpdateConfig
(
model_conf
):
model_conf
.
input_dim
=
self
.
train_loader
.
collate_fn
.
feature_size
model_conf
.
output_dim
=
self
.
train_loader
.
collate_fn
.
vocab_size
model
=
U2Model
.
from_config
(
model_conf
)
if
self
.
parallel
:
...
...
deepspeech/exps/u2/trainer.py
浏览文件 @
d7a33b9d
...
...
@@ -32,6 +32,7 @@ from deepspeech.training.trainer import Trainer
from
deepspeech.training.updaters.trainer
import
Trainer
as
NewTrainer
from
deepspeech.utils
import
layer_tools
from
deepspeech.utils.log
import
Log
from
deepspeech.utils.utility
import
UpdateConfig
logger
=
Log
(
__name__
).
getlog
()
...
...
@@ -121,10 +122,10 @@ class U2Trainer(Trainer):
def
setup_model
(
self
):
config
=
self
.
config
model_conf
=
config
.
model
model_conf
.
defrost
()
model_conf
.
input_dim
=
self
.
train_loader
.
collate_fn
.
feature_size
model_conf
.
output_dim
=
self
.
train_loader
.
collate_fn
.
vocab_size
model_conf
.
freeze
()
with
UpdateConfig
(
model_conf
):
model_conf
.
input_dim
=
self
.
train_loader
.
collate_fn
.
feature_size
model_conf
.
output_dim
=
self
.
train_loader
.
collate_fn
.
vocab_size
model
=
U2Model
.
from_config
(
model_conf
)
if
self
.
parallel
:
...
...
deepspeech/exps/u2_kaldi/model.py
浏览文件 @
d7a33b9d
...
...
@@ -41,6 +41,7 @@ from deepspeech.utils import mp_tools
from
deepspeech.utils
import
text_grid
from
deepspeech.utils
import
utility
from
deepspeech.utils.log
import
Log
from
deepspeech.utils.utility
import
UpdateConfig
logger
=
Log
(
__name__
).
getlog
()
...
...
@@ -319,10 +320,10 @@ class U2Trainer(Trainer):
# model
model_conf
=
config
.
model
model_conf
.
defrost
()
model_conf
.
input_dim
=
self
.
train_loader
.
feat_dim
model_conf
.
output_dim
=
self
.
train_loader
.
vocab_size
model_conf
.
freeze
()
with
UpdateConfig
(
model_conf
):
model_conf
.
input_dim
=
self
.
train_loader
.
feat_dim
model_conf
.
output_dim
=
self
.
train_loader
.
vocab_size
model
=
U2Model
.
from_config
(
model_conf
)
if
self
.
parallel
:
model
=
paddle
.
DataParallel
(
model
)
...
...
deepspeech/exps/u2_st/model.py
浏览文件 @
d7a33b9d
...
...
@@ -47,6 +47,7 @@ from deepspeech.utils import mp_tools
from
deepspeech.utils
import
text_grid
from
deepspeech.utils
import
utility
from
deepspeech.utils.log
import
Log
from
deepspeech.utils.utility
import
UpdateConfig
logger
=
Log
(
__name__
).
getlog
()
...
...
@@ -345,10 +346,10 @@ class U2STTrainer(Trainer):
def
setup_model
(
self
):
config
=
self
.
config
model_conf
=
config
.
model
model_conf
.
defrost
()
model_conf
.
input_dim
=
self
.
train_loader
.
collate_fn
.
feature_size
model_conf
.
output_dim
=
self
.
train_loader
.
collate_fn
.
vocab_size
model_conf
.
freeze
()
with
UpdateConfig
(
model_conf
):
model_conf
.
input_dim
=
self
.
train_loader
.
collate_fn
.
feature_size
model_conf
.
output_dim
=
self
.
train_loader
.
collate_fn
.
vocab_size
model
=
U2STModel
.
from_config
(
model_conf
)
if
self
.
parallel
:
...
...
deepspeech/models/u2/u2.py
浏览文件 @
d7a33b9d
...
...
@@ -48,6 +48,7 @@ from deepspeech.utils.tensor_utils import add_sos_eos
from
deepspeech.utils.tensor_utils
import
pad_sequence
from
deepspeech.utils.tensor_utils
import
th_accuracy
from
deepspeech.utils.utility
import
log_add
from
deepspeech.utils.utility
import
UpdateConfig
__all__
=
[
"U2Model"
,
"U2InferModel"
]
...
...
@@ -903,10 +904,10 @@ class U2Model(U2BaseModel):
Returns:
DeepSpeech2Model: The model built from pretrained result.
"""
config
.
defrost
()
config
.
input_dim
=
dataloader
.
collate_fn
.
feature_size
config
.
output_dim
=
dataloader
.
collate_fn
.
vocab_size
config
.
freeze
()
with
UpdateConfig
(
config
):
config
.
input_dim
=
dataloader
.
collate_fn
.
feature_size
config
.
output_dim
=
dataloader
.
collate_fn
.
vocab_size
model
=
cls
.
from_config
(
config
)
if
checkpoint_path
:
...
...
deepspeech/models/u2_st.py
浏览文件 @
d7a33b9d
...
...
@@ -42,6 +42,7 @@ from deepspeech.utils import layer_tools
from
deepspeech.utils.log
import
Log
from
deepspeech.utils.tensor_utils
import
add_sos_eos
from
deepspeech.utils.tensor_utils
import
th_accuracy
from
deepspeech.utils.utility
import
UpdateConfig
__all__
=
[
"U2STModel"
,
"U2STInferModel"
]
...
...
@@ -686,10 +687,10 @@ class U2STModel(U2STBaseModel):
Returns:
DeepSpeech2Model: The model built from pretrained result.
"""
config
.
defrost
()
config
.
input_dim
=
dataloader
.
collate_fn
.
feature_size
config
.
output_dim
=
dataloader
.
collate_fn
.
vocab_size
config
.
freeze
()
with
UpdateConfig
(
config
):
config
.
input_dim
=
dataloader
.
collate_fn
.
feature_size
config
.
output_dim
=
dataloader
.
collate_fn
.
vocab_size
model
=
cls
.
from_config
(
config
)
if
checkpoint_path
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录