Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
1f050a4d
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1f050a4d
编写于
8月 25, 2021
作者:
H
huangyuxin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make the code simple
上级
7ab022e1
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
34 addition
and
62 deletion
+34
-62
deepspeech/exps/deepspeech2/model.py
deepspeech/exps/deepspeech2/model.py
+34
-62
未找到文件。
deepspeech/exps/deepspeech2/model.py
浏览文件 @
1f050a4d
...
...
@@ -270,24 +270,9 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
vocab_list
=
self
.
test_loader
.
collate_fn
.
vocab_list
target_transcripts
=
self
.
ordid2token
(
texts
,
texts_len
)
self
.
autolog
.
times
.
start
()
self
.
autolog
.
times
.
stamp
()
result_transcripts
=
self
.
model
.
decode
(
audio
,
audio_len
,
vocab_list
,
decoding_method
=
cfg
.
decoding_method
,
lang_model_path
=
cfg
.
lang_model_path
,
beam_alpha
=
cfg
.
alpha
,
beam_beta
=
cfg
.
beta
,
beam_size
=
cfg
.
beam_size
,
cutoff_prob
=
cfg
.
cutoff_prob
,
cutoff_top_n
=
cfg
.
cutoff_top_n
,
num_processes
=
cfg
.
num_proc_bsearch
)
self
.
autolog
.
times
.
stamp
()
self
.
autolog
.
times
.
stamp
()
self
.
autolog
.
times
.
end
()
result_transcripts
=
self
.
compute_result_transcripts
(
audio
,
audio_len
,
vocab_list
,
cfg
)
for
utt
,
target
,
result
in
zip
(
utts
,
target_transcripts
,
result_transcripts
):
errors
,
len_ref
=
errors_func
(
target
,
result
)
...
...
@@ -308,6 +293,26 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
error_rate
=
errors_sum
/
len_refs
,
error_rate_type
=
cfg
.
error_rate_type
)
def
compute_result_transcripts
(
self
,
audio
,
audio_len
,
vocab_list
,
cfg
):
self
.
autolog
.
times
.
start
()
self
.
autolog
.
times
.
stamp
()
result_transcripts
=
self
.
model
.
decode
(
audio
,
audio_len
,
vocab_list
,
decoding_method
=
cfg
.
decoding_method
,
lang_model_path
=
cfg
.
lang_model_path
,
beam_alpha
=
cfg
.
alpha
,
beam_beta
=
cfg
.
beta
,
beam_size
=
cfg
.
beam_size
,
cutoff_prob
=
cfg
.
cutoff_prob
,
cutoff_top_n
=
cfg
.
cutoff_top_n
,
num_processes
=
cfg
.
num_proc_bsearch
)
self
.
autolog
.
times
.
stamp
()
self
.
autolog
.
times
.
stamp
()
self
.
autolog
.
times
.
end
()
return
result_transcripts
@
mp_tools
.
rank_zero_only
@
paddle
.
no_grad
()
def
test
(
self
):
...
...
@@ -403,21 +408,7 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester):
def
__init__
(
self
,
config
,
args
):
super
().
__init__
(
config
,
args
)
def
compute_metrics
(
self
,
utts
,
audio
,
audio_len
,
texts
,
texts_len
,
fout
=
None
):
cfg
=
self
.
config
.
decoding
errors_sum
,
len_refs
,
num_ins
=
0.0
,
0
,
0
errors_func
=
error_rate
.
char_errors
if
cfg
.
error_rate_type
==
'cer'
else
error_rate
.
word_errors
error_rate_func
=
error_rate
.
cer
if
cfg
.
error_rate_type
==
'cer'
else
error_rate
.
wer
vocab_list
=
self
.
test_loader
.
collate_fn
.
vocab_list
def
compute_result_transcripts
(
self
,
audio
,
audio_len
,
vocab_list
,
cfg
):
if
self
.
args
.
model_type
==
"online"
:
output_probs_branch
,
output_lens_branch
=
self
.
static_forward_online
(
audio
,
audio_len
)
...
...
@@ -437,31 +428,12 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester):
cfg
.
beam_size
,
cfg
.
cutoff_prob
,
cfg
.
cutoff_top_n
,
cfg
.
num_proc_bsearch
)
target_transcripts
=
self
.
ordid2token
(
texts
,
texts_len
)
for
utt
,
target
,
result
in
zip
(
utts
,
target_transcripts
,
result_transcripts
):
errors
,
len_ref
=
errors_func
(
target
,
result
)
errors_sum
+=
errors
len_refs
+=
len_ref
num_ins
+=
1
if
fout
:
fout
.
write
(
utt
+
" "
+
result
+
"
\n
"
)
logger
.
info
(
"
\n
Target Transcription: %s
\n
Output Transcription: %s"
%
(
target
,
result
))
logger
.
info
(
"Current error rate [%s] = %f"
%
(
cfg
.
error_rate_type
,
error_rate_func
(
target
,
result
)))
return
dict
(
errors_sum
=
errors_sum
,
len_refs
=
len_refs
,
num_ins
=
num_ins
,
error_rate
=
errors_sum
/
len_refs
,
error_rate_type
=
cfg
.
error_rate_type
)
return
result_transcripts
def
static_forward_online
(
self
,
audio
,
audio_len
):
output_probs_list
=
[]
output_lens_list
=
[]
decoder_chunk_size
=
8
decoder_chunk_size
=
1
subsampling_rate
=
self
.
model
.
encoder
.
conv
.
subsampling_rate
receptive_field_length
=
self
.
model
.
encoder
.
conv
.
receptive_field_length
chunk_stride
=
subsampling_rate
*
decoder_chunk_size
...
...
@@ -553,27 +525,27 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester):
output_chunk_lens
=
output_lens_handle
.
copy_to_cpu
()
chunk_state_h_box
=
output_state_h_handle
.
copy_to_cpu
()
chunk_state_c_box
=
output_state_c_handle
.
copy_to_cpu
()
output_chunk_probs
=
paddle
.
to_tensor
(
output_chunk_probs
)
output_chunk_lens
=
paddle
.
to_tensor
(
output_chunk_lens
)
probs_chunk_list
.
append
(
output_chunk_probs
)
probs_chunk_lens_list
.
append
(
output_chunk_lens
)
output_probs
=
paddle
.
concat
(
probs_chunk_list
,
axis
=
1
)
output_lens
=
paddle
.
add_n
(
probs_chunk_lens_list
)
output_probs
=
np
.
concatenate
(
probs_chunk_list
,
axis
=
1
)
output_lens
=
np
.
sum
(
probs_chunk_lens_list
,
axis
=
0
)
output_probs_padding_len
=
max_len_batch
+
batch_padding_len
-
output_probs
.
shape
[
1
]
output_probs_padding
=
paddle
.
zeros
(
output_probs_padding
=
np
.
zeros
(
(
1
,
output_probs_padding_len
,
output_probs
.
shape
[
2
]),
dtype
=
"float32"
)
# The prob padding for a piece of utterance
output_probs
=
paddle
.
concat
(
dtype
=
np
.
float32
)
# The prob padding for a piece of utterance
output_probs
=
np
.
concatenate
(
[
output_probs
,
output_probs_padding
],
axis
=
1
)
output_probs_list
.
append
(
output_probs
)
output_lens_list
.
append
(
output_lens
)
self
.
autolog
.
times
.
stamp
()
self
.
autolog
.
times
.
stamp
()
self
.
autolog
.
times
.
end
()
output_probs_branch
=
paddle
.
concat
(
output_probs_list
,
axis
=
0
)
output_lens_branch
=
paddle
.
concat
(
output_lens_list
,
axis
=
0
)
output_probs_branch
=
np
.
concatenate
(
output_probs_list
,
axis
=
0
)
output_lens_branch
=
np
.
concatenate
(
output_lens_list
,
axis
=
0
)
output_probs_branch
=
paddle
.
to_tensor
(
output_probs_branch
)
output_lens_branch
=
paddle
.
to_tensor
(
output_lens_branch
)
return
output_probs_branch
,
output_lens_branch
def
static_forward_offline
(
self
,
audio
,
audio_len
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录