Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
ab92e2c9
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ab92e2c9
编写于
9月 06, 2022
作者:
T
tianhao zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix deepspeech2 decode_wav
上级
ed16f96a
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
35 addition
and
30 deletion
+35
-30
paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py
paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py
+35
-30
未找到文件。
paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py
浏览文件 @
ab92e2c9
...
...
@@ -20,8 +20,8 @@ import paddle
import
soundfile
from
yacs.config
import
CfgNode
from
paddlespeech.audio.transform.transformation
import
Transformation
from
paddlespeech.s2t.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
paddlespeech.s2t.io.collator
import
SpeechCollator
from
paddlespeech.s2t.models.ds2
import
DeepSpeech2Model
from
paddlespeech.s2t.training.cli
import
default_argument_parser
from
paddlespeech.s2t.utils
import
mp_tools
...
...
@@ -38,24 +38,24 @@ class DeepSpeech2Tester_hub():
self
.
args
=
args
self
.
config
=
config
self
.
audio_file
=
args
.
audio_file
self
.
collate_fn_test
=
SpeechCollator
.
from_config
(
config
)
self
.
_text_featurizer
=
TextFeaturizer
(
unit_type
=
config
.
unit_type
,
vocab
=
None
)
def
compute_result_transcripts
(
self
,
audio
,
audio_len
,
vocab_list
,
cfg
):
result_transcripts
=
self
.
model
.
decode
(
audio
,
audio_len
,
vocab_list
,
decoding_method
=
cfg
.
decoding_method
,
lang_model_path
=
cfg
.
lang_model_path
,
beam_alpha
=
cfg
.
alpha
,
beam_beta
=
cfg
.
beta
,
beam_size
=
cfg
.
beam_size
,
cutoff_prob
=
cfg
.
cutoff_prob
,
cutoff_top_n
=
cfg
.
cutoff_top_n
,
num_processes
=
cfg
.
num_proc_bsearch
)
self
.
preprocess_conf
=
config
.
preprocess_config
self
.
preprocess_args
=
{
"train"
:
False
}
self
.
preprocessing
=
Transformation
(
self
.
preprocess_conf
)
self
.
text_feature
=
TextFeaturizer
(
unit_type
=
config
.
unit_type
,
vocab
=
config
.
vocab_filepath
,
spm_model_prefix
=
config
.
spm_model_prefix
)
paddle
.
set_device
(
'gpu'
if
self
.
args
.
ngpu
>
0
else
'cpu'
)
def
compute_result_transcripts
(
self
,
audio
,
audio_len
,
vocab_list
,
cfg
):
decode_batch_size
=
cfg
.
decode_batch_size
self
.
model
.
decoder
.
init_decoder
(
decode_batch_size
,
vocab_list
,
cfg
.
decoding_method
,
cfg
.
lang_model_path
,
cfg
.
alpha
,
cfg
.
beta
,
cfg
.
beam_size
,
cfg
.
cutoff_prob
,
cfg
.
cutoff_top_n
,
cfg
.
num_proc_bsearch
)
result_transcripts
=
self
.
model
.
decode
(
audio
,
audio_len
)
return
result_transcripts
@
mp_tools
.
rank_zero_only
...
...
@@ -64,16 +64,23 @@ class DeepSpeech2Tester_hub():
self
.
model
.
eval
()
cfg
=
self
.
config
audio_file
=
self
.
audio_file
collate_fn_test
=
self
.
collate_fn_test
audio
,
_
=
collate_fn_test
.
process_utterance
(
audio_file
=
audio_file
,
transcript
=
" "
)
audio_len
=
audio
.
shape
[
0
]
audio
=
paddle
.
to_tensor
(
audio
,
dtype
=
'float32'
)
audio_len
=
paddle
.
to_tensor
(
audio_len
)
audio
=
paddle
.
unsqueeze
(
audio
,
axis
=
0
)
vocab_list
=
collate_fn_test
.
vocab_list
audio
,
sample_rate
=
soundfile
.
read
(
self
.
audio_file
,
dtype
=
"int16"
,
always_2d
=
True
)
audio
=
audio
[:,
0
]
logger
.
info
(
f
"audio shape:
{
audio
.
shape
}
"
)
# fbank
feat
=
self
.
preprocessing
(
audio
,
**
self
.
preprocess_args
)
logger
.
info
(
f
"feat shape:
{
feat
.
shape
}
"
)
audio_len
=
paddle
.
to_tensor
(
feat
.
shape
[
0
])
audio
=
paddle
.
to_tensor
(
feat
,
dtype
=
'float32'
).
unsqueeze
(
axis
=
0
)
result_transcripts
=
self
.
compute_result_transcripts
(
audio
,
audio_len
,
vocab_list
,
cfg
.
decode
)
audio
,
audio_len
,
self
.
text_feature
.
vocab_list
,
cfg
.
decode
)
logger
.
info
(
"result_transcripts: "
+
result_transcripts
[
0
])
def
run_test
(
self
):
...
...
@@ -109,11 +116,9 @@ class DeepSpeech2Tester_hub():
def
setup_model
(
self
):
config
=
self
.
config
.
clone
()
with
UpdateConfig
(
config
):
config
.
input_dim
=
self
.
collate_fn_test
.
feature_size
config
.
output_dim
=
self
.
collate_fn_test
.
vocab_size
config
.
input_dim
=
config
.
feat_dim
config
.
output_dim
=
self
.
text_feature
.
vocab_size
model
=
DeepSpeech2Model
.
from_config
(
config
)
self
.
model
=
model
def
setup_checkpointer
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录