Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
7fe0c5fa
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
1 年多 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7fe0c5fa
编写于
10月 22, 2021
作者:
J
Jackwaterveg
提交者:
GitHub
10月 22, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #920 from Jackwaterveg/Hub
add the Hub: conformer for aishell
上级
a5745168
2b7fdf51
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
260 addition
and
3 deletion
+260
-3
deepspeech/exps/deepspeech2/bin/test_hub.py
deepspeech/exps/deepspeech2/bin/test_hub.py
+1
-1
deepspeech/exps/u2/bin/test_hub.py
deepspeech/exps/u2/bin/test_hub.py
+186
-0
deepspeech/frontend/featurizer/text_featurizer.py
deepspeech/frontend/featurizer/text_featurizer.py
+18
-1
examples/aishell/s1/local/test_hub.sh
examples/aishell/s1/local/test_hub.sh
+47
-0
examples/aishell/s1/run.sh
examples/aishell/s1/run.sh
+8
-1
未找到文件。
deepspeech/exps/deepspeech2/bin/test_hub.py
浏览文件 @
7fe0c5fa
...
...
@@ -58,7 +58,7 @@ class DeepSpeech2Tester_hub():
num_processes
=
cfg
.
num_proc_bsearch
)
#replace the '<space>' with ' '
result_transcripts
=
[
self
.
_text_featurizer
.
detokenize
(
sentence
)
self
.
_text_featurizer
.
detokenize
2
(
sentence
)
for
sentence
in
result_transcripts
]
...
...
deepspeech/exps/u2/bin/test_hub.py
0 → 100644
浏览文件 @
7fe0c5fa
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for U2 model."""
import
cProfile
import
os
import
sys
import
paddle
import
soundfile
from
deepspeech.exps.u2.config
import
get_cfg_defaults
from
deepspeech.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
deepspeech.io.collator
import
SpeechCollator
from
deepspeech.models.u2
import
U2Model
from
deepspeech.training.cli
import
default_argument_parser
from
deepspeech.training.trainer
import
Trainer
from
deepspeech.utils
import
layer_tools
from
deepspeech.utils
import
mp_tools
from
deepspeech.utils.log
import
Log
from
deepspeech.utils.utility
import
print_arguments
from
deepspeech.utils.utility
import
UpdateConfig
logger
=
Log
(
__name__
).
getlog
()
# TODO(hui zhang): dynamic load
class
U2Tester_Hub
(
Trainer
):
def
__init__
(
self
,
config
,
args
):
# super().__init__(config, args)
self
.
args
=
args
self
.
config
=
config
self
.
audio_file
=
args
.
audio_file
self
.
collate_fn_test
=
SpeechCollator
.
from_config
(
config
)
self
.
_text_featurizer
=
TextFeaturizer
(
unit_type
=
config
.
collator
.
unit_type
,
vocab_filepath
=
None
,
spm_model_prefix
=
config
.
collator
.
spm_model_prefix
)
def
setup_model
(
self
):
config
=
self
.
config
model_conf
=
config
.
model
with
UpdateConfig
(
model_conf
):
model_conf
.
input_dim
=
self
.
collate_fn_test
.
feature_size
model_conf
.
output_dim
=
self
.
collate_fn_test
.
vocab_size
model
=
U2Model
.
from_config
(
model_conf
)
if
self
.
parallel
:
model
=
paddle
.
DataParallel
(
model
)
logger
.
info
(
f
"
{
model
}
"
)
layer_tools
.
print_params
(
model
,
logger
.
info
)
self
.
model
=
model
logger
.
info
(
"Setup model"
)
@
mp_tools
.
rank_zero_only
@
paddle
.
no_grad
()
def
test
(
self
):
self
.
model
.
eval
()
cfg
=
self
.
config
.
decoding
audio_file
=
self
.
audio_file
collate_fn_test
=
self
.
collate_fn_test
audio
,
_
=
collate_fn_test
.
process_utterance
(
audio_file
=
audio_file
,
transcript
=
"Hello"
)
audio_len
=
audio
.
shape
[
0
]
audio
=
paddle
.
to_tensor
(
audio
,
dtype
=
'float32'
)
audio_len
=
paddle
.
to_tensor
(
audio_len
)
audio
=
paddle
.
unsqueeze
(
audio
,
axis
=
0
)
vocab_list
=
collate_fn_test
.
vocab_list
text_feature
=
self
.
collate_fn_test
.
text_feature
result_transcripts
=
self
.
model
.
decode
(
audio
,
audio_len
,
text_feature
=
text_feature
,
decoding_method
=
cfg
.
decoding_method
,
lang_model_path
=
cfg
.
lang_model_path
,
beam_alpha
=
cfg
.
alpha
,
beam_beta
=
cfg
.
beta
,
beam_size
=
cfg
.
beam_size
,
cutoff_prob
=
cfg
.
cutoff_prob
,
cutoff_top_n
=
cfg
.
cutoff_top_n
,
num_processes
=
cfg
.
num_proc_bsearch
,
ctc_weight
=
cfg
.
ctc_weight
,
decoding_chunk_size
=
cfg
.
decoding_chunk_size
,
num_decoding_left_chunks
=
cfg
.
num_decoding_left_chunks
,
simulate_streaming
=
cfg
.
simulate_streaming
)
logger
.
info
(
"The result_transcripts: "
+
result_transcripts
[
0
])
def
run_test
(
self
):
self
.
resume
()
try
:
self
.
test
()
except
KeyboardInterrupt
:
sys
.
exit
(
-
1
)
def
setup
(
self
):
"""Setup the experiment.
"""
paddle
.
set_device
(
'gpu'
if
self
.
args
.
nprocs
>
0
else
'cpu'
)
#self.setup_output_dir()
#self.setup_checkpointer()
#self.setup_dataloader()
self
.
setup_model
()
self
.
iteration
=
0
self
.
epoch
=
0
def
resume
(
self
):
"""Resume from the checkpoint at checkpoints in the output
directory or load a specified checkpoint.
"""
params_path
=
self
.
args
.
checkpoint_path
+
".pdparams"
model_dict
=
paddle
.
load
(
params_path
)
self
.
model
.
set_state_dict
(
model_dict
)
def
check
(
audio_file
):
logger
.
info
(
"checking the audio file format......"
)
try
:
sig
,
sample_rate
=
soundfile
.
read
(
audio_file
)
except
Exception
as
e
:
logger
.
error
(
str
(
e
))
logger
.
error
(
"can not open the wav file, please check the audio file format"
)
sys
.
exit
(
-
1
)
logger
.
info
(
"The sample rate is %d"
%
sample_rate
)
assert
(
sample_rate
==
16000
)
logger
.
info
(
"The audio file format is right"
)
def
main_sp
(
config
,
args
):
exp
=
U2Tester_Hub
(
config
,
args
)
exp
.
setup
()
exp
.
run_test
()
def
main
(
config
,
args
):
main_sp
(
config
,
args
)
if
__name__
==
"__main__"
:
parser
=
default_argument_parser
()
# save asr result to
parser
.
add_argument
(
"--result_file"
,
type
=
str
,
help
=
"path of save the asr result"
)
parser
.
add_argument
(
"--audio_file"
,
type
=
str
,
help
=
"path of the input audio file"
)
args
=
parser
.
parse_args
()
print_arguments
(
args
,
globals
())
if
not
os
.
path
.
isfile
(
args
.
audio_file
):
print
(
"Please input the right audio file path"
)
sys
.
exit
(
-
1
)
check
(
args
.
audio_file
)
# https://yaml.org/type/float.html
config
=
get_cfg_defaults
()
if
args
.
config
:
config
.
merge_from_file
(
args
.
config
)
if
args
.
opts
:
config
.
merge_from_list
(
args
.
opts
)
config
.
freeze
()
print
(
config
)
if
args
.
dump_config
:
with
open
(
args
.
dump_config
,
'w'
)
as
f
:
print
(
config
,
file
=
f
)
# Setting for profiling
pr
=
cProfile
.
Profile
()
pr
.
runcall
(
main
,
config
,
args
)
pr
.
dump_stats
(
'test.profile'
)
deepspeech/frontend/featurizer/text_featurizer.py
浏览文件 @
7fe0c5fa
...
...
@@ -63,6 +63,11 @@ class TextFeaturizer():
tokens
=
self
.
spm_tokenize
(
text
)
return
tokens
def
detokenize2
(
self
,
tokens
):
if
self
.
unit_type
==
'char'
:
text
=
self
.
char_detokenize2
(
tokens
)
return
text
def
detokenize
(
self
,
tokens
):
if
self
.
unit_type
==
'char'
:
text
=
self
.
char_detokenize
(
tokens
)
...
...
@@ -123,6 +128,18 @@ class TextFeaturizer():
text_list
=
list
(
text
)
return
text_list
def
char_detokenize2
(
self
,
tokens
):
"""Character detokenizer.
Args:
tokens (str): tokens.
Returns:
str: text string.
"""
tokens
=
tokens
.
replace
(
SPACE
,
" "
)
return
tokens
def
char_detokenize
(
self
,
tokens
):
"""Character detokenizer.
...
...
@@ -132,7 +149,7 @@ class TextFeaturizer():
Returns:
str: text string.
"""
tokens
=
tokens
.
replace
(
SPACE
,
" "
)
tokens
=
[
token
.
replace
(
SPACE
,
" "
)
for
token
in
tokens
]
return
""
.
join
(
tokens
)
def
word_tokenize
(
self
,
text
):
...
...
examples/aishell/s1/local/test_hub.sh
0 → 100755
浏览文件 @
7fe0c5fa
#!/bin/bash
if
[
$#
!=
3
]
;
then
echo
"usage:
${
0
}
config_path ckpt_path_prefix audio_file"
exit
-1
fi
ngpu
=
$(
echo
$CUDA_VISIBLE_DEVICES
|
awk
-F
","
'{print NF}'
)
echo
"using
$ngpu
gpus..."
config_path
=
$1
ckpt_prefix
=
$2
audio_file
=
$3
chunk_mode
=
false
if
[[
${
config_path
}
=
~ ^.
*
chunk_.
*
yaml
$
]]
;
then
chunk_mode
=
true
fi
# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
# exit 1
#fi
for
type
in
attention_rescoring
;
do
echo
"decoding
${
type
}
"
batch_size
=
1
output_dir
=
${
ckpt_prefix
}
mkdir
-p
${
output_dir
}
python3
-u
${
BIN_DIR
}
/test_hub.py
\
--nproc
${
ngpu
}
\
--config
${
config_path
}
\
--result_file
${
output_dir
}
/
${
type
}
.rsl
\
--checkpoint_path
${
ckpt_prefix
}
\
--opts
decoding.decoding_method
${
type
}
\
--opts
decoding.batch_size
${
batch_size
}
\
--audio_file
${
audio_file
}
if
[
$?
-ne
0
]
;
then
echo
"Failed in evaluation!"
exit
1
fi
done
exit
0
examples/aishell/s1/run.sh
浏览文件 @
7fe0c5fa
...
...
@@ -13,6 +13,8 @@ avg_ckpt=avg_${avg_num}
ckpt
=
$(
basename
${
conf_path
}
|
awk
-F
'.'
'{print $1}'
)
echo
"checkpoint name
${
ckpt
}
"
audio_file
=
"data/tmp.wav"
if
[
${
stage
}
-le
0
]
&&
[
${
stop_stage
}
-ge
0
]
;
then
# prepare data
bash ./local/data.sh
||
exit
-1
...
...
@@ -46,5 +48,10 @@ fi
# Optionally, you can add LM and test it with runtime.
if
[
${
stage
}
-le
6
]
&&
[
${
stop_stage
}
-ge
6
]
;
then
# train lm and build TLG
./local/tlg.sh
--corpus
aishell
--lmtype
srilm
./local/tlg.sh
--corpus
aishell
--lmtype
srilm
fi
if
[
${
stage
}
-le
7
]
&&
[
${
stop_stage
}
-ge
7
]
;
then
# test a single .wav file
CUDA_VISIBLE_DEVICES
=
3 ./local/test_hub.sh
${
conf_path
}
exp/
${
ckpt
}
/checkpoints/
${
avg_ckpt
}
${
audio_file
}
||
exit
-1
fi
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录