Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
90d6b6f1
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
90d6b6f1
编写于
4月 11, 2022
作者:
Y
Yang Zhou
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add aishell wfst eg script
上级
18b3225b
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
65 addition
and
19 deletion
+65
-19
speechx/examples/aishell/run.sh
speechx/examples/aishell/run.sh
+37
-5
speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
...hx/examples/decoder/offline_decoder_sliding_chunk_main.cc
+5
-3
speechx/examples/decoder/offline_wfst_decoder_main.cc
speechx/examples/decoder/offline_wfst_decoder_main.cc
+23
-5
speechx/speechx/nnet/paddle_nnet.cc
speechx/speechx/nnet/paddle_nnet.cc
+0
-6
未找到文件。
speechx/examples/aishell/run.sh
浏览文件 @
90d6b6f1
...
@@ -48,7 +48,7 @@ wer=./aishell_wer
...
@@ -48,7 +48,7 @@ wer=./aishell_wer
nj
=
40
nj
=
40
export
GLOG_logtostderr
=
1
export
GLOG_logtostderr
=
1
./local/split_data.sh
$data
$data
/
$aishell_wav_scp
$aishell_wav_scp
$nj
#
./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
data
=
$PWD
/data
data
=
$PWD
/data
# 3. gen linear feat
# 3. gen linear feat
...
@@ -72,10 +72,42 @@ utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log \
...
@@ -72,10 +72,42 @@ utils/run.pl JOB=1:$nj $data/split${nj}/JOB/log \
--param_path
=
$aishell_online_model
/avg_1.jit.pdiparams
\
--param_path
=
$aishell_online_model
/avg_1.jit.pdiparams
\
--model_output_names
=
softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0
\
--model_output_names
=
softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0
\
--dict_file
=
$lm_model_dir
/vocab.txt
\
--dict_file
=
$lm_model_dir
/vocab.txt
\
--lm_path
=
$lm_model_dir
/avg_1.jit.klm
\
--result_wspecifier
=
ark,t:
$data
/split
${
nj
}
/JOB/result
--result_wspecifier
=
ark,t:
$data
/split
${
nj
}
/JOB/result
cat
$data
/split
${
nj
}
/
*
/result
>
$label_file
cat
$data
/split
${
nj
}
/
*
/result
>
${
label_file
}
local
/compute-wer.py
--char
=
1
--v
=
1
${
label_file
}
$text
>
${
wer
}
# 4. decode with lm
utils/run.pl
JOB
=
1:
$nj
$data
/split
${
nj
}
/JOB/log_lm
\
offline_decoder_sliding_chunk_main
\
--feature_rspecifier
=
scp:
$data
/split
${
nj
}
/JOB/feat.scp
\
--model_path
=
$aishell_online_model
/avg_1.jit.pdmodel
\
--param_path
=
$aishell_online_model
/avg_1.jit.pdiparams
\
--model_output_names
=
softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0
\
--dict_file
=
$lm_model_dir
/vocab.txt
\
--lm_path
=
$lm_model_dir
/avg_1.jit.klm
\
--result_wspecifier
=
ark,t:
$data
/split
${
nj
}
/JOB/result_lm
cat
$data
/split
${
nj
}
/
*
/result_lm
>
${
label_file
}
_lm
local
/compute-wer.py
--char
=
1
--v
=
1
${
label_file
}
_lm
$text
>
${
wer
}
_lm
graph_dir
=
./aishell_graph
if
[
!
-d
$
]
;
then
wget
-c
https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip
unzip
-d
aishell_graph.zip
fi
# 5. test TLG decoder
utils/run.pl
JOB
=
1:
$nj
$data
/split
${
nj
}
/JOB/log_tlg
\
offline_wfst_decoder_main
\
--feature_rspecifier
=
scp:
$data
/split
${
nj
}
/JOB/feat.scp
\
--model_path
=
$aishell_online_model
/avg_1.jit.pdmodel
\
--param_path
=
$aishell_online_model
/avg_1.jit.pdiparams
\
--word_symbol_table
=
$graph_dir
/words.txt
\
--model_output_names
=
softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0
\
--graph_path
=
$graph_dir
/TLG.fst
--max_active
=
7500
\
--acoustic_scale
=
1.2
\
--result_wspecifier
=
ark,t:
$data
/split
${
nj
}
/JOB/result_tlg
local
/compute-wer.py
--char
=
1
--v
=
1
$label_file
$text
>
$wer
cat
$data
/split
${
nj
}
/
*
/result_tlg
>
${
label_file
}
_tlg
tail
$wer
local
/compute-wer.py
--char
=
1
--v
=
1
${
label_file
}
_tlg
$text
>
${
wer
}
_tlg
\ No newline at end of file
speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
浏览文件 @
90d6b6f1
...
@@ -27,7 +27,7 @@ DEFINE_string(result_wspecifier, "", "test result wspecifier");
...
@@ -27,7 +27,7 @@ DEFINE_string(result_wspecifier, "", "test result wspecifier");
DEFINE_string
(
model_path
,
"avg_1.jit.pdmodel"
,
"paddle nnet model"
);
DEFINE_string
(
model_path
,
"avg_1.jit.pdmodel"
,
"paddle nnet model"
);
DEFINE_string
(
param_path
,
"avg_1.jit.pdiparams"
,
"paddle nnet model param"
);
DEFINE_string
(
param_path
,
"avg_1.jit.pdiparams"
,
"paddle nnet model param"
);
DEFINE_string
(
dict_file
,
"vocab.txt"
,
"vocabulary of lm"
);
DEFINE_string
(
dict_file
,
"vocab.txt"
,
"vocabulary of lm"
);
DEFINE_string
(
lm_path
,
"
lm.klm
"
,
"language model"
);
DEFINE_string
(
lm_path
,
""
,
"language model"
);
DEFINE_int32
(
receptive_field_length
,
DEFINE_int32
(
receptive_field_length
,
7
,
7
,
"receptive field of two CNN(kernel=5) downsampling module."
);
"receptive field of two CNN(kernel=5) downsampling module."
);
...
@@ -45,7 +45,6 @@ using kaldi::BaseFloat;
...
@@ -45,7 +45,6 @@ using kaldi::BaseFloat;
using
kaldi
::
Matrix
;
using
kaldi
::
Matrix
;
using
std
::
vector
;
using
std
::
vector
;
// test ds2 online decoder by feeding speech feature
// test ds2 online decoder by feeding speech feature
int
main
(
int
argc
,
char
*
argv
[])
{
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
...
@@ -63,7 +62,6 @@ int main(int argc, char* argv[]) {
...
@@ -63,7 +62,6 @@ int main(int argc, char* argv[]) {
LOG
(
INFO
)
<<
"dict path: "
<<
dict_file
;
LOG
(
INFO
)
<<
"dict path: "
<<
dict_file
;
LOG
(
INFO
)
<<
"lm path: "
<<
lm_path
;
LOG
(
INFO
)
<<
"lm path: "
<<
lm_path
;
int32
num_done
=
0
,
num_err
=
0
;
int32
num_done
=
0
,
num_err
=
0
;
ppspeech
::
CTCBeamSearchOptions
opts
;
ppspeech
::
CTCBeamSearchOptions
opts
;
...
@@ -139,6 +137,10 @@ int main(int argc, char* argv[]) {
...
@@ -139,6 +137,10 @@ int main(int argc, char* argv[]) {
std
::
string
result
;
std
::
string
result
;
result
=
decoder
.
GetFinalBestPath
();
result
=
decoder
.
GetFinalBestPath
();
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is "
<<
result
;
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is "
<<
result
;
if
(
result
.
empty
())
{
// the TokenWriter can not write empty string.
result
=
" "
;
}
result_writer
.
Write
(
utt
,
result
);
result_writer
.
Write
(
utt
,
result
);
decodable
->
Reset
();
decodable
->
Reset
();
decoder
.
Reset
();
decoder
.
Reset
();
...
...
speechx/examples/decoder/offline_wfst_decoder_main.cc
浏览文件 @
90d6b6f1
...
@@ -22,10 +22,11 @@
...
@@ -22,10 +22,11 @@
#include "nnet/decodable.h"
#include "nnet/decodable.h"
#include "nnet/paddle_nnet.h"
#include "nnet/paddle_nnet.h"
DEFINE_string
(
feature_respecifier
,
""
,
"test feature rspecifier"
);
DEFINE_string
(
feature_rspecifier
,
""
,
"test feature rspecifier"
);
DEFINE_string
(
result_wspecifier
,
""
,
"test result wspecifier"
);
DEFINE_string
(
model_path
,
"avg_1.jit.pdmodel"
,
"paddle nnet model"
);
DEFINE_string
(
model_path
,
"avg_1.jit.pdmodel"
,
"paddle nnet model"
);
DEFINE_string
(
param_path
,
"avg_1.jit.pdiparams"
,
"paddle nnet model param"
);
DEFINE_string
(
param_path
,
"avg_1.jit.pdiparams"
,
"paddle nnet model param"
);
DEFINE_string
(
word_symbol_table
,
"
vocab
.txt"
,
"word symbol table"
);
DEFINE_string
(
word_symbol_table
,
"
words
.txt"
,
"word symbol table"
);
DEFINE_string
(
graph_path
,
"TLG"
,
"decoder graph"
);
DEFINE_string
(
graph_path
,
"TLG"
,
"decoder graph"
);
DEFINE_double
(
acoustic_scale
,
1.0
,
"acoustic scale"
);
DEFINE_double
(
acoustic_scale
,
1.0
,
"acoustic scale"
);
DEFINE_int32
(
max_active
,
7500
,
"decoder graph"
);
DEFINE_int32
(
max_active
,
7500
,
"decoder graph"
);
...
@@ -35,22 +36,33 @@ DEFINE_int32(receptive_field_length,
...
@@ -35,22 +36,33 @@ DEFINE_int32(receptive_field_length,
DEFINE_int32
(
downsampling_rate
,
DEFINE_int32
(
downsampling_rate
,
4
,
4
,
"two CNN(kernel=5) module downsampling rate."
);
"two CNN(kernel=5) module downsampling rate."
);
DEFINE_string
(
model_output_names
,
"save_infer_model/scale_0.tmp_1,save_infer_model/"
"scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/"
"scale_3.tmp_1"
,
"model output names"
);
DEFINE_string
(
model_cache_names
,
"5-1-1024,5-1-1024"
,
"model cache names"
);
using
kaldi
::
BaseFloat
;
using
kaldi
::
BaseFloat
;
using
kaldi
::
Matrix
;
using
kaldi
::
Matrix
;
using
std
::
vector
;
using
std
::
vector
;
// test
clg
decoder by feeding speech feature.
// test
TLG
decoder by feeding speech feature.
int
main
(
int
argc
,
char
*
argv
[])
{
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
google
::
InitGoogleLogging
(
argv
[
0
]);
kaldi
::
SequentialBaseFloatMatrixReader
feature_reader
(
kaldi
::
SequentialBaseFloatMatrixReader
feature_reader
(
FLAGS_feature_respecifier
);
FLAGS_feature_rspecifier
);
kaldi
::
TokenWriter
result_writer
(
FLAGS_result_wspecifier
);
std
::
string
model_graph
=
FLAGS_model_path
;
std
::
string
model_graph
=
FLAGS_model_path
;
std
::
string
model_params
=
FLAGS_param_path
;
std
::
string
model_params
=
FLAGS_param_path
;
std
::
string
word_symbol_table
=
FLAGS_word_symbol_table
;
std
::
string
word_symbol_table
=
FLAGS_word_symbol_table
;
std
::
string
graph_path
=
FLAGS_graph_path
;
std
::
string
graph_path
=
FLAGS_graph_path
;
LOG
(
INFO
)
<<
"model path: "
<<
model_graph
;
LOG
(
INFO
)
<<
"model param: "
<<
model_params
;
LOG
(
INFO
)
<<
"word symbol path: "
<<
word_symbol_table
;
LOG
(
INFO
)
<<
"graph path: "
<<
graph_path
;
int32
num_done
=
0
,
num_err
=
0
;
int32
num_done
=
0
,
num_err
=
0
;
...
@@ -65,7 +77,8 @@ int main(int argc, char* argv[]) {
...
@@ -65,7 +77,8 @@ int main(int argc, char* argv[]) {
ppspeech
::
ModelOptions
model_opts
;
ppspeech
::
ModelOptions
model_opts
;
model_opts
.
model_path
=
model_graph
;
model_opts
.
model_path
=
model_graph
;
model_opts
.
params_path
=
model_params
;
model_opts
.
params_path
=
model_params
;
model_opts
.
cache_shape
=
"5-1-1024,5-1-1024"
;
model_opts
.
cache_shape
=
FLAGS_model_cache_names
;
model_opts
.
output_names
=
FLAGS_model_output_names
;
std
::
shared_ptr
<
ppspeech
::
PaddleNnet
>
nnet
(
std
::
shared_ptr
<
ppspeech
::
PaddleNnet
>
nnet
(
new
ppspeech
::
PaddleNnet
(
model_opts
));
new
ppspeech
::
PaddleNnet
(
model_opts
));
std
::
shared_ptr
<
ppspeech
::
DataCache
>
raw_data
(
new
ppspeech
::
DataCache
());
std
::
shared_ptr
<
ppspeech
::
DataCache
>
raw_data
(
new
ppspeech
::
DataCache
());
...
@@ -127,6 +140,11 @@ int main(int argc, char* argv[]) {
...
@@ -127,6 +140,11 @@ int main(int argc, char* argv[]) {
std
::
string
result
;
std
::
string
result
;
result
=
decoder
.
GetFinalBestPath
();
result
=
decoder
.
GetFinalBestPath
();
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is "
<<
result
;
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is "
<<
result
;
if
(
result
.
empty
())
{
// the TokenWriter can not write empty string.
result
=
" "
;
}
result_writer
.
Write
(
utt
,
result
);
decodable
->
Reset
();
decodable
->
Reset
();
decoder
.
Reset
();
decoder
.
Reset
();
++
num_done
;
++
num_done
;
...
...
speechx/speechx/nnet/paddle_nnet.cc
浏览文件 @
90d6b6f1
...
@@ -94,7 +94,6 @@ PaddleNnet::PaddleNnet(const ModelOptions& opts) : opts_(opts) {
...
@@ -94,7 +94,6 @@ PaddleNnet::PaddleNnet(const ModelOptions& opts) : opts_(opts) {
void
PaddleNnet
::
Reset
()
{
InitCacheEncouts
(
opts_
);
}
void
PaddleNnet
::
Reset
()
{
InitCacheEncouts
(
opts_
);
}
paddle_infer
::
Predictor
*
PaddleNnet
::
GetPredictor
()
{
paddle_infer
::
Predictor
*
PaddleNnet
::
GetPredictor
()
{
LOG
(
INFO
)
<<
"attempt to get a new predictor instance "
<<
std
::
endl
;
paddle_infer
::
Predictor
*
predictor
=
nullptr
;
paddle_infer
::
Predictor
*
predictor
=
nullptr
;
std
::
lock_guard
<
std
::
mutex
>
guard
(
pool_mutex
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
pool_mutex
);
int
pred_id
=
0
;
int
pred_id
=
0
;
...
@@ -110,7 +109,6 @@ paddle_infer::Predictor* PaddleNnet::GetPredictor() {
...
@@ -110,7 +109,6 @@ paddle_infer::Predictor* PaddleNnet::GetPredictor() {
if
(
predictor
)
{
if
(
predictor
)
{
pool_usages
[
pred_id
]
=
true
;
pool_usages
[
pred_id
]
=
true
;
predictor_to_thread_id
[
predictor
]
=
pred_id
;
predictor_to_thread_id
[
predictor
]
=
pred_id
;
LOG
(
INFO
)
<<
pred_id
<<
" predictor create success"
;
}
else
{
}
else
{
LOG
(
INFO
)
<<
"Failed to get predictor from pool !!!"
;
LOG
(
INFO
)
<<
"Failed to get predictor from pool !!!"
;
}
}
...
@@ -119,7 +117,6 @@ paddle_infer::Predictor* PaddleNnet::GetPredictor() {
...
@@ -119,7 +117,6 @@ paddle_infer::Predictor* PaddleNnet::GetPredictor() {
}
}
int
PaddleNnet
::
ReleasePredictor
(
paddle_infer
::
Predictor
*
predictor
)
{
int
PaddleNnet
::
ReleasePredictor
(
paddle_infer
::
Predictor
*
predictor
)
{
LOG
(
INFO
)
<<
"attempt to releae a predictor"
;
std
::
lock_guard
<
std
::
mutex
>
guard
(
pool_mutex
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
pool_mutex
);
auto
iter
=
predictor_to_thread_id
.
find
(
predictor
);
auto
iter
=
predictor_to_thread_id
.
find
(
predictor
);
...
@@ -128,10 +125,8 @@ int PaddleNnet::ReleasePredictor(paddle_infer::Predictor* predictor) {
...
@@ -128,10 +125,8 @@ int PaddleNnet::ReleasePredictor(paddle_infer::Predictor* predictor) {
return
0
;
return
0
;
}
}
LOG
(
INFO
)
<<
iter
->
second
<<
" predictor will be release"
;
pool_usages
[
iter
->
second
]
=
false
;
pool_usages
[
iter
->
second
]
=
false
;
predictor_to_thread_id
.
erase
(
predictor
);
predictor_to_thread_id
.
erase
(
predictor
);
LOG
(
INFO
)
<<
"release success"
;
return
0
;
return
0
;
}
}
...
@@ -152,7 +147,6 @@ void PaddleNnet::FeedForward(const Vector<BaseFloat>& features,
...
@@ -152,7 +147,6 @@ void PaddleNnet::FeedForward(const Vector<BaseFloat>& features,
int
feat_row
=
features
.
Dim
()
/
feature_dim
;
int
feat_row
=
features
.
Dim
()
/
feature_dim
;
std
::
vector
<
std
::
string
>
input_names
=
predictor
->
GetInputNames
();
std
::
vector
<
std
::
string
>
input_names
=
predictor
->
GetInputNames
();
std
::
vector
<
std
::
string
>
output_names
=
predictor
->
GetOutputNames
();
std
::
vector
<
std
::
string
>
output_names
=
predictor
->
GetOutputNames
();
LOG
(
INFO
)
<<
"feat info: rows, cols: "
<<
feat_row
<<
", "
<<
feature_dim
;
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
input_tensor
=
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
input_tensor
=
predictor
->
GetInputHandle
(
input_names
[
0
]);
predictor
->
GetInputHandle
(
input_names
[
0
]);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录