Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
ae629e2f
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ae629e2f
编写于
5月 27, 2022
作者:
Y
Yang Zhou
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rm ds2_ol test dir
上级
418cc37f
变更
32
显示空白变更内容
内联
并排
Showing
32 changed file
with
5 addition
and
1465 deletion
+5
-1465
speechx/examples/README.md
speechx/examples/README.md
+2
-9
speechx/examples/ds2_ol/CMakeLists.txt
speechx/examples/ds2_ol/CMakeLists.txt
+0
-6
speechx/examples/ds2_ol/aishell/run_build_tlg.sh
speechx/examples/ds2_ol/aishell/run_build_tlg.sh
+1
-1
speechx/examples/ds2_ol/aishell/run_fbank.sh
speechx/examples/ds2_ol/aishell/run_fbank.sh
+1
-1
speechx/examples/ds2_ol/decoder/.gitignore
speechx/examples/ds2_ol/decoder/.gitignore
+0
-2
speechx/examples/ds2_ol/decoder/CMakeLists.txt
speechx/examples/ds2_ol/decoder/CMakeLists.txt
+0
-22
speechx/examples/ds2_ol/decoder/README.md
speechx/examples/ds2_ol/decoder/README.md
+0
-12
speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
...mples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
+0
-167
speechx/examples/ds2_ol/decoder/local/model.sh
speechx/examples/ds2_ol/decoder/local/model.sh
+0
-3
speechx/examples/ds2_ol/decoder/nnet-logprob-decoder-test.cc
speechx/examples/ds2_ol/decoder/nnet-logprob-decoder-test.cc
+0
-74
speechx/examples/ds2_ol/decoder/path.sh
speechx/examples/ds2_ol/decoder/path.sh
+0
-14
speechx/examples/ds2_ol/decoder/recognizer_test_main.cc
speechx/examples/ds2_ol/decoder/recognizer_test_main.cc
+0
-99
speechx/examples/ds2_ol/decoder/run.sh
speechx/examples/ds2_ol/decoder/run.sh
+0
-78
speechx/examples/ds2_ol/decoder/valgrind.sh
speechx/examples/ds2_ol/decoder/valgrind.sh
+0
-26
speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc
speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc
+0
-169
speechx/examples/ds2_ol/feat/.gitignore
speechx/examples/ds2_ol/feat/.gitignore
+0
-2
speechx/examples/ds2_ol/feat/CMakeLists.txt
speechx/examples/ds2_ol/feat/CMakeLists.txt
+0
-16
speechx/examples/ds2_ol/feat/README.md
speechx/examples/ds2_ol/feat/README.md
+0
-7
speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc
speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc
+0
-85
speechx/examples/ds2_ol/feat/compute_fbank_main.cc
speechx/examples/ds2_ol/feat/compute_fbank_main.cc
+0
-143
speechx/examples/ds2_ol/feat/linear-spectrogram-wo-db-norm-ol.cc
.../examples/ds2_ol/feat/linear-spectrogram-wo-db-norm-ol.cc
+0
-147
speechx/examples/ds2_ol/feat/path.sh
speechx/examples/ds2_ol/feat/path.sh
+0
-14
speechx/examples/ds2_ol/feat/run.sh
speechx/examples/ds2_ol/feat/run.sh
+0
-57
speechx/examples/ds2_ol/feat/valgrind.sh
speechx/examples/ds2_ol/feat/valgrind.sh
+0
-24
speechx/examples/ds2_ol/nnet/.gitignore
speechx/examples/ds2_ol/nnet/.gitignore
+0
-2
speechx/examples/ds2_ol/nnet/CMakeLists.txt
speechx/examples/ds2_ol/nnet/CMakeLists.txt
+0
-6
speechx/examples/ds2_ol/nnet/README.md
speechx/examples/ds2_ol/nnet/README.md
+0
-3
speechx/examples/ds2_ol/nnet/ds2-model-ol-test.cc
speechx/examples/ds2_ol/nnet/ds2-model-ol-test.cc
+0
-203
speechx/examples/ds2_ol/nnet/path.sh
speechx/examples/ds2_ol/nnet/path.sh
+0
-14
speechx/examples/ds2_ol/nnet/run.sh
speechx/examples/ds2_ol/nnet/run.sh
+0
-38
speechx/examples/ds2_ol/nnet/valgrind.sh
speechx/examples/ds2_ol/nnet/valgrind.sh
+0
-20
utils/README.md
utils/README.md
+1
-1
未找到文件。
speechx/examples/README.md
浏览文件 @
ae629e2f
...
...
@@ -22,14 +22,7 @@ netron exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel --port 8022 --host
## For Developer
>
Warning
: Only for developer, make sure you know what's it.
>
Reminder
: Only for developer, make sure you know what's it.
*
dev
- for speechx developer, using for test.
*
codelab
- for speechx developer, using for test.
## Build WFST
> Warning: Using below example when you know what's it.
*
text_lm - process text for build lm
*
ngram - using to build NGram ARPA lm.
*
wfst - build wfst for TLG.
speechx/examples/ds2_ol/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
418cc37f
cmake_minimum_required
(
VERSION 3.14 FATAL_ERROR
)
add_subdirectory
(
feat
)
add_subdirectory
(
nnet
)
add_subdirectory
(
decoder
)
add_subdirectory
(
websocket
)
speechx/examples/ds2_ol/aishell/run_build_tlg.sh
浏览文件 @
ae629e2f
...
...
@@ -118,7 +118,7 @@ export GLOG_logtostderr=1
if
[
${
stage
}
-le
4
]
&&
[
${
stop_stage
}
-ge
4
]
;
then
# TLG decoder
utils/run.pl
JOB
=
1:
$nj
$data
/split
${
nj
}
/JOB/check_tlg.log
\
recognizer_
test_
main
\
recognizer_main
\
--wav_rspecifier
=
scp:
$data
/split
${
nj
}
/JOB/
${
aishell_wav_scp
}
\
--cmvn_file
=
$cmvn
\
--model_path
=
$model_dir
/avg_5.jit.pdmodel
\
...
...
speechx/examples/ds2_ol/aishell/run_fbank.sh
浏览文件 @
ae629e2f
...
...
@@ -148,7 +148,7 @@ fi
if
[
${
stage
}
-le
5
]
&&
[
${
stop_stage
}
-ge
5
]
;
then
utils/run.pl
JOB
=
1:
$nj
$data
/split
${
nj
}
/JOB/fbank_recognizer.log
\
recognizer_
test_
main
\
recognizer_main
\
--wav_rspecifier
=
scp:
$data
/split
${
nj
}
/JOB/
${
aishell_wav_scp
}
\
--cmvn_file
=
$cmvn
\
--model_path
=
$model_dir
/avg_5.jit.pdmodel
\
...
...
speechx/examples/ds2_ol/decoder/.gitignore
已删除
100644 → 0
浏览文件 @
418cc37f
data
exp
speechx/examples/ds2_ol/decoder/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
418cc37f
cmake_minimum_required
(
VERSION 3.14 FATAL_ERROR
)
set
(
bin_name ctc-prefix-beam-search-decoder-ol
)
add_executable
(
${
bin_name
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
bin_name
}
.cc
)
target_include_directories
(
${
bin_name
}
PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
${
bin_name
}
PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util
${
DEPS
}
)
set
(
bin_name wfst-decoder-ol
)
add_executable
(
${
bin_name
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
bin_name
}
.cc
)
target_include_directories
(
${
bin_name
}
PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
${
bin_name
}
PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util kaldi-decoder
${
DEPS
}
)
set
(
bin_name nnet-logprob-decoder-test
)
add_executable
(
${
bin_name
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
bin_name
}
.cc
)
target_include_directories
(
${
bin_name
}
PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
${
bin_name
}
PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util
${
DEPS
}
)
add_executable
(
recognizer_test_main
${
CMAKE_CURRENT_SOURCE_DIR
}
/recognizer_test_main.cc
)
target_include_directories
(
recognizer_test_main PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
recognizer_test_main PUBLIC frontend kaldi-feat-common nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util kaldi-decoder
${
DEPS
}
)
speechx/examples/ds2_ol/decoder/README.md
已删除
100644 → 0
浏览文件 @
418cc37f
# ASR Decoder
ASR Decoder test bins. We using theses bins to test CTC BeamSearch decoder and WFST decoder.
*
decoder_test_main.cc
feed nnet output logprob, and only test decoder
*
offline_decoder_sliding_chunk_main.cc
feed streaming audio feature, decode as streaming manner.
*
offline_wfst_decoder_main.cc
feed streaming audio feature, decode using WFST as streaming manner.
speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
已删除
100644 → 0
浏览文件 @
418cc37f
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// todo refactor, repalce with gtest
#include "base/flags.h"
#include "base/log.h"
#include "decoder/ctc_beam_search_decoder.h"
#include "frontend/audio/data_cache.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
#include "nnet/paddle_nnet.h"
DEFINE_string
(
feature_rspecifier
,
""
,
"test feature rspecifier"
);
DEFINE_string
(
result_wspecifier
,
""
,
"test result wspecifier"
);
DEFINE_string
(
model_path
,
"avg_1.jit.pdmodel"
,
"paddle nnet model"
);
DEFINE_string
(
param_path
,
"avg_1.jit.pdiparams"
,
"paddle nnet model param"
);
DEFINE_string
(
dict_file
,
"vocab.txt"
,
"vocabulary of lm"
);
DEFINE_string
(
lm_path
,
""
,
"language model"
);
DEFINE_int32
(
receptive_field_length
,
7
,
"receptive field of two CNN(kernel=5) downsampling module."
);
DEFINE_int32
(
downsampling_rate
,
4
,
"two CNN(kernel=5) module downsampling rate."
);
DEFINE_string
(
model_input_names
,
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box"
,
"model input names"
);
DEFINE_string
(
model_output_names
,
"softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0"
,
"model output names"
);
DEFINE_string
(
model_cache_names
,
"chunk_state_h_box,chunk_state_c_box"
,
"model cache names"
);
DEFINE_string
(
model_cache_shapes
,
"5-1-1024,5-1-1024"
,
"model cache shapes"
);
using
kaldi
::
BaseFloat
;
using
kaldi
::
Matrix
;
using
std
::
vector
;
// test ds2 online decoder by feeding speech feature
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
CHECK
(
FLAGS_result_wspecifier
!=
""
);
CHECK
(
FLAGS_feature_rspecifier
!=
""
);
kaldi
::
SequentialBaseFloatMatrixReader
feature_reader
(
FLAGS_feature_rspecifier
);
kaldi
::
TokenWriter
result_writer
(
FLAGS_result_wspecifier
);
std
::
string
model_path
=
FLAGS_model_path
;
std
::
string
model_params
=
FLAGS_param_path
;
std
::
string
dict_file
=
FLAGS_dict_file
;
std
::
string
lm_path
=
FLAGS_lm_path
;
LOG
(
INFO
)
<<
"model path: "
<<
model_path
;
LOG
(
INFO
)
<<
"model param: "
<<
model_params
;
LOG
(
INFO
)
<<
"dict path: "
<<
dict_file
;
LOG
(
INFO
)
<<
"lm path: "
<<
lm_path
;
int32
num_done
=
0
,
num_err
=
0
;
ppspeech
::
CTCBeamSearchOptions
opts
;
opts
.
dict_file
=
dict_file
;
opts
.
lm_path
=
lm_path
;
ppspeech
::
CTCBeamSearch
decoder
(
opts
);
ppspeech
::
ModelOptions
model_opts
;
model_opts
.
model_path
=
model_path
;
model_opts
.
param_path
=
model_params
;
model_opts
.
cache_names
=
FLAGS_model_cache_names
;
model_opts
.
cache_shape
=
FLAGS_model_cache_shapes
;
model_opts
.
input_names
=
FLAGS_model_input_names
;
model_opts
.
output_names
=
FLAGS_model_output_names
;
std
::
shared_ptr
<
ppspeech
::
PaddleNnet
>
nnet
(
new
ppspeech
::
PaddleNnet
(
model_opts
));
std
::
shared_ptr
<
ppspeech
::
DataCache
>
raw_data
(
new
ppspeech
::
DataCache
());
std
::
shared_ptr
<
ppspeech
::
Decodable
>
decodable
(
new
ppspeech
::
Decodable
(
nnet
,
raw_data
));
int32
chunk_size
=
FLAGS_receptive_field_length
;
int32
chunk_stride
=
FLAGS_downsampling_rate
;
int32
receptive_field_length
=
FLAGS_receptive_field_length
;
LOG
(
INFO
)
<<
"chunk size (frame): "
<<
chunk_size
;
LOG
(
INFO
)
<<
"chunk stride (frame): "
<<
chunk_stride
;
LOG
(
INFO
)
<<
"receptive field (frame): "
<<
receptive_field_length
;
decoder
.
InitDecoder
();
kaldi
::
Timer
timer
;
for
(;
!
feature_reader
.
Done
();
feature_reader
.
Next
())
{
string
utt
=
feature_reader
.
Key
();
kaldi
::
Matrix
<
BaseFloat
>
feature
=
feature_reader
.
Value
();
raw_data
->
SetDim
(
feature
.
NumCols
());
LOG
(
INFO
)
<<
"process utt: "
<<
utt
;
LOG
(
INFO
)
<<
"rows: "
<<
feature
.
NumRows
();
LOG
(
INFO
)
<<
"cols: "
<<
feature
.
NumCols
();
int32
row_idx
=
0
;
int32
padding_len
=
0
;
int32
ori_feature_len
=
feature
.
NumRows
();
if
((
feature
.
NumRows
()
-
chunk_size
)
%
chunk_stride
!=
0
)
{
padding_len
=
chunk_stride
-
(
feature
.
NumRows
()
-
chunk_size
)
%
chunk_stride
;
feature
.
Resize
(
feature
.
NumRows
()
+
padding_len
,
feature
.
NumCols
(),
kaldi
::
kCopyData
);
}
int32
num_chunks
=
(
feature
.
NumRows
()
-
chunk_size
)
/
chunk_stride
+
1
;
for
(
int
chunk_idx
=
0
;
chunk_idx
<
num_chunks
;
++
chunk_idx
)
{
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
feature_chunk
(
chunk_size
*
feature
.
NumCols
());
int32
feature_chunk_size
=
0
;
if
(
ori_feature_len
>
chunk_idx
*
chunk_stride
)
{
feature_chunk_size
=
std
::
min
(
ori_feature_len
-
chunk_idx
*
chunk_stride
,
chunk_size
);
}
if
(
feature_chunk_size
<
receptive_field_length
)
break
;
int32
start
=
chunk_idx
*
chunk_stride
;
for
(
int
row_id
=
0
;
row_id
<
chunk_size
;
++
row_id
)
{
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
tmp
(
feature
,
start
);
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
f_chunk_tmp
(
feature_chunk
.
Data
()
+
row_id
*
feature
.
NumCols
(),
feature
.
NumCols
());
f_chunk_tmp
.
CopyFromVec
(
tmp
);
++
start
;
}
raw_data
->
Accept
(
feature_chunk
);
if
(
chunk_idx
==
num_chunks
-
1
)
{
raw_data
->
SetFinished
();
}
decoder
.
AdvanceDecode
(
decodable
);
}
std
::
string
result
;
result
=
decoder
.
GetFinalBestPath
();
decodable
->
Reset
();
decoder
.
Reset
();
if
(
result
.
empty
())
{
// the TokenWriter can not write empty string.
++
num_err
;
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is empty"
;
continue
;
}
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is "
<<
result
;
result_writer
.
Write
(
utt
,
result
);
++
num_done
;
}
KALDI_LOG
<<
"Done "
<<
num_done
<<
" utterances, "
<<
num_err
<<
" with errors."
;
double
elapsed
=
timer
.
Elapsed
();
KALDI_LOG
<<
" cost:"
<<
elapsed
<<
" s"
;
return
(
num_done
!=
0
?
0
:
1
);
}
speechx/examples/ds2_ol/decoder/local/model.sh
已删除
100644 → 0
浏览文件 @
418cc37f
#!/bin/bash
speechx/examples/ds2_ol/decoder/nnet-logprob-decoder-test.cc
已删除
100644 → 0
浏览文件 @
418cc37f
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// todo refactor, repalce with gtest
#include "base/flags.h"
#include "base/log.h"
#include "decoder/ctc_beam_search_decoder.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
DEFINE_string
(
nnet_prob_respecifier
,
""
,
"test nnet prob rspecifier"
);
DEFINE_string
(
dict_file
,
"vocab.txt"
,
"vocabulary of lm"
);
DEFINE_string
(
lm_path
,
"lm.klm"
,
"language model"
);
using
kaldi
::
BaseFloat
;
using
kaldi
::
Matrix
;
using
std
::
vector
;
// test decoder by feeding nnet posterior probability
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
kaldi
::
SequentialBaseFloatMatrixReader
likelihood_reader
(
FLAGS_nnet_prob_respecifier
);
std
::
string
dict_file
=
FLAGS_dict_file
;
std
::
string
lm_path
=
FLAGS_lm_path
;
LOG
(
INFO
)
<<
"dict path: "
<<
dict_file
;
LOG
(
INFO
)
<<
"lm path: "
<<
lm_path
;
int32
num_done
=
0
,
num_err
=
0
;
ppspeech
::
CTCBeamSearchOptions
opts
;
opts
.
dict_file
=
dict_file
;
opts
.
lm_path
=
lm_path
;
ppspeech
::
CTCBeamSearch
decoder
(
opts
);
std
::
shared_ptr
<
ppspeech
::
Decodable
>
decodable
(
new
ppspeech
::
Decodable
(
nullptr
,
nullptr
));
decoder
.
InitDecoder
();
for
(;
!
likelihood_reader
.
Done
();
likelihood_reader
.
Next
())
{
string
utt
=
likelihood_reader
.
Key
();
const
kaldi
::
Matrix
<
BaseFloat
>
likelihood
=
likelihood_reader
.
Value
();
LOG
(
INFO
)
<<
"process utt: "
<<
utt
;
LOG
(
INFO
)
<<
"rows: "
<<
likelihood
.
NumRows
();
LOG
(
INFO
)
<<
"cols: "
<<
likelihood
.
NumCols
();
decodable
->
Acceptlikelihood
(
likelihood
);
decoder
.
AdvanceDecode
(
decodable
);
std
::
string
result
;
result
=
decoder
.
GetFinalBestPath
();
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is "
<<
result
;
decodable
->
Reset
();
decoder
.
Reset
();
++
num_done
;
}
KALDI_LOG
<<
"Done "
<<
num_done
<<
" utterances, "
<<
num_err
<<
" with errors."
;
return
(
num_done
!=
0
?
0
:
1
);
}
speechx/examples/ds2_ol/decoder/path.sh
已删除
100644 → 0
浏览文件 @
418cc37f
# This contains the locations of binarys build required for running the examples.
SPEECHX_ROOT
=
$PWD
/../../../
SPEECHX_EXAMPLES
=
$SPEECHX_ROOT
/build/examples
SPEECHX_TOOLS
=
$SPEECHX_ROOT
/tools
TOOLS_BIN
=
$SPEECHX_TOOLS
/valgrind/install/bin
[
-d
$SPEECHX_EXAMPLES
]
||
{
echo
"Error: 'build/examples' directory not found. please ensure that the project build successfully"
;
}
export
LC_AL
=
C
SPEECHX_BIN
=
$SPEECHX_EXAMPLES
/ds2_ol/decoder:
$SPEECHX_EXAMPLES
/ds2_ol/feat
export
PATH
=
$PATH
:
$SPEECHX_BIN
:
$TOOLS_BIN
speechx/examples/ds2_ol/decoder/recognizer_test_main.cc
已删除
100644 → 0
浏览文件 @
418cc37f
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "decoder/recognizer.h"
#include "decoder/param.h"
#include "kaldi/feat/wave-reader.h"
#include "kaldi/util/table-types.h"
DEFINE_string
(
wav_rspecifier
,
""
,
"test feature rspecifier"
);
DEFINE_string
(
result_wspecifier
,
""
,
"test result wspecifier"
);
DEFINE_int32
(
sample_rate
,
16000
,
"sample rate"
);
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
ppspeech
::
RecognizerResource
resource
=
ppspeech
::
InitRecognizerResoure
();
ppspeech
::
Recognizer
recognizer
(
resource
);
kaldi
::
SequentialTableReader
<
kaldi
::
WaveHolder
>
wav_reader
(
FLAGS_wav_rspecifier
);
kaldi
::
TokenWriter
result_writer
(
FLAGS_result_wspecifier
);
int
sample_rate
=
FLAGS_sample_rate
;
float
streaming_chunk
=
FLAGS_streaming_chunk
;
int
chunk_sample_size
=
streaming_chunk
*
sample_rate
;
LOG
(
INFO
)
<<
"sr: "
<<
sample_rate
;
LOG
(
INFO
)
<<
"chunk size (s): "
<<
streaming_chunk
;
LOG
(
INFO
)
<<
"chunk size (sample): "
<<
chunk_sample_size
;
int32
num_done
=
0
,
num_err
=
0
;
double
tot_wav_duration
=
0.0
;
kaldi
::
Timer
timer
;
for
(;
!
wav_reader
.
Done
();
wav_reader
.
Next
())
{
std
::
string
utt
=
wav_reader
.
Key
();
const
kaldi
::
WaveData
&
wave_data
=
wav_reader
.
Value
();
int32
this_channel
=
0
;
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
waveform
(
wave_data
.
Data
(),
this_channel
);
int
tot_samples
=
waveform
.
Dim
();
tot_wav_duration
+=
tot_samples
*
1.0
/
sample_rate
;
LOG
(
INFO
)
<<
"wav len (sample): "
<<
tot_samples
;
int
sample_offset
=
0
;
std
::
vector
<
kaldi
::
Vector
<
BaseFloat
>>
feats
;
int
feature_rows
=
0
;
while
(
sample_offset
<
tot_samples
)
{
int
cur_chunk_size
=
std
::
min
(
chunk_sample_size
,
tot_samples
-
sample_offset
);
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
wav_chunk
(
cur_chunk_size
);
for
(
int
i
=
0
;
i
<
cur_chunk_size
;
++
i
)
{
wav_chunk
(
i
)
=
waveform
(
sample_offset
+
i
);
}
// wav_chunk = waveform.Range(sample_offset + i, cur_chunk_size);
recognizer
.
Accept
(
wav_chunk
);
if
(
cur_chunk_size
<
chunk_sample_size
)
{
recognizer
.
SetFinished
();
}
recognizer
.
Decode
();
// no overlap
sample_offset
+=
cur_chunk_size
;
}
std
::
string
result
;
result
=
recognizer
.
GetFinalResult
();
recognizer
.
Reset
();
if
(
result
.
empty
())
{
// the TokenWriter can not write empty string.
++
num_err
;
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is empty"
;
continue
;
}
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is "
<<
result
;
result_writer
.
Write
(
utt
,
result
);
++
num_done
;
}
double
elapsed
=
timer
.
Elapsed
();
KALDI_LOG
<<
"Done "
<<
num_done
<<
" out of "
<<
(
num_err
+
num_done
);
KALDI_LOG
<<
" cost:"
<<
elapsed
<<
" s"
;
KALDI_LOG
<<
"total wav duration is: "
<<
tot_wav_duration
<<
" s"
;
KALDI_LOG
<<
"the RTF is: "
<<
elapsed
/
tot_wav_duration
;
}
\ No newline at end of file
speechx/examples/ds2_ol/decoder/run.sh
已删除
100755 → 0
浏览文件 @
418cc37f
#!/bin/bash
set
+x
set
-e
.
path.sh
# 1. compile
if
[
!
-d
${
SPEECHX_EXAMPLES
}
]
;
then
pushd
${
SPEECHX_ROOT
}
bash build.sh
popd
fi
# input
mkdir
-p
data
data
=
$PWD
/data
ckpt_dir
=
$data
/model
model_dir
=
$ckpt_dir
/exp/deepspeech2_online/checkpoints/
vocb_dir
=
$ckpt_dir
/data/lang_char/
lm
=
$data
/zh_giga.no_cna_cmn.prune01244.klm
# output
exp_dir
=
./exp
mkdir
-p
$exp_dir
# 2. download model
if
[[
!
-f
data/model/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
]]
;
then
mkdir
-p
data/model
pushd
data/model
wget
-c
https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
tar
xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
popd
fi
# produce wav scp
if
[
!
-f
data/wav.scp
]
;
then
pushd
data
wget
-c
https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
echo
"utt1 "
$PWD
/zh.wav
>
wav.scp
popd
fi
# download lm
if
[
!
-f
$lm
]
;
then
pushd
data
wget
-c
https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm
popd
fi
feat_wspecifier
=
$exp_dir
/feats.ark
cmvn
=
$exp_dir
/cmvn.ark
export
GLOG_logtostderr
=
1
# dump json cmvn to kaldi
cmvn-json2kaldi
\
--json_file
$ckpt_dir
/data/mean_std.json
\
--cmvn_write_path
$cmvn
\
--binary
=
false
echo
"convert json cmvn to kaldi ark."
# generate linear feature as streaming
linear-spectrogram-wo-db-norm-ol
\
--wav_rspecifier
=
scp:
$data
/wav.scp
\
--feature_wspecifier
=
ark,t:
$feat_wspecifier
\
--cmvn_file
=
$cmvn
echo
"compute linear spectrogram feature."
# run ctc beam search decoder as streaming
ctc-prefix-beam-search-decoder-ol
\
--result_wspecifier
=
ark,t:
$exp_dir
/result.txt
\
--feature_rspecifier
=
ark:
$feat_wspecifier
\
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--param_path
=
$model_dir
/avg_1.jit.pdiparams
\
--dict_file
=
$vocb_dir
/vocab.txt
\
--lm_path
=
$lm
\ No newline at end of file
speechx/examples/ds2_ol/decoder/valgrind.sh
已删除
100755 → 0
浏览文件 @
418cc37f
#!/bin/bash
# this script is for memory check, so please run ./run.sh first.
set
+x
set
-e
.
./path.sh
if
[
!
-d
${
SPEECHX_TOOLS
}
/valgrind/install
]
;
then
echo
"please install valgrind in the speechx tools dir.
\n
"
exit
1
fi
model_dir
=
../paddle_asr_model
feat_wspecifier
=
./feats.ark
cmvn
=
./cmvn.ark
valgrind
--tool
=
memcheck
--track-origins
=
yes
--leak-check
=
full
--show-leak-kinds
=
all
\
offline_decoder_main
\
--feature_respecifier
=
ark:
$feat_wspecifier
\
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--param_path
=
$model_dir
/avg_1.jit.pdparams
\
--dict_file
=
$model_dir
/vocab.txt
\
--lm_path
=
$model_dir
/avg_1.jit.klm
speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc
已删除
100644 → 0
浏览文件 @
418cc37f
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// todo refactor, repalce with gtest
#include "base/flags.h"
#include "base/log.h"
#include "decoder/ctc_tlg_decoder.h"
#include "frontend/audio/data_cache.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
#include "nnet/paddle_nnet.h"
DEFINE_string
(
feature_rspecifier
,
""
,
"test feature rspecifier"
);
DEFINE_string
(
result_wspecifier
,
""
,
"test result wspecifier"
);
DEFINE_string
(
model_path
,
"avg_1.jit.pdmodel"
,
"paddle nnet model"
);
DEFINE_string
(
param_path
,
"avg_1.jit.pdiparams"
,
"paddle nnet model param"
);
DEFINE_string
(
word_symbol_table
,
"words.txt"
,
"word symbol table"
);
DEFINE_string
(
graph_path
,
"TLG"
,
"decoder graph"
);
DEFINE_double
(
acoustic_scale
,
1.0
,
"acoustic scale"
);
DEFINE_int32
(
max_active
,
7500
,
"decoder graph"
);
DEFINE_int32
(
receptive_field_length
,
7
,
"receptive field of two CNN(kernel=5) downsampling module."
);
DEFINE_int32
(
downsampling_rate
,
4
,
"two CNN(kernel=5) module downsampling rate."
);
DEFINE_string
(
model_input_names
,
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box"
,
"model input names"
);
DEFINE_string
(
model_output_names
,
"softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0"
,
"model output names"
);
DEFINE_string
(
model_cache_names
,
"chunk_state_h_box,chunk_state_c_box"
,
"model cache names"
);
DEFINE_string
(
model_cache_shapes
,
"5-1-1024,5-1-1024"
,
"model cache shapes"
);
using
kaldi
::
BaseFloat
;
using
kaldi
::
Matrix
;
using
std
::
vector
;
// test TLG decoder by feeding speech feature.
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
kaldi
::
SequentialBaseFloatMatrixReader
feature_reader
(
FLAGS_feature_rspecifier
);
kaldi
::
TokenWriter
result_writer
(
FLAGS_result_wspecifier
);
std
::
string
model_graph
=
FLAGS_model_path
;
std
::
string
model_params
=
FLAGS_param_path
;
std
::
string
word_symbol_table
=
FLAGS_word_symbol_table
;
std
::
string
graph_path
=
FLAGS_graph_path
;
LOG
(
INFO
)
<<
"model path: "
<<
model_graph
;
LOG
(
INFO
)
<<
"model param: "
<<
model_params
;
LOG
(
INFO
)
<<
"word symbol path: "
<<
word_symbol_table
;
LOG
(
INFO
)
<<
"graph path: "
<<
graph_path
;
int32
num_done
=
0
,
num_err
=
0
;
ppspeech
::
TLGDecoderOptions
opts
;
opts
.
word_symbol_table
=
word_symbol_table
;
opts
.
fst_path
=
graph_path
;
opts
.
opts
.
max_active
=
FLAGS_max_active
;
opts
.
opts
.
beam
=
15.0
;
opts
.
opts
.
lattice_beam
=
7.5
;
ppspeech
::
TLGDecoder
decoder
(
opts
);
ppspeech
::
ModelOptions
model_opts
;
model_opts
.
model_path
=
model_graph
;
model_opts
.
param_path
=
model_params
;
model_opts
.
cache_names
=
FLAGS_model_cache_names
;
model_opts
.
cache_shape
=
FLAGS_model_cache_shapes
;
model_opts
.
input_names
=
FLAGS_model_input_names
;
model_opts
.
output_names
=
FLAGS_model_output_names
;
std
::
shared_ptr
<
ppspeech
::
PaddleNnet
>
nnet
(
new
ppspeech
::
PaddleNnet
(
model_opts
));
std
::
shared_ptr
<
ppspeech
::
DataCache
>
raw_data
(
new
ppspeech
::
DataCache
());
std
::
shared_ptr
<
ppspeech
::
Decodable
>
decodable
(
new
ppspeech
::
Decodable
(
nnet
,
raw_data
,
FLAGS_acoustic_scale
));
int32
chunk_size
=
FLAGS_receptive_field_length
;
int32
chunk_stride
=
FLAGS_downsampling_rate
;
int32
receptive_field_length
=
FLAGS_receptive_field_length
;
LOG
(
INFO
)
<<
"chunk size (frame): "
<<
chunk_size
;
LOG
(
INFO
)
<<
"chunk stride (frame): "
<<
chunk_stride
;
LOG
(
INFO
)
<<
"receptive field (frame): "
<<
receptive_field_length
;
decoder
.
InitDecoder
();
kaldi
::
Timer
timer
;
for
(;
!
feature_reader
.
Done
();
feature_reader
.
Next
())
{
string
utt
=
feature_reader
.
Key
();
kaldi
::
Matrix
<
BaseFloat
>
feature
=
feature_reader
.
Value
();
raw_data
->
SetDim
(
feature
.
NumCols
());
LOG
(
INFO
)
<<
"process utt: "
<<
utt
;
LOG
(
INFO
)
<<
"rows: "
<<
feature
.
NumRows
();
LOG
(
INFO
)
<<
"cols: "
<<
feature
.
NumCols
();
int32
row_idx
=
0
;
int32
padding_len
=
0
;
int32
ori_feature_len
=
feature
.
NumRows
();
if
((
feature
.
NumRows
()
-
chunk_size
)
%
chunk_stride
!=
0
)
{
padding_len
=
chunk_stride
-
(
feature
.
NumRows
()
-
chunk_size
)
%
chunk_stride
;
feature
.
Resize
(
feature
.
NumRows
()
+
padding_len
,
feature
.
NumCols
(),
kaldi
::
kCopyData
);
}
int32
num_chunks
=
(
feature
.
NumRows
()
-
chunk_size
)
/
chunk_stride
+
1
;
for
(
int
chunk_idx
=
0
;
chunk_idx
<
num_chunks
;
++
chunk_idx
)
{
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
feature_chunk
(
chunk_size
*
feature
.
NumCols
());
int32
feature_chunk_size
=
0
;
if
(
ori_feature_len
>
chunk_idx
*
chunk_stride
)
{
feature_chunk_size
=
std
::
min
(
ori_feature_len
-
chunk_idx
*
chunk_stride
,
chunk_size
);
}
if
(
feature_chunk_size
<
receptive_field_length
)
break
;
int32
start
=
chunk_idx
*
chunk_stride
;
for
(
int
row_id
=
0
;
row_id
<
chunk_size
;
++
row_id
)
{
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
tmp
(
feature
,
start
);
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
f_chunk_tmp
(
feature_chunk
.
Data
()
+
row_id
*
feature
.
NumCols
(),
feature
.
NumCols
());
f_chunk_tmp
.
CopyFromVec
(
tmp
);
++
start
;
}
raw_data
->
Accept
(
feature_chunk
);
if
(
chunk_idx
==
num_chunks
-
1
)
{
raw_data
->
SetFinished
();
}
decoder
.
AdvanceDecode
(
decodable
);
}
std
::
string
result
;
result
=
decoder
.
GetFinalBestPath
();
decodable
->
Reset
();
decoder
.
Reset
();
if
(
result
.
empty
())
{
// the TokenWriter can not write empty string.
++
num_err
;
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is empty"
;
continue
;
}
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is "
<<
result
;
result_writer
.
Write
(
utt
,
result
);
++
num_done
;
}
double
elapsed
=
timer
.
Elapsed
();
KALDI_LOG
<<
" cost:"
<<
elapsed
<<
" s"
;
KALDI_LOG
<<
"Done "
<<
num_done
<<
" utterances, "
<<
num_err
<<
" with errors."
;
return
(
num_done
!=
0
?
0
:
1
);
}
speechx/examples/ds2_ol/feat/.gitignore
已删除
100644 → 0
浏览文件 @
418cc37f
exp
data
speechx/examples/ds2_ol/feat/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
418cc37f
cmake_minimum_required
(
VERSION 3.14 FATAL_ERROR
)
set
(
bin_name linear-spectrogram-wo-db-norm-ol
)
add_executable
(
${
bin_name
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
bin_name
}
.cc
)
target_include_directories
(
${
bin_name
}
PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
${
bin_name
}
frontend kaldi-util kaldi-feat-common gflags glog
)
set
(
bin_name compute_fbank_main
)
add_executable
(
${
bin_name
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
bin_name
}
.cc
)
target_include_directories
(
${
bin_name
}
PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
${
bin_name
}
frontend kaldi-util kaldi-feat-common gflags glog
)
set
(
bin_name cmvn-json2kaldi
)
add_executable
(
${
bin_name
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
bin_name
}
.cc
)
target_include_directories
(
${
bin_name
}
PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
${
bin_name
}
utils kaldi-util kaldi-matrix gflags glog
)
speechx/examples/ds2_ol/feat/README.md
已删除
100644 → 0
浏览文件 @
418cc37f
# Deepspeech2 Straming Audio Feature
ASR audio feature test bins. We using theses bins to test linaer/fbank/mfcc asr feature as streaming manner.
*
linear_spectrogram_without_db_norm_main.cc
compute linear spectrogram w/o db norm in streaming manner.
speechx/examples/ds2_ol/feat/cmvn-json2kaldi.cc
已删除
100644 → 0
浏览文件 @
418cc37f
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Note: Do not print/log ondemand object.
#include "base/common.h"
#include "base/flags.h"
#include "base/log.h"
#include "kaldi/matrix/kaldi-matrix.h"
#include "kaldi/util/kaldi-io.h"
#include "utils/file_utils.h"
// #include "boost/json.hpp"
#include <boost/json/src.hpp>
DEFINE_string
(
json_file
,
""
,
"cmvn json file"
);
DEFINE_string
(
cmvn_write_path
,
"./cmvn.ark"
,
"write cmvn"
);
DEFINE_bool
(
binary
,
true
,
"write cmvn in binary (true) or text(false)"
);
using
namespace
boost
::
json
;
// from <boost/json.hpp>
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
LOG
(
INFO
)
<<
"cmvn josn path: "
<<
FLAGS_json_file
;
auto
ifs
=
std
::
ifstream
(
FLAGS_json_file
);
std
::
string
json_str
=
ppspeech
::
ReadFile2String
(
FLAGS_json_file
);
auto
value
=
boost
::
json
::
parse
(
json_str
);
if
(
!
value
.
is_object
())
{
LOG
(
ERROR
)
<<
"Input json file format error."
;
}
for
(
auto
obj
:
value
.
as_object
())
{
if
(
obj
.
key
()
==
"mean_stat"
)
{
LOG
(
INFO
)
<<
"mean_stat:"
<<
obj
.
value
();
}
if
(
obj
.
key
()
==
"var_stat"
)
{
LOG
(
INFO
)
<<
"var_stat: "
<<
obj
.
value
();
}
if
(
obj
.
key
()
==
"frame_num"
)
{
LOG
(
INFO
)
<<
"frame_num: "
<<
obj
.
value
();
}
}
boost
::
json
::
array
mean_stat
=
value
.
at
(
"mean_stat"
).
as_array
();
std
::
vector
<
kaldi
::
BaseFloat
>
mean_stat_vec
;
for
(
auto
it
=
mean_stat
.
begin
();
it
!=
mean_stat
.
end
();
it
++
)
{
mean_stat_vec
.
push_back
(
it
->
as_double
());
}
boost
::
json
::
array
var_stat
=
value
.
at
(
"var_stat"
).
as_array
();
std
::
vector
<
kaldi
::
BaseFloat
>
var_stat_vec
;
for
(
auto
it
=
var_stat
.
begin
();
it
!=
var_stat
.
end
();
it
++
)
{
var_stat_vec
.
push_back
(
it
->
as_double
());
}
kaldi
::
int32
frame_num
=
uint64_t
(
value
.
at
(
"frame_num"
).
as_int64
());
LOG
(
INFO
)
<<
"nframe: "
<<
frame_num
;
size_t
mean_size
=
mean_stat_vec
.
size
();
kaldi
::
Matrix
<
double
>
cmvn_stats
(
2
,
mean_size
+
1
);
for
(
size_t
idx
=
0
;
idx
<
mean_size
;
++
idx
)
{
cmvn_stats
(
0
,
idx
)
=
mean_stat_vec
[
idx
];
cmvn_stats
(
1
,
idx
)
=
var_stat_vec
[
idx
];
}
cmvn_stats
(
0
,
mean_size
)
=
frame_num
;
LOG
(
INFO
)
<<
cmvn_stats
;
kaldi
::
WriteKaldiObject
(
cmvn_stats
,
FLAGS_cmvn_write_path
,
FLAGS_binary
);
LOG
(
INFO
)
<<
"cmvn stats have write into: "
<<
FLAGS_cmvn_write_path
;
LOG
(
INFO
)
<<
"Binary: "
<<
FLAGS_binary
;
return
0
;
}
speechx/examples/ds2_ol/feat/compute_fbank_main.cc
已删除
100644 → 0
浏览文件 @
418cc37f
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// todo refactor, repalce with gtest
#include "base/flags.h"
#include "base/log.h"
#include "kaldi/feat/wave-reader.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"
#include "frontend/audio/audio_cache.h"
#include "frontend/audio/data_cache.h"
#include "frontend/audio/fbank.h"
#include "frontend/audio/feature_cache.h"
#include "frontend/audio/frontend_itf.h"
#include "frontend/audio/normalizer.h"
DEFINE_string
(
wav_rspecifier
,
""
,
"test wav scp path"
);
DEFINE_string
(
feature_wspecifier
,
""
,
"output feats wspecifier"
);
DEFINE_string
(
cmvn_file
,
""
,
"read cmvn"
);
DEFINE_double
(
streaming_chunk
,
0.36
,
"streaming feature chunk size"
);
DEFINE_int32
(
num_bins
,
161
,
"fbank num bins"
);
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
kaldi
::
SequentialTableReader
<
kaldi
::
WaveHolder
>
wav_reader
(
FLAGS_wav_rspecifier
);
kaldi
::
BaseFloatMatrixWriter
feat_writer
(
FLAGS_feature_wspecifier
);
int32
num_done
=
0
,
num_err
=
0
;
// feature pipeline: wave cache --> povey window
// -->fbank --> global cmvn -> feat cache
std
::
unique_ptr
<
ppspeech
::
FrontendInterface
>
data_source
(
new
ppspeech
::
AudioCache
(
3600
*
1600
,
false
));
ppspeech
::
FbankOptions
opt
;
opt
.
fbank_opts
.
frame_opts
.
frame_length_ms
=
25
;
opt
.
fbank_opts
.
frame_opts
.
frame_shift_ms
=
10
;
opt
.
streaming_chunk
=
FLAGS_streaming_chunk
;
opt
.
fbank_opts
.
mel_opts
.
num_bins
=
FLAGS_num_bins
;
opt
.
fbank_opts
.
frame_opts
.
dither
=
0.0
;
std
::
unique_ptr
<
ppspeech
::
FrontendInterface
>
fbank
(
new
ppspeech
::
Fbank
(
opt
,
std
::
move
(
data_source
)));
std
::
unique_ptr
<
ppspeech
::
FrontendInterface
>
cmvn
(
new
ppspeech
::
CMVN
(
FLAGS_cmvn_file
,
std
::
move
(
fbank
)));
ppspeech
::
FeatureCacheOptions
feat_cache_opts
;
// the feature cache output feature chunk by chunk.
// frame_chunk_size : num frame of a chunk.
// frame_chunk_stride: chunk sliding window stride.
feat_cache_opts
.
frame_chunk_stride
=
1
;
feat_cache_opts
.
frame_chunk_size
=
1
;
ppspeech
::
FeatureCache
feature_cache
(
feat_cache_opts
,
std
::
move
(
cmvn
));
LOG
(
INFO
)
<<
"fbank: "
<<
true
;
LOG
(
INFO
)
<<
"feat dim: "
<<
feature_cache
.
Dim
();
int
sample_rate
=
16000
;
float
streaming_chunk
=
FLAGS_streaming_chunk
;
int
chunk_sample_size
=
streaming_chunk
*
sample_rate
;
LOG
(
INFO
)
<<
"sr: "
<<
sample_rate
;
LOG
(
INFO
)
<<
"chunk size (s): "
<<
streaming_chunk
;
LOG
(
INFO
)
<<
"chunk size (sample): "
<<
chunk_sample_size
;
for
(;
!
wav_reader
.
Done
();
wav_reader
.
Next
())
{
std
::
string
utt
=
wav_reader
.
Key
();
const
kaldi
::
WaveData
&
wave_data
=
wav_reader
.
Value
();
LOG
(
INFO
)
<<
"process utt: "
<<
utt
;
int32
this_channel
=
0
;
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
waveform
(
wave_data
.
Data
(),
this_channel
);
int
tot_samples
=
waveform
.
Dim
();
LOG
(
INFO
)
<<
"wav len (sample): "
<<
tot_samples
;
int
sample_offset
=
0
;
std
::
vector
<
kaldi
::
Vector
<
BaseFloat
>>
feats
;
int
feature_rows
=
0
;
while
(
sample_offset
<
tot_samples
)
{
int
cur_chunk_size
=
std
::
min
(
chunk_sample_size
,
tot_samples
-
sample_offset
);
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
wav_chunk
(
cur_chunk_size
);
for
(
int
i
=
0
;
i
<
cur_chunk_size
;
++
i
)
{
wav_chunk
(
i
)
=
waveform
(
sample_offset
+
i
);
}
kaldi
::
Vector
<
BaseFloat
>
features
;
feature_cache
.
Accept
(
wav_chunk
);
if
(
cur_chunk_size
<
chunk_sample_size
)
{
feature_cache
.
SetFinished
();
}
bool
flag
=
true
;
do
{
flag
=
feature_cache
.
Read
(
&
features
);
feats
.
push_back
(
features
);
feature_rows
+=
features
.
Dim
()
/
feature_cache
.
Dim
();
}
while
(
flag
==
true
&&
features
.
Dim
()
!=
0
);
sample_offset
+=
cur_chunk_size
;
}
int
cur_idx
=
0
;
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>
features
(
feature_rows
,
feature_cache
.
Dim
());
for
(
auto
feat
:
feats
)
{
int
num_rows
=
feat
.
Dim
()
/
feature_cache
.
Dim
();
for
(
int
row_idx
=
0
;
row_idx
<
num_rows
;
++
row_idx
)
{
for
(
size_t
col_idx
=
0
;
col_idx
<
feature_cache
.
Dim
();
++
col_idx
)
{
features
(
cur_idx
,
col_idx
)
=
feat
(
row_idx
*
feature_cache
.
Dim
()
+
col_idx
);
}
++
cur_idx
;
}
}
feat_writer
.
Write
(
utt
,
features
);
feature_cache
.
Reset
();
if
(
num_done
%
50
==
0
&&
num_done
!=
0
)
KALDI_VLOG
(
2
)
<<
"Processed "
<<
num_done
<<
" utterances"
;
num_done
++
;
}
KALDI_LOG
<<
"Done "
<<
num_done
<<
" utterances, "
<<
num_err
<<
" with errors."
;
return
(
num_done
!=
0
?
0
:
1
);
}
speechx/examples/ds2_ol/feat/linear-spectrogram-wo-db-norm-ol.cc
已删除
100644 → 0
浏览文件 @
418cc37f
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// todo refactor, repalce with gtest
#include "base/flags.h"
#include "base/log.h"
#include "kaldi/feat/wave-reader.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"
#include "frontend/audio/audio_cache.h"
#include "frontend/audio/data_cache.h"
#include "frontend/audio/feature_cache.h"
#include "frontend/audio/frontend_itf.h"
#include "frontend/audio/linear_spectrogram.h"
#include "frontend/audio/normalizer.h"
DEFINE_string
(
wav_rspecifier
,
""
,
"test wav scp path"
);
DEFINE_string
(
feature_wspecifier
,
""
,
"output feats wspecifier"
);
DEFINE_string
(
cmvn_file
,
"./cmvn.ark"
,
"read cmvn"
);
DEFINE_double
(
streaming_chunk
,
0.36
,
"streaming feature chunk size"
);
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
kaldi
::
SequentialTableReader
<
kaldi
::
WaveHolder
>
wav_reader
(
FLAGS_wav_rspecifier
);
kaldi
::
BaseFloatMatrixWriter
feat_writer
(
FLAGS_feature_wspecifier
);
int32
num_done
=
0
,
num_err
=
0
;
// feature pipeline: wave cache --> hanning window
// -->linear_spectrogram --> global cmvn -> feat cache
std
::
unique_ptr
<
ppspeech
::
FrontendInterface
>
data_source
(
new
ppspeech
::
AudioCache
(
3600
*
1600
,
true
));
ppspeech
::
LinearSpectrogramOptions
opt
;
opt
.
frame_opts
.
frame_length_ms
=
20
;
opt
.
frame_opts
.
frame_shift_ms
=
10
;
opt
.
streaming_chunk
=
FLAGS_streaming_chunk
;
opt
.
frame_opts
.
dither
=
0.0
;
opt
.
frame_opts
.
remove_dc_offset
=
false
;
opt
.
frame_opts
.
window_type
=
"hanning"
;
opt
.
frame_opts
.
preemph_coeff
=
0.0
;
LOG
(
INFO
)
<<
"linear feature: "
<<
true
;
LOG
(
INFO
)
<<
"frame length (ms): "
<<
opt
.
frame_opts
.
frame_length_ms
;
LOG
(
INFO
)
<<
"frame shift (ms): "
<<
opt
.
frame_opts
.
frame_shift_ms
;
std
::
unique_ptr
<
ppspeech
::
FrontendInterface
>
linear_spectrogram
(
new
ppspeech
::
LinearSpectrogram
(
opt
,
std
::
move
(
data_source
)));
std
::
unique_ptr
<
ppspeech
::
FrontendInterface
>
cmvn
(
new
ppspeech
::
CMVN
(
FLAGS_cmvn_file
,
std
::
move
(
linear_spectrogram
)));
ppspeech
::
FeatureCacheOptions
feat_cache_opts
;
// the feature cache output feature chunk by chunk.
// frame_chunk_size : num frame of a chunk.
// frame_chunk_stride: chunk sliding window stride.
feat_cache_opts
.
frame_chunk_stride
=
1
;
feat_cache_opts
.
frame_chunk_size
=
1
;
ppspeech
::
FeatureCache
feature_cache
(
feat_cache_opts
,
std
::
move
(
cmvn
));
LOG
(
INFO
)
<<
"feat dim: "
<<
feature_cache
.
Dim
();
int
sample_rate
=
16000
;
float
streaming_chunk
=
FLAGS_streaming_chunk
;
int
chunk_sample_size
=
streaming_chunk
*
sample_rate
;
LOG
(
INFO
)
<<
"sample rate: "
<<
sample_rate
;
LOG
(
INFO
)
<<
"chunk size (s): "
<<
streaming_chunk
;
LOG
(
INFO
)
<<
"chunk size (sample): "
<<
chunk_sample_size
;
for
(;
!
wav_reader
.
Done
();
wav_reader
.
Next
())
{
std
::
string
utt
=
wav_reader
.
Key
();
const
kaldi
::
WaveData
&
wave_data
=
wav_reader
.
Value
();
LOG
(
INFO
)
<<
"process utt: "
<<
utt
;
int32
this_channel
=
0
;
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
waveform
(
wave_data
.
Data
(),
this_channel
);
int
tot_samples
=
waveform
.
Dim
();
LOG
(
INFO
)
<<
"wav len (sample): "
<<
tot_samples
;
int
sample_offset
=
0
;
std
::
vector
<
kaldi
::
Vector
<
BaseFloat
>>
feats
;
int
feature_rows
=
0
;
while
(
sample_offset
<
tot_samples
)
{
int
cur_chunk_size
=
std
::
min
(
chunk_sample_size
,
tot_samples
-
sample_offset
);
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
wav_chunk
(
cur_chunk_size
);
for
(
int
i
=
0
;
i
<
cur_chunk_size
;
++
i
)
{
wav_chunk
(
i
)
=
waveform
(
sample_offset
+
i
);
}
kaldi
::
Vector
<
BaseFloat
>
features
;
feature_cache
.
Accept
(
wav_chunk
);
if
(
cur_chunk_size
<
chunk_sample_size
)
{
feature_cache
.
SetFinished
();
}
bool
flag
=
true
;
do
{
flag
=
feature_cache
.
Read
(
&
features
);
feats
.
push_back
(
features
);
feature_rows
+=
features
.
Dim
()
/
feature_cache
.
Dim
();
}
while
(
flag
==
true
&&
features
.
Dim
()
!=
0
);
sample_offset
+=
cur_chunk_size
;
}
int
cur_idx
=
0
;
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>
features
(
feature_rows
,
feature_cache
.
Dim
());
for
(
auto
feat
:
feats
)
{
int
num_rows
=
feat
.
Dim
()
/
feature_cache
.
Dim
();
for
(
int
row_idx
=
0
;
row_idx
<
num_rows
;
++
row_idx
)
{
for
(
size_t
col_idx
=
0
;
col_idx
<
feature_cache
.
Dim
();
++
col_idx
)
{
features
(
cur_idx
,
col_idx
)
=
feat
(
row_idx
*
feature_cache
.
Dim
()
+
col_idx
);
}
++
cur_idx
;
}
}
feat_writer
.
Write
(
utt
,
features
);
feature_cache
.
Reset
();
if
(
num_done
%
50
==
0
&&
num_done
!=
0
)
KALDI_VLOG
(
2
)
<<
"Processed "
<<
num_done
<<
" utterances"
;
num_done
++
;
}
KALDI_LOG
<<
"Done "
<<
num_done
<<
" utterances, "
<<
num_err
<<
" with errors."
;
return
(
num_done
!=
0
?
0
:
1
);
}
speechx/examples/ds2_ol/feat/path.sh
已删除
100644 → 0
浏览文件 @
418cc37f
# This contains the locations of binarys build required for running the examples.
SPEECHX_ROOT
=
$PWD
/../../../
SPEECHX_EXAMPLES
=
$SPEECHX_ROOT
/build/examples
SPEECHX_TOOLS
=
$SPEECHX_ROOT
/tools
TOOLS_BIN
=
$SPEECHX_TOOLS
/valgrind/install/bin
[
-d
$SPEECHX_EXAMPLES
]
||
{
echo
"Error: 'build/examples' directory not found. please ensure that the project build successfully"
;
}
export
LC_AL
=
C
SPEECHX_BIN
=
$SPEECHX_EXAMPLES
/ds2_ol/feat
export
PATH
=
$PATH
:
$SPEECHX_BIN
:
$TOOLS_BIN
speechx/examples/ds2_ol/feat/run.sh
已删除
100755 → 0
浏览文件 @
418cc37f
#!/bin/bash
set
+x
set
-e
.
./path.sh
# 1. compile
if
[
!
-d
${
SPEECHX_EXAMPLES
}
]
;
then
pushd
${
SPEECHX_ROOT
}
bash build.sh
popd
fi
# 2. download model
if
[
!
-e
data/model/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
]
;
then
mkdir
-p
data/model
pushd
data/model
wget
-c
https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
tar
xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
popd
fi
# produce wav scp
if
[
!
-f
data/wav.scp
]
;
then
mkdir
-p
data
pushd
data
wget
-c
https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
echo
"utt1 "
$PWD
/zh.wav
>
wav.scp
popd
fi
# input
data_dir
=
./data
exp_dir
=
./exp
model_dir
=
$data_dir
/model/
mkdir
-p
$exp_dir
# 3. run feat
export
GLOG_logtostderr
=
1
cmvn-json2kaldi
\
--json_file
$model_dir
/data/mean_std.json
\
--cmvn_write_path
$exp_dir
/cmvn.ark
\
--binary
=
false
echo
"convert json cmvn to kaldi ark."
linear-spectrogram-wo-db-norm-ol
\
--wav_rspecifier
=
scp:
$data_dir
/wav.scp
\
--feature_wspecifier
=
ark,t:
$exp_dir
/feats.ark
\
--cmvn_file
=
$exp_dir
/cmvn.ark
echo
"compute linear spectrogram feature."
speechx/examples/ds2_ol/feat/valgrind.sh
已删除
100755 → 0
浏览文件 @
418cc37f
#!/bin/bash
# this script is for memory check, so please run ./run.sh first.
set
+x
set
-e
.
./path.sh
if
[
!
-d
${
SPEECHX_TOOLS
}
/valgrind/install
]
;
then
echo
"please install valgrind in the speechx tools dir.
\n
"
exit
1
fi
model_dir
=
../paddle_asr_model
feat_wspecifier
=
./feats.ark
cmvn
=
./cmvn.ark
valgrind
--tool
=
memcheck
--track-origins
=
yes
--leak-check
=
full
--show-leak-kinds
=
all
\
linear_spectrogram_main
\
--wav_rspecifier
=
scp:
$model_dir
/wav.scp
\
--feature_wspecifier
=
ark,t:
$feat_wspecifier
\
--cmvn_write_path
=
$cmvn
speechx/examples/ds2_ol/nnet/.gitignore
已删除
100644 → 0
浏览文件 @
418cc37f
data
exp
speechx/examples/ds2_ol/nnet/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
418cc37f
cmake_minimum_required
(
VERSION 3.14 FATAL_ERROR
)
set
(
bin_name ds2-model-ol-test
)
add_executable
(
${
bin_name
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
bin_name
}
.cc
)
target_include_directories
(
${
bin_name
}
PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
${
bin_name
}
PUBLIC nnet gflags glog
${
DEPS
}
)
\ No newline at end of file
speechx/examples/ds2_ol/nnet/README.md
已删除
100644 → 0
浏览文件 @
418cc37f
# Deepspeech2 Streaming NNet Test
Using for ds2 streaming nnet inference test.
speechx/examples/ds2_ol/nnet/ds2-model-ol-test.cc
已删除
100644 → 0
浏览文件 @
418cc37f
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// deepspeech2 online model info
#include <algorithm>
#include <fstream>
#include <functional>
#include <iostream>
#include <iterator>
#include <numeric>
#include <thread>
#include "base/flags.h"
#include "base/log.h"
#include "paddle_inference_api.h"
using
std
::
cout
;
using
std
::
endl
;
DEFINE_string
(
model_path
,
""
,
"xxx.pdmodel"
);
DEFINE_string
(
param_path
,
""
,
"xxx.pdiparams"
);
DEFINE_int32
(
chunk_size
,
35
,
"feature chunk size, unit:frame"
);
DEFINE_int32
(
feat_dim
,
161
,
"feature dim"
);
void
produce_data
(
std
::
vector
<
std
::
vector
<
float
>>*
data
);
void
model_forward_test
();
void
produce_data
(
std
::
vector
<
std
::
vector
<
float
>>*
data
)
{
int
chunk_size
=
FLAGS_chunk_size
;
// chunk_size in frame
int
col_size
=
FLAGS_feat_dim
;
// feat dim
cout
<<
"chunk size: "
<<
chunk_size
<<
endl
;
cout
<<
"feat dim: "
<<
col_size
<<
endl
;
data
->
reserve
(
chunk_size
);
data
->
back
().
reserve
(
col_size
);
for
(
int
row
=
0
;
row
<
chunk_size
;
++
row
)
{
data
->
push_back
(
std
::
vector
<
float
>
());
for
(
int
col_idx
=
0
;
col_idx
<
col_size
;
++
col_idx
)
{
data
->
back
().
push_back
(
0.201
);
}
}
}
void
model_forward_test
()
{
std
::
cout
<<
"1. read the data"
<<
std
::
endl
;
std
::
vector
<
std
::
vector
<
float
>>
feats
;
produce_data
(
&
feats
);
std
::
cout
<<
"2. load the model"
<<
std
::
endl
;
;
std
::
string
model_graph
=
FLAGS_model_path
;
std
::
string
model_params
=
FLAGS_param_path
;
CHECK
(
model_graph
!=
""
);
CHECK
(
model_params
!=
""
);
cout
<<
"model path: "
<<
model_graph
<<
endl
;
cout
<<
"model param path : "
<<
model_params
<<
endl
;
paddle_infer
::
Config
config
;
config
.
SetModel
(
model_graph
,
model_params
);
config
.
SwitchIrOptim
(
false
);
cout
<<
"SwitchIrOptim: "
<<
false
<<
endl
;
config
.
DisableFCPadding
();
cout
<<
"DisableFCPadding: "
<<
endl
;
auto
predictor
=
paddle_infer
::
CreatePredictor
(
config
);
std
::
cout
<<
"3. feat shape, row="
<<
feats
.
size
()
<<
",col="
<<
feats
[
0
].
size
()
<<
std
::
endl
;
std
::
vector
<
float
>
pp_input_mat
;
for
(
const
auto
&
item
:
feats
)
{
pp_input_mat
.
insert
(
pp_input_mat
.
end
(),
item
.
begin
(),
item
.
end
());
}
std
::
cout
<<
"4. fead the data to model"
<<
std
::
endl
;
int
row
=
feats
.
size
();
int
col
=
feats
[
0
].
size
();
std
::
vector
<
std
::
string
>
input_names
=
predictor
->
GetInputNames
();
std
::
vector
<
std
::
string
>
output_names
=
predictor
->
GetOutputNames
();
for
(
auto
name
:
input_names
)
{
cout
<<
"model input names: "
<<
name
<<
endl
;
}
for
(
auto
name
:
output_names
)
{
cout
<<
"model output names: "
<<
name
<<
endl
;
}
// input
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
input_tensor
=
predictor
->
GetInputHandle
(
input_names
[
0
]);
std
::
vector
<
int
>
INPUT_SHAPE
=
{
1
,
row
,
col
};
input_tensor
->
Reshape
(
INPUT_SHAPE
);
input_tensor
->
CopyFromCpu
(
pp_input_mat
.
data
());
// input length
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
input_len
=
predictor
->
GetInputHandle
(
input_names
[
1
]);
std
::
vector
<
int
>
input_len_size
=
{
1
};
input_len
->
Reshape
(
input_len_size
);
std
::
vector
<
int64_t
>
audio_len
;
audio_len
.
push_back
(
row
);
input_len
->
CopyFromCpu
(
audio_len
.
data
());
// state_h
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
chunk_state_h_box
=
predictor
->
GetInputHandle
(
input_names
[
2
]);
std
::
vector
<
int
>
chunk_state_h_box_shape
=
{
5
,
1
,
1024
};
chunk_state_h_box
->
Reshape
(
chunk_state_h_box_shape
);
int
chunk_state_h_box_size
=
std
::
accumulate
(
chunk_state_h_box_shape
.
begin
(),
chunk_state_h_box_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
std
::
vector
<
float
>
chunk_state_h_box_data
(
chunk_state_h_box_size
,
0.0
f
);
chunk_state_h_box
->
CopyFromCpu
(
chunk_state_h_box_data
.
data
());
// state_c
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
chunk_state_c_box
=
predictor
->
GetInputHandle
(
input_names
[
3
]);
std
::
vector
<
int
>
chunk_state_c_box_shape
=
{
5
,
1
,
1024
};
chunk_state_c_box
->
Reshape
(
chunk_state_c_box_shape
);
int
chunk_state_c_box_size
=
std
::
accumulate
(
chunk_state_c_box_shape
.
begin
(),
chunk_state_c_box_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
std
::
vector
<
float
>
chunk_state_c_box_data
(
chunk_state_c_box_size
,
0.0
f
);
chunk_state_c_box
->
CopyFromCpu
(
chunk_state_c_box_data
.
data
());
// run
bool
success
=
predictor
->
Run
();
// state_h out
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
h_out
=
predictor
->
GetOutputHandle
(
output_names
[
2
]);
std
::
vector
<
int
>
h_out_shape
=
h_out
->
shape
();
int
h_out_size
=
std
::
accumulate
(
h_out_shape
.
begin
(),
h_out_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
std
::
vector
<
float
>
h_out_data
(
h_out_size
);
h_out
->
CopyToCpu
(
h_out_data
.
data
());
// stage_c out
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
c_out
=
predictor
->
GetOutputHandle
(
output_names
[
3
]);
std
::
vector
<
int
>
c_out_shape
=
c_out
->
shape
();
int
c_out_size
=
std
::
accumulate
(
c_out_shape
.
begin
(),
c_out_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
std
::
vector
<
float
>
c_out_data
(
c_out_size
);
c_out
->
CopyToCpu
(
c_out_data
.
data
());
// output tensor
std
::
unique_ptr
<
paddle_infer
::
Tensor
>
output_tensor
=
predictor
->
GetOutputHandle
(
output_names
[
0
]);
std
::
vector
<
int
>
output_shape
=
output_tensor
->
shape
();
std
::
vector
<
float
>
output_probs
;
int
output_size
=
std
::
accumulate
(
output_shape
.
begin
(),
output_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
output_probs
.
resize
(
output_size
);
output_tensor
->
CopyToCpu
(
output_probs
.
data
());
row
=
output_shape
[
1
];
col
=
output_shape
[
2
];
// probs
std
::
vector
<
std
::
vector
<
float
>>
probs
;
probs
.
reserve
(
row
);
for
(
int
i
=
0
;
i
<
row
;
i
++
)
{
probs
.
push_back
(
std
::
vector
<
float
>
());
probs
.
back
().
reserve
(
col
);
for
(
int
j
=
0
;
j
<
col
;
j
++
)
{
probs
.
back
().
push_back
(
output_probs
[
i
*
col
+
j
]);
}
}
std
::
vector
<
std
::
vector
<
float
>>
log_feat
=
probs
;
std
::
cout
<<
"probs, row: "
<<
log_feat
.
size
()
<<
" col: "
<<
log_feat
[
0
].
size
()
<<
std
::
endl
;
for
(
size_t
row_idx
=
0
;
row_idx
<
log_feat
.
size
();
++
row_idx
)
{
for
(
size_t
col_idx
=
0
;
col_idx
<
log_feat
[
row_idx
].
size
();
++
col_idx
)
{
std
::
cout
<<
log_feat
[
row_idx
][
col_idx
]
<<
" "
;
}
std
::
cout
<<
std
::
endl
;
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
model_forward_test
();
return
0
;
}
speechx/examples/ds2_ol/nnet/path.sh
已删除
100644 → 0
浏览文件 @
418cc37f
# This contains the locations of binarys build required for running the examples.
SPEECHX_ROOT
=
$PWD
/../../../
SPEECHX_EXAMPLES
=
$SPEECHX_ROOT
/build/examples
SPEECHX_TOOLS
=
$SPEECHX_ROOT
/tools
TOOLS_BIN
=
$SPEECHX_TOOLS
/valgrind/install/bin
[
-d
$SPEECHX_EXAMPLES
]
||
{
echo
"Error: 'build/examples' directory not found. please ensure that the project build successfully"
;
}
export
LC_AL
=
C
SPEECHX_BIN
=
$SPEECHX_EXAMPLES
/ds2_ol/nnet
export
PATH
=
$PATH
:
$SPEECHX_BIN
:
$TOOLS_BIN
speechx/examples/ds2_ol/nnet/run.sh
已删除
100755 → 0
浏览文件 @
418cc37f
#!/bin/bash
set
+x
set
-e
.
path.sh
# 1. compile
if
[
!
-d
${
SPEECHX_EXAMPLES
}
]
;
then
pushd
${
SPEECHX_ROOT
}
bash build.sh
popd
fi
# 2. download model
if
[
!
-f
data/model/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
]
;
then
mkdir
-p
data/model
pushd
data/model
wget
-c
https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
tar
xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
popd
fi
# produce wav scp
if
[
!
-f
data/wav.scp
]
;
then
mkdir
-p
data
pushd
data
wget
-c
https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
echo
"utt1 "
$PWD
/zh.wav
>
wav.scp
popd
fi
ckpt_dir
=
./data/model
model_dir
=
$ckpt_dir
/exp/deepspeech2_online/checkpoints/
ds2-model-ol-test
\
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--param_path
=
$model_dir
/avg_1.jit.pdiparams
speechx/examples/ds2_ol/nnet/valgrind.sh
已删除
100755 → 0
浏览文件 @
418cc37f
#!/bin/bash
# this script is for memory check, so please run ./run.sh first.
set
+x
set
-e
.
./path.sh
if
[
!
-d
${
SPEECHX_TOOLS
}
/valgrind/install
]
;
then
echo
"please install valgrind in the speechx tools dir.
\n
"
exit
1
fi
model_dir
=
../paddle_asr_model
valgrind
--tool
=
memcheck
--track-origins
=
yes
--leak-check
=
full
--show-leak-kinds
=
all
\
pp-model-test
\
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--param_path
=
$model_dir
/avg_1.jit.pdparams
\ No newline at end of file
utils/README.md
浏览文件 @
ae629e2f
# Utils
*
[
kaldi utils
](
https://github.com/kaldi-asr/kaldi/blob/cbed4ff688/egs/wsj/s5/utils
)
*
[espnet utils
)
(https://github.com/espnet/espnet/tree/master/utils)
*
[
espnet utils
]
(
https://github.com/espnet/espnet/tree/master/utils
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录