Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
93c3e03b
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
93c3e03b
编写于
4月 01, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
more comment
上级
92d699c1
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
43 addition
and
16 deletion
+43
-16
speechx/examples/decoder/decoder_test_main.cc
speechx/examples/decoder/decoder_test_main.cc
+6
-1
speechx/examples/decoder/offline_decoder_main.cc
speechx/examples/decoder/offline_decoder_main.cc
+1
-1
speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
...hx/examples/decoder/offline_decoder_sliding_chunk_main.cc
+35
-13
speechx/speechx/nnet/decodable.cc
speechx/speechx/nnet/decodable.cc
+1
-1
未找到文件。
speechx/examples/decoder/decoder_test_main.cc
浏览文件 @
93c3e03b
...
...
@@ -24,11 +24,11 @@ DEFINE_string(nnet_prob_respecifier, "", "test nnet prob rspecifier");
DEFINE_string
(
dict_file
,
"vocab.txt"
,
"vocabulary of lm"
);
DEFINE_string
(
lm_path
,
"lm.klm"
,
"language model"
);
using
kaldi
::
BaseFloat
;
using
kaldi
::
Matrix
;
using
std
::
vector
;
// test decoder by feeding nnet posterior probability
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
...
...
@@ -37,6 +37,8 @@ int main(int argc, char* argv[]) {
FLAGS_nnet_prob_respecifier
);
std
::
string
dict_file
=
FLAGS_dict_file
;
std
::
string
lm_path
=
FLAGS_lm_path
;
LOG
(
INFO
)
<<
"dict path: "
<<
dict_file
;
LOG
(
INFO
)
<<
"lm path: "
<<
lm_path
;
int32
num_done
=
0
,
num_err
=
0
;
...
...
@@ -53,6 +55,9 @@ int main(int argc, char* argv[]) {
for
(;
!
likelihood_reader
.
Done
();
likelihood_reader
.
Next
())
{
string
utt
=
likelihood_reader
.
Key
();
const
kaldi
::
Matrix
<
BaseFloat
>
likelihood
=
likelihood_reader
.
Value
();
LOG
(
INFO
)
<<
"process utt: "
<<
utt
;
LOG
(
INFO
)
<<
"rows: "
<<
likelihood
.
NumRows
();
LOG
(
INFO
)
<<
"cols: "
<<
likelihood
.
NumCols
();
decodable
->
Acceptlikelihood
(
likelihood
);
decoder
.
AdvanceDecode
(
decodable
);
std
::
string
result
;
...
...
speechx/examples/decoder/offline_decoder_main.cc
浏览文件 @
93c3e03b
...
...
@@ -34,6 +34,7 @@ using kaldi::BaseFloat;
using
kaldi
::
Matrix
;
using
std
::
vector
;
// test decoder by feeding speech feature, deprecated.
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
...
...
@@ -55,7 +56,6 @@ int main(int argc, char* argv[]) {
// frontend + nnet is decodable
ppspeech
::
ModelOptions
model_opts
;
model_opts
.
cache_shape
=
"5-1-1024,5-1-1024"
;
model_opts
.
model_path
=
model_graph
;
model_opts
.
params_path
=
model_params
;
std
::
shared_ptr
<
ppspeech
::
PaddleNnet
>
nnet
(
...
...
speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
浏览文件 @
93c3e03b
...
...
@@ -27,12 +27,19 @@ DEFINE_string(model_path, "avg_1.jit.pdmodel", "paddle nnet model");
DEFINE_string
(
param_path
,
"avg_1.jit.pdiparams"
,
"paddle nnet model param"
);
DEFINE_string
(
dict_file
,
"vocab.txt"
,
"vocabulary of lm"
);
DEFINE_string
(
lm_path
,
"lm.klm"
,
"language model"
);
DEFINE_int32
(
receptive_field_length
,
7
,
"receptive field of two CNN(kernel=5) downsampling module."
);
DEFINE_int32
(
downsampling_rate
,
4
,
"two CNN(kernel=5) module downsampling rate."
);
using
kaldi
::
BaseFloat
;
using
kaldi
::
Matrix
;
using
std
::
vector
;
// test ds2 online decoder by feeding speech feature
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
...
...
@@ -43,6 +50,11 @@ int main(int argc, char* argv[]) {
std
::
string
model_params
=
FLAGS_param_path
;
std
::
string
dict_file
=
FLAGS_dict_file
;
std
::
string
lm_path
=
FLAGS_lm_path
;
LOG
(
INFO
)
<<
"model path: "
<<
model_graph
;
LOG
(
INFO
)
<<
"model param: "
<<
model_params
;
LOG
(
INFO
)
<<
"dict path: "
<<
dict_file
;
LOG
(
INFO
)
<<
"lm path: "
<<
lm_path
;
int32
num_done
=
0
,
num_err
=
0
;
...
...
@@ -57,34 +69,44 @@ int main(int argc, char* argv[]) {
model_opts
.
cache_shape
=
"5-1-1024,5-1-1024"
;
std
::
shared_ptr
<
ppspeech
::
PaddleNnet
>
nnet
(
new
ppspeech
::
PaddleNnet
(
model_opts
));
std
::
shared_ptr
<
ppspeech
::
DataCache
>
raw_data
(
new
ppspeech
::
DataCache
());
std
::
shared_ptr
<
ppspeech
::
DataCache
>
raw_data
(
new
ppspeech
::
DataCache
());
std
::
shared_ptr
<
ppspeech
::
Decodable
>
decodable
(
new
ppspeech
::
Decodable
(
nnet
,
raw_data
));
int32
chunk_size
=
7
;
int32
chunk_stride
=
4
;
int32
receptive_field_length
=
7
;
int32
chunk_size
=
FLAGS_receptive_field_length
;
int32
chunk_stride
=
FLAGS_downsampling_rate
;
int32
receptive_field_length
=
FLAGS_receptive_field_length
;
LOG
(
INFO
)
<<
"chunk size (frame): "
<<
chunk_size
;
LOG
(
INFO
)
<<
"chunk stride (frame): "
<<
chunk_stride
;
LOG
(
INFO
)
<<
"receptive field (frame): "
<<
receptive_field_length
;
decoder
.
InitDecoder
();
for
(;
!
feature_reader
.
Done
();
feature_reader
.
Next
())
{
string
utt
=
feature_reader
.
Key
();
kaldi
::
Matrix
<
BaseFloat
>
feature
=
feature_reader
.
Value
();
raw_data
->
SetDim
(
feature
.
NumCols
());
LOG
(
INFO
)
<<
"process utt: "
<<
utt
;
LOG
(
INFO
)
<<
"rows: "
<<
feature
.
NumRows
();
LOG
(
INFO
)
<<
"cols: "
<<
feature
.
NumCols
();
int32
row_idx
=
0
;
int32
padding_len
=
0
;
int32
ori_feature_len
=
feature
.
NumRows
();
if
(
(
feature
.
NumRows
()
-
chunk_size
)
%
chunk_stride
!=
0
)
{
padding_len
=
chunk_stride
-
(
feature
.
NumRows
()
-
chunk_size
)
%
chunk_stride
;
feature
.
Resize
(
feature
.
NumRows
()
+
padding_len
,
feature
.
NumCols
(),
kaldi
::
kCopyData
);
int32
ori_feature_len
=
feature
.
NumRows
();
if
((
feature
.
NumRows
()
-
chunk_size
)
%
chunk_stride
!=
0
)
{
padding_len
=
chunk_stride
-
(
feature
.
NumRows
()
-
chunk_size
)
%
chunk_stride
;
feature
.
Resize
(
feature
.
NumRows
()
+
padding_len
,
feature
.
NumCols
(),
kaldi
::
kCopyData
);
}
int32
num_chunks
=
(
feature
.
NumRows
()
-
chunk_size
)
/
chunk_stride
+
1
;
for
(
int
chunk_idx
=
0
;
chunk_idx
<
num_chunks
;
++
chunk_idx
)
{
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
feature_chunk
(
chunk_size
*
feature
.
NumCols
());
int32
feature_chunk_size
=
0
;
if
(
ori_feature_len
>
chunk_idx
*
chunk_stride
)
{
feature_chunk_size
=
std
::
min
(
ori_feature_len
-
chunk_idx
*
chunk_stride
,
chunk_size
);
int32
feature_chunk_size
=
0
;
if
(
ori_feature_len
>
chunk_idx
*
chunk_stride
)
{
feature_chunk_size
=
std
::
min
(
ori_feature_len
-
chunk_idx
*
chunk_stride
,
chunk_size
);
}
if
(
feature_chunk_size
<
receptive_field_length
)
break
;
...
...
speechx/speechx/nnet/decodable.cc
浏览文件 @
93c3e03b
...
...
@@ -82,7 +82,7 @@ void Decodable::Reset() {
if
(
nnet_
!=
nullptr
)
nnet_
->
Reset
();
frame_offset_
=
0
;
frames_ready_
=
0
;
nnet_cache_
.
Resize
(
0
,
0
);
nnet_cache_
.
Resize
(
0
,
0
);
}
}
// namespace ppspeech
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录