Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
28dafea0
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
28dafea0
编写于
10月 21, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add fill zero opt for frontend
上级
83f885c6
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
18 addition
and
13 deletion
+18
-13
speechx/speechx/decoder/param.h
speechx/speechx/decoder/param.h
+1
-0
speechx/speechx/frontend/audio/assembler.cc
speechx/speechx/frontend/audio/assembler.cc
+5
-6
speechx/speechx/frontend/audio/feature_pipeline.h
speechx/speechx/frontend/audio/feature_pipeline.h
+7
-3
speechx/speechx/nnet/decodable.cc
speechx/speechx/nnet/decodable.cc
+1
-1
speechx/speechx/recognizer/recognizer.h
speechx/speechx/recognizer/recognizer.h
+2
-0
speechx/speechx/recognizer/u2_recognizer.h
speechx/speechx/recognizer/u2_recognizer.h
+2
-0
speechx/speechx/recognizer/u2_recognizer_main.cc
speechx/speechx/recognizer/u2_recognizer_main.cc
+0
-3
未找到文件。
speechx/speechx/decoder/param.h
浏览文件 @
28dafea0
...
...
@@ -20,6 +20,7 @@
// feature
DEFINE_bool
(
use_fbank
,
false
,
"False for fbank; or linear feature"
);
DEFINE_bool
(
fill_zero
,
false
,
"fill zero at last chunk, when chunk < chunk_size"
);
// DEFINE_bool(to_float32, true, "audio convert to pcm32. True for linear
// feature, or fbank");
DEFINE_int32
(
num_bins
,
161
,
"num bins of mel"
);
...
...
speechx/speechx/frontend/audio/assembler.cc
浏览文件 @
28dafea0
...
...
@@ -47,17 +47,16 @@ bool Assembler::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
// read frame by frame from base_feature_extractor_ into cache_
bool
Assembler
::
Compute
(
Vector
<
BaseFloat
>*
feats
)
{
// compute and feed frame by frame
bool
result
=
false
;
while
(
feature_cache_
.
size
()
<
frame_chunk_size_
)
{
Vector
<
BaseFloat
>
feature
;
result
=
base_extractor_
->
Read
(
&
feature
);
bool
result
=
base_extractor_
->
Read
(
&
feature
);
if
(
result
==
false
||
feature
.
Dim
()
==
0
)
{
VLOG
(
1
)
<<
"result: "
<<
result
<<
"feature dim: "
<<
feature
.
Dim
();
VLOG
(
1
)
<<
"result: "
<<
result
<<
"
feature dim: "
<<
feature
.
Dim
();
if
(
IsFinished
()
==
false
)
{
LOG
(
INFO
)
<<
"finished reading feature. cache size: "
<<
feature_cache_
.
size
();
VLOG
(
1
)
<<
"finished reading feature. cache size: "
<<
feature_cache_
.
size
();
return
false
;
}
else
{
LOG
(
INFO
)
<<
"break"
;
VLOG
(
1
)
<<
"break"
;
break
;
}
}
...
...
@@ -103,7 +102,7 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
counter
++
;
}
CHECK
(
feature_cache_
.
size
()
==
cache_size_
);
CHECK
(
feature_cache_
.
size
()
==
cache_size_
);
return
true
;
}
...
...
speechx/speechx/frontend/audio/feature_pipeline.h
浏览文件 @
28dafea0
...
...
@@ -27,6 +27,7 @@
// feature
DECLARE_bool
(
use_fbank
);
DECLARE_bool
(
fill_zero
);
DECLARE_int32
(
num_bins
);
DECLARE_string
(
cmvn_file
);
...
...
@@ -80,15 +81,18 @@ struct FeaturePipelineOptions {
// assembler opts
opts
.
assembler_opts
.
subsampling_rate
=
FLAGS_subsampling_rate
;
LOG
(
INFO
)
<<
"subsampling rate: "
<<
opts
.
assembler_opts
.
subsampling_rate
;
opts
.
assembler_opts
.
receptive_filed_length
=
FLAGS_receptive_field_length
;
opts
.
assembler_opts
.
nnet_decoder_chunk
=
FLAGS_nnet_decoder_chunk
;
opts
.
assembler_opts
.
fill_zero
=
FLAGS_fill_zero
;
LOG
(
INFO
)
<<
"subsampling rate: "
<<
opts
.
assembler_opts
.
subsampling_rate
;
LOG
(
INFO
)
<<
"nnet receptive filed length: "
<<
opts
.
assembler_opts
.
receptive_filed_length
;
opts
.
assembler_opts
.
nnet_decoder_chunk
=
FLAGS_nnet_decoder_chunk
;
LOG
(
INFO
)
<<
"nnet chunk size: "
<<
opts
.
assembler_opts
.
nnet_decoder_chunk
;
LOG
(
INFO
)
<<
"frontend fill zeros: "
<<
opts
.
assembler_opts
.
fill_zero
;
return
opts
;
}
};
...
...
speechx/speechx/nnet/decodable.cc
浏览文件 @
28dafea0
...
...
@@ -114,7 +114,7 @@ bool Decodable::AdvanceChunk(kaldi::Vector<kaldi::BaseFloat>* logprobs,
// read one frame likelihood
bool
Decodable
::
FrameLikelihood
(
int32
frame
,
vector
<
BaseFloat
>*
likelihood
)
{
if
(
EnsureFrameHaveComputed
(
frame
)
==
false
)
{
LOG
(
INFO
)
<<
"framelikehood exit."
;
VLOG
(
1
)
<<
"framelikehood exit."
;
return
false
;
}
...
...
speechx/speechx/recognizer/recognizer.h
浏览文件 @
28dafea0
...
...
@@ -38,6 +38,8 @@ struct RecognizerResource {
resource
.
acoustic_scale
=
FLAGS_acoustic_scale
;
resource
.
feature_pipeline_opts
=
FeaturePipelineOptions
::
InitFromFlags
();
resource
.
feature_pipeline_opts
.
assembler_opts
.
fill_zero
=
true
;
LOG
(
INFO
)
<<
"ds2 need fill zero be true: "
<<
resource
.
feature_pipeline_opts
.
assembler_opts
.
fill_zero
;
resource
.
model_opts
=
ModelOptions
::
InitFromFlags
();
resource
.
tlg_opts
=
TLGDecoderOptions
::
InitFromFlags
();
return
resource
;
...
...
speechx/speechx/recognizer/u2_recognizer.h
浏览文件 @
28dafea0
...
...
@@ -101,6 +101,8 @@ struct U2RecognizerResource {
resource
.
feature_pipeline_opts
=
ppspeech
::
FeaturePipelineOptions
::
InitFromFlags
();
resource
.
feature_pipeline_opts
.
assembler_opts
.
fill_zero
=
false
;
LOG
(
INFO
)
<<
"u2 need fill zero be false: "
<<
resource
.
feature_pipeline_opts
.
assembler_opts
.
fill_zero
;
resource
.
model_opts
=
ppspeech
::
ModelOptions
::
InitFromFlags
();
resource
.
decoder_opts
=
ppspeech
::
DecodeOptions
::
InitFromFlags
();
return
resource
;
...
...
speechx/speechx/recognizer/u2_recognizer_main.cc
浏览文件 @
28dafea0
...
...
@@ -85,9 +85,6 @@ int main(int argc, char* argv[]) {
cnt
++
;
}
CHECK
(
sample_offset
==
tot_samples
);
VLOG
(
1
)
<<
"num decode: "
<<
cnt
;
// recognizer.SetFinished();
// second pass decoding
recognizer
.
Rescoring
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录