Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
7e334ce8
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7e334ce8
编写于
10月 19, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix assembler buf, which not clear cache, and fill zero default
上级
f9fc32e8
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
77 addition
and
34 deletion
+77
-34
speechx/examples/u2pp_ol/wenetspeech/local/recognizer.sh
speechx/examples/u2pp_ol/wenetspeech/local/recognizer.sh
+1
-1
speechx/speechx/frontend/audio/assembler.cc
speechx/speechx/frontend/audio/assembler.cc
+38
-12
speechx/speechx/frontend/audio/assembler.h
speechx/speechx/frontend/audio/assembler.h
+16
-15
speechx/speechx/frontend/audio/audio_cache.cc
speechx/speechx/frontend/audio/audio_cache.cc
+4
-0
speechx/speechx/frontend/audio/audio_cache.h
speechx/speechx/frontend/audio/audio_cache.h
+3
-1
speechx/speechx/frontend/audio/feature_cache.cc
speechx/speechx/frontend/audio/feature_cache.cc
+3
-0
speechx/speechx/frontend/audio/feature_cache.h
speechx/speechx/frontend/audio/feature_cache.h
+6
-4
speechx/speechx/nnet/u2_nnet.cc
speechx/speechx/nnet/u2_nnet.cc
+1
-0
speechx/speechx/recognizer/u2_recognizer_main.cc
speechx/speechx/recognizer/u2_recognizer_main.cc
+5
-1
未找到文件。
speechx/examples/u2pp_ol/wenetspeech/local/recognizer.sh
浏览文件 @
7e334ce8
...
@@ -5,7 +5,7 @@ set -e
...
@@ -5,7 +5,7 @@ set -e
data
=
data
data
=
data
exp
=
exp
exp
=
exp
nj
=
2
0
nj
=
4
0
mkdir
-p
$exp
mkdir
-p
$exp
...
...
speechx/speechx/frontend/audio/assembler.cc
浏览文件 @
7e334ce8
...
@@ -23,9 +23,11 @@ using std::unique_ptr;
...
@@ -23,9 +23,11 @@ using std::unique_ptr;
Assembler
::
Assembler
(
AssemblerOptions
opts
,
Assembler
::
Assembler
(
AssemblerOptions
opts
,
unique_ptr
<
FrontendInterface
>
base_extractor
)
{
unique_ptr
<
FrontendInterface
>
base_extractor
)
{
fill_zero_
=
opts
.
fill_zero
;
frame_chunk_stride_
=
opts
.
subsampling_rate
*
opts
.
nnet_decoder_chunk
;
frame_chunk_stride_
=
opts
.
subsampling_rate
*
opts
.
nnet_decoder_chunk
;
frame_chunk_size_
=
(
opts
.
nnet_decoder_chunk
-
1
)
*
opts
.
subsampling_rate
+
frame_chunk_size_
=
(
opts
.
nnet_decoder_chunk
-
1
)
*
opts
.
subsampling_rate
+
opts
.
receptive_filed_length
;
opts
.
receptive_filed_length
;
cache_size_
=
frame_chunk_size_
-
frame_chunk_stride_
;
receptive_filed_length_
=
opts
.
receptive_filed_length
;
receptive_filed_length_
=
opts
.
receptive_filed_length
;
base_extractor_
=
std
::
move
(
base_extractor
);
base_extractor_
=
std
::
move
(
base_extractor
);
dim_
=
base_extractor_
->
Dim
();
dim_
=
base_extractor_
->
Dim
();
...
@@ -38,14 +40,13 @@ void Assembler::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
...
@@ -38,14 +40,13 @@ void Assembler::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
// pop feature chunk
// pop feature chunk
bool
Assembler
::
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
{
bool
Assembler
::
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
{
feats
->
Resize
(
dim_
*
frame_chunk_size_
);
bool
result
=
Compute
(
feats
);
bool
result
=
Compute
(
feats
);
return
result
;
return
result
;
}
}
// read
all data
from base_feature_extractor_ into cache_
// read
frame by frame
from base_feature_extractor_ into cache_
bool
Assembler
::
Compute
(
Vector
<
BaseFloat
>*
feats
)
{
bool
Assembler
::
Compute
(
Vector
<
BaseFloat
>*
feats
)
{
// compute and feed
// compute and feed
frame by frame
bool
result
=
false
;
bool
result
=
false
;
while
(
feature_cache_
.
size
()
<
frame_chunk_size_
)
{
while
(
feature_cache_
.
size
()
<
frame_chunk_size_
)
{
Vector
<
BaseFloat
>
feature
;
Vector
<
BaseFloat
>
feature
;
...
@@ -54,33 +55,58 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
...
@@ -54,33 +55,58 @@ bool Assembler::Compute(Vector<BaseFloat>* feats) {
if
(
IsFinished
()
==
false
)
return
false
;
if
(
IsFinished
()
==
false
)
return
false
;
break
;
break
;
}
}
CHECK
(
feature
.
Dim
()
==
dim_
);
nframes_
+=
1
;
VLOG
(
1
)
<<
"nframes: "
<<
nframes_
;
feature_cache_
.
push
(
feature
);
feature_cache_
.
push
(
feature
);
}
}
if
(
feature_cache_
.
size
()
<
receptive_filed_length_
)
{
if
(
feature_cache_
.
size
()
<
receptive_filed_length_
)
{
VLOG
(
1
)
<<
"feature_cache less than receptive_filed_lenght. "
<<
feature_cache_
.
size
()
<<
": "
<<
receptive_filed_length_
;
return
false
;
return
false
;
}
}
while
(
feature_cache_
.
size
()
<
frame_chunk_size_
)
{
Vector
<
BaseFloat
>
feature
(
dim_
,
kaldi
::
kSetZero
);
if
(
fill_zero_
){
feature_cache_
.
push
(
feature
);
while
(
feature_cache_
.
size
()
<
frame_chunk_size_
)
{
Vector
<
BaseFloat
>
feature
(
dim_
,
kaldi
::
kSetZero
);
nframes_
+=
1
;
feature_cache_
.
push
(
feature
);
}
}
}
int32
this_chunk_size
=
std
::
min
(
static_cast
<
int32
>
(
feature_cache_
.
size
()),
frame_chunk_size_
);
feats
->
Resize
(
dim_
*
this_chunk_size
);
int32
counter
=
0
;
int32
counter
=
0
;
int32
cache_size
=
frame_chunk_size_
-
frame_chunk_stride_
;
while
(
counter
<
this_chunk_size
)
{
int32
elem_dim
=
base_extractor_
->
Dim
();
while
(
counter
<
frame_chunk_size_
)
{
Vector
<
BaseFloat
>&
val
=
feature_cache_
.
front
();
Vector
<
BaseFloat
>&
val
=
feature_cache_
.
front
();
int32
start
=
counter
*
elem_dim
;
CHECK
(
val
.
Dim
()
==
dim_
)
<<
val
.
Dim
();
feats
->
Range
(
start
,
elem_dim
).
CopyFromVec
(
val
);
if
(
frame_chunk_size_
-
counter
<=
cache_size
)
{
int32
start
=
counter
*
dim_
;
feats
->
Range
(
start
,
dim_
).
CopyFromVec
(
val
);
if
(
this_chunk_size
-
counter
<=
cache_size_
)
{
feature_cache_
.
push
(
val
);
feature_cache_
.
push
(
val
);
}
}
// val is reference, so we should pop here
feature_cache_
.
pop
();
feature_cache_
.
pop
();
counter
++
;
counter
++
;
}
}
return
result
;
return
result
;
}
}
void
Assembler
::
Reset
()
{
std
::
queue
<
kaldi
::
Vector
<
kaldi
::
BaseFloat
>>
empty
;
std
::
swap
(
feature_cache_
,
empty
);
nframes_
=
0
;
base_extractor_
->
Reset
();
}
}
// namespace ppspeech
}
// namespace ppspeech
speechx/speechx/frontend/audio/assembler.h
浏览文件 @
7e334ce8
...
@@ -22,14 +22,10 @@ namespace ppspeech {
...
@@ -22,14 +22,10 @@ namespace ppspeech {
struct
AssemblerOptions
{
struct
AssemblerOptions
{
// refer:https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/paddlespeech/s2t/exps/deepspeech2/model.py
// refer:https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/paddlespeech/s2t/exps/deepspeech2/model.py
// the nnet batch forward
// the nnet batch forward
int32
receptive_filed_length
;
int32
receptive_filed_length
{
1
};
int32
subsampling_rate
;
int32
subsampling_rate
{
1
};
int32
nnet_decoder_chunk
;
int32
nnet_decoder_chunk
{
1
};
bool
fill_zero
{
false
};
// whether fill zero when last chunk is not equal to frame_chunk_size_
AssemblerOptions
()
:
receptive_filed_length
(
1
),
subsampling_rate
(
1
),
nnet_decoder_chunk
(
1
)
{}
};
};
class
Assembler
:
public
FrontendInterface
{
class
Assembler
:
public
FrontendInterface
{
...
@@ -39,29 +35,34 @@ class Assembler : public FrontendInterface {
...
@@ -39,29 +35,34 @@ class Assembler : public FrontendInterface {
std
::
unique_ptr
<
FrontendInterface
>
base_extractor
=
NULL
);
std
::
unique_ptr
<
FrontendInterface
>
base_extractor
=
NULL
);
// Feed feats or waves
// Feed feats or waves
v
irtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
;
v
oid
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
override
;
// feats size = num_frames * feat_dim
// feats size = num_frames * feat_dim
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
;
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
override
;
// feat dim
// feat dim
virtual
size_t
Dim
()
const
{
return
dim_
;
}
size_t
Dim
()
const
override
{
return
dim_
;
}
v
irtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
v
oid
SetFinished
()
override
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
bool
IsFinished
()
const
override
{
return
base_extractor_
->
IsFinished
();
}
v
irtual
void
Reset
()
{
base_extractor_
->
Reset
();
}
v
oid
Reset
()
override
;
private:
private:
bool
Compute
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
bool
Compute
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
int32
dim_
;
bool
fill_zero_
{
false
};
int32
dim_
;
// feat dim
int32
frame_chunk_size_
;
// window
int32
frame_chunk_size_
;
// window
int32
frame_chunk_stride_
;
// stride
int32
frame_chunk_stride_
;
// stride
int32
cache_size_
;
// window - stride
int32
receptive_filed_length_
;
int32
receptive_filed_length_
;
std
::
queue
<
kaldi
::
Vector
<
kaldi
::
BaseFloat
>>
feature_cache_
;
std
::
queue
<
kaldi
::
Vector
<
kaldi
::
BaseFloat
>>
feature_cache_
;
std
::
unique_ptr
<
FrontendInterface
>
base_extractor_
;
std
::
unique_ptr
<
FrontendInterface
>
base_extractor_
;
int32
nframes_
;
// num frame computed
DISALLOW_COPY_AND_ASSIGN
(
Assembler
);
DISALLOW_COPY_AND_ASSIGN
(
Assembler
);
};
};
...
...
speechx/speechx/frontend/audio/audio_cache.cc
浏览文件 @
7e334ce8
...
@@ -83,6 +83,10 @@ bool AudioCache::Read(Vector<BaseFloat>* waves) {
...
@@ -83,6 +83,10 @@ bool AudioCache::Read(Vector<BaseFloat>* waves) {
}
}
size_
-=
chunk_size
;
size_
-=
chunk_size
;
offset_
=
(
offset_
+
chunk_size
)
%
ring_buffer_
.
size
();
offset_
=
(
offset_
+
chunk_size
)
%
ring_buffer_
.
size
();
nsamples_
+=
chunk_size
;
VLOG
(
1
)
<<
"nsamples readed: "
<<
nsamples_
;
ready_feed_condition_
.
notify_one
();
ready_feed_condition_
.
notify_one
();
return
true
;
return
true
;
}
}
...
...
speechx/speechx/frontend/audio/audio_cache.h
浏览文件 @
7e334ce8
...
@@ -41,10 +41,11 @@ class AudioCache : public FrontendInterface {
...
@@ -41,10 +41,11 @@ class AudioCache : public FrontendInterface {
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
v
irtual
void
Reset
()
{
v
oid
Reset
()
override
{
offset_
=
0
;
offset_
=
0
;
size_
=
0
;
size_
=
0
;
finished_
=
false
;
finished_
=
false
;
nsamples_
=
0
;
}
}
private:
private:
...
@@ -61,6 +62,7 @@ class AudioCache : public FrontendInterface {
...
@@ -61,6 +62,7 @@ class AudioCache : public FrontendInterface {
kaldi
::
int32
timeout_
;
// millisecond
kaldi
::
int32
timeout_
;
// millisecond
bool
to_float32_
;
// int16 -> float32. used in linear_spectrogram
bool
to_float32_
;
// int16 -> float32. used in linear_spectrogram
int32
nsamples_
;
// number samples readed.
DISALLOW_COPY_AND_ASSIGN
(
AudioCache
);
DISALLOW_COPY_AND_ASSIGN
(
AudioCache
);
};
};
...
...
speechx/speechx/frontend/audio/feature_cache.cc
浏览文件 @
7e334ce8
...
@@ -73,6 +73,9 @@ bool FeatureCache::Compute() {
...
@@ -73,6 +73,9 @@ bool FeatureCache::Compute() {
if
(
result
==
false
||
feature
.
Dim
()
==
0
)
return
false
;
if
(
result
==
false
||
feature
.
Dim
()
==
0
)
return
false
;
int32
num_chunk
=
feature
.
Dim
()
/
dim_
;
int32
num_chunk
=
feature
.
Dim
()
/
dim_
;
nframe_
+=
num_chunk
;
VLOG
(
1
)
<<
"nframe computed: "
<<
nframe_
;
for
(
int
chunk_idx
=
0
;
chunk_idx
<
num_chunk
;
++
chunk_idx
)
{
for
(
int
chunk_idx
=
0
;
chunk_idx
<
num_chunk
;
++
chunk_idx
)
{
int32
start
=
chunk_idx
*
dim_
;
int32
start
=
chunk_idx
*
dim_
;
Vector
<
BaseFloat
>
feature_chunk
(
dim_
);
Vector
<
BaseFloat
>
feature_chunk
(
dim_
);
...
...
speechx/speechx/frontend/audio/feature_cache.h
浏览文件 @
7e334ce8
...
@@ -51,11 +51,12 @@ class FeatureCache : public FrontendInterface {
...
@@ -51,11 +51,12 @@ class FeatureCache : public FrontendInterface {
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
void
Reset
()
{
void
Reset
()
override
{
std
::
queue
<
kaldi
::
Vector
<
BaseFloat
>>
empty
;
std
::
swap
(
cache_
,
empty
);
nframe_
=
0
;
base_extractor_
->
Reset
();
base_extractor_
->
Reset
();
while
(
!
cache_
.
empty
())
{
VLOG
(
1
)
<<
"feature cache reset: cache size: "
<<
cache_
.
size
();
cache_
.
pop
();
}
}
}
private:
private:
...
@@ -74,6 +75,7 @@ class FeatureCache : public FrontendInterface {
...
@@ -74,6 +75,7 @@ class FeatureCache : public FrontendInterface {
std
::
condition_variable
ready_feed_condition_
;
std
::
condition_variable
ready_feed_condition_
;
std
::
condition_variable
ready_read_condition_
;
std
::
condition_variable
ready_read_condition_
;
int32
nframe_
;
// num of feature computed
DISALLOW_COPY_AND_ASSIGN
(
FeatureCache
);
DISALLOW_COPY_AND_ASSIGN
(
FeatureCache
);
};
};
...
...
speechx/speechx/nnet/u2_nnet.cc
浏览文件 @
7e334ce8
...
@@ -153,6 +153,7 @@ void U2Nnet::Reset() {
...
@@ -153,6 +153,7 @@ void U2Nnet::Reset() {
std
::
move
(
paddle
::
zeros
({
0
,
0
,
0
,
0
},
paddle
::
DataType
::
FLOAT32
));
std
::
move
(
paddle
::
zeros
({
0
,
0
,
0
,
0
},
paddle
::
DataType
::
FLOAT32
));
encoder_outs_
.
clear
();
encoder_outs_
.
clear
();
VLOG
(
1
)
<<
"u2nnet reset"
;
}
}
// Debug API
// Debug API
...
...
speechx/speechx/recognizer/u2_recognizer_main.cc
浏览文件 @
7e334ce8
...
@@ -82,9 +82,13 @@ int main(int argc, char* argv[]) {
...
@@ -82,9 +82,13 @@ int main(int argc, char* argv[]) {
// no overlap
// no overlap
sample_offset
+=
cur_chunk_size
;
sample_offset
+=
cur_chunk_size
;
}
}
CHECK
(
sample_offset
==
tot_samples
);
// recognizer.SetFinished();
// second pass decoding
// second pass decoding
recognizer
.
Rescoring
();
recognizer
.
Rescoring
();
std
::
string
result
=
recognizer
.
GetFinalResult
();
std
::
string
result
=
recognizer
.
GetFinalResult
();
recognizer
.
Reset
();
recognizer
.
Reset
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录