Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
ba812854
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
ba812854
编写于
4月 22, 2022
作者:
Y
YangZhou
提交者:
GitHub
4月 22, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1757 from zh794390558/ws
[speechx] change opt convert2PCM32 to to_float32, fix shell script
上级
3ad43431
972f2dd6
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
125 addition
and
84 deletion
+125
-84
speechx/examples/README.md
speechx/examples/README.md
+2
-2
speechx/examples/ds2_ol/README.md
speechx/examples/ds2_ol/README.md
+5
-5
speechx/examples/ds2_ol/aishell/run.sh
speechx/examples/ds2_ol/aishell/run.sh
+10
-29
speechx/examples/ds2_ol/feat/linear-spectrogram-wo-db-norm-ol.cc
.../examples/ds2_ol/feat/linear-spectrogram-wo-db-norm-ol.cc
+1
-1
speechx/examples/ds2_ol/websocket/websocket_client.sh
speechx/examples/ds2_ol/websocket/websocket_client.sh
+2
-4
speechx/examples/ds2_ol/websocket/websocket_server.sh
speechx/examples/ds2_ol/websocket/websocket_server.sh
+22
-8
speechx/speechx/decoder/param.h
speechx/speechx/decoder/param.h
+2
-2
speechx/speechx/frontend/audio/audio_cache.cc
speechx/speechx/frontend/audio/audio_cache.cc
+6
-6
speechx/speechx/frontend/audio/audio_cache.h
speechx/speechx/frontend/audio/audio_cache.h
+6
-5
speechx/speechx/frontend/audio/cmvn.cc
speechx/speechx/frontend/audio/cmvn.cc
+3
-0
speechx/speechx/frontend/audio/data_cache.h
speechx/speechx/frontend/audio/data_cache.h
+1
-0
speechx/speechx/frontend/audio/fbank.h
speechx/speechx/frontend/audio/fbank.h
+43
-10
speechx/speechx/frontend/audio/feature_cache.cc
speechx/speechx/frontend/audio/feature_cache.cc
+8
-3
speechx/speechx/frontend/audio/feature_cache.h
speechx/speechx/frontend/audio/feature_cache.h
+11
-6
speechx/speechx/frontend/audio/feature_pipeline.cc
speechx/speechx/frontend/audio/feature_pipeline.cc
+1
-1
speechx/speechx/frontend/audio/feature_pipeline.h
speechx/speechx/frontend/audio/feature_pipeline.h
+2
-2
未找到文件。
speechx/examples/README.md
浏览文件 @
ba812854
# Examples for SpeechX
*
ds2_ol - ds2 streaming test under
`aishell-1`
test dataset.
The entrypoint is
`ds2_ol/aishell/run.sh`
*
ds2_ol - ds2 streaming test under
`aishell-1`
test dataset.
The entrypoint is
`ds2_ol/aishell/run.sh`
## How to run
...
...
speechx/examples/ds2_ol/README.md
浏览文件 @
ba812854
# Deepspeech2 Streaming
# Deepspeech2 Streaming
ASR
Please go to
`aishell`
to test it.
*
aishell
Deepspeech2 Streaming Decoding under aishell dataset.
*
websocket
Streaming ASR with websocket.
*
aishell
Streaming Decoding under aishell dataset, for local WER test and so on.
## More
The below is for developing and offline testing:
*
nnet
*
feat
...
...
speechx/examples/ds2_ol/aishell/run.sh
浏览文件 @
ba812854
...
...
@@ -112,8 +112,8 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
utils/compute-wer.py
--char
=
1
--v
=
1
$text
$exp
/
${
label_file
}
_lm
>
$exp
/
${
wer
}
.lm
fi
wfst
=
$data
/wfst/
if
[
${
stage
}
-le
4
]
&&
[
${
stop_stage
}
-ge
4
]
;
then
wfst
=
$data
/wfst/
mkdir
-p
$wfst
if
[
!
-f
$wfst
/aishell_graph.zip
]
;
then
pushd
$wfst
...
...
@@ -122,18 +122,18 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
mv
aishell_graph/
*
$wfst
popd
fi
fi
graph_dir
=
$wfst
/
if
[
${
stage
}
-le
4
]
&&
[
${
stop_stage
}
-ge
4
]
;
then
# TLG decoder
utils/run.pl
JOB
=
1:
$nj
$data
/split
${
nj
}
/JOB/recog.wfst.log
\
wfst-decoder-ol
\
--feature_rspecifier
=
scp:
$data
/split
${
nj
}
/JOB/feat.scp
\
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--param_path
=
$model_dir
/avg_1.jit.pdiparams
\
--word_symbol_table
=
$
graph_dir
/words.txt
\
--word_symbol_table
=
$
wfst
/words.txt
\
--model_output_names
=
softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0
\
--graph_path
=
$
graph_dir
/TLG.fst
--max_active
=
7500
\
--graph_path
=
$
wfst
/TLG.fst
--max_active
=
7500
\
--acoustic_scale
=
1.2
\
--result_wspecifier
=
ark,t:
$data
/split
${
nj
}
/JOB/result_tlg
...
...
@@ -142,40 +142,21 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
fi
if
[
${
stage
}
-le
5
]
&&
[
${
stop_stage
}
-ge
5
]
;
then
cmvn
=
$data
/cmvn.ark
if
[
!
-f
$data
/split
${
nj
}
/1/
${
aishell_wav_scp
}
]
;
then
cmvn-json2kaldi
--json_file
=
$ckpt_dir
/data/mean_std.json
--cmvn_write_path
=
$cmvn
./local/split_data.sh
$data
${
data
}
/
${
aishell_wav_scp
}
$aishell_wav_scp
$nj
fi
wfst
=
$data
/wfst/
mkdir
-p
$wfst
if
[
!
-f
$wfst
/aishell_graph.zip
]
;
then
pushd
$wfst
wget
-c
https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip
unzip aishell_graph.zip
popd
fi
graph_dir
=
$wfst
/aishell_graph
# TLG decoder
utils/run.pl
JOB
=
1:
$nj
$data
/split
${
nj
}
/JOB/recognizer.log
\
recognizer_test_main
\
--wav_rspecifier
=
scp:
$data
/split
${
nj
}
/JOB/
${
aishell_wav_scp
}
\
--cmvn_file
=
$cmvn
\
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--
convert2PCM
32
=
true
\
--
to_float
32
=
true
\
--streaming_chunk
=
30
\
--param
s
_path
=
$model_dir
/avg_1.jit.pdiparams
\
--word_symbol_table
=
$
graph_dir
/words.txt
\
--param_path
=
$model_dir
/avg_1.jit.pdiparams
\
--word_symbol_table
=
$
wfst
/words.txt
\
--model_output_names
=
softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0
\
--graph_path
=
$
graph_dir
/TLG.fst
--max_active
=
7500
\
--graph_path
=
$
wfst
/TLG.fst
--max_active
=
7500
\
--acoustic_scale
=
1.2
\
--result_wspecifier
=
ark,t:
$data
/split
${
nj
}
/JOB/result_recognizer
cat
$data
/split
${
nj
}
/
*
/result_recognizer
>
$exp
/
${
label_file
}
_recognizer
utils/compute-wer.py
--char
=
1
--v
=
1
$text
$exp
/
${
label_file
}
_recognizer
>
$exp
/
${
wer
}
.recognizer
fi
fi
\ No newline at end of file
speechx/examples/ds2_ol/feat/linear-spectrogram-wo-db-norm-ol.cc
浏览文件 @
ba812854
...
...
@@ -115,7 +115,7 @@ int main(int argc, char* argv[]) {
flag
=
feature_cache
.
Read
(
&
features
);
feats
.
push_back
(
features
);
feature_rows
+=
features
.
Dim
()
/
feature_cache
.
Dim
();
}
while
(
flag
==
true
&&
features
.
Dim
()
!=
0
);
}
while
(
flag
==
true
&&
features
.
Dim
()
!=
0
);
sample_offset
+=
cur_chunk_size
;
}
...
...
speechx/examples/ds2_ol/websocket/websocket_client.sh
浏览文件 @
ba812854
...
...
@@ -14,9 +14,7 @@ fi
# input
mkdir
-p
data
data
=
$PWD
/data
ckpt_dir
=
$data
/model
model_dir
=
$ckpt_dir
/exp/deepspeech2_online/checkpoints/
vocb_dir
=
$ckpt_dir
/data/lang_char
# output
aishell_wav_scp
=
aishell_test.scp
if
[
!
-d
$data
/test
]
;
then
...
...
@@ -34,4 +32,4 @@ export GLOG_logtostderr=1
# websocket client
websocket_client_main
\
--wav_rspecifier
=
scp:
$data
/
$aishell_wav_scp
--streaming_chunk
=
0.36
--wav_rspecifier
=
scp:
$data
/
$aishell_wav_scp
--streaming_chunk
=
0.36
\ No newline at end of file
speechx/examples/ds2_ol/websocket/websocket_server.sh
浏览文件 @
ba812854
...
...
@@ -19,12 +19,26 @@ ckpt_dir=$data/model
model_dir
=
$ckpt_dir
/exp/deepspeech2_online/checkpoints/
vocb_dir
=
$ckpt_dir
/data/lang_char/
# output
aishell_wav_scp
=
aishell_test.scp
if
[
!
-d
$data
/test
]
;
then
pushd
$data
wget
-c
https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
unzip aishell_test.zip
popd
realpath
$data
/test/
*
/
*
.wav
>
$data
/wavlist
awk
-F
'/'
'{ print $(NF) }'
$data
/wavlist |
awk
-F
'.'
'{ print $1 }'
>
$data
/utt_id
paste
$data
/utt_id
$data
/wavlist
>
$data
/
$aishell_wav_scp
fi
if
[
!
-f
$ckpt_dir
/data/mean_std.json
]
;
then
mkdir
-p
$ckpt_dir
pushd
$ckpt_dir
wget
-c
https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
tar
xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
popd
mkdir
-p
$ckpt_dir
pushd
$ckpt_dir
wget
-c
https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
tar
xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
popd
fi
export
GLOG_logtostderr
=
1
...
...
@@ -49,9 +63,9 @@ websocket_server_main \
--cmvn_file
=
$cmvn
\
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--streaming_chunk
=
0.1
\
--
convert2PCM
32
=
true
\
--
to_float
32
=
true
\
--param_path
=
$model_dir
/avg_1.jit.pdiparams
\
--word_symbol_table
=
$
data
/
wfst/words.txt
\
--word_symbol_table
=
$wfst
/words.txt
\
--model_output_names
=
softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0
\
--graph_path
=
$
data
/
wfst/TLG.fst
--max_active
=
7500
\
--graph_path
=
$wfst
/TLG.fst
--max_active
=
7500
\
--acoustic_scale
=
1.2
speechx/speechx/decoder/param.h
浏览文件 @
ba812854
...
...
@@ -21,7 +21,7 @@
DEFINE_string
(
cmvn_file
,
""
,
"read cmvn"
);
DEFINE_double
(
streaming_chunk
,
0.1
,
"streaming feature chunk size"
);
DEFINE_bool
(
convert2PCM
32
,
true
,
"audio convert to pcm32"
);
DEFINE_bool
(
to_float
32
,
true
,
"audio convert to pcm32"
);
DEFINE_string
(
model_path
,
"avg_1.jit.pdmodel"
,
"paddle nnet model"
);
DEFINE_string
(
param_path
,
"avg_1.jit.pdiparams"
,
"paddle nnet model param"
);
DEFINE_string
(
word_symbol_table
,
"words.txt"
,
"word symbol table"
);
...
...
@@ -52,7 +52,7 @@ FeaturePipelineOptions InitFeaturePipelineOptions() {
FeaturePipelineOptions
opts
;
opts
.
cmvn_file
=
FLAGS_cmvn_file
;
opts
.
linear_spectrogram_opts
.
streaming_chunk
=
FLAGS_streaming_chunk
;
opts
.
convert2PCM32
=
FLAGS_convert2PCM
32
;
opts
.
to_float32
=
FLAGS_to_float
32
;
kaldi
::
FrameExtractionOptions
frame_opts
;
frame_opts
.
frame_length_ms
=
20
;
frame_opts
.
frame_shift_ms
=
10
;
...
...
speechx/speechx/frontend/audio/audio_cache.cc
浏览文件 @
ba812854
...
...
@@ -21,17 +21,18 @@ using kaldi::BaseFloat;
using
kaldi
::
VectorBase
;
using
kaldi
::
Vector
;
AudioCache
::
AudioCache
(
int
buffer_size
,
bool
convert2PCM
32
)
AudioCache
::
AudioCache
(
int
buffer_size
,
bool
to_float
32
)
:
finished_
(
false
),
capacity_
(
buffer_size
),
capacity_
(
buffer_size
),
// unit: sample
size_
(
0
),
offset_
(
0
),
timeout_
(
1
),
convert2PCM32_
(
convert2PCM
32
)
{
timeout_
(
1
),
// ms
to_float32_
(
to_float
32
)
{
ring_buffer_
.
resize
(
capacity_
);
}
BaseFloat
AudioCache
::
Convert2PCM32
(
BaseFloat
val
)
{
// sample type int16, int16->float32
return
val
*
(
1.
/
std
::
pow
(
2.0
,
15
));
}
...
...
@@ -43,8 +44,7 @@ void AudioCache::Accept(const VectorBase<BaseFloat>& waves) {
for
(
size_t
idx
=
0
;
idx
<
waves
.
Dim
();
++
idx
)
{
int32
buffer_idx
=
(
idx
+
offset_
+
size_
)
%
ring_buffer_
.
size
();
ring_buffer_
[
buffer_idx
]
=
waves
(
idx
);
if
(
convert2PCM32_
)
ring_buffer_
[
buffer_idx
]
=
Convert2PCM32
(
waves
(
idx
));
if
(
to_float32_
)
ring_buffer_
[
buffer_idx
]
=
Convert2PCM32
(
waves
(
idx
));
}
size_
+=
waves
.
Dim
();
}
...
...
speechx/speechx/frontend/audio/audio_cache.h
浏览文件 @
ba812854
...
...
@@ -24,7 +24,7 @@ namespace ppspeech {
class
AudioCache
:
public
FrontendInterface
{
public:
explicit
AudioCache
(
int
buffer_size
=
1000
*
kint16max
,
bool
convert2PCM
32
=
true
);
bool
to_float
32
=
true
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
waves
);
...
...
@@ -50,14 +50,15 @@ class AudioCache : public FrontendInterface {
kaldi
::
BaseFloat
Convert2PCM32
(
kaldi
::
BaseFloat
val
);
std
::
vector
<
kaldi
::
BaseFloat
>
ring_buffer_
;
size_t
offset_
;
// offset in ring_buffer_
size_t
size_
;
// samples in ring_buffer_ now
size_t
capacity_
;
// capacity of ring_buffer_
size_t
offset_
;
// offset in ring_buffer_, begin of data
size_t
size_
;
// samples in ring_buffer_, size of valid data
size_t
capacity_
;
// capacity of ring_buffer_, full size of data buffer,
// unit: sample
bool
finished_
;
// reach audio end
std
::
mutex
mutex_
;
std
::
condition_variable
ready_feed_condition_
;
kaldi
::
int32
timeout_
;
// millisecond
bool
convert2PCM
32_
;
bool
to_float
32_
;
DISALLOW_COPY_AND_ASSIGN
(
AudioCache
);
};
...
...
speechx/speechx/frontend/audio/cmvn.cc
浏览文件 @
ba812854
...
...
@@ -37,14 +37,17 @@ CMVN::CMVN(std::string cmvn_file, unique_ptr<FrontendInterface> base_extractor)
}
void
CMVN
::
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
{
// feed waves/feats to compute feature
base_extractor_
->
Accept
(
inputs
);
return
;
}
bool
CMVN
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
feats
)
{
// compute feature
if
(
base_extractor_
->
Read
(
feats
)
==
false
||
feats
->
Dim
()
==
0
)
{
return
false
;
}
// appply cmvn
Compute
(
feats
);
return
true
;
}
...
...
speechx/speechx/frontend/audio/data_cache.h
浏览文件 @
ba812854
...
...
@@ -27,6 +27,7 @@ class DataCache : public FrontendInterface {
public:
explicit
DataCache
()
{
finished_
=
false
;
}
// accept waves/feats
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
{
data_
=
inputs
;
}
...
...
speechx/speechx/frontend/audio/fbank.h
浏览文件 @
ba812854
...
...
@@ -15,23 +15,56 @@
// wrap the fbank feat of kaldi, todo (SmileGoat)
#include "kaldi/feat/feature-mfcc.h"
#incldue "kaldi/matrix/kaldi-vector.h"
namespace
ppspeech
{
class
FbankExtractor
:
FrontendInterface
{
struct
FbankOptions
{
kaldi
::
FrameExtractionOptions
frame_opts
;
kaldi
::
BaseFloat
streaming_chunk
;
// second
LinearSpectrogramOptions
()
:
streaming_chunk
(
0.1
),
frame_opts
()
{}
void
Register
(
kaldi
::
OptionsItf
*
opts
)
{
opts
->
Register
(
"streaming-chunk"
,
&
streaming_chunk
,
"streaming chunk size, default: 0.1 sec"
);
frame_opts
.
Register
(
opts
);
}
};
class
Fbank
:
FrontendInterface
{
public:
explicit
FbankExtractor
(
const
FbankOptions
&
opts
,
share_ptr
<
FrontendInterface
>
pre_extractor
);
virtual
void
AcceptWaveform
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
input
)
=
0
;
virtual
void
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
=
0
;
virtual
size_t
Dim
()
const
=
0
;
explicit
Fbank
(
const
FbankOptions
&
opts
,
unique_ptr
<
FrontendInterface
>
base_extractor
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
// the dim_ is the dim of single frame feature
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
void
Reset
()
{
base_extractor_
->
Reset
();
remained_wav_
.
Resize
(
0
);
}
private:
bool
Compute
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
wave
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
const
;
bool
Compute
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
waves
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
// kaldi::FeatureWindowFunction feature_window_funtion_;
// kaldi::BaseFloat hanning_window_energy_;
size_t
dim_
;
FbankOptions
opts_
;
std
::
unique_ptr
<
FrontendInterface
>
base_extractor_
;
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
remained_wav_
;
int
chunk_sample_size_
;
DISALLOW_COPY_AND_ASSIGN
(
Fbank
);
};
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/frontend/audio/feature_cache.cc
浏览文件 @
ba812854
...
...
@@ -28,11 +28,13 @@ FeatureCache::FeatureCache(FeatureCacheOptions opts,
max_size_
=
opts
.
max_size
;
frame_chunk_stride_
=
opts
.
frame_chunk_stride
;
frame_chunk_size_
=
opts
.
frame_chunk_size
;
timeout_
=
opts
.
timeout
;
// ms
base_extractor_
=
std
::
move
(
base_extractor
);
dim_
=
base_extractor_
->
Dim
();
}
void
FeatureCache
::
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
{
// read inputs
base_extractor_
->
Accept
(
inputs
);
// feed current data
bool
result
=
false
;
...
...
@@ -49,9 +51,8 @@ bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* feats) {
while
(
cache_
.
empty
()
&&
base_extractor_
->
IsFinished
()
==
false
)
{
// todo refactor: wait
// ready_read_condition_.wait(lock);
int32
elapsed
=
static_cast
<
int32
>
(
timer
.
Elapsed
()
*
1000
);
// todo replace 1 with timeout_, 1 ms
if
(
elapsed
>
1
)
{
int32
elapsed
=
static_cast
<
int32
>
(
timer
.
Elapsed
()
*
1000
);
// ms
if
(
elapsed
>
timeout_
)
{
return
false
;
}
usleep
(
100
);
// sleep 0.1 ms
...
...
@@ -70,6 +71,8 @@ bool FeatureCache::Compute() {
Vector
<
BaseFloat
>
feature
;
bool
result
=
base_extractor_
->
Read
(
&
feature
);
if
(
result
==
false
||
feature
.
Dim
()
==
0
)
return
false
;
// join with remained
int32
joint_len
=
feature
.
Dim
()
+
remained_feature_
.
Dim
();
int32
num_chunk
=
((
joint_len
/
dim_
)
-
frame_chunk_size_
)
/
frame_chunk_stride_
+
1
;
...
...
@@ -82,6 +85,7 @@ bool FeatureCache::Compute() {
for
(
int
chunk_idx
=
0
;
chunk_idx
<
num_chunk
;
++
chunk_idx
)
{
int32
start
=
chunk_idx
*
frame_chunk_stride_
*
dim_
;
Vector
<
BaseFloat
>
feature_chunk
(
frame_chunk_size_
*
dim_
);
SubVector
<
BaseFloat
>
tmp
(
joint_feature
.
Data
()
+
start
,
frame_chunk_size_
*
dim_
);
...
...
@@ -89,6 +93,7 @@ bool FeatureCache::Compute() {
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
cache_
.
size
()
>=
max_size_
)
{
// cache full, wait
ready_feed_condition_
.
wait
(
lock
);
}
...
...
speechx/speechx/frontend/audio/feature_cache.h
浏览文件 @
ba812854
...
...
@@ -23,8 +23,12 @@ struct FeatureCacheOptions {
int32
max_size
;
int32
frame_chunk_size
;
int32
frame_chunk_stride
;
int32
timeout
;
// ms
FeatureCacheOptions
()
:
max_size
(
kint16max
),
frame_chunk_size
(
1
),
frame_chunk_stride
(
1
)
{}
:
max_size
(
kint16max
),
frame_chunk_size
(
1
),
frame_chunk_stride
(
1
),
timeout
(
1
)
{}
};
class
FeatureCache
:
public
FrontendInterface
{
...
...
@@ -64,14 +68,15 @@ class FeatureCache : public FrontendInterface {
bool
Compute
();
int32
dim_
;
size_t
max_size_
;
int32
frame_chunk_size_
;
int32
frame_chunk_stride_
;
size_t
max_size_
;
// cache capacity
int32
frame_chunk_size_
;
// window
int32
frame_chunk_stride_
;
// stride
std
::
unique_ptr
<
FrontendInterface
>
base_extractor_
;
kaldi
::
int32
timeout_
;
// ms
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
remained_feature_
;
std
::
unique_ptr
<
FrontendInterface
>
base_extractor_
;
std
::
queue
<
kaldi
::
Vector
<
BaseFloat
>>
cache_
;
// feature cache
std
::
mutex
mutex_
;
std
::
queue
<
kaldi
::
Vector
<
BaseFloat
>>
cache_
;
std
::
condition_variable
ready_feed_condition_
;
std
::
condition_variable
ready_read_condition_
;
...
...
speechx/speechx/frontend/audio/feature_pipeline.cc
浏览文件 @
ba812854
...
...
@@ -20,7 +20,7 @@ using std::unique_ptr;
FeaturePipeline
::
FeaturePipeline
(
const
FeaturePipelineOptions
&
opts
)
{
unique_ptr
<
FrontendInterface
>
data_source
(
new
ppspeech
::
AudioCache
(
1000
*
kint16max
,
opts
.
convert2PCM
32
));
new
ppspeech
::
AudioCache
(
1000
*
kint16max
,
opts
.
to_float
32
));
unique_ptr
<
FrontendInterface
>
linear_spectrogram
(
new
ppspeech
::
LinearSpectrogram
(
opts
.
linear_spectrogram_opts
,
...
...
speechx/speechx/frontend/audio/feature_pipeline.h
浏览文件 @
ba812854
...
...
@@ -27,12 +27,12 @@ namespace ppspeech {
struct
FeaturePipelineOptions
{
std
::
string
cmvn_file
;
bool
convert2PCM
32
;
bool
to_float
32
;
LinearSpectrogramOptions
linear_spectrogram_opts
;
FeatureCacheOptions
feature_cache_opts
;
FeaturePipelineOptions
()
:
cmvn_file
(
""
),
convert2PCM
32
(
false
),
to_float
32
(
false
),
linear_spectrogram_opts
(),
feature_cache_opts
()
{}
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录