Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
c769d907
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c769d907
编写于
3月 09, 2022
作者:
S
SmileGoat
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
make feature cache& raw audio work
上级
ac0e4170
变更
14
显示空白变更内容
内联
并排
Showing
14 changed file
with
610 addition
and
518 deletion
+610
-518
speechx/examples/feat/CMakeLists.txt
speechx/examples/feat/CMakeLists.txt
+3
-3
speechx/examples/feat/linear_spectrogram_main.cc
speechx/examples/feat/linear_spectrogram_main.cc
+34
-42
speechx/examples/feat/streaming_feat_main.cc
speechx/examples/feat/streaming_feat_main.cc
+0
-109
speechx/speechx/base/common.h
speechx/speechx/base/common.h
+7
-6
speechx/speechx/frontend/CMakeLists.txt
speechx/speechx/frontend/CMakeLists.txt
+2
-1
speechx/speechx/frontend/feature_cache.cc
speechx/speechx/frontend/feature_cache.cc
+62
-16
speechx/speechx/frontend/feature_cache.h
speechx/speechx/frontend/feature_cache.h
+39
-12
speechx/speechx/frontend/feature_extractor_interface.h
speechx/speechx/frontend/feature_extractor_interface.h
+6
-2
speechx/speechx/frontend/linear_spectrogram.cc
speechx/speechx/frontend/linear_spectrogram.cc
+108
-109
speechx/speechx/frontend/linear_spectrogram.h
speechx/speechx/frontend/linear_spectrogram.h
+28
-12
speechx/speechx/frontend/normalizer.cc
speechx/speechx/frontend/normalizer.cc
+153
-127
speechx/speechx/frontend/normalizer.h
speechx/speechx/frontend/normalizer.h
+39
-18
speechx/speechx/frontend/raw_audio.cc
speechx/speechx/frontend/raw_audio.cc
+72
-47
speechx/speechx/frontend/raw_audio.h
speechx/speechx/frontend/raw_audio.h
+57
-14
未找到文件。
speechx/examples/feat/CMakeLists.txt
浏览文件 @
c769d907
...
@@ -5,6 +5,6 @@ add_executable(mfcc-test ${CMAKE_CURRENT_SOURCE_DIR}/feature-mfcc-test.cc)
...
@@ -5,6 +5,6 @@ add_executable(mfcc-test ${CMAKE_CURRENT_SOURCE_DIR}/feature-mfcc-test.cc)
target_include_directories
(
mfcc-test PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_include_directories
(
mfcc-test PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
mfcc-test kaldi-mfcc
)
target_link_libraries
(
mfcc-test kaldi-mfcc
)
add_executable
(
linear-spectrogram-main
${
CMAKE_CURRENT_SOURCE_DIR
}
/linear-spectrogram-main.cc
)
add_executable
(
linear_spectrogram_main
${
CMAKE_CURRENT_SOURCE_DIR
}
/linear_spectrogram_main.cc
)
target_include_directories
(
linear-spectrogram-main PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_include_directories
(
linear_spectrogram_main PRIVATE
${
SPEECHX_ROOT
}
${
SPEECHX_ROOT
}
/kaldi
)
target_link_libraries
(
linear-spectrogram-main frontend kaldi-util kaldi-feat-common gflags glog
)
target_link_libraries
(
linear_spectrogram_main frontend kaldi-util kaldi-feat-common gflags glog
)
\ No newline at end of file
\ No newline at end of file
speechx/examples/feat/linear_spectrogram_main.cc
浏览文件 @
c769d907
...
@@ -14,11 +14,13 @@
...
@@ -14,11 +14,13 @@
// todo refactor, repalce with gtest
// todo refactor, repalce with gtest
#include "frontend/linear_spectrogram.h"
#include "base/flags.h"
#include "base/flags.h"
#include "base/log.h"
#include "base/log.h"
#include "frontend/feature_cache.h"
#include "frontend/feature_extractor_interface.h"
#include "frontend/feature_extractor_interface.h"
#include "frontend/linear_spectrogram.h"
#include "frontend/normalizer.h"
#include "frontend/normalizer.h"
#include "frontend/raw_audio.h"
#include "kaldi/feat/wave-reader.h"
#include "kaldi/feat/wave-reader.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"
#include "kaldi/util/table-types.h"
...
@@ -158,38 +160,37 @@ int main(int argc, char* argv[]) {
...
@@ -158,38 +160,37 @@ int main(int argc, char* argv[]) {
kaldi
::
SequentialTableReader
<
kaldi
::
WaveHolder
>
wav_reader
(
kaldi
::
SequentialTableReader
<
kaldi
::
WaveHolder
>
wav_reader
(
FLAGS_wav_rspecifier
);
FLAGS_wav_rspecifier
);
kaldi
::
BaseFloatMatrixWriter
feat_writer
(
FLAGS_feature_wspecifier
);
kaldi
::
BaseFloatMatrixWriter
feat_writer
(
FLAGS_feature_wspecifier
);
kaldi
::
BaseFloatMatrixWriter
feat_cmvn_check_writer
(
FLAGS_feature_check_wspecifier
);
WriteMatrix
();
WriteMatrix
();
// test feature linear_spectorgram: wave --> decibel_normalizer --> hanning
// test feature linear_spectorgram: wave --> decibel_normalizer --> hanning
// window -->linear_spectrogram --> cmvn
// window -->linear_spectrogram --> cmvn
int32
num_done
=
0
,
num_err
=
0
;
int32
num_done
=
0
,
num_err
=
0
;
// std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(new
// ppspeech::RawDataSource());
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
data_source
(
new
ppspeech
::
RawAudioSource
());
ppspeech
::
LinearSpectrogramOptions
opt
;
ppspeech
::
LinearSpectrogramOptions
opt
;
opt
.
frame_opts
.
frame_length_ms
=
20
;
opt
.
frame_opts
.
frame_length_ms
=
20
;
opt
.
frame_opts
.
frame_shift_ms
=
10
;
opt
.
frame_opts
.
frame_shift_ms
=
10
;
ppspeech
::
DecibelNormalizerOptions
db_norm_opt
;
ppspeech
::
DecibelNormalizerOptions
db_norm_opt
;
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
base_feature_extractor
(
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
base_feature_extractor
(
new
ppspeech
::
DecibelNormalizer
(
db_norm_opt
));
new
ppspeech
::
DecibelNormalizer
(
db_norm_opt
,
std
::
move
(
data_source
)));
ppspeech
::
LinearSpectrogram
linear_spectrogram
(
opt
,
std
::
move
(
base_feature_extractor
));
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
linear_spectrogram
(
new
ppspeech
::
LinearSpectrogram
(
opt
,
std
::
move
(
base_feature_extractor
)));
ppspeech
::
CMVN
cmvn
(
FLAGS_cmvn_write_path
);
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
cmvn
(
new
ppspeech
::
CMVN
(
FLAGS_cmvn_write_path
,
std
::
move
(
linear_spectrogram
)));
ppspeech
::
FeatureCache
feature_cache
(
kint16max
,
std
::
move
(
cmvn
));
float
streaming_chunk
=
0.36
;
float
streaming_chunk
=
0.36
;
int
sample_rate
=
16000
;
int
sample_rate
=
16000
;
int
chunk_sample_size
=
streaming_chunk
*
sample_rate
;
int
chunk_sample_size
=
streaming_chunk
*
sample_rate
;
LOG
(
INFO
)
<<
mean_
.
size
();
for
(
size_t
i
=
0
;
i
<
mean_
.
size
();
i
++
)
{
mean_
[
i
]
/=
count_
;
variance_
[
i
]
=
variance_
[
i
]
/
count_
-
mean_
[
i
]
*
mean_
[
i
];
if
(
variance_
[
i
]
<
1.0e-20
)
{
variance_
[
i
]
=
1.0e-20
;
}
variance_
[
i
]
=
1.0
/
std
::
sqrt
(
variance_
[
i
]);
}
for
(;
!
wav_reader
.
Done
();
wav_reader
.
Next
())
{
for
(;
!
wav_reader
.
Done
();
wav_reader
.
Next
())
{
std
::
string
utt
=
wav_reader
.
Key
();
std
::
string
utt
=
wav_reader
.
Key
();
const
kaldi
::
WaveData
&
wave_data
=
wav_reader
.
Value
();
const
kaldi
::
WaveData
&
wave_data
=
wav_reader
.
Value
();
...
@@ -199,54 +200,45 @@ int main(int argc, char* argv[]) {
...
@@ -199,54 +200,45 @@ int main(int argc, char* argv[]) {
this_channel
);
this_channel
);
int
tot_samples
=
waveform
.
Dim
();
int
tot_samples
=
waveform
.
Dim
();
int
sample_offset
=
0
;
int
sample_offset
=
0
;
std
::
vector
<
kaldi
::
Matrix
<
BaseFloat
>>
feats
;
std
::
vector
<
kaldi
::
Vector
<
BaseFloat
>>
feats
;
int
feature_rows
=
0
;
int
feature_rows
=
0
;
while
(
sample_offset
<
tot_samples
)
{
while
(
sample_offset
<
tot_samples
)
{
int
cur_chunk_size
=
int
cur_chunk_size
=
std
::
min
(
chunk_sample_size
,
tot_samples
-
sample_offset
);
std
::
min
(
chunk_sample_size
,
tot_samples
-
sample_offset
);
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
wav_chunk
(
cur_chunk_size
);
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
wav_chunk
(
cur_chunk_size
);
for
(
int
i
=
0
;
i
<
cur_chunk_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
cur_chunk_size
;
++
i
)
{
wav_chunk
(
i
)
=
waveform
(
sample_offset
+
i
);
wav_chunk
(
i
)
=
waveform
(
sample_offset
+
i
);
}
}
kaldi
::
Matrix
<
BaseFloat
>
features
;
kaldi
::
Vector
<
BaseFloat
>
features
;
linear_spectrogram
.
AcceptWaveform
(
wav_chunk
);
feature_cache
.
AcceptWaveform
(
wav_chunk
);
linear_spectrogram
.
ReadFeats
(
&
features
);
if
(
cur_chunk_size
<
chunk_sample_size
)
{
feature_cache
.
SetFinished
();
}
feature_cache
.
Read
(
&
features
);
if
(
features
.
Dim
()
==
0
)
break
;
feats
.
push_back
(
features
);
feats
.
push_back
(
features
);
sample_offset
+=
cur_chunk_size
;
sample_offset
+=
cur_chunk_size
;
feature_rows
+=
features
.
NumRows
();
feature_rows
+=
features
.
Dim
()
/
feature_cache
.
Dim
();
}
}
int
cur_idx
=
0
;
int
cur_idx
=
0
;
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>
features
(
feature_rows
,
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>
features
(
feature_rows
,
feat
s
[
0
].
NumCols
());
feat
ure_cache
.
Dim
());
for
(
auto
feat
:
feats
)
{
for
(
auto
feat
:
feats
)
{
for
(
int
row_idx
=
0
;
row_idx
<
feat
.
NumRows
();
++
row_idx
)
{
int
num_rows
=
feat
.
Dim
()
/
feature_cache
.
Dim
();
for
(
int
col_idx
=
0
;
col_idx
<
feat
.
NumCols
();
++
col_idx
)
{
for
(
int
row_idx
=
0
;
row_idx
<
num_rows
;
++
row_idx
)
{
for
(
size_t
col_idx
=
0
;
col_idx
<
feature_cache
.
Dim
();
++
col_idx
)
{
features
(
cur_idx
,
col_idx
)
=
features
(
cur_idx
,
col_idx
)
=
(
feat
(
row_idx
,
col_idx
)
-
mean_
[
col_idx
])
*
feat
(
row_idx
*
feature_cache
.
Dim
()
+
col_idx
);
variance_
[
col_idx
];
}
}
++
cur_idx
;
++
cur_idx
;
}
}
}
}
feat_writer
.
Write
(
utt
,
features
);
feat_writer
.
Write
(
utt
,
features
);
cur_idx
=
0
;
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>
features_check
(
feature_rows
,
feats
[
0
].
NumCols
());
for
(
auto
feat
:
feats
)
{
for
(
int
row_idx
=
0
;
row_idx
<
feat
.
NumRows
();
++
row_idx
)
{
for
(
int
col_idx
=
0
;
col_idx
<
feat
.
NumCols
();
++
col_idx
)
{
features_check
(
cur_idx
,
col_idx
)
=
feat
(
row_idx
,
col_idx
);
}
kaldi
::
SubVector
<
BaseFloat
>
row_feat
(
features_check
,
cur_idx
);
cmvn
.
ApplyCMVN
(
true
,
&
row_feat
);
++
cur_idx
;
}
}
feat_cmvn_check_writer
.
Write
(
utt
,
features_check
);
if
(
num_done
%
50
==
0
&&
num_done
!=
0
)
if
(
num_done
%
50
==
0
&&
num_done
!=
0
)
KALDI_VLOG
(
2
)
<<
"Processed "
<<
num_done
<<
" utterances"
;
KALDI_VLOG
(
2
)
<<
"Processed "
<<
num_done
<<
" utterances"
;
num_done
++
;
num_done
++
;
...
...
speechx/examples/feat/streaming_feat_main.cc
已删除
100644 → 0
浏览文件 @
ac0e4170
// todo refactor, repalce with gtest
#include "base/log.h"
#include "base/flags.h"
#include "frontend/linear_spectrogram.h"
#include "frontend/normalizer.h"
#include "frontend/feature_extractor_interface.h"
#include "frontend/raw_audio.h"
#include "kaldi/util/table-types.h"
#include "kaldi/feat/wave-reader.h"
#include "kaldi/util/kaldi-io.h"
DEFINE_string
(
wav_rspecifier
,
""
,
"test wav path"
);
DEFINE_string
(
feature_wspecifier
,
""
,
"test wav ark"
);
DEFINE_string
(
feature_check_wspecifier
,
""
,
"test wav ark"
);
DEFINE_string
(
cmvn_write_path
,
"./cmvn.ark"
,
"test wav ark"
);
std
::
vector
<
float
>
mean_
{
-
13730251.531853663
,
-
12982852.199316509
,
-
13673844.299583456
,
-
13089406.559646806
,
-
12673095.524938712
,
-
12823859.223276224
,
-
13590267.158903603
,
-
14257618.467152044
,
-
14374605.116185192
,
-
14490009.21822485
,
-
14849827.158924166
,
-
15354435.470563512
,
-
15834149.206532761
,
-
16172971.985514281
,
-
16348740.496746974
,
-
16423536.699409386
,
-
16556246.263649225
,
-
16744088.772748645
,
-
16916184.08510357
,
-
17054034.840031497
,
-
17165612.509455364
,
-
17255955.470915023
,
-
17322572.527648456
,
-
17408943.862033736
,
-
17521554.799865916
,
-
17620623.254924215
,
-
17699792.395918526
,
-
17723364.411134344
,
-
17741483.4433254
,
-
17747426.888704527
,
-
17733315.928209435
,
-
17748780.160905756
,
-
17808336.883775543
,
-
17895918.671983004
,
-
18009812.59173023
,
-
18098188.66548325
,
-
18195798.958462656
,
-
18293617.62980999
,
-
18397432.92077201
,
-
18505834.787318766
,
-
18585451.8100908
,
-
18652438.235649142
,
-
18700960.306275308
,
-
18734944.58792185
,
-
18737426.313365128
,
-
18735347.165987637
,
-
18738813.444170244
,
-
18737086.848890636
,
-
18731576.2474336
,
-
18717405.44095871
,
-
18703089.25545657
,
-
18691014.546456724
,
-
18692460.568905357
,
-
18702119.628629155
,
-
18727710.621126678
,
-
18761582.72034647
,
-
18806745.835547544
,
-
18850674.8692112
,
-
18884431.510951452
,
-
18919999.992506847
,
-
18939303.799078144
,
-
18952946.273760635
,
-
18980289.22996379
,
-
19011610.17803294
,
-
19040948.61805145
,
-
19061021.429847397
,
-
19112055.53768819
,
-
19149667.414264943
,
-
19201127.05091321
,
-
19270250.82564605
,
-
19334606.883057203
,
-
19390513.336589377
,
-
19444176.259208687
,
-
19502755.000038862
,
-
19544333.014549147
,
-
19612668.183176614
,
-
19681902.19006569
,
-
19771969.951249883
,
-
19873329.723376893
,
-
19996752.59235844
,
-
20110031.131400537
,
-
20231658.612529557
,
-
20319378.894054495
,
-
20378534.45718066
,
-
20413332.089584175
,
-
20438147.844177883
,
-
20443710.248040095
,
-
20465457.02238927
,
-
20488610.969337028
,
-
20516295.16424432
,
-
20541423.795738827
,
-
20553192.874953747
,
-
20573605.50701977
,
-
20577871.61936797
,
-
20571807.008916274
,
-
20556242.38912231
,
-
20542199.30819195
,
-
20521239.063551214
,
-
20519150.80004532
,
-
20527204.80248933
,
-
20536933.769257784
,
-
20543470.522332076
,
-
20549700.089992985
,
-
20551525.24958494
,
-
20554873.406493705
,
-
20564277.65794227
,
-
20572211.740052115
,
-
20574305.69550465
,
-
20575494.450104576
,
-
20567092.577932164
,
-
20549302.929608088
,
-
20545445.11878376
,
-
20546625.326603737
,
-
20549190.03499401
,
-
20554824.947828256
,
-
20568341.378989458
,
-
20577582.331383612
,
-
20577980.519402675
,
-
20566603.03458152
,
-
20560131.592262644
,
-
20552166.469060015
,
-
20549063.06763577
,
-
20544490.562339947
,
-
20539817.82346569
,
-
20528747.715731595
,
-
20518026.24576161
,
-
20510977.844974525
,
-
20506874.36087992
,
-
20506731.11977665
,
-
20510482.133420516
,
-
20507760.92101862
,
-
20494644.834457114
,
-
20480107.89304893
,
-
20461312.091867123
,
-
20442941.75080173
,
-
20426123.02834838
,
-
20424607.675283
,
-
20426810.369107097
,
-
20434024.50097819
,
-
20437404.75544205
,
-
20447688.63916367
,
-
20460893.335563846
,
-
20482922.735127095
,
-
20503610.119434915
,
-
20527062.76448319
,
-
20557830.035128627
,
-
20593274.72068722
,
-
20632528.452965066
,
-
20673637.471334763
,
-
20733106.97143075
,
-
20842921.0447562
,
-
21054357.83621519
,
-
21416569.534189366
,
-
21978460.272811692
,
-
22753170.052172784
,
-
23671344.10563395
,
-
24613499.293358143
,
-
25406477.12230188
,
-
25884377.82156489
,
-
26049040.62791664
,
-
26996879.104431007
};
std
::
vector
<
float
>
variance_
{
213747175.10846674
,
188395815.34302503
,
212706429.10966414
,
199109025.81461075
,
189235901.23864496
,
194901336.53253657
,
217481594.29306737
,
238689869.12327808
,
243977501.24115244
,
248479623.6431067
,
259766741.47116545
,
275516766.7790273
,
291271202.3691234
,
302693239.8220509
,
308627358.3997694
,
311143911.38788426
,
315446105.07731867
,
321705430.9341829
,
327458907.4659941
,
332245072.43223983
,
336251717.5935284
,
339694069.7639722
,
342188204.4322228
,
345587110.31313115
,
349903086.2875232
,
353660214.20643026
,
356700344.5270885
,
357665362.3529641
,
358493352.05658793
,
358857951.620328
,
358375239.52774596
,
358899733.6342954
,
361051818.3511561
,
364361716.05025816
,
368750322.3771452
,
372047800.6462831
,
375655861.1349018
,
379358519.1980013
,
383327605.3935181
,
387458599.282341
,
390434692.3406868
,
392994486.35057056
,
394874418.04603153
,
396230525.79763395
,
396365592.0414835
,
396334819.8242737
,
396488353.19250053
,
396438877.00744957
,
396197980.4459586
,
395590921.6672991
,
395001107.62072515
,
394528291.7318225
,
394593110.424006
,
395018405.59353715
,
396110577.5415993
,
397506704.0371068
,
399400197.4657644
,
401243568.2468382
,
402687134.7805103
,
404136047.2872507
,
404883170.001883
,
405522253.219517
,
406660365.3626476
,
407919346.0991902
,
409045348.5384909
,
409759588.7889818
,
411974821.8564483
,
413489718.78201455
,
415535392.56684107
,
418466481.97674364
,
421104678.35678065
,
423405392.5200779
,
425550570.40798235
,
427929423.9579701
,
429585274.253478
,
432368493.55181056
,
435193587.13513297
,
438886855.20476013
,
443058876.8633751
,
448181232.5093362
,
452883835.6332396
,
458056721.77926534
,
461816531.22735566
,
464363620.1970998
,
465886343.5057493
,
466928872.0651
,
467180536.42647296
,
468111848.70714295
,
469138695.3071312
,
470378429.6930793
,
471517958.7132626
,
472109050.4262365
,
473087417.0177867
,
473381322.04648733
,
473220195.85483915
,
472666071.8998819
,
472124669.87879956
,
471298571.411737
,
471251033.2902761
,
471672676.43128747
,
472177147.2193172
,
472572361.7711908
,
472968783.7751127
,
473156295.4164052
,
473398034.82676554
,
473897703.5203811
,
474328271.33112127
,
474452670.98002136
,
474549003.99284613
,
474252887.13567275
,
473557462.909069
,
473483385.85193115
,
473609738.04855174
,
473746944.82085115
,
474016729.91696435
,
474617321.94138587
,
475045097.237122
,
475125402.586558
,
474664112.9824912
,
474426247.5800283
,
474104075.42796475
,
473978219.7273978
,
473773171.7798875
,
473578534.69508696
,
473102924.16904145
,
472651240.5232615
,
472374383.1810912
,
472209479.6956096
,
472202298.8921673
,
472370090.76781124
,
472220933.99374026
,
471625467.37106377
,
470994646.51883453
,
470182428.9637543
,
469348211.5939578
,
468570387.4467277
,
468540442.7225135
,
468672018.90414184
,
468994346.9533251
,
469138757.58201426
,
469553915.95710236
,
470134523.38582784
,
471082421.62055486
,
471962316.51804745
,
472939745.1708408
,
474250621.5944825
,
475773933.43199486
,
477465399.71087736
,
479218782.61382693
,
481752299.7930922
,
486608947.8984568
,
496119403.2067917
,
512730085.5704984
,
539048915.2641417
,
576285298.3548826
,
621610270.2240586
,
669308196.4436442
,
710656993.5957186
,
736344437.3725077
,
745481288.0241544
,
801121432.9925804
};
int
count_
=
912592
;
void
WriteMatrix
()
{
kaldi
::
Matrix
<
double
>
cmvn_stats
(
2
,
mean_
.
size
()
+
1
);
for
(
size_t
idx
=
0
;
idx
<
mean_
.
size
();
++
idx
)
{
cmvn_stats
(
0
,
idx
)
=
mean_
[
idx
];
cmvn_stats
(
1
,
idx
)
=
variance_
[
idx
];
}
cmvn_stats
(
0
,
mean_
.
size
())
=
count_
;
kaldi
::
WriteKaldiObject
(
cmvn_stats
,
FLAGS_cmvn_write_path
,
true
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
kaldi
::
SequentialTableReader
<
kaldi
::
WaveHolder
>
wav_reader
(
FLAGS_wav_rspecifier
);
kaldi
::
BaseFloatMatrixWriter
feat_writer
(
FLAGS_feature_wspecifier
);
WriteMatrix
();
// test feature linear_spectorgram: wave --> decibel_normalizer --> hanning window -->linear_spectrogram --> cmvn
int32
num_done
=
0
,
num_err
=
0
;
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
data_source
(
new
ppspeech
::
RawDataSource
());
ppspeech
::
LinearSpectrogramOptions
opt
;
opt
.
frame_opts
.
frame_length_ms
=
20
;
opt
.
frame_opts
.
frame_shift_ms
=
10
;
ppspeech
::
DecibelNormalizerOptions
db_norm_opt
;
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
base_feature_extractor
(
new
ppspeech
::
DecibelNormalizer
(
db_norm_opt
,
std
::
move
(
data_source
)));
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
linear_spectrogram
(
new
ppspeech
::
LinearSpectrogram
(
opt
,
std
::
move
(
base_feature_extractor
)));
ppspeech
::
CMVN
cmvn
(
FLAGS_cmvn_write_path
,
std
::
move
(
linear_spectrogram
));
float
streaming_chunk
=
0.36
;
int
sample_rate
=
16000
;
int
chunk_sample_size
=
streaming_chunk
*
sample_rate
;
for
(;
!
wav_reader
.
Done
();
wav_reader
.
Next
())
{
std
::
string
utt
=
wav_reader
.
Key
();
const
kaldi
::
WaveData
&
wave_data
=
wav_reader
.
Value
();
int32
this_channel
=
0
;
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
waveform
(
wave_data
.
Data
(),
this_channel
);
int
tot_samples
=
waveform
.
Dim
();
int
sample_offset
=
0
;
std
::
vector
<
kaldi
::
Vector
<
BaseFloat
>>
feats
;
int
feature_rows
=
0
;
while
(
sample_offset
<
tot_samples
)
{
int
cur_chunk_size
=
std
::
min
(
chunk_sample_size
,
tot_samples
-
sample_offset
);
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
wav_chunk
(
cur_chunk_size
);
for
(
int
i
=
0
;
i
<
cur_chunk_size
;
++
i
)
{
wav_chunk
(
i
)
=
waveform
(
sample_offset
+
i
);
}
kaldi
::
Vector
<
BaseFloat
>
features
;
cmvn
.
AcceptWaveform
(
wav_chunk
);
cmvn
.
Read
(
&
features
);
std
::
cout
<<
wav_chunk
(
0
)
<<
std
::
endl
;
std
::
cout
<<
features
(
0
)
<<
std
::
endl
;
feats
.
push_back
(
features
);
sample_offset
+=
cur_chunk_size
;
feature_rows
+=
features
.
Dim
()
/
cmvn
.
Dim
();
}
int
cur_idx
=
0
;
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>
features
(
feature_rows
,
cmvn
.
Dim
());
for
(
auto
feat
:
feats
)
{
int
num_rows
=
feat
.
Dim
()
/
cmvn
.
Dim
();
for
(
int
row_idx
=
0
;
row_idx
<
num_rows
;
++
row_idx
)
{
for
(
int
col_idx
=
0
;
col_idx
<
cmvn
.
Dim
();
++
col_idx
)
{
features
(
cur_idx
,
col_idx
)
=
feat
(
row_idx
*
cmvn
.
Dim
()
+
col_idx
);
}
++
cur_idx
;
}
}
feat_writer
.
Write
(
utt
,
features
);
if
(
num_done
%
50
==
0
&&
num_done
!=
0
)
KALDI_VLOG
(
2
)
<<
"Processed "
<<
num_done
<<
" utterances"
;
num_done
++
;
}
KALDI_LOG
<<
"Done "
<<
num_done
<<
" utterances, "
<<
num_err
<<
" with errors."
;
return
(
num_done
!=
0
?
0
:
1
);
}
speechx/speechx/base/common.h
浏览文件 @
c769d907
...
@@ -14,24 +14,25 @@
...
@@ -14,24 +14,25 @@
#pragma once
#pragma once
#include <condition_variable>
#include <deque>
#include <deque>
#include <fstream>
#include <iostream>
#include <iostream>
#include <istream>
#include <istream>
#include <fstream>
#include <map>
#include <map>
#include <memory>
#include <memory>
#include <mutex>
#include <ostream>
#include <ostream>
#include <queue>
#include <set>
#include <set>
#include <sstream>
#include <sstream>
#include <stack>
#include <stack>
#include <string>
#include <string>
#include <vector>
#include <unordered_map>
#include <unordered_map>
#include <unordered_set>
#include <unordered_set>
#include <mutex>
#include <vector>
#include <condition_variable>
#include "base/log.h"
#include "base/flags.h"
#include "base/basic_types.h"
#include "base/basic_types.h"
#include "base/flags.h"
#include "base/log.h"
#include "base/macros.h"
#include "base/macros.h"
speechx/speechx/frontend/CMakeLists.txt
浏览文件 @
c769d907
...
@@ -4,6 +4,7 @@ add_library(frontend STATIC
...
@@ -4,6 +4,7 @@ add_library(frontend STATIC
normalizer.cc
normalizer.cc
linear_spectrogram.cc
linear_spectrogram.cc
raw_audio.cc
raw_audio.cc
feature_cache.cc
)
)
target_link_libraries
(
frontend PUBLIC kaldi-matrix
)
target_link_libraries
(
frontend PUBLIC kaldi-matrix
)
speechx/speechx/frontend/feature_cache.cc
浏览文件 @
c769d907
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "frontend/feature_cache.h"
#include "frontend/feature_cache.h"
void
FeatureCache
::
AcceptWaveform
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
)
{
namespace
ppspeech
{
using
kaldi
::
Vector
;
using
kaldi
::
VectorBase
;
using
kaldi
::
BaseFloat
;
using
std
::
vector
;
using
kaldi
::
SubVector
;
using
std
::
unique_ptr
;
FeatureCache
::
FeatureCache
(
int
max_size
,
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
)
{
max_size_
=
max_size
;
base_extractor_
=
std
::
move
(
base_extractor
);
}
void
FeatureCache
::
AcceptWaveform
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
)
{
base_extractor_
->
AcceptWaveform
(
input
);
base_extractor_
->
AcceptWaveform
(
input
);
// feed current data
// feed current data
while
(
base_extractor_
->
IsLastFrame
())
{
bool
result
=
false
;
Compute
();
do
{
}
result
=
Compute
();
}
while
(
result
);
}
}
// pop feature chunk
// pop feature chunk
void
FeatureCache
::
Read
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
)
{
bool
FeatureCache
::
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
kaldi
::
Timer
timer
;
while
(
cache_
.
empty
())
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
cache_
.
empty
()
&&
base_extractor_
->
IsFinished
()
==
false
)
{
ready_read_condition_
.
wait
(
lock
);
ready_read_condition_
.
wait
(
lock
);
BaseFloat
elapsed
=
timer
.
Elapsed
()
*
1000
;
// todo replace 1.0 with timeout_
if
(
elapsed
>
1.0
)
{
return
false
;
}
usleep
(
1000
);
// sleep 1 ms
}
}
if
(
cache_
.
empty
())
return
false
;
feat
->
Resize
(
cache_
.
front
().
Dim
());
feat
->
CopyFromVec
(
cache_
.
front
());
feat
->
CopyFromVec
(
cache_
.
front
());
cache_
.
pop
();
cache_
.
pop
();
ready_feed_condition_
.
notify_one
();
ready_feed_condition_
.
notify_one
();
return
true
;
}
}
// read all data from base_feature_extractor_ into cache_
// read all data from base_feature_extractor_ into cache_
void
FeatureCache
::
Compute
()
{
bool
FeatureCache
::
Compute
()
{
// compute and feed
// compute and feed
Vector
<
BaseFloat
>
feature_chunk
(
base_extractor_
->
Dim
())
;
Vector
<
BaseFloat
>
feature_chunk
;
base_extractor_
->
Read
(
&
feature_chunk
);
b
ool
result
=
b
ase_extractor_
->
Read
(
&
feature_chunk
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
cache_
.
size
()
>=
max_size_
)
{
while
(
cache_
.
size
()
>=
max_size_
)
{
ready_feed_condition_
.
wait
(
lock
);
ready_feed_condition_
.
wait
(
lock
);
}
}
if
(
feature_chunk
.
Dim
()
!=
0
)
{
cache_
.
push
(
feature_chunk
);
cache_
.
push
(
feature_chunk
);
}
ready_read_condition_
.
notify_one
();
ready_read_condition_
.
notify_one
();
return
result
;
}
}
// compute the last chunk data && set feed finished
void
Reset
()
{
void
FeatureCache
::
InputFinishd
()
{
// std::lock_guard<std::mutex> lock(mutex_);
Compute
()
;
return
;
}
}
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/frontend/feature_cache.h
浏览文件 @
c769d907
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "base/common.h"
#include "frontend/feature_extractor_interface.h"
#include "frontend/feature_extractor_interface.h"
class
FeatureCache
{
namespace
ppspeech
{
class
FeatureCache
:
public
FeatureExtractorInterface
{
public:
public:
explicit
FeatureCache
(
FeatureExtractorInterface
base_extractor
);
explicit
FeatureCache
(
void
AcceptWaveform
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
int32
max_size
=
kint16max
,
void
Read
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
);
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
=
NULL
);
void
Dim
()
{
return
base_extractor_
->
Dim
();
}
virtual
void
AcceptWaveform
(
void
SetFinished
();
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
bool
IsFinished
();
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
virtual
size_t
Dim
()
const
{
return
base_extractor_
->
Dim
();
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
Compute
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
private:
private:
bool
Compute
();
bool
finished_
;
bool
finished_
;
mutable
std
::
mutex
mutex_
;
std
::
mutex
mutex_
;
size_t
max_size
;
size_t
max_size
_
;
std
::
queue
<
kaldi
::
Vector
<
BaseFloat
>>
cache_
;
std
::
queue
<
kaldi
::
Vector
<
BaseFloat
>>
cache_
;
std
::
shared
_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
std
::
unique
_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
std
::
condition_variable
ready_feed_condition_
;
std
::
condition_variable
ready_feed_condition_
;
std
::
condition_variable
ready_read_condition_
;
std
::
condition_variable
ready_read_condition_
;
DISALLOW_COPY_AND_ASSGIN
(
FeatureCache
);
//
DISALLOW_COPY_AND_ASSGIN(FeatureCache);
};
};
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/frontend/feature_extractor_interface.h
浏览文件 @
c769d907
...
@@ -21,9 +21,13 @@ namespace ppspeech {
...
@@ -21,9 +21,13 @@ namespace ppspeech {
class
FeatureExtractorInterface
{
class
FeatureExtractorInterface
{
public:
public:
virtual
void
AcceptWaveform
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
)
=
0
;
virtual
void
AcceptWaveform
(
virtual
void
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
=
0
;
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
)
=
0
;
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
=
0
;
virtual
size_t
Dim
()
const
=
0
;
virtual
size_t
Dim
()
const
=
0
;
virtual
void
SetFinished
()
=
0
;
virtual
bool
IsFinished
()
const
=
0
;
// virtual void Reset();
};
};
}
// namespace ppspeech
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/frontend/linear_spectrogram.cc
浏览文件 @
c769d907
...
@@ -25,7 +25,7 @@ using kaldi::VectorBase;
...
@@ -25,7 +25,7 @@ using kaldi::VectorBase;
using
kaldi
::
Matrix
;
using
kaldi
::
Matrix
;
using
std
::
vector
;
using
std
::
vector
;
//todo remove later
//
todo remove later
void
CopyVector2StdVector_
(
const
VectorBase
<
BaseFloat
>&
input
,
void
CopyVector2StdVector_
(
const
VectorBase
<
BaseFloat
>&
input
,
vector
<
BaseFloat
>*
output
)
{
vector
<
BaseFloat
>*
output
)
{
if
(
input
.
Dim
()
==
0
)
return
;
if
(
input
.
Dim
()
==
0
)
return
;
...
@@ -70,14 +70,13 @@ void LinearSpectrogram::AcceptWaveform(const VectorBase<BaseFloat>& input) {
...
@@ -70,14 +70,13 @@ void LinearSpectrogram::AcceptWaveform(const VectorBase<BaseFloat>& input) {
base_extractor_
->
AcceptWaveform
(
input
);
base_extractor_
->
AcceptWaveform
(
input
);
}
}
void
LinearSpectrogram
::
Read
(
Vector
<
BaseFloat
>*
feat
)
{
bool
LinearSpectrogram
::
Read
(
Vector
<
BaseFloat
>*
feat
)
{
Vector
<
BaseFloat
>
input_feats
(
chunk_sample_size_
);
Vector
<
BaseFloat
>
input_feats
(
chunk_sample_size_
);
base_extractor_
->
Read
(
&
input_feats
);
bool
flag
=
base_extractor_
->
Read
(
&
input_feats
);
if
(
flag
==
false
||
input_feats
.
Dim
()
==
0
)
return
false
;
vector
<
BaseFloat
>
input_feats_vec
(
input_feats
.
Dim
());
vector
<
BaseFloat
>
input_feats_vec
(
input_feats
.
Dim
());
CopyVector2StdVector_
(
input_feats
,
&
input_feats_vec
);
CopyVector2StdVector_
(
input_feats
,
&
input_feats_vec
);
//for (int idx = 0; idx < input_feats.Dim(); ++idx) {
// input_feats_vec[idx] = input_feats(idx);
//}
vector
<
vector
<
BaseFloat
>>
result
;
vector
<
vector
<
BaseFloat
>>
result
;
Compute
(
input_feats_vec
,
result
);
Compute
(
input_feats_vec
,
result
);
int32
feat_size
=
0
;
int32
feat_size
=
0
;
...
@@ -88,7 +87,7 @@ void LinearSpectrogram::Read(Vector<BaseFloat>* feat) {
...
@@ -88,7 +87,7 @@ void LinearSpectrogram::Read(Vector<BaseFloat>* feat) {
for
(
size_t
idx
=
0
;
idx
<
feat_size
;
++
idx
)
{
for
(
size_t
idx
=
0
;
idx
<
feat_size
;
++
idx
)
{
(
*
feat
)(
idx
)
=
result
[
idx
/
dim_
][
idx
%
dim_
];
(
*
feat
)(
idx
)
=
result
[
idx
/
dim_
][
idx
%
dim_
];
}
}
return
;
return
true
;
}
}
void
LinearSpectrogram
::
Hanning
(
vector
<
float
>*
data
)
const
{
void
LinearSpectrogram
::
Hanning
(
vector
<
float
>*
data
)
const
{
...
...
speechx/speechx/frontend/linear_spectrogram.h
浏览文件 @
c769d907
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#pragma once
#include "base/common.h"
#include "frontend/feature_extractor_interface.h"
#include "frontend/feature_extractor_interface.h"
#include "kaldi/feat/feature-window.h"
#include "kaldi/feat/feature-window.h"
#include "base/common.h"
namespace
ppspeech
{
namespace
ppspeech
{
struct
LinearSpectrogramOptions
{
struct
LinearSpectrogramOptions
{
kaldi
::
FrameExtractionOptions
frame_opts
;
kaldi
::
FrameExtractionOptions
frame_opts
;
kaldi
::
BaseFloat
streaming_chunk
;
kaldi
::
BaseFloat
streaming_chunk
;
LinearSpectrogramOptions
()
:
LinearSpectrogramOptions
()
:
streaming_chunk
(
0.36
),
frame_opts
()
{}
streaming_chunk
(
0.36
),
frame_opts
()
{}
void
Register
(
kaldi
::
OptionsItf
*
opts
)
{
void
Register
(
kaldi
::
OptionsItf
*
opts
)
{
opts
->
Register
(
"streaming-chunk"
,
&
streaming_chunk
,
"streaming chunk size"
);
opts
->
Register
(
"streaming-chunk"
,
&
streaming_chunk
,
"streaming chunk size"
);
frame_opts
.
Register
(
opts
);
frame_opts
.
Register
(
opts
);
}
}
};
};
class
LinearSpectrogram
:
public
FeatureExtractorInterface
{
class
LinearSpectrogram
:
public
FeatureExtractorInterface
{
public:
public:
explicit
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
,
explicit
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
AcceptWaveform
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
virtual
void
AcceptWaveform
(
virtual
void
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
size_t
Dim
()
const
{
return
dim_
;
}
void
ReadFeats
(
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>*
feats
);
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
private:
private:
void
Hanning
(
std
::
vector
<
kaldi
::
BaseFloat
>*
data
)
const
;
void
Hanning
(
std
::
vector
<
kaldi
::
BaseFloat
>*
data
)
const
;
...
...
speechx/speechx/frontend/normalizer.cc
浏览文件 @
c769d907
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "frontend/normalizer.h"
#include "frontend/normalizer.h"
#include "kaldi/feat/cmvn.h"
#include "kaldi/feat/cmvn.h"
...
@@ -12,27 +26,32 @@ using std::vector;
...
@@ -12,27 +26,32 @@ using std::vector;
using
kaldi
::
SubVector
;
using
kaldi
::
SubVector
;
using
std
::
unique_ptr
;
using
std
::
unique_ptr
;
DecibelNormalizer
::
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
,
DecibelNormalizer
::
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
)
{
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
)
{
base_extractor_
=
std
::
move
(
base_extractor
);
base_extractor_
=
std
::
move
(
base_extractor
);
opts_
=
opts
;
opts_
=
opts
;
dim_
=
0
;
dim_
=
0
;
}
}
void
DecibelNormalizer
::
AcceptWaveform
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
input
)
{
void
DecibelNormalizer
::
AcceptWaveform
(
//dim_ = input.Dim();
const
kaldi
::
VectorBase
<
BaseFloat
>&
input
)
{
//waveform_.Resize(input.Dim());
// dim_ = input.Dim();
//waveform_.CopyFromVec(input);
// waveform_.Resize(input.Dim());
// waveform_.CopyFromVec(input);
base_extractor_
->
AcceptWaveform
(
input
);
base_extractor_
->
AcceptWaveform
(
input
);
}
}
void
DecibelNormalizer
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
feat
)
{
bool
DecibelNormalizer
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
feat
)
{
// if (waveform_.Dim() == 0) return;
// if (waveform_.Dim() == 0) return;
base_extractor_
->
Read
(
feat
);
if
(
base_extractor_
->
Read
(
feat
)
==
false
||
feat
->
Dim
()
==
0
)
{
return
false
;
}
Compute
(
feat
);
Compute
(
feat
);
return
true
;
}
}
//todo remove later
//
todo remove later
void
CopyVector2StdVector
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
input
,
void
CopyVector2StdVector
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
input
,
vector
<
BaseFloat
>*
output
)
{
vector
<
BaseFloat
>*
output
)
{
if
(
input
.
Dim
()
==
0
)
return
;
if
(
input
.
Dim
()
==
0
)
return
;
...
@@ -65,7 +84,7 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feat) const {
...
@@ -65,7 +84,7 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feat) const {
}
}
// square
// square
for
(
auto
&
d
:
samples
)
{
for
(
auto
&
d
:
samples
)
{
if
(
opts_
.
convert_int_float
)
{
if
(
opts_
.
convert_int_float
)
{
d
=
d
*
wave_float_normlization
;
d
=
d
*
wave_float_normlization
;
}
}
...
@@ -78,14 +97,15 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feat) const {
...
@@ -78,14 +97,15 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feat) const {
gain
=
opts_
.
target_db
-
rms_db
;
gain
=
opts_
.
target_db
-
rms_db
;
if
(
gain
>
opts_
.
max_gain_db
)
{
if
(
gain
>
opts_
.
max_gain_db
)
{
LOG
(
ERROR
)
<<
"Unable to normalize segment to "
<<
opts_
.
target_db
<<
"dB,"
LOG
(
ERROR
)
<<
"Unable to normalize segment to "
<<
opts_
.
target_db
<<
"dB,"
<<
"because the the probable gain have exceeds opts_.max_gain_db"
<<
"because the the probable gain have exceeds opts_.max_gain_db"
<<
opts_
.
max_gain_db
<<
"dB."
;
<<
opts_
.
max_gain_db
<<
"dB."
;
return
false
;
return
false
;
}
}
// Note that this is an in-place transformation.
// Note that this is an in-place transformation.
for
(
auto
&
item
:
samples
)
{
for
(
auto
&
item
:
samples
)
{
// python item *= 10.0 ** (gain / 20.0)
// python item *= 10.0 ** (gain / 20.0)
item
*=
std
::
pow
(
10.0
,
gain
/
20.0
);
item
*=
std
::
pow
(
10.0
,
gain
/
20.0
);
}
}
...
@@ -109,23 +129,26 @@ void CMVN::AcceptWaveform(const kaldi::VectorBase<kaldi::BaseFloat>& input) {
...
@@ -109,23 +129,26 @@ void CMVN::AcceptWaveform(const kaldi::VectorBase<kaldi::BaseFloat>& input) {
return
;
return
;
}
}
void
CMVN
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
feat
)
{
bool
CMVN
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
feat
)
{
base_extractor_
->
Read
(
feat
);
if
(
base_extractor_
->
Read
(
feat
)
==
false
)
{
return
false
;
}
Compute
(
feat
);
Compute
(
feat
);
return
;
return
true
;
}
}
// feats contain num_frames feature.
// feats contain num_frames feature.
void
CMVN
::
Compute
(
VectorBase
<
BaseFloat
>*
feats
)
const
{
void
CMVN
::
Compute
(
VectorBase
<
BaseFloat
>*
feats
)
const
{
KALDI_ASSERT
(
feats
!=
NULL
);
KALDI_ASSERT
(
feats
!=
NULL
);
int32
dim
=
stats_
.
NumCols
()
-
1
;
int32
dim
=
stats_
.
NumCols
()
-
1
;
if
(
stats_
.
NumRows
()
>
2
||
stats_
.
NumRows
()
<
1
||
feats
->
Dim
()
%
dim
!=
0
)
{
if
(
stats_
.
NumRows
()
>
2
||
stats_
.
NumRows
()
<
1
||
KALDI_ERR
<<
"Dim mismatch: cmvn "
feats
->
Dim
()
%
dim
!=
0
)
{
<<
stats_
.
NumRows
()
<<
'x'
<<
stats_
.
NumCols
()
KALDI_ERR
<<
"Dim mismatch: cmvn "
<<
stats_
.
NumRows
()
<<
'x'
<<
", feats "
<<
feats
->
Dim
()
<<
'x'
;
<<
stats_
.
NumCols
()
<<
", feats "
<<
feats
->
Dim
()
<<
'x'
;
}
}
if
(
stats_
.
NumRows
()
==
1
&&
var_norm_
)
{
if
(
stats_
.
NumRows
()
==
1
&&
var_norm_
)
{
KALDI_ERR
<<
"You requested variance normalization but no variance stats_ "
KALDI_ERR
<<
"You requested variance normalization but no variance stats_ "
<<
"are supplied."
;
<<
"are supplied."
;
}
}
...
@@ -133,17 +156,20 @@ void CMVN::Compute(VectorBase<BaseFloat>* feats) const {
...
@@ -133,17 +156,20 @@ void CMVN::Compute(VectorBase<BaseFloat>* feats) const {
// Do not change the threshold of 1.0 here: in the balanced-cmvn code, when
// Do not change the threshold of 1.0 here: in the balanced-cmvn code, when
// computing an offset and representing it as stats_, we use a count of one.
// computing an offset and representing it as stats_, we use a count of one.
if
(
count
<
1.0
)
if
(
count
<
1.0
)
KALDI_ERR
<<
"Insufficient stats_ for cepstral mean and variance normalization: "
KALDI_ERR
<<
"Insufficient stats_ for cepstral mean and variance "
"normalization: "
<<
"count = "
<<
count
;
<<
"count = "
<<
count
;
if
(
!
var_norm_
)
{
if
(
!
var_norm_
)
{
Vector
<
BaseFloat
>
offset
(
feats
->
Dim
());
Vector
<
BaseFloat
>
offset
(
feats
->
Dim
());
SubVector
<
double
>
mean_stats
(
stats_
.
RowData
(
0
),
dim
);
SubVector
<
double
>
mean_stats
(
stats_
.
RowData
(
0
),
dim
);
Vector
<
double
>
mean_stats_apply
(
feats
->
Dim
());
Vector
<
double
>
mean_stats_apply
(
feats
->
Dim
());
//fill the datat of mean_stats in mean_stats_appy whose dim is equal with the dim of feature.
// fill the datat of mean_stats in mean_stats_appy whose dim is equal
//the dim of feats = dim * num_frames;
// with the dim of feature.
// the dim of feats = dim * num_frames;
for
(
int32
idx
=
0
;
idx
<
feats
->
Dim
()
/
dim
;
++
idx
)
{
for
(
int32
idx
=
0
;
idx
<
feats
->
Dim
()
/
dim
;
++
idx
)
{
SubVector
<
double
>
stats_tmp
(
mean_stats_apply
.
Data
()
+
dim
*
idx
,
dim
);
SubVector
<
double
>
stats_tmp
(
mean_stats_apply
.
Data
()
+
dim
*
idx
,
dim
);
stats_tmp
.
CopyFromVec
(
mean_stats
);
stats_tmp
.
CopyFromVec
(
mean_stats
);
}
}
offset
.
AddVec
(
-
1.0
/
count
,
mean_stats_apply
);
offset
.
AddVec
(
-
1.0
/
count
,
mean_stats_apply
);
...
@@ -155,18 +181,18 @@ void CMVN::Compute(VectorBase<BaseFloat>* feats) const {
...
@@ -155,18 +181,18 @@ void CMVN::Compute(VectorBase<BaseFloat>* feats) const {
kaldi
::
Matrix
<
BaseFloat
>
norm
(
2
,
feats
->
Dim
());
kaldi
::
Matrix
<
BaseFloat
>
norm
(
2
,
feats
->
Dim
());
for
(
int32
d
=
0
;
d
<
dim
;
d
++
)
{
for
(
int32
d
=
0
;
d
<
dim
;
d
++
)
{
double
mean
,
offset
,
scale
;
double
mean
,
offset
,
scale
;
mean
=
stats_
(
0
,
d
)
/
count
;
mean
=
stats_
(
0
,
d
)
/
count
;
double
var
=
(
stats_
(
1
,
d
)
/
count
)
-
mean
*
mean
,
double
var
=
(
stats_
(
1
,
d
)
/
count
)
-
mean
*
mean
,
floor
=
1.0e-20
;
floor
=
1.0e-20
;
if
(
var
<
floor
)
{
if
(
var
<
floor
)
{
KALDI_WARN
<<
"Flooring cepstral variance from "
<<
var
<<
" to "
KALDI_WARN
<<
"Flooring cepstral variance from "
<<
var
<<
" to "
<<
floor
;
<<
floor
;
var
=
floor
;
var
=
floor
;
}
}
scale
=
1.0
/
sqrt
(
var
);
scale
=
1.0
/
sqrt
(
var
);
if
(
scale
!=
scale
||
1
/
scale
==
0.0
)
if
(
scale
!=
scale
||
1
/
scale
==
0.0
)
KALDI_ERR
<<
"NaN or infinity in cepstral mean/variance computation"
;
KALDI_ERR
offset
=
-
(
mean
*
scale
);
<<
"NaN or infinity in cepstral mean/variance computation"
;
offset
=
-
(
mean
*
scale
);
for
(
int32
d_skip
=
d
;
d_skip
<
feats
->
Dim
();)
{
for
(
int32
d_skip
=
d
;
d_skip
<
feats
->
Dim
();)
{
norm
(
0
,
d_skip
)
=
offset
;
norm
(
0
,
d_skip
)
=
offset
;
norm
(
1
,
d_skip
)
=
scale
;
norm
(
1
,
d_skip
)
=
scale
;
...
...
speechx/speechx/frontend/normalizer.h
浏览文件 @
c769d907
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#pragma once
#include "base/common.h"
#include "base/common.h"
#include "frontend/feature_extractor_interface.h"
#include "frontend/feature_extractor_interface.h"
#include "kaldi/util/options-itf.h"
#include "kaldi/matrix/kaldi-matrix.h"
#include "kaldi/matrix/kaldi-matrix.h"
#include "kaldi/util/options-itf.h"
namespace
ppspeech
{
namespace
ppspeech
{
...
@@ -12,15 +26,17 @@ struct DecibelNormalizerOptions {
...
@@ -12,15 +26,17 @@ struct DecibelNormalizerOptions {
float
target_db
;
float
target_db
;
float
max_gain_db
;
float
max_gain_db
;
bool
convert_int_float
;
bool
convert_int_float
;
DecibelNormalizerOptions
()
:
DecibelNormalizerOptions
()
target_db
(
-
20
),
:
target_db
(
-
20
),
max_gain_db
(
300.0
),
convert_int_float
(
false
)
{}
max_gain_db
(
300.0
),
convert_int_float
(
false
){}
void
Register
(
kaldi
::
OptionsItf
*
opts
)
{
void
Register
(
kaldi
::
OptionsItf
*
opts
)
{
opts
->
Register
(
"target-db"
,
&
target_db
,
"target db for db normalization"
);
opts
->
Register
(
opts
->
Register
(
"max-gain-db"
,
&
max_gain_db
,
"max gain db for db normalization"
);
"target-db"
,
&
target_db
,
"target db for db normalization"
);
opts
->
Register
(
"convert-int-float"
,
&
convert_int_float
,
"if convert int samples to float"
);
opts
->
Register
(
"max-gain-db"
,
&
max_gain_db
,
"max gain db for db normalization"
);
opts
->
Register
(
"convert-int-float"
,
&
convert_int_float
,
"if convert int samples to float"
);
}
}
};
};
...
@@ -29,9 +45,12 @@ class DecibelNormalizer : public FeatureExtractorInterface {
...
@@ -29,9 +45,12 @@ class DecibelNormalizer : public FeatureExtractorInterface {
explicit
DecibelNormalizer
(
explicit
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
,
const
DecibelNormalizerOptions
&
opts
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
AcceptWaveform
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
virtual
void
AcceptWaveform
(
virtual
void
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
private:
private:
bool
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
)
const
;
bool
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
)
const
;
...
@@ -44,12 +63,14 @@ class DecibelNormalizer : public FeatureExtractorInterface {
...
@@ -44,12 +63,14 @@ class DecibelNormalizer : public FeatureExtractorInterface {
class
CMVN
:
public
FeatureExtractorInterface
{
class
CMVN
:
public
FeatureExtractorInterface
{
public:
public:
explicit
CMVN
(
explicit
CMVN
(
std
::
string
cmvn_file
,
std
::
string
cmvn_file
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
AcceptWaveform
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
virtual
void
AcceptWaveform
(
virtual
void
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
private:
private:
void
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
)
const
;
void
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
)
const
;
...
...
speechx/speechx/frontend/raw_audio.cc
浏览文件 @
c769d907
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "frontend/raw_audio.h"
#include "frontend/raw_audio.h"
#include "kaldi/base/timer.h"
#include "kaldi/base/timer.h"
namespace
ppspeech
{
namespace
ppspeech
{
RawAudioSource
::
RawAudioSource
(
int
buffer_size
=
65536
)
using
kaldi
::
BaseFloat
;
:
finished_
(
false
),
using
kaldi
::
VectorBase
;
data_length_
(
0
),
using
kaldi
::
Vector
;
start_
(
0
),
timeout_
(
5
)
{
RawAudioSource
::
RawAudioSource
(
int
buffer_size
)
:
finished_
(
false
),
data_length_
(
0
),
start_
(
0
),
timeout_
(
1
)
{
ring_buffer_
.
resize
(
buffer_size
);
ring_buffer_
.
resize
(
buffer_size
);
}
}
// todo length > buffer size, condition_var
void
RawAudioSource
::
AcceptWaveform
(
const
VectorBase
<
BaseFloat
>&
data
)
{
bool
RawAudioSource
::
AcceptWaveform
(
const
VectorBase
<
BaseFloat
>&
data
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
while
(
data_length_
+
data
.
Dim
()
>
ring_buffer_
.
size
())
{
ready_feed_condition_
.
wait
(
lock
);
}
for
(
size_t
idx
=
0
;
idx
<
data
.
Dim
();
++
idx
)
{
for
(
size_t
idx
=
0
;
idx
<
data
.
Dim
();
++
idx
)
{
ring_buffer_
[
idx
%
ring_buffer_
.
size
()]
=
data
(
idx
);
ring_buffer_
[
idx
%
ring_buffer_
.
size
()]
=
data
(
idx
);
}
}
data_length_
+=
length
;
data_length_
+=
data
.
Dim
();
ready_read_condition_
.
notify_one
();
}
}
// todo length > buffer size
// bool RawAudioSource::AcceptWaveform(BaseFloat* data, int length) {
//bool RawAudioSource::AcceptWaveform(BaseFloat* data, int length) {
// std::unique_lock<std::mutex> lock(mutex_);
//std::lock_guard<std::mutex> lock(mutex_);
// for (size_t idx = 0; idx < length; ++idx) {
//for (size_t idx = 0; idx < length; ++idx) {
// ring_buffer_[idx % ring_buffer_.size()] = data[idx];
//ring_buffer_[idx % ring_buffer_.size()] = data[idx];
//}
//}
// data_length_ += length;
//data_length_ += length;
// finish_condition_.notify_one();
//finish_condition_.notify_one();
//}
//}
bool
RawAudioSource
::
Read
(
Vector
<
BaseFloat
>*
feats
)
{
bool
RawAudioSource
::
Read
(
Vector
<
BaseFloat
>*
feat
)
{
size_t
chunk_size
=
feats
->
Dim
();
size_t
chunk_size
=
feat
->
Dim
();
Timer
timer
;
kaldi
::
Timer
timer
;
if
(
chunk_size
>
data_length_
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
true
)
{
while
(
chunk_size
>
data_length_
)
{
int32
elapsed
=
static_cat
<
int32
>
(
timer
.
Elapsed
()
*
1000
);
// when audio is empty and no more data feed
if
(
finished_
||
>
timeout_
)
{
// ready_read_condition will block in dead lock.
chunk_size
=
data_length_
;
// ready_read_condition_.wait(lock);
feats
->
Resize
(
chunk_size
);
int32
elapsed
=
static_cast
<
int32
>
(
timer
.
Elapsed
()
*
1000
);
if
(
elapsed
>
timeout_
)
{
if
(
finished_
==
true
)
{
// read last chunk data
break
;
break
;
}
}
sleep
(
1
);
if
(
chunk_size
>
data_length_
)
{
return
false
;
}
}
}
usleep
(
100
);
// sleep 0.1 ms
}
}
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
// read last chunk data
if
(
chunk_size
>
data_length_
)
{
chunk_size
=
data_length_
;
feat
->
Resize
(
chunk_size
);
}
for
(
size_t
idx
=
0
;
idx
<
chunk_size
;
++
idx
)
{
for
(
size_t
idx
=
0
;
idx
<
chunk_size
;
++
idx
)
{
feats
->
Data
()[
idx
]
=
ring_buffer_
[
idx
];
feat
->
Data
()[
idx
]
=
ring_buffer_
[
idx
];
}
}
data_length_
-=
chunk_size
;
data_length_
-=
chunk_size
;
start_
=
(
start_
+
chunk_size
)
%
ring_buffer_
.
size
();
start_
=
(
start_
+
chunk_size
)
%
ring_buffer_
.
size
();
finish_condition_
.
notify_one
();
ready_feed_condition_
.
notify_one
();
return
true
;
}
}
//size_t RawAudioSource::GetDataLength() {
// return data_length_;
//}
}
// namespace ppspeech
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/frontend/raw_audio.h
浏览文件 @
c769d907
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#pragma once
#include "frontend/feature_extractor_interface.h"
#include "base/common.h"
#include "base/common.h"
#include "frontend/feature_extractor_interface.h"
#pragma once
#pragma once
namespace
ppspeech
{
namespace
ppspeech
{
class
RawAudioSource
{
class
RawAudioSource
:
public
FeatureExtractorInterface
{
public:
public:
RawAudioSource
(
int
buffer_size
=
kint16max
);
explicit
RawAudioSource
(
int
buffer_size
=
kint16max
);
virtual
void
AcceptWaveform
(
kaldi
::
BaseFloat
*
data
,
int
length
);
virtual
void
AcceptWaveform
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
data
);
v
oid
AcceptWaveformByByte
(
char
*
data
,
lnt
length
)
{}
v
irtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
v
oid
AcceptWaveformByShort
(
kaldi
::
int16
*
data
,
int
length
)
{
}
v
irtual
size_t
Dim
()
const
{
return
data_length_
;
}
virtual
void
SetFinished
()
{
// read chunk data in buffer
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
bool
Read
(
VectorBase
<
BaseFloat
>*
feats
)
;
finished_
=
true
;
void
SetFinished
()
{
finished_
=
true
;
}
}
bool
IsFinished
()
{
return
finished_
;
}
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
private:
private:
vector
<
kaldi
::
BaseFloat
>
ring_buffer_
;
std
::
vector
<
kaldi
::
BaseFloat
>
ring_buffer_
;
size_t
start_
;
size_t
start_
;
size_t
data_length_
;
size_t
data_length_
;
bool
finished_
;
bool
finished_
;
mutable
std
::
mutex
mutex
t
_
;
mutable
std
::
mutex
mutex_
;
std
::
condition_variable
ready_read_condition_
;
std
::
condition_variable
ready_read_condition_
;
std
::
condition_variable
ready_feed_condition_
;
std
::
condition_variable
ready_feed_condition_
;
kaldi
::
int32
timeout_
;
kaldi
::
int32
timeout_
;
DISALLOW_COPY_AND_ASSIGN
(
RawAudioSource
);
};
// it is a datasource for testing different frontend module.
class
RawDataSource
:
public
FeatureExtractorInterface
{
public:
explicit
RawDataSource
()
{
finished_
=
false
;
}
virtual
void
AcceptWaveform
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
)
{
data_
=
input
;
}
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
{
if
(
data_
.
Dim
()
==
0
)
{
return
false
;
}
(
*
feat
)
=
data_
;
data_
.
Resize
(
0
);
return
true
;
}
virtual
size_t
Dim
()
const
{
return
data_
.
Dim
();
}
virtual
void
SetFinished
()
{
finished_
=
true
;
}
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
private:
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
data_
;
bool
finished_
;
DISALLOW_COPY_AND_ASSIGN
(
RawDataSource
);
};
};
}
// namespace ppspeech
}
// namespace ppspeech
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录