Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
22fe1c9d
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
22fe1c9d
编写于
3月 10, 2022
作者:
S
SmileGoat
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rename interface & add comment to Dim()
上级
7c1b4328
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
94 addition
and
91 deletion
+94
-91
speechx/examples/feat/linear_spectrogram_main.cc
speechx/examples/feat/linear_spectrogram_main.cc
+7
-8
speechx/speechx/frontend/feature_cache.cc
speechx/speechx/frontend/feature_cache.cc
+6
-6
speechx/speechx/frontend/feature_cache.h
speechx/speechx/frontend/feature_cache.h
+7
-4
speechx/speechx/frontend/feature_extractor_interface.h
speechx/speechx/frontend/feature_extractor_interface.h
+9
-4
speechx/speechx/frontend/linear_spectrogram.cc
speechx/speechx/frontend/linear_spectrogram.cc
+7
-6
speechx/speechx/frontend/linear_spectrogram.h
speechx/speechx/frontend/linear_spectrogram.h
+4
-6
speechx/speechx/frontend/normalizer.cc
speechx/speechx/frontend/normalizer.cc
+17
-20
speechx/speechx/frontend/normalizer.h
speechx/speechx/frontend/normalizer.h
+13
-8
speechx/speechx/frontend/raw_audio.cc
speechx/speechx/frontend/raw_audio.cc
+12
-19
speechx/speechx/frontend/raw_audio.h
speechx/speechx/frontend/raw_audio.h
+12
-10
未找到文件。
speechx/examples/feat/linear_spectrogram_main.cc
浏览文件 @
22fe1c9d
...
@@ -25,10 +25,9 @@
...
@@ -25,10 +25,9 @@
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"
#include "kaldi/util/table-types.h"
DEFINE_string
(
wav_rspecifier
,
""
,
"test wav path"
);
DEFINE_string
(
wav_rspecifier
,
""
,
"test wav scp path"
);
DEFINE_string
(
feature_wspecifier
,
""
,
"test wav ark"
);
DEFINE_string
(
feature_wspecifier
,
""
,
"output feats wspecifier"
);
DEFINE_string
(
feature_check_wspecifier
,
""
,
"test wav ark"
);
DEFINE_string
(
cmvn_write_path
,
"./cmvn.ark"
,
"write cmvn"
);
DEFINE_string
(
cmvn_write_path
,
"./cmvn.ark"
,
"test wav ark"
);
std
::
vector
<
float
>
mean_
{
std
::
vector
<
float
>
mean_
{
...
@@ -165,10 +164,10 @@ int main(int argc, char* argv[]) {
...
@@ -165,10 +164,10 @@ int main(int argc, char* argv[]) {
// test feature linear_spectorgram: wave --> decibel_normalizer --> hanning
// test feature linear_spectorgram: wave --> decibel_normalizer --> hanning
// window -->linear_spectrogram --> cmvn
// window -->linear_spectrogram --> cmvn
int32
num_done
=
0
,
num_err
=
0
;
int32
num_done
=
0
,
num_err
=
0
;
//
std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(new
//std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(new
// ppspeech::RawDataSourc
e());
//ppspeech::RawDataCach
e());
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
data_source
(
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
data_source
(
new
ppspeech
::
RawAudio
Sourc
e
());
new
ppspeech
::
RawAudio
Cach
e
());
ppspeech
::
LinearSpectrogramOptions
opt
;
ppspeech
::
LinearSpectrogramOptions
opt
;
opt
.
frame_opts
.
frame_length_ms
=
20
;
opt
.
frame_opts
.
frame_length_ms
=
20
;
...
@@ -211,7 +210,7 @@ int main(int argc, char* argv[]) {
...
@@ -211,7 +210,7 @@ int main(int argc, char* argv[]) {
wav_chunk
(
i
)
=
waveform
(
sample_offset
+
i
);
wav_chunk
(
i
)
=
waveform
(
sample_offset
+
i
);
}
}
kaldi
::
Vector
<
BaseFloat
>
features
;
kaldi
::
Vector
<
BaseFloat
>
features
;
feature_cache
.
Accept
Waveform
(
wav_chunk
);
feature_cache
.
Accept
(
wav_chunk
);
if
(
cur_chunk_size
<
chunk_sample_size
)
{
if
(
cur_chunk_size
<
chunk_sample_size
)
{
feature_cache
.
SetFinished
();
feature_cache
.
SetFinished
();
}
}
...
...
speechx/speechx/frontend/feature_cache.cc
浏览文件 @
22fe1c9d
...
@@ -29,9 +29,9 @@ FeatureCache::FeatureCache(
...
@@ -29,9 +29,9 @@ FeatureCache::FeatureCache(
base_extractor_
=
std
::
move
(
base_extractor
);
base_extractor_
=
std
::
move
(
base_extractor
);
}
}
void
FeatureCache
::
Accept
Waveform
(
void
FeatureCache
::
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
)
{
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
s
)
{
base_extractor_
->
Accept
Waveform
(
input
);
base_extractor_
->
Accept
(
inputs
);
// feed current data
// feed current data
bool
result
=
false
;
bool
result
=
false
;
do
{
do
{
...
@@ -40,7 +40,7 @@ void FeatureCache::AcceptWaveform(
...
@@ -40,7 +40,7 @@ void FeatureCache::AcceptWaveform(
}
}
// pop feature chunk
// pop feature chunk
bool
FeatureCache
::
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
{
bool
FeatureCache
::
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
output_feats
)
{
kaldi
::
Timer
timer
;
kaldi
::
Timer
timer
;
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
cache_
.
empty
()
&&
base_extractor_
->
IsFinished
()
==
false
)
{
while
(
cache_
.
empty
()
&&
base_extractor_
->
IsFinished
()
==
false
)
{
...
@@ -53,8 +53,8 @@ bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* feat) {
...
@@ -53,8 +53,8 @@ bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* feat) {
usleep
(
1000
);
// sleep 1 ms
usleep
(
1000
);
// sleep 1 ms
}
}
if
(
cache_
.
empty
())
return
false
;
if
(
cache_
.
empty
())
return
false
;
feat
->
Resize
(
cache_
.
front
().
Dim
());
output_feats
->
Resize
(
cache_
.
front
().
Dim
());
feat
->
CopyFromVec
(
cache_
.
front
());
output_feats
->
CopyFromVec
(
cache_
.
front
());
cache_
.
pop
();
cache_
.
pop
();
ready_feed_condition_
.
notify_one
();
ready_feed_condition_
.
notify_one
();
return
true
;
return
true
;
...
...
speechx/speechx/frontend/feature_cache.h
浏览文件 @
22fe1c9d
...
@@ -24,12 +24,15 @@ class FeatureCache : public FeatureExtractorInterface {
...
@@ -24,12 +24,15 @@ class FeatureCache : public FeatureExtractorInterface {
explicit
FeatureCache
(
explicit
FeatureCache
(
int32
max_size
=
kint16max
,
int32
max_size
=
kint16max
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
=
NULL
);
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
=
NULL
);
virtual
void
AcceptWaveform
(
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
// output_feats dim = num_frames * feature_dim
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
output_feats
);
// feature cache only cache feature which from base extractor
virtual
size_t
Dim
()
const
{
return
base_extractor_
->
Dim
();
}
virtual
size_t
Dim
()
const
{
return
base_extractor_
->
Dim
();
}
virtual
void
SetFinished
()
{
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
base_extractor_
->
SetFinished
();
// read the last chunk data
Compute
();
Compute
();
}
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
...
@@ -44,7 +47,7 @@ class FeatureCache : public FeatureExtractorInterface {
...
@@ -44,7 +47,7 @@ class FeatureCache : public FeatureExtractorInterface {
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
std
::
condition_variable
ready_feed_condition_
;
std
::
condition_variable
ready_feed_condition_
;
std
::
condition_variable
ready_read_condition_
;
std
::
condition_variable
ready_read_condition_
;
//
DISALLOW_COPY_AND_ASSGIN(FeatureCache);
//DISALLOW_COPY_AND_ASSGIN(FeatureCache);
};
};
}
// namespace ppspeech
}
// namespace ppspeech
speechx/speechx/frontend/feature_extractor_interface.h
浏览文件 @
22fe1c9d
...
@@ -21,13 +21,18 @@ namespace ppspeech {
...
@@ -21,13 +21,18 @@ namespace ppspeech {
class
FeatureExtractorInterface
{
class
FeatureExtractorInterface
{
public:
public:
virtual
void
AcceptWaveform
(
// accept input data
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
)
=
0
;
virtual
void
Accept
(
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
=
0
;
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
=
0
;
// get the processed result
// the length of output = feature_row * feature_dim,
// the Matrix is squashed into Vector
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
outputs
)
=
0
;
// the Dim is the feature dim
virtual
size_t
Dim
()
const
=
0
;
virtual
size_t
Dim
()
const
=
0
;
virtual
void
SetFinished
()
=
0
;
virtual
void
SetFinished
()
=
0
;
virtual
bool
IsFinished
()
const
=
0
;
virtual
bool
IsFinished
()
const
=
0
;
// virtual void Reset();
// virtual void Reset();
};
};
}
// namespace ppspeech
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/frontend/linear_spectrogram.cc
浏览文件 @
22fe1c9d
...
@@ -66,11 +66,11 @@ LinearSpectrogram::LinearSpectrogram(
...
@@ -66,11 +66,11 @@ LinearSpectrogram::LinearSpectrogram(
dim_
=
fft_points_
/
2
+
1
;
// the dimension is Fs/2 Hz
dim_
=
fft_points_
/
2
+
1
;
// the dimension is Fs/2 Hz
}
}
void
LinearSpectrogram
::
Accept
Waveform
(
const
VectorBase
<
BaseFloat
>&
input
)
{
void
LinearSpectrogram
::
Accept
(
const
VectorBase
<
BaseFloat
>&
inputs
)
{
base_extractor_
->
Accept
Waveform
(
input
);
base_extractor_
->
Accept
(
inputs
);
}
}
bool
LinearSpectrogram
::
Read
(
Vector
<
BaseFloat
>*
feat
)
{
bool
LinearSpectrogram
::
Read
(
Vector
<
BaseFloat
>*
output_feats
)
{
Vector
<
BaseFloat
>
input_feats
(
chunk_sample_size_
);
Vector
<
BaseFloat
>
input_feats
(
chunk_sample_size_
);
bool
flag
=
base_extractor_
->
Read
(
&
input_feats
);
bool
flag
=
base_extractor_
->
Read
(
&
input_feats
);
if
(
flag
==
false
||
input_feats
.
Dim
()
==
0
)
return
false
;
if
(
flag
==
false
||
input_feats
.
Dim
()
==
0
)
return
false
;
...
@@ -83,9 +83,10 @@ bool LinearSpectrogram::Read(Vector<BaseFloat>* feat) {
...
@@ -83,9 +83,10 @@ bool LinearSpectrogram::Read(Vector<BaseFloat>* feat) {
if
(
result
.
size
()
!=
0
)
{
if
(
result
.
size
()
!=
0
)
{
feat_size
=
result
.
size
()
*
result
[
0
].
size
();
feat_size
=
result
.
size
()
*
result
[
0
].
size
();
}
}
feat
->
Resize
(
feat_size
);
output_feats
->
Resize
(
feat_size
);
// todo refactor (SimleGoat)
for
(
size_t
idx
=
0
;
idx
<
feat_size
;
++
idx
)
{
for
(
size_t
idx
=
0
;
idx
<
feat_size
;
++
idx
)
{
(
*
feat
)(
idx
)
=
result
[
idx
/
dim_
][
idx
%
dim_
];
(
*
output_feats
)(
idx
)
=
result
[
idx
/
dim_
][
idx
%
dim_
];
}
}
return
true
;
return
true
;
}
}
...
@@ -117,7 +118,7 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
...
@@ -117,7 +118,7 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
return
true
;
return
true
;
}
}
// Compute spectrogram feat
, only for test, remove later
// Compute spectrogram feat
// todo: refactor later (SmileGoat)
// todo: refactor later (SmileGoat)
bool
LinearSpectrogram
::
Compute
(
const
vector
<
float
>&
wave
,
bool
LinearSpectrogram
::
Compute
(
const
vector
<
float
>&
wave
,
vector
<
vector
<
float
>>&
feat
)
{
vector
<
vector
<
float
>>&
feat
)
{
...
...
speechx/speechx/frontend/linear_spectrogram.h
浏览文件 @
22fe1c9d
...
@@ -38,9 +38,10 @@ class LinearSpectrogram : public FeatureExtractorInterface {
...
@@ -38,9 +38,10 @@ class LinearSpectrogram : public FeatureExtractorInterface {
explicit
LinearSpectrogram
(
explicit
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
,
const
LinearSpectrogramOptions
&
opts
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
AcceptWaveform
(
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
output_feats
);
// the dim_ is the dim of single frame feature
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
...
@@ -49,8 +50,6 @@ class LinearSpectrogram : public FeatureExtractorInterface {
...
@@ -49,8 +50,6 @@ class LinearSpectrogram : public FeatureExtractorInterface {
void
Hanning
(
std
::
vector
<
kaldi
::
BaseFloat
>*
data
)
const
;
void
Hanning
(
std
::
vector
<
kaldi
::
BaseFloat
>*
data
)
const
;
bool
Compute
(
const
std
::
vector
<
kaldi
::
BaseFloat
>&
wave
,
bool
Compute
(
const
std
::
vector
<
kaldi
::
BaseFloat
>&
wave
,
std
::
vector
<
std
::
vector
<
kaldi
::
BaseFloat
>>&
feat
);
std
::
vector
<
std
::
vector
<
kaldi
::
BaseFloat
>>&
feat
);
void
Compute
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
,
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feature
);
bool
NumpyFft
(
std
::
vector
<
kaldi
::
BaseFloat
>*
v
,
bool
NumpyFft
(
std
::
vector
<
kaldi
::
BaseFloat
>*
v
,
std
::
vector
<
kaldi
::
BaseFloat
>*
real
,
std
::
vector
<
kaldi
::
BaseFloat
>*
real
,
std
::
vector
<
kaldi
::
BaseFloat
>*
img
)
const
;
std
::
vector
<
kaldi
::
BaseFloat
>*
img
)
const
;
...
@@ -60,7 +59,6 @@ class LinearSpectrogram : public FeatureExtractorInterface {
...
@@ -60,7 +59,6 @@ class LinearSpectrogram : public FeatureExtractorInterface {
std
::
vector
<
kaldi
::
BaseFloat
>
hanning_window_
;
std
::
vector
<
kaldi
::
BaseFloat
>
hanning_window_
;
kaldi
::
BaseFloat
hanning_window_energy_
;
kaldi
::
BaseFloat
hanning_window_energy_
;
LinearSpectrogramOptions
opts_
;
LinearSpectrogramOptions
opts_
;
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
waveform_
;
// remove later, todo(SmileGoat)
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
int
chunk_sample_size_
;
int
chunk_sample_size_
;
DISALLOW_COPY_AND_ASSIGN
(
LinearSpectrogram
);
DISALLOW_COPY_AND_ASSIGN
(
LinearSpectrogram
);
...
...
speechx/speechx/frontend/normalizer.cc
浏览文件 @
22fe1c9d
...
@@ -31,23 +31,20 @@ DecibelNormalizer::DecibelNormalizer(
...
@@ -31,23 +31,20 @@ DecibelNormalizer::DecibelNormalizer(
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
)
{
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
)
{
base_extractor_
=
std
::
move
(
base_extractor
);
base_extractor_
=
std
::
move
(
base_extractor
);
opts_
=
opts
;
opts_
=
opts
;
dim_
=
0
;
dim_
=
1
;
}
}
void
DecibelNormalizer
::
AcceptWaveform
(
void
DecibelNormalizer
::
Accept
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
input
)
{
const
kaldi
::
VectorBase
<
BaseFloat
>&
inputs_wave
)
{
// dim_ = input.Dim();
base_extractor_
->
Accept
(
inputs_wave
);
// waveform_.Resize(input.Dim());
// waveform_.CopyFromVec(input);
base_extractor_
->
AcceptWaveform
(
input
);
}
}
bool
DecibelNormalizer
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
feat
)
{
bool
DecibelNormalizer
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
outputs_wave
)
{
// if (waveform_.Dim() == 0) return;
if
(
base_extractor_
->
Read
(
outputs_wave
)
==
false
||
if
(
base_extractor_
->
Read
(
feat
)
==
false
||
feat
->
Dim
()
==
0
)
{
outputs_wave
->
Dim
()
==
0
)
{
return
false
;
return
false
;
}
}
Compute
(
feat
);
Compute
(
outputs_wave
);
return
true
;
return
true
;
}
}
...
@@ -70,7 +67,7 @@ void CopyStdVector2Vector(const vector<BaseFloat>& input,
...
@@ -70,7 +67,7 @@ void CopyStdVector2Vector(const vector<BaseFloat>& input,
}
}
}
}
bool
DecibelNormalizer
::
Compute
(
VectorBase
<
BaseFloat
>*
feat
)
const
{
bool
DecibelNormalizer
::
Compute
(
VectorBase
<
BaseFloat
>*
feat
s
)
const
{
// calculate db rms
// calculate db rms
BaseFloat
rms_db
=
0.0
;
BaseFloat
rms_db
=
0.0
;
BaseFloat
mean_square
=
0.0
;
BaseFloat
mean_square
=
0.0
;
...
@@ -78,9 +75,9 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feat) const {
...
@@ -78,9 +75,9 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feat) const {
BaseFloat
wave_float_normlization
=
1.0
f
/
(
std
::
pow
(
2
,
16
-
1
));
BaseFloat
wave_float_normlization
=
1.0
f
/
(
std
::
pow
(
2
,
16
-
1
));
vector
<
BaseFloat
>
samples
;
vector
<
BaseFloat
>
samples
;
samples
.
resize
(
feat
->
Dim
());
samples
.
resize
(
feat
s
->
Dim
());
for
(
size_t
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
samples
[
i
]
=
(
*
feat
)(
i
);
samples
[
i
]
=
(
*
feat
s
)(
i
);
}
}
// square
// square
...
@@ -110,7 +107,7 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feat) const {
...
@@ -110,7 +107,7 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feat) const {
item
*=
std
::
pow
(
10.0
,
gain
/
20.0
);
item
*=
std
::
pow
(
10.0
,
gain
/
20.0
);
}
}
CopyStdVector2Vector
(
samples
,
feat
);
CopyStdVector2Vector
(
samples
,
feat
s
);
return
true
;
return
true
;
}
}
...
@@ -124,16 +121,16 @@ CMVN::CMVN(std::string cmvn_file,
...
@@ -124,16 +121,16 @@ CMVN::CMVN(std::string cmvn_file,
dim_
=
stats_
.
NumCols
()
-
1
;
dim_
=
stats_
.
NumCols
()
-
1
;
}
}
void
CMVN
::
Accept
Waveform
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
)
{
void
CMVN
::
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
feats
)
{
base_extractor_
->
Accept
Waveform
(
input
);
base_extractor_
->
Accept
(
feats
);
return
;
return
;
}
}
bool
CMVN
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
feat
)
{
bool
CMVN
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
outputs
)
{
if
(
base_extractor_
->
Read
(
feat
)
==
false
)
{
if
(
base_extractor_
->
Read
(
outputs
)
==
false
)
{
return
false
;
return
false
;
}
}
Compute
(
feat
);
Compute
(
outputs
);
return
true
;
return
true
;
}
}
...
...
speechx/speechx/frontend/normalizer.h
浏览文件 @
22fe1c9d
...
@@ -45,15 +45,16 @@ class DecibelNormalizer : public FeatureExtractorInterface {
...
@@ -45,15 +45,16 @@ class DecibelNormalizer : public FeatureExtractorInterface {
explicit
DecibelNormalizer
(
explicit
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
,
const
DecibelNormalizerOptions
&
opts
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
AcceptWaveform
(
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs_wave
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
outputs_wave
);
// noramlize audio, the dim is 1.
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
private:
private:
bool
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
)
const
;
bool
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
s
)
const
;
DecibelNormalizerOptions
opts_
;
DecibelNormalizerOptions
opts_
;
size_t
dim_
;
size_t
dim_
;
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
...
@@ -65,15 +66,19 @@ class CMVN : public FeatureExtractorInterface {
...
@@ -65,15 +66,19 @@ class CMVN : public FeatureExtractorInterface {
public:
public:
explicit
CMVN
(
std
::
string
cmvn_file
,
explicit
CMVN
(
std
::
string
cmvn_file
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
AcceptWaveform
(
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
feats
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
// the length of outputs = feature_row * feature_dim,
// the Matrix is squashed into Vector
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
outputs
);
// the dim_ is the feautre dim.
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
private:
private:
void
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
)
const
;
void
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
s
)
const
;
void
ApplyCMVN
(
kaldi
::
MatrixBase
<
BaseFloat
>*
feats
);
void
ApplyCMVN
(
kaldi
::
MatrixBase
<
BaseFloat
>*
feats
);
kaldi
::
Matrix
<
double
>
stats_
;
kaldi
::
Matrix
<
double
>
stats_
;
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
...
...
speechx/speechx/frontend/raw_audio.cc
浏览文件 @
22fe1c9d
...
@@ -21,33 +21,25 @@ using kaldi::BaseFloat;
...
@@ -21,33 +21,25 @@ using kaldi::BaseFloat;
using
kaldi
::
VectorBase
;
using
kaldi
::
VectorBase
;
using
kaldi
::
Vector
;
using
kaldi
::
Vector
;
RawAudio
Source
::
RawAudioSourc
e
(
int
buffer_size
)
RawAudio
Cache
::
RawAudioCach
e
(
int
buffer_size
)
:
finished_
(
false
),
data_length_
(
0
),
start_
(
0
),
timeout_
(
1
)
{
:
finished_
(
false
),
data_length_
(
0
),
start_
(
0
),
timeout_
(
1
)
{
ring_buffer_
.
resize
(
buffer_size
);
ring_buffer_
.
resize
(
buffer_size
);
}
}
void
RawAudio
Source
::
AcceptWaveform
(
const
VectorBase
<
BaseFloat
>&
data
)
{
void
RawAudio
Cache
::
Accept
(
const
VectorBase
<
BaseFloat
>&
input_audio
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
data_length_
+
data
.
Dim
()
>
ring_buffer_
.
size
())
{
while
(
data_length_
+
input_audio
.
Dim
()
>
ring_buffer_
.
size
())
{
ready_feed_condition_
.
wait
(
lock
);
ready_feed_condition_
.
wait
(
lock
);
}
}
for
(
size_t
idx
=
0
;
idx
<
data
.
Dim
();
++
idx
)
{
for
(
size_t
idx
=
0
;
idx
<
input_audio
.
Dim
();
++
idx
)
{
ring_buffer_
[
idx
%
ring_buffer_
.
size
()]
=
data
(
idx
);
int32
buffer_idx
=
(
idx
+
start_
)
%
ring_buffer_
.
size
();
ring_buffer_
[
buffer_idx
]
=
input_audio
(
idx
);
}
}
data_length_
+=
data
.
Dim
();
data_length_
+=
input_audio
.
Dim
();
}
}
// bool RawAudioSource::AcceptWaveform(BaseFloat* data, int length) {
bool
RawAudioCache
::
Read
(
Vector
<
BaseFloat
>*
output_audio
)
{
// std::unique_lock<std::mutex> lock(mutex_);
size_t
chunk_size
=
output_audio
->
Dim
();
// for (size_t idx = 0; idx < length; ++idx) {
// ring_buffer_[idx % ring_buffer_.size()] = data[idx];
//}
// data_length_ += length;
// finish_condition_.notify_one();
//}
bool
RawAudioSource
::
Read
(
Vector
<
BaseFloat
>*
feat
)
{
size_t
chunk_size
=
feat
->
Dim
();
kaldi
::
Timer
timer
;
kaldi
::
Timer
timer
;
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
chunk_size
>
data_length_
)
{
while
(
chunk_size
>
data_length_
)
{
...
@@ -69,11 +61,12 @@ bool RawAudioSource::Read(Vector<BaseFloat>* feat) {
...
@@ -69,11 +61,12 @@ bool RawAudioSource::Read(Vector<BaseFloat>* feat) {
// read last chunk data
// read last chunk data
if
(
chunk_size
>
data_length_
)
{
if
(
chunk_size
>
data_length_
)
{
chunk_size
=
data_length_
;
chunk_size
=
data_length_
;
feat
->
Resize
(
chunk_size
);
output_audio
->
Resize
(
chunk_size
);
}
}
for
(
size_t
idx
=
0
;
idx
<
chunk_size
;
++
idx
)
{
for
(
size_t
idx
=
0
;
idx
<
chunk_size
;
++
idx
)
{
feat
->
Data
()[
idx
]
=
ring_buffer_
[
idx
];
int
buff_idx
=
(
start_
+
idx
)
%
ring_buffer_
.
size
();
output_audio
->
Data
()[
idx
]
=
ring_buffer_
[
buff_idx
];
}
}
data_length_
-=
chunk_size
;
data_length_
-=
chunk_size
;
start_
=
(
start_
+
chunk_size
)
%
ring_buffer_
.
size
();
start_
=
(
start_
+
chunk_size
)
%
ring_buffer_
.
size
();
...
...
speechx/speechx/frontend/raw_audio.h
浏览文件 @
22fe1c9d
...
@@ -20,12 +20,13 @@
...
@@ -20,12 +20,13 @@
namespace
ppspeech
{
namespace
ppspeech
{
class
RawAudio
Sourc
e
:
public
FeatureExtractorInterface
{
class
RawAudio
Cach
e
:
public
FeatureExtractorInterface
{
public:
public:
explicit
RawAudioSource
(
int
buffer_size
=
kint16max
);
explicit
RawAudioCache
(
int
buffer_size
=
kint16max
);
virtual
void
AcceptWaveform
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
data
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
input_audio
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
output_audio
);
virtual
size_t
Dim
()
const
{
return
data_length_
;
}
// the audio dim is 1
virtual
size_t
Dim
()
const
{
return
1
;
}
virtual
void
SetFinished
()
{
virtual
void
SetFinished
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
finished_
=
true
;
finished_
=
true
;
...
@@ -41,14 +42,14 @@ class RawAudioSource : public FeatureExtractorInterface {
...
@@ -41,14 +42,14 @@ class RawAudioSource : public FeatureExtractorInterface {
std
::
condition_variable
ready_feed_condition_
;
std
::
condition_variable
ready_feed_condition_
;
kaldi
::
int32
timeout_
;
kaldi
::
int32
timeout_
;
DISALLOW_COPY_AND_ASSIGN
(
RawAudio
Sourc
e
);
DISALLOW_COPY_AND_ASSIGN
(
RawAudio
Cach
e
);
};
};
// it is a datasource for testing different frontend module.
// it is a datasource for testing different frontend module.
class
RawData
Source
:
public
FeatureExtractorInterface
{
class
RawData
Cache
:
public
FeatureExtractorInterface
{
public:
public:
explicit
RawData
Sourc
e
()
{
finished_
=
false
;
}
explicit
RawData
Cach
e
()
{
finished_
=
false
;
}
virtual
void
Accept
Waveform
(
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
)
{
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
)
{
data_
=
input
;
data_
=
input
;
}
}
...
@@ -60,6 +61,7 @@ class RawDataSource : public FeatureExtractorInterface {
...
@@ -60,6 +61,7 @@ class RawDataSource : public FeatureExtractorInterface {
data_
.
Resize
(
0
);
data_
.
Resize
(
0
);
return
true
;
return
true
;
}
}
//the dim is data_ length
virtual
size_t
Dim
()
const
{
return
data_
.
Dim
();
}
virtual
size_t
Dim
()
const
{
return
data_
.
Dim
();
}
virtual
void
SetFinished
()
{
finished_
=
true
;
}
virtual
void
SetFinished
()
{
finished_
=
true
;
}
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
...
@@ -68,7 +70,7 @@ class RawDataSource : public FeatureExtractorInterface {
...
@@ -68,7 +70,7 @@ class RawDataSource : public FeatureExtractorInterface {
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
data_
;
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
data_
;
bool
finished_
;
bool
finished_
;
DISALLOW_COPY_AND_ASSIGN
(
RawData
Sourc
e
);
DISALLOW_COPY_AND_ASSIGN
(
RawData
Cach
e
);
};
};
}
// namespace ppspeech
}
// namespace ppspeech
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录