Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
027feae9
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
027feae9
编写于
3月 10, 2022
作者:
S
SmileGoat
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
rename arg of Accept & Read
上级
22fe1c9d
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
63 addition
and
61 deletion
+63
-61
speechx/speechx/frontend/feature_cache.cc
speechx/speechx/frontend/feature_cache.cc
+3
-3
speechx/speechx/frontend/feature_cache.h
speechx/speechx/frontend/feature_cache.h
+2
-2
speechx/speechx/frontend/feature_extractor_interface.h
speechx/speechx/frontend/feature_extractor_interface.h
+2
-1
speechx/speechx/frontend/linear_spectrogram.cc
speechx/speechx/frontend/linear_spectrogram.cc
+15
-15
speechx/speechx/frontend/linear_spectrogram.h
speechx/speechx/frontend/linear_spectrogram.h
+3
-3
speechx/speechx/frontend/normalizer.cc
speechx/speechx/frontend/normalizer.cc
+15
-15
speechx/speechx/frontend/normalizer.h
speechx/speechx/frontend/normalizer.h
+6
-6
speechx/speechx/frontend/raw_audio.cc
speechx/speechx/frontend/raw_audio.cc
+9
-9
speechx/speechx/frontend/raw_audio.h
speechx/speechx/frontend/raw_audio.h
+8
-7
未找到文件。
speechx/speechx/frontend/feature_cache.cc
浏览文件 @
027feae9
...
...
@@ -40,7 +40,7 @@ void FeatureCache::Accept(
}
// pop feature chunk
bool
FeatureCache
::
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
output_
feats
)
{
bool
FeatureCache
::
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
{
kaldi
::
Timer
timer
;
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
cache_
.
empty
()
&&
base_extractor_
->
IsFinished
()
==
false
)
{
...
...
@@ -53,8 +53,8 @@ bool FeatureCache::Read(kaldi::Vector<kaldi::BaseFloat>* output_feats) {
usleep
(
1000
);
// sleep 1 ms
}
if
(
cache_
.
empty
())
return
false
;
output_
feats
->
Resize
(
cache_
.
front
().
Dim
());
output_
feats
->
CopyFromVec
(
cache_
.
front
());
feats
->
Resize
(
cache_
.
front
().
Dim
());
feats
->
CopyFromVec
(
cache_
.
front
());
cache_
.
pop
();
ready_feed_condition_
.
notify_one
();
return
true
;
...
...
speechx/speechx/frontend/feature_cache.h
浏览文件 @
027feae9
...
...
@@ -26,8 +26,8 @@ class FeatureCache : public FeatureExtractorInterface {
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
=
NULL
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
//
output_
feats dim = num_frames * feature_dim
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
output_
feats
);
// feats dim = num_frames * feature_dim
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
// feature cache only cache feature which from base extractor
virtual
size_t
Dim
()
const
{
return
base_extractor_
->
Dim
();
}
virtual
void
SetFinished
()
{
...
...
speechx/speechx/frontend/feature_extractor_interface.h
浏览文件 @
027feae9
...
...
@@ -21,7 +21,8 @@ namespace ppspeech {
class
FeatureExtractorInterface
{
public:
// accept input data
// accept input data, accept feature or raw waves which decided
// by the base_extractor
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
=
0
;
// get the processed result
...
...
speechx/speechx/frontend/linear_spectrogram.cc
浏览文件 @
027feae9
...
...
@@ -70,7 +70,7 @@ void LinearSpectrogram::Accept(const VectorBase<BaseFloat>& inputs) {
base_extractor_
->
Accept
(
inputs
);
}
bool
LinearSpectrogram
::
Read
(
Vector
<
BaseFloat
>*
output_
feats
)
{
bool
LinearSpectrogram
::
Read
(
Vector
<
BaseFloat
>*
feats
)
{
Vector
<
BaseFloat
>
input_feats
(
chunk_sample_size_
);
bool
flag
=
base_extractor_
->
Read
(
&
input_feats
);
if
(
flag
==
false
||
input_feats
.
Dim
()
==
0
)
return
false
;
...
...
@@ -83,10 +83,10 @@ bool LinearSpectrogram::Read(Vector<BaseFloat>* output_feats) {
if
(
result
.
size
()
!=
0
)
{
feat_size
=
result
.
size
()
*
result
[
0
].
size
();
}
output_
feats
->
Resize
(
feat_size
);
feats
->
Resize
(
feat_size
);
// todo refactor (SimleGoat)
for
(
size_t
idx
=
0
;
idx
<
feat_size
;
++
idx
)
{
(
*
output_
feats
)(
idx
)
=
result
[
idx
/
dim_
][
idx
%
dim_
];
(
*
feats
)(
idx
)
=
result
[
idx
/
dim_
][
idx
%
dim_
];
}
return
true
;
}
...
...
@@ -120,9 +120,9 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
// Compute spectrogram feat
// todo: refactor later (SmileGoat)
bool
LinearSpectrogram
::
Compute
(
const
vector
<
float
>&
wave
,
vector
<
vector
<
float
>>&
feat
)
{
int
num_samples
=
wave
.
size
();
bool
LinearSpectrogram
::
Compute
(
const
vector
<
float
>&
wave
s
,
vector
<
vector
<
float
>>&
feat
s
)
{
int
num_samples
=
wave
s
.
size
();
const
int
&
frame_length
=
opts_
.
frame_opts
.
WindowSize
();
const
int
&
sample_rate
=
opts_
.
frame_opts
.
samp_freq
;
const
int
&
frame_shift
=
opts_
.
frame_opts
.
WindowShift
();
...
...
@@ -134,34 +134,34 @@ bool LinearSpectrogram::Compute(const vector<float>& wave,
}
int
num_frames
=
1
+
((
num_samples
-
frame_length
)
/
frame_shift
);
feat
.
resize
(
num_frames
);
feat
s
.
resize
(
num_frames
);
vector
<
float
>
fft_real
((
fft_points_
/
2
+
1
),
0
);
vector
<
float
>
fft_img
((
fft_points_
/
2
+
1
),
0
);
vector
<
float
>
v
(
frame_length
,
0
);
vector
<
float
>
power
((
fft_points
/
2
+
1
));
for
(
int
i
=
0
;
i
<
num_frames
;
++
i
)
{
vector
<
float
>
data
(
wave
.
data
()
+
i
*
frame_shift
,
wave
.
data
()
+
i
*
frame_shift
+
frame_length
);
vector
<
float
>
data
(
wave
s
.
data
()
+
i
*
frame_shift
,
wave
s
.
data
()
+
i
*
frame_shift
+
frame_length
);
Hanning
(
&
data
);
fft_img
.
clear
();
fft_real
.
clear
();
v
.
assign
(
data
.
begin
(),
data
.
end
());
NumpyFft
(
&
v
,
&
fft_real
,
&
fft_img
);
feat
[
i
].
resize
(
fft_points
/
2
+
1
);
// the last dimension is Fs/2 Hz
feat
s
[
i
].
resize
(
fft_points
/
2
+
1
);
// the last dimension is Fs/2 Hz
for
(
int
j
=
0
;
j
<
(
fft_points
/
2
+
1
);
++
j
)
{
power
[
j
]
=
fft_real
[
j
]
*
fft_real
[
j
]
+
fft_img
[
j
]
*
fft_img
[
j
];
feat
[
i
][
j
]
=
power
[
j
];
feat
s
[
i
][
j
]
=
power
[
j
];
if
(
j
==
0
||
j
==
feat
[
0
].
size
()
-
1
)
{
feat
[
i
][
j
]
/=
scale
;
if
(
j
==
0
||
j
==
feat
s
[
0
].
size
()
-
1
)
{
feat
s
[
i
][
j
]
/=
scale
;
}
else
{
feat
[
i
][
j
]
*=
(
2.0
/
scale
);
feat
s
[
i
][
j
]
*=
(
2.0
/
scale
);
}
// log added eps=1e-14
feat
[
i
][
j
]
=
std
::
log
(
feat
[
i
][
j
]
+
1e-14
);
feat
s
[
i
][
j
]
=
std
::
log
(
feats
[
i
][
j
]
+
1e-14
);
}
}
return
true
;
...
...
speechx/speechx/frontend/linear_spectrogram.h
浏览文件 @
027feae9
...
...
@@ -40,7 +40,7 @@ class LinearSpectrogram : public FeatureExtractorInterface {
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
output_
feats
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
// the dim_ is the dim of single frame feature
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
...
...
@@ -48,8 +48,8 @@ class LinearSpectrogram : public FeatureExtractorInterface {
private:
void
Hanning
(
std
::
vector
<
kaldi
::
BaseFloat
>*
data
)
const
;
bool
Compute
(
const
std
::
vector
<
kaldi
::
BaseFloat
>&
wave
,
std
::
vector
<
std
::
vector
<
kaldi
::
BaseFloat
>>&
feat
);
bool
Compute
(
const
std
::
vector
<
kaldi
::
BaseFloat
>&
wave
s
,
std
::
vector
<
std
::
vector
<
kaldi
::
BaseFloat
>>&
feat
s
);
bool
NumpyFft
(
std
::
vector
<
kaldi
::
BaseFloat
>*
v
,
std
::
vector
<
kaldi
::
BaseFloat
>*
real
,
std
::
vector
<
kaldi
::
BaseFloat
>*
img
)
const
;
...
...
speechx/speechx/frontend/normalizer.cc
浏览文件 @
027feae9
...
...
@@ -35,16 +35,16 @@ DecibelNormalizer::DecibelNormalizer(
}
void
DecibelNormalizer
::
Accept
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
inputs_wave
)
{
base_extractor_
->
Accept
(
inputs_wave
);
const
kaldi
::
VectorBase
<
BaseFloat
>&
waves
)
{
base_extractor_
->
Accept
(
waves
);
}
bool
DecibelNormalizer
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
outputs_wave
)
{
if
(
base_extractor_
->
Read
(
outputs_wave
)
==
false
||
outputs_wave
->
Dim
()
==
0
)
{
bool
DecibelNormalizer
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
waves
)
{
if
(
base_extractor_
->
Read
(
waves
)
==
false
||
waves
->
Dim
()
==
0
)
{
return
false
;
}
Compute
(
outputs_wave
);
Compute
(
waves
);
return
true
;
}
...
...
@@ -67,7 +67,7 @@ void CopyStdVector2Vector(const vector<BaseFloat>& input,
}
}
bool
DecibelNormalizer
::
Compute
(
VectorBase
<
BaseFloat
>*
feat
s
)
const
{
bool
DecibelNormalizer
::
Compute
(
VectorBase
<
BaseFloat
>*
wave
s
)
const
{
// calculate db rms
BaseFloat
rms_db
=
0.0
;
BaseFloat
mean_square
=
0.0
;
...
...
@@ -75,9 +75,9 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feats) const {
BaseFloat
wave_float_normlization
=
1.0
f
/
(
std
::
pow
(
2
,
16
-
1
));
vector
<
BaseFloat
>
samples
;
samples
.
resize
(
feat
s
->
Dim
());
samples
.
resize
(
wave
s
->
Dim
());
for
(
size_t
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
samples
[
i
]
=
(
*
feat
s
)(
i
);
samples
[
i
]
=
(
*
wave
s
)(
i
);
}
// square
...
...
@@ -107,7 +107,7 @@ bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* feats) const {
item
*=
std
::
pow
(
10.0
,
gain
/
20.0
);
}
CopyStdVector2Vector
(
samples
,
feat
s
);
CopyStdVector2Vector
(
samples
,
wave
s
);
return
true
;
}
...
...
@@ -121,16 +121,16 @@ CMVN::CMVN(std::string cmvn_file,
dim_
=
stats_
.
NumCols
()
-
1
;
}
void
CMVN
::
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
fea
ts
)
{
base_extractor_
->
Accept
(
fea
ts
);
void
CMVN
::
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inpu
ts
)
{
base_extractor_
->
Accept
(
inpu
ts
);
return
;
}
bool
CMVN
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
outpu
ts
)
{
if
(
base_extractor_
->
Read
(
outpu
ts
)
==
false
)
{
bool
CMVN
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
fea
ts
)
{
if
(
base_extractor_
->
Read
(
fea
ts
)
==
false
)
{
return
false
;
}
Compute
(
outpu
ts
);
Compute
(
fea
ts
);
return
true
;
}
...
...
speechx/speechx/frontend/normalizer.h
浏览文件 @
027feae9
...
...
@@ -46,15 +46,15 @@ class DecibelNormalizer : public FeatureExtractorInterface {
const
DecibelNormalizerOptions
&
opts
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs_wave
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
outputs_wave
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
waves
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
waves
);
// noramlize audio, the dim is 1.
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
private:
bool
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
s
)
const
;
bool
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
wave
s
)
const
;
DecibelNormalizerOptions
opts_
;
size_t
dim_
;
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
...
...
@@ -67,11 +67,11 @@ class CMVN : public FeatureExtractorInterface {
explicit
CMVN
(
std
::
string
cmvn_file
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
fea
ts
);
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inpu
ts
);
// the length of
outpu
ts = feature_row * feature_dim,
// the length of
fea
ts = feature_row * feature_dim,
// the Matrix is squashed into Vector
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
outpu
ts
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
fea
ts
);
// the dim_ is the feautre dim.
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
...
...
speechx/speechx/frontend/raw_audio.cc
浏览文件 @
027feae9
...
...
@@ -26,20 +26,20 @@ RawAudioCache::RawAudioCache(int buffer_size)
ring_buffer_
.
resize
(
buffer_size
);
}
void
RawAudioCache
::
Accept
(
const
VectorBase
<
BaseFloat
>&
input_audio
)
{
void
RawAudioCache
::
Accept
(
const
VectorBase
<
BaseFloat
>&
waves
)
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
data_length_
+
input_audio
.
Dim
()
>
ring_buffer_
.
size
())
{
while
(
data_length_
+
waves
.
Dim
()
>
ring_buffer_
.
size
())
{
ready_feed_condition_
.
wait
(
lock
);
}
for
(
size_t
idx
=
0
;
idx
<
input_audio
.
Dim
();
++
idx
)
{
for
(
size_t
idx
=
0
;
idx
<
waves
.
Dim
();
++
idx
)
{
int32
buffer_idx
=
(
idx
+
start_
)
%
ring_buffer_
.
size
();
ring_buffer_
[
buffer_idx
]
=
input_audio
(
idx
);
ring_buffer_
[
buffer_idx
]
=
waves
(
idx
);
}
data_length_
+=
input_audio
.
Dim
();
data_length_
+=
waves
.
Dim
();
}
bool
RawAudioCache
::
Read
(
Vector
<
BaseFloat
>*
output_audio
)
{
size_t
chunk_size
=
output_audio
->
Dim
();
bool
RawAudioCache
::
Read
(
Vector
<
BaseFloat
>*
waves
)
{
size_t
chunk_size
=
waves
->
Dim
();
kaldi
::
Timer
timer
;
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
while
(
chunk_size
>
data_length_
)
{
...
...
@@ -61,12 +61,12 @@ bool RawAudioCache::Read(Vector<BaseFloat>* output_audio) {
// read last chunk data
if
(
chunk_size
>
data_length_
)
{
chunk_size
=
data_length_
;
output_audio
->
Resize
(
chunk_size
);
waves
->
Resize
(
chunk_size
);
}
for
(
size_t
idx
=
0
;
idx
<
chunk_size
;
++
idx
)
{
int
buff_idx
=
(
start_
+
idx
)
%
ring_buffer_
.
size
();
output_audio
->
Data
()[
idx
]
=
ring_buffer_
[
buff_idx
];
waves
->
Data
()[
idx
]
=
ring_buffer_
[
buff_idx
];
}
data_length_
-=
chunk_size
;
start_
=
(
start_
+
chunk_size
)
%
ring_buffer_
.
size
();
...
...
speechx/speechx/frontend/raw_audio.h
浏览文件 @
027feae9
...
...
@@ -23,8 +23,8 @@ namespace ppspeech {
class
RawAudioCache
:
public
FeatureExtractorInterface
{
public:
explicit
RawAudioCache
(
int
buffer_size
=
kint16max
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
input_audio
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
output_audio
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
waves
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
waves
);
// the audio dim is 1
virtual
size_t
Dim
()
const
{
return
1
;
}
virtual
void
SetFinished
()
{
...
...
@@ -45,19 +45,20 @@ class RawAudioCache : public FeatureExtractorInterface {
DISALLOW_COPY_AND_ASSIGN
(
RawAudioCache
);
};
// it is a datasource for testing different frontend module.
// it is a data source to test different frontend module.
// it Accepts waves or feats.
class
RawDataCache
:
public
FeatureExtractorInterface
{
public:
explicit
RawDataCache
()
{
finished_
=
false
;
}
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
)
{
data_
=
input
;
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
s
)
{
data_
=
input
s
;
}
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
{
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
s
)
{
if
(
data_
.
Dim
()
==
0
)
{
return
false
;
}
(
*
feat
)
=
data_
;
(
*
feat
s
)
=
data_
;
data_
.
Resize
(
0
);
return
true
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录