Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
c11a5d97
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c11a5d97
编写于
4月 05, 2022
作者:
Y
Yang Zhou
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor linear feature:unify vector & remove redundant function
上级
e366fb6b
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
58 addition
and
105 deletion
+58
-105
speechx/examples/feat/linear_spectrogram_main.cc
speechx/examples/feat/linear_spectrogram_main.cc
+4
-0
speechx/speechx/frontend/audio/linear_spectrogram.cc
speechx/speechx/frontend/audio/linear_spectrogram.cc
+50
-97
speechx/speechx/frontend/audio/linear_spectrogram.h
speechx/speechx/frontend/audio/linear_spectrogram.h
+4
-8
未找到文件。
speechx/examples/feat/linear_spectrogram_main.cc
浏览文件 @
c11a5d97
...
@@ -181,6 +181,10 @@ int main(int argc, char* argv[]) {
...
@@ -181,6 +181,10 @@ int main(int argc, char* argv[]) {
ppspeech
::
LinearSpectrogramOptions
opt
;
ppspeech
::
LinearSpectrogramOptions
opt
;
opt
.
frame_opts
.
frame_length_ms
=
20
;
opt
.
frame_opts
.
frame_length_ms
=
20
;
opt
.
frame_opts
.
frame_shift_ms
=
10
;
opt
.
frame_opts
.
frame_shift_ms
=
10
;
opt
.
frame_opts
.
dither
=
0.0
;
opt
.
frame_opts
.
remove_dc_offset
=
false
;
opt
.
frame_opts
.
window_type
=
"hanning"
;
opt
.
frame_opts
.
preemph_coeff
=
0.0
;
LOG
(
INFO
)
<<
"frame length (ms): "
<<
opt
.
frame_opts
.
frame_length_ms
;
LOG
(
INFO
)
<<
"frame length (ms): "
<<
opt
.
frame_opts
.
frame_length_ms
;
LOG
(
INFO
)
<<
"frame shift (ms): "
<<
opt
.
frame_opts
.
frame_shift_ms
;
LOG
(
INFO
)
<<
"frame shift (ms): "
<<
opt
.
frame_opts
.
frame_shift_ms
;
...
...
speechx/speechx/frontend/audio/linear_spectrogram.cc
浏览文件 @
c11a5d97
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
#include "frontend/audio/linear_spectrogram.h"
#include "frontend/audio/linear_spectrogram.h"
#include "kaldi/base/kaldi-math.h"
#include "kaldi/base/kaldi-math.h"
#include "kaldi/feat/feature-common.h"
#include "kaldi/feat/feature-functions.h"
#include "kaldi/matrix/matrix-functions.h"
#include "kaldi/matrix/matrix-functions.h"
namespace
ppspeech
{
namespace
ppspeech
{
...
@@ -21,30 +23,23 @@ namespace ppspeech {
...
@@ -21,30 +23,23 @@ namespace ppspeech {
using
kaldi
::
int32
;
using
kaldi
::
int32
;
using
kaldi
::
BaseFloat
;
using
kaldi
::
BaseFloat
;
using
kaldi
::
Vector
;
using
kaldi
::
Vector
;
using
kaldi
::
SubVector
;
using
kaldi
::
VectorBase
;
using
kaldi
::
VectorBase
;
using
kaldi
::
Matrix
;
using
kaldi
::
Matrix
;
using
std
::
vector
;
using
std
::
vector
;
LinearSpectrogram
::
LinearSpectrogram
(
LinearSpectrogram
::
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
,
const
LinearSpectrogramOptions
&
opts
,
std
::
unique_ptr
<
FrontendInterface
>
base_extractor
)
{
std
::
unique_ptr
<
FrontendInterface
>
base_extractor
)
opts_
=
opts
;
:
opts_
(
opts
),
feature_window_funtion_
(
opts
.
frame_opts
)
{
base_extractor_
=
std
::
move
(
base_extractor
);
base_extractor_
=
std
::
move
(
base_extractor
);
int32
window_size
=
opts
.
frame_opts
.
WindowSize
();
int32
window_size
=
opts
.
frame_opts
.
WindowSize
();
int32
window_shift
=
opts
.
frame_opts
.
WindowShift
();
int32
window_shift
=
opts
.
frame_opts
.
WindowShift
();
fft_points_
=
window_size
;
dim_
=
window_size
/
2
+
1
;
chunk_sample_size_
=
chunk_sample_size_
=
static_cast
<
int32
>
(
opts
.
streaming_chunk
*
opts
.
frame_opts
.
samp_freq
);
static_cast
<
int32
>
(
opts
.
streaming_chunk
*
opts
.
frame_opts
.
samp_freq
);
hanning_window_
.
resize
(
window_size
);
hanning_window_energy_
=
kaldi
::
VecVec
(
feature_window_funtion_
.
window
,
feature_window_funtion_
.
window
);
double
a
=
M_2PI
/
(
window_size
-
1
);
hanning_window_energy_
=
0
;
for
(
int
i
=
0
;
i
<
window_size
;
++
i
)
{
hanning_window_
[
i
]
=
0.5
-
0.5
*
cos
(
a
*
i
);
hanning_window_energy_
+=
hanning_window_
[
i
]
*
hanning_window_
[
i
];
}
dim_
=
fft_points_
/
2
+
1
;
// the dimension is Fs/2 Hz
}
}
void
LinearSpectrogram
::
Accept
(
const
VectorBase
<
BaseFloat
>&
inputs
)
{
void
LinearSpectrogram
::
Accept
(
const
VectorBase
<
BaseFloat
>&
inputs
)
{
...
@@ -56,99 +51,57 @@ bool LinearSpectrogram::Read(Vector<BaseFloat>* feats) {
...
@@ -56,99 +51,57 @@ bool LinearSpectrogram::Read(Vector<BaseFloat>* feats) {
bool
flag
=
base_extractor_
->
Read
(
&
input_feats
);
bool
flag
=
base_extractor_
->
Read
(
&
input_feats
);
if
(
flag
==
false
||
input_feats
.
Dim
()
==
0
)
return
false
;
if
(
flag
==
false
||
input_feats
.
Dim
()
==
0
)
return
false
;
vector
<
BaseFloat
>
input_feats_vec
(
input_feats
.
Dim
());
int32
feat_len
=
input_feats
.
Dim
();
std
::
memcpy
(
input_feats_vec
.
data
(),
int32
left_len
=
reminded_wav_
.
Dim
();
input_feats
.
Data
(),
Vector
<
BaseFloat
>
waves
(
feat_len
+
left_len
);
input_feats
.
Dim
()
*
sizeof
(
BaseFloat
));
waves
.
Range
(
0
,
left_len
).
CopyFromVec
(
reminded_wav_
);
vector
<
vector
<
BaseFloat
>>
result
;
waves
.
Range
(
left_len
,
feat_len
).
CopyFromVec
(
input_feats
);
Compute
(
input_feats_vec
,
result
);
Compute
(
waves
,
feats
);
int32
feat_size
=
0
;
int32
frame_shift
=
opts_
.
frame_opts
.
WindowShift
();
if
(
result
.
size
()
!=
0
)
{
int32
num_frames
=
kaldi
::
NumFrames
(
waves
.
Dim
(),
opts_
.
frame_opts
);
feat_size
=
result
.
size
()
*
result
[
0
].
size
();
int32
left_samples
=
waves
.
Dim
()
-
frame_shift
*
num_frames
;
}
reminded_wav_
.
Resize
(
left_samples
);
feats
->
Resize
(
feat_size
);
reminded_wav_
.
CopyFromVec
(
// todo refactor (SimleGoat)
waves
.
Range
(
frame_shift
*
num_frames
,
left_samples
));
for
(
size_t
idx
=
0
;
idx
<
feat_size
;
++
idx
)
{
(
*
feats
)(
idx
)
=
result
[
idx
/
dim_
][
idx
%
dim_
];
}
return
true
;
}
void
LinearSpectrogram
::
Hanning
(
vector
<
float
>*
data
)
const
{
CHECK_GE
(
data
->
size
(),
hanning_window_
.
size
());
for
(
size_t
i
=
0
;
i
<
hanning_window_
.
size
();
++
i
)
{
data
->
at
(
i
)
*=
hanning_window_
[
i
];
}
}
bool
LinearSpectrogram
::
NumpyFft
(
vector
<
BaseFloat
>*
v
,
vector
<
BaseFloat
>*
real
,
vector
<
BaseFloat
>*
img
)
const
{
Vector
<
BaseFloat
>
v_tmp
;
v_tmp
.
Resize
(
v
->
size
());
std
::
memcpy
(
v_tmp
.
Data
(),
v
->
data
(),
sizeof
(
BaseFloat
)
*
(
v
->
size
()));
RealFft
(
&
v_tmp
,
true
);
v
->
resize
(
v_tmp
.
Dim
());
std
::
memcpy
(
v
->
data
(),
v_tmp
.
Data
(),
sizeof
(
BaseFloat
)
*
(
v
->
size
()));
real
->
push_back
(
v
->
at
(
0
));
img
->
push_back
(
0
);
for
(
int
i
=
1
;
i
<
v
->
size
()
/
2
;
i
++
)
{
real
->
push_back
(
v
->
at
(
2
*
i
));
img
->
push_back
(
v
->
at
(
2
*
i
+
1
));
}
real
->
push_back
(
v
->
at
(
1
));
img
->
push_back
(
0
);
return
true
;
return
true
;
}
}
// Compute spectrogram feat
// Compute spectrogram feat
// todo: refactor later (SmileGoat)
bool
LinearSpectrogram
::
Compute
(
const
Vector
<
BaseFloat
>&
waves
,
bool
LinearSpectrogram
::
Compute
(
const
vector
<
float
>&
waves
,
Vector
<
BaseFloat
>*
feats
)
{
vector
<
vector
<
float
>>&
feats
)
{
int32
num_samples
=
waves
.
Dim
();
int
num_samples
=
waves
.
size
();
int32
frame_length
=
opts_
.
frame_opts
.
WindowSize
();
const
int
&
frame_length
=
opts_
.
frame_opts
.
WindowSize
();
int32
sample_rate
=
opts_
.
frame_opts
.
samp_freq
;
const
int
&
sample_rate
=
opts_
.
frame_opts
.
samp_freq
;
BaseFloat
scale
=
2.0
/
(
hanning_window_energy_
*
sample_rate
);
const
int
&
frame_shift
=
opts_
.
frame_opts
.
WindowShift
();
const
int
&
fft_points
=
fft_points_
;
const
float
scale
=
hanning_window_energy_
*
sample_rate
;
if
(
num_samples
<
frame_length
)
{
if
(
num_samples
<
frame_length
)
{
return
true
;
return
true
;
}
}
int
num_frames
=
1
+
((
num_samples
-
frame_length
)
/
frame_shift
);
int32
num_frames
=
kaldi
::
NumFrames
(
num_samples
,
opts_
.
frame_opts
);
feats
.
resize
(
num_frames
);
feats
->
Resize
(
num_frames
*
dim_
);
vector
<
float
>
fft_real
((
fft_points_
/
2
+
1
),
0
);
Vector
<
BaseFloat
>
window
;
vector
<
float
>
fft_img
((
fft_points_
/
2
+
1
),
0
);
vector
<
float
>
v
(
frame_length
,
0
);
for
(
int
frame_idx
=
0
;
frame_idx
<
num_frames
;
++
frame_idx
)
{
vector
<
float
>
power
((
fft_points
/
2
+
1
));
kaldi
::
ExtractWindow
(
0
,
waves
,
for
(
int
i
=
0
;
i
<
num_frames
;
++
i
)
{
frame_idx
,
vector
<
float
>
data
(
waves
.
data
()
+
i
*
frame_shift
,
opts_
.
frame_opts
,
waves
.
data
()
+
i
*
frame_shift
+
frame_length
);
feature_window_funtion_
,
Hanning
(
&
data
);
&
window
,
fft_img
.
clear
();
NULL
);
fft_real
.
clear
();
v
.
assign
(
data
.
begin
(),
data
.
end
());
SubVector
<
BaseFloat
>
output_row
(
feats
->
Data
()
+
frame_idx
*
dim_
,
dim_
);
NumpyFft
(
&
v
,
&
fft_real
,
&
fft_img
);
window
.
Resize
(
frame_length
,
kaldi
::
kCopyData
);
RealFft
(
&
window
,
true
);
feats
[
i
].
resize
(
fft_points
/
2
+
1
);
// the last dimension is Fs/2 Hz
kaldi
::
ComputePowerSpectrum
(
&
window
);
for
(
int
j
=
0
;
j
<
(
fft_points
/
2
+
1
);
++
j
)
{
SubVector
<
BaseFloat
>
power_spectrum
(
window
,
0
,
dim_
);
power
[
j
]
=
fft_real
[
j
]
*
fft_real
[
j
]
+
fft_img
[
j
]
*
fft_img
[
j
];
power_spectrum
.
Scale
(
scale
);
feats
[
i
][
j
]
=
power
[
j
];
power_spectrum
(
0
)
=
power_spectrum
(
0
)
/
2
;
power_spectrum
(
dim_
-
1
)
=
power_spectrum
(
dim_
-
1
)
/
2
;
if
(
j
==
0
||
j
==
feats
[
0
].
size
()
-
1
)
{
power_spectrum
.
Add
(
1e-14
);
feats
[
i
][
j
]
/=
scale
;
power_spectrum
.
ApplyLog
();
}
else
{
output_row
.
CopyFromVec
(
power_spectrum
);
feats
[
i
][
j
]
*=
(
2.0
/
scale
);
}
// log added eps=1e-14
feats
[
i
][
j
]
=
std
::
log
(
feats
[
i
][
j
]
+
1e-14
);
}
}
}
return
true
;
return
true
;
}
}
...
...
speechx/speechx/frontend/audio/linear_spectrogram.h
浏览文件 @
c11a5d97
...
@@ -49,19 +49,15 @@ class LinearSpectrogram : public FrontendInterface {
...
@@ -49,19 +49,15 @@ class LinearSpectrogram : public FrontendInterface {
virtual
void
Reset
()
{
base_extractor_
->
Reset
();
}
virtual
void
Reset
()
{
base_extractor_
->
Reset
();
}
private:
private:
void
Hanning
(
std
::
vector
<
kaldi
::
BaseFloat
>*
data
)
const
;
bool
Compute
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
waves
,
bool
Compute
(
const
std
::
vector
<
kaldi
::
BaseFloat
>&
waves
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
std
::
vector
<
std
::
vector
<
kaldi
::
BaseFloat
>>&
feats
);
bool
NumpyFft
(
std
::
vector
<
kaldi
::
BaseFloat
>*
v
,
std
::
vector
<
kaldi
::
BaseFloat
>*
real
,
std
::
vector
<
kaldi
::
BaseFloat
>*
img
)
const
;
kaldi
::
int32
fft_points_
;
size_t
dim_
;
size_t
dim_
;
std
::
vector
<
kaldi
::
BaseFloat
>
hanning_window
_
;
kaldi
::
FeatureWindowFunction
feature_window_funtion
_
;
kaldi
::
BaseFloat
hanning_window_energy_
;
kaldi
::
BaseFloat
hanning_window_energy_
;
LinearSpectrogramOptions
opts_
;
LinearSpectrogramOptions
opts_
;
std
::
unique_ptr
<
FrontendInterface
>
base_extractor_
;
std
::
unique_ptr
<
FrontendInterface
>
base_extractor_
;
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
reminded_wav_
;
int
chunk_sample_size_
;
int
chunk_sample_size_
;
DISALLOW_COPY_AND_ASSIGN
(
LinearSpectrogram
);
DISALLOW_COPY_AND_ASSIGN
(
LinearSpectrogram
);
};
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录