Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
f03d48f7
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f03d48f7
编写于
2月 10, 2022
作者:
S
SmileGoat
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test linear spectrogram feature
上级
a01fa866
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
62 addition
and
14 deletion
+62
-14
speechx/speechx/frontend/feature_extractor_interface.h
speechx/speechx/frontend/feature_extractor_interface.h
+2
-0
speechx/speechx/frontend/linear_spectrogram.cc
speechx/speechx/frontend/linear_spectrogram.cc
+28
-8
speechx/speechx/frontend/linear_spectrogram.h
speechx/speechx/frontend/linear_spectrogram.h
+7
-3
speechx/speechx/frontend/linear_spectrogram_main.cc
speechx/speechx/frontend/linear_spectrogram_main.cc
+8
-1
speechx/speechx/frontend/normalizer.cc
speechx/speechx/frontend/normalizer.cc
+1
-2
speechx/speechx/nnet/nnet_interface.h
speechx/speechx/nnet/nnet_interface.h
+16
-0
未找到文件。
speechx/speechx/frontend/feature_extractor_interface.h
浏览文件 @
f03d48f7
...
...
@@ -23,6 +23,8 @@ class FeatureExtractorInterface {
public:
virtual
void
AcceptWaveform
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
input
)
=
0
;
virtual
void
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
=
0
;
virtual
void
Compute
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
,
kaldi
::
VectorBae
<
kaldi
::
BaseFloat
>*
feature
)
=
0
;
virtual
size_t
Dim
()
const
=
0
;
};
...
...
speechx/speechx/frontend/linear_spectrogram.cc
浏览文件 @
f03d48f7
...
...
@@ -22,7 +22,10 @@ using kaldi::Vector;
using
kaldi
::
Matrix
;
using
std
::
vector
;
LinearSpectrogram
::
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
)
{
LinearSpectrogram
::
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
,
const
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
)
{
base_extractor_
=
std
::
move
(
base_extractor
);
int32
window_size
=
opts
.
frame_opts
.
WindowSize
();
int32
window_shift
=
opts
.
frame_opts
.
WindowShift
();
fft_points_
=
window_size
;
...
...
@@ -34,6 +37,8 @@ LinearSpectrogram::LinearSpectrogram(const LinearSpectrogramOptions& opts) {
hanning_window_
[
i
]
=
0.5
-
0.5
*
cos
(
a
*
i
);
hanning_window_energy_
+=
hanning_window_
[
i
]
*
hanning_window_
[
i
];
}
dim_
=
fft_points_
/
2
+
1
;
// the dimension is Fs/2 Hz
}
void
LinearSpectrogram
::
AcceptWavefrom
(
const
Vector
<
BaseFloat
>&
input
)
{
...
...
@@ -70,27 +75,42 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
return
true
;
}
// todo refactor later
//todo remove later
void
CopyVector2StdVector
(
const
kaldi
::
Vector
<
BaseFloat
>&
input
,
vector
<
BaseFloat
>*
output
)
{
}
// todo remove later
bool
LinearSpectrogram
::
ReadFeats
(
Matrix
<
BaseFloat
>*
feats
)
const
{
vector
<
vector
<
BaseFloat
>>
feat
;
if
(
wavefrom_
.
empty
())
{
if
(
wavefrom_
.
Dim
()
==
0
)
{
return
false
;
}
kaldi
::
Vector
<
BaseFloat
>
feats
;
Compute
(
wavefrom_
,
&
feats
);
vector
<
vector
<
BaseFloat
>>
result
;
Compute
(
wavefrom_
,
result
);
vector
<
BaseFloat
>
feats_vec
;
CopyVector2StdVector
(
feats
,
&
feats_vec
);
Compute
(
feats_vec
,
result
);
feats
->
Resize
(
result
.
size
(),
result
[
0
].
size
());
for
(
int
row_idx
=
0
;
row_idx
<
result
.
size
();
++
row_idx
)
{
for
(
int
col_idx
=
0
;
col_idx
<
result
.
size
();
++
col_idx
)
{
feats
(
row_idx
,
col_idx
)
=
result
[
row_idx
][
col_idx
];
}
wavefrom_
.
clear
(
);
wavefrom_
.
Resize
(
0
);
return
true
;
}
// Compute spectrogram feat, return num frames
// only for test, remove later
// todo: compute the feature frame by frame.
void
LinearSpectrogram
::
Compute
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
,
kaldi
::
VectorBae
<
kaldi
::
BaseFloat
>*
feature
)
{
base_extractor_
->
Compute
(
input
,
feature
);
}
// Compute spectrogram feat, only for test, remove later
// todo: refactor later (SmileGoat)
bool
LinearSpectrogram
::
Compute
(
const
vector
<
float
>&
wave
,
vector
<
vector
<
float
>>&
feat
)
{
vector
<
vector
<
float
>>&
feat
)
{
int
num_samples
=
wave
.
size
();
const
int
&
frame_length
=
opts
.
frame_opts
.
WindowSize
();
const
int
&
sample_rate
=
opts
.
frame_opts
.
samp_freq
;
...
...
speechx/speechx/frontend/linear_spectrogram.h
浏览文件 @
f03d48f7
...
...
@@ -19,16 +19,19 @@ struct LinearSpectrogramOptions {
class
LinearSpectrogram
:
public
FeatureExtractorInterface
{
public:
explict
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
);
explict
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
,
const
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
AcceptWavefrom
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
input
);
virtual
void
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
virtual
size_t
Dim
()
const
;
virtual
size_t
Dim
()
const
{
return
dim_
;
}
void
ReadFeats
(
kaldi
::
Matrix
<
kaldi
::
BaesFloat
>*
feats
)
const
;
private:
void
Hanning
(
std
::
vector
<
kaldi
::
BaseFloat
>&
data
)
const
;
kaldi
::
int32
Compute
(
const
std
::
vector
<
kaldi
::
BaseFloat
>&
wave
,
std
::
vector
<
std
::
vector
<
kaldi
::
BaseFloat
>>&
feat
);
void
Compute
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
,
kaldi
::
VectorBae
<
kaldi
::
BaseFloat
>*
feature
);
bool
NumpyFft
(
std
::
vector
<
kaldi
::
BaseFloat
>*
v
,
std
::
vector
<
kaldi
::
BaseFloat
>*
real
,
std
::
vector
<
kaldi
::
BaseFloat
>*
img
)
const
;
...
...
@@ -38,7 +41,8 @@ class LinearSpectrogram : public FeatureExtractorInterface {
std
::
vector
<
kaldi
::
BaseFloat
>
hanning_window_
;
kaldi
::
BaseFloat
hanning_window_energy_
;
LinearSpectrogramOptions
opts_
;
std
::
vector
<
kaldi
::
BaseFloat
>
wavefrom_
;
// remove later, todo(SmileGoat)
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
wavefrom_
;
// remove later, todo(SmileGoat)
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
DISALLOW_COPY_AND_ASSIGN
(
LinearSpectrogram
);
};
...
...
speechx/speechx/frontend/linear_spectrogram_main.cc
浏览文件 @
f03d48f7
// todo refactor, repalce with gtest
#include "frontend/linear_spectrogram.h"
#include "frontend/normalizer.h"
#include "kaldi/util/table-types.h"
#include "base/log.h"
#include "base/flags.h"
...
...
@@ -15,9 +17,14 @@ int main(int argc, char* argv[]) {
kaldi
::
SequentialTableReader
<
kaldi
::
WaveHolder
>
wav_reader
(
FLAGS_wav_rspecifier
);
kaldi
::
BaseFloatMatrixWriter
feat_writer
(
FLAGS_feature_wspecifier
);
// test feature linear_spectorgram: wave --> decibel_normalizer --> hanning window -->linear_spectrogram --> cmvn
int32
num_done
=
0
,
num_err
=
0
;
ppspeech
::
LinearSpectrogramOptions
opt
;
ppspeech
::
LinearSpectrogram
linear_spectrogram
(
opt
);
ppspeech
::
DecibelNormalizerOptions
db_norm_opt
;
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
base_feature_extractor
=
new
DecibelNormalizer
(
db_norm_opt
);
ppspeech
::
LinearSpectrogram
linear_spectrogram
(
opt
,
base_featrue_extractor
);
for
(;
!
wav_reader
.
Done
();
wav_reader
.
Next
())
{
std
::
string
utt
=
wav_reader
.
Key
();
const
kaldi
::
WaveData
&
wave_data
=
wav_reader
.
Value
();
...
...
speechx/speechx/frontend/normalizer.cc
浏览文件 @
f03d48f7
...
...
@@ -2,8 +2,7 @@
#include "frontend/normalizer.h"
DecibelNormalizer
::
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
,
const
std
::
unique_ptr
<
FeatureExtractorInterface
>&
pre_extractor
)
{
const
DecibelNormalizerOptions
&
opts
)
{
}
...
...
speechx/speechx/nnet/nnet_interface.h
0 → 100644
浏览文件 @
f03d48f7
#pragma once
#include ""
namespace
ppspeech
{
class
NnetForwardInterface
{
public:
virtual
~
NnetForwardInterface
()
{}
virtual
void
FeedForward
(
const
kaldi
::
Matrix
<
BaseFloat
>&
features
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
inference
)
const
=
0
;
};
}
// namespace ppspeech
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录