Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
c6027751
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
10 个月 前同步成功
通知
200
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
c6027751
编写于
2月 13, 2022
作者:
S
SmileGoat
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add frontend cmakelist
上级
f03d48f7
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
179 addition
and
88 deletion
+179
-88
docs/source/reference.md
docs/source/reference.md
+4
-0
speechx/CMakeLists.txt
speechx/CMakeLists.txt
+1
-1
speechx/speechx/CMakeLists.txt
speechx/speechx/CMakeLists.txt
+11
-0
speechx/speechx/base/basic_types.h
speechx/speechx/base/basic_types.h
+2
-2
speechx/speechx/base/thread_pool.h
speechx/speechx/base/thread_pool.h
+21
-1
speechx/speechx/codelab/feat_test/linear_spectrogram_main.cc
speechx/speechx/codelab/feat_test/linear_spectrogram_main.cc
+2
-1
speechx/speechx/frontend/CMakeLists.txt
speechx/speechx/frontend/CMakeLists.txt
+8
-0
speechx/speechx/frontend/feature_extractor_interface.h
speechx/speechx/frontend/feature_extractor_interface.h
+3
-5
speechx/speechx/frontend/linear_spectrogram.cc
speechx/speechx/frontend/linear_spectrogram.cc
+47
-34
speechx/speechx/frontend/linear_spectrogram.h
speechx/speechx/frontend/linear_spectrogram.h
+12
-12
speechx/speechx/frontend/normalizer.cc
speechx/speechx/frontend/normalizer.cc
+55
-25
speechx/speechx/frontend/normalizer.h
speechx/speechx/frontend/normalizer.h
+13
-7
未找到文件。
docs/source/reference.md
浏览文件 @
c6027751
...
...
@@ -35,3 +35,7 @@ We borrowed a lot of code from these repos to build `model` and `engine`, thanks
*
[
librosa
](
https://github.com/librosa/librosa/blob/main/LICENSE.md
)
-
ISC License
-
Audio feature
*
[
ThreadPool
](
https://github.com/progschj/ThreadPool/blob/master/COPYING
)
-
zlib License
-
ThreadPool
speechx/CMakeLists.txt
浏览文件 @
c6027751
...
...
@@ -65,7 +65,7 @@ FetchContent_Declare(
URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
)
FetchContent_MakeAvailable
(
glog
)
include_directories
(
${
glog_BINARY_DIR
}
)
include_directories
(
${
glog_BINARY_DIR
}
${
glog_SOURCE_DIR
}
/src
)
# gtest
FetchContent_Declare
(
googletest
...
...
speechx/speechx/CMakeLists.txt
浏览文件 @
c6027751
...
...
@@ -4,11 +4,22 @@ project(speechx LANGUAGES CXX)
link_directories
(
${
CMAKE_CURRENT_SOURCE_DIR
}
/third_party/openblas
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-std=c++14"
)
include_directories
(
${
CMAKE_CURRENT_SOURCE_DIR
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/kaldi
)
add_subdirectory
(
kaldi
)
include_directories
(
${
CMAKE_CURRENT_SOURCE_DIR
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/frontend
)
add_subdirectory
(
frontend
)
add_executable
(
mfcc-test codelab/feat_test/feature-mfcc-test.cc
)
target_link_libraries
(
mfcc-test kaldi-mfcc
)
add_executable
(
linear_spectrogram_main codelab/feat_test/linear_spectrogram_main.cc
)
target_link_libraries
(
linear_spectrogram_main frontend kaildi-util kaldi-feat
)
speechx/speechx/base/basic_types.h
浏览文件 @
c6027751
...
...
@@ -16,7 +16,7 @@
#include "kaldi/base/kaldi-types.h"
#include <limits
.h
>
#include <limits>
typedef
float
BaseFloat
;
typedef
double
double64
;
...
...
@@ -35,7 +35,7 @@ typedef unsigned char uint8;
typedef
unsigned
short
uint16
;
typedef
unsigned
int
uint32
;
if
defined
(
__LP64__
)
&&
!
defined
(
OS_MACOSX
)
&&
!
defined
(
OS_OPENBSD
)
#
if defined(__LP64__) && !defined(OS_MACOSX) && !defined(OS_OPENBSD)
typedef
unsigned
long
uint64
;
#else
typedef
unsigned
long
long
uint64
;
...
...
speechx/speechx/base/thread_pool.h
浏览文件 @
c6027751
// Copyright (c) 2012 Jakob Progsch, Václav Zeman
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgment in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source
// distribution.
// this code is from https://github.com/progschj/ThreadPool
#ifndef BASE_THREAD_POOL_H
...
...
@@ -97,4 +117,4 @@ inline ThreadPool::~ThreadPool()
worker
.
join
();
}
#endif
\ No newline at end of file
#endif
speechx/speechx/
frontend
/linear_spectrogram_main.cc
→
speechx/speechx/
codelab/feat_test
/linear_spectrogram_main.cc
浏览文件 @
c6027751
...
...
@@ -2,6 +2,7 @@
#include "frontend/linear_spectrogram.h"
#include "frontend/normalizer.h"
#include "frontend/feature_extractor_interface.h"
#include "kaldi/util/table-types.h"
#include "base/log.h"
#include "base/flags.h"
...
...
@@ -22,7 +23,7 @@ int main(int argc, char* argv[]) {
ppspeech
::
LinearSpectrogramOptions
opt
;
ppspeech
::
DecibelNormalizerOptions
db_norm_opt
;
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
base_feature_extractor
=
new
DecibelNormalizer
(
db_norm_opt
);
new
ppspeech
::
DecibelNormalizer
(
db_norm_opt
);
ppspeech
::
LinearSpectrogram
linear_spectrogram
(
opt
,
base_featrue_extractor
);
for
(;
!
wav_reader
.
Done
();
wav_reader
.
Next
())
{
...
...
speechx/speechx/frontend/CMakeLists.txt
浏览文件 @
c6027751
project
(
frontend
)
add_library
(
frontend
normalizer.cc
linear_spectrogram.cc
)
target_link_libraries
(
frontend kaldi-matrix
)
\ No newline at end of file
speechx/speechx/frontend/feature_extractor_interface.h
浏览文件 @
c6027751
...
...
@@ -15,16 +15,14 @@
#pragma once
#include "base/basic_types.h"
#incl
du
e "kaldi/matrix/kaldi-vector.h"
#incl
ud
e "kaldi/matrix/kaldi-vector.h"
namespace
ppspeech
{
class
FeatureExtractorInterface
{
public:
virtual
void
AcceptWaveform
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
input
)
=
0
;
virtual
void
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
=
0
;
virtual
void
Compute
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
,
kaldi
::
VectorBae
<
kaldi
::
BaseFloat
>*
feature
)
=
0
;
virtual
void
AcceptWaveform
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
)
=
0
;
virtual
void
Read
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
)
=
0
;
virtual
size_t
Dim
()
const
=
0
;
};
...
...
speechx/speechx/frontend/linear_spectrogram.cc
浏览文件 @
c6027751
...
...
@@ -16,15 +16,36 @@
#include "kaldi/base/kaldi-math.h"
#include "kaldi/matrix/matrix-functions.h"
namespace
ppspeech
{
using
kaldi
::
int32
;
using
kaldi
::
BaseFloat
;
using
kaldi
::
Vector
;
using
kaldi
::
Matrix
;
using
std
::
vector
;
//todo remove later
void
CopyVector2StdVector
(
const
kaldi
::
Vector
<
BaseFloat
>&
input
,
vector
<
BaseFloat
>*
output
)
{
if
(
input
.
Dim
()
==
0
)
return
;
output
->
resize
(
input
.
Dim
());
for
(
size_t
idx
=
0
;
idx
<
input
.
Dim
();
++
idx
)
{
(
*
output
)[
idx
]
=
input
(
idx
);
}
}
void
CopyStdVector2Vector
(
const
vector
<
BaseFloat
>&
input
,
Vector
<
BaseFloat
>*
output
)
{
if
(
input
.
empty
())
return
;
output
->
Resize
(
input
.
size
());
for
(
size_t
idx
=
0
;
idx
<
input
.
size
();
++
idx
)
{
(
*
output
)(
idx
)
=
input
[
idx
];
}
}
LinearSpectrogram
::
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
,
const
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
)
{
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
)
{
base_extractor_
=
std
::
move
(
base_extractor
);
int32
window_size
=
opts
.
frame_opts
.
WindowSize
();
int32
window_shift
=
opts
.
frame_opts
.
WindowShift
();
...
...
@@ -41,11 +62,8 @@ LinearSpectrogram::LinearSpectrogram(
dim_
=
fft_points_
/
2
+
1
;
// the dimension is Fs/2 Hz
}
void
LinearSpectrogram
::
AcceptWavefrom
(
const
Vector
<
BaseFloat
>&
input
)
{
wavefrom_
.
resize
(
input
.
Dim
());
for
(
size_t
idx
=
0
;
idx
<
input
.
Dim
();
++
idx
)
{
waveform_
[
idx
]
=
input
(
idx
);
}
void
LinearSpectrogram
::
AcceptWavefrom
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
input
)
{
base_extractor_
->
AcceptWaveform
(
input
);
}
void
LinearSpectrogram
::
Hanning
(
vector
<
float
>*
data
)
const
{
...
...
@@ -58,11 +76,11 @@ void LinearSpectrogram::Hanning(vector<float>* data) const {
bool
LinearSpectrogram
::
NumpyFft
(
vector
<
BaseFloat
>*
v
,
vector
<
BaseFloat
>*
real
,
vector
<
BaseFloat
>*
img
)
{
if
(
RealFft
(
v
,
true
))
{
LOG
(
ERROR
)
<<
"compute the fft occurs error"
;
return
false
;
}
vector
<
BaseFloat
>*
img
)
const
{
Vector
<
BaseFloat
>
v_tmp
;
CopyStdVector2Vector
(
*
v
,
&
v_tmp
)
;
RealFft
(
&
v_tmp
,
true
)
;
CopyVector2StdVector
(
v_tmp
,
v
);
real
->
push_back
(
v
->
at
(
0
));
img
->
push_back
(
0
);
for
(
int
i
=
1
;
i
<
v
->
size
()
/
2
;
i
++
)
{
...
...
@@ -75,36 +93,28 @@ bool LinearSpectrogram::NumpyFft(vector<BaseFloat>* v,
return
true
;
}
//todo remove later
void
CopyVector2StdVector
(
const
kaldi
::
Vector
<
BaseFloat
>&
input
,
vector
<
BaseFloat
>*
output
)
{
}
// todo remove later
bool
LinearSpectrogram
::
ReadFeats
(
Matrix
<
BaseFloat
>*
feats
)
const
{
if
(
wavefrom_
.
Dim
()
==
0
)
{
return
false
;
}
kaldi
::
Vector
<
BaseFloat
>
feats
;
Compute
(
wavefrom_
,
&
feats
);
void
LinearSpectrogram
::
ReadFeats
(
Matrix
<
BaseFloat
>*
feats
)
{
Vector
<
BaseFloat
>
tmp
;
Compute
(
tmp
,
&
waveform_
);
vector
<
vector
<
BaseFloat
>>
result
;
vector
<
BaseFloat
>
feats_vec
;
CopyVector2StdVector
(
feats
,
&
feats_vec
);
CopyVector2StdVector
(
waveform_
,
&
feats_vec
);
Compute
(
feats_vec
,
result
);
feats
->
Resize
(
result
.
size
(),
result
[
0
].
size
());
for
(
int
row_idx
=
0
;
row_idx
<
result
.
size
();
++
row_idx
)
{
for
(
int
col_idx
=
0
;
col_idx
<
result
.
size
();
++
col_idx
)
{
feats
(
row_idx
,
col_idx
)
=
result
[
row_idx
][
col_idx
];
(
*
feats
)(
row_idx
,
col_idx
)
=
result
[
row_idx
][
col_idx
];
}
}
wavefrom_
.
Resize
(
0
);
return
true
;
waveform_
.
Resize
(
0
);
}
// only for test, remove later
// todo: compute the feature frame by frame.
void
LinearSpectrogram
::
Compute
(
const
kaldi
::
Vector
Base
<
kaldi
::
BaseFloat
>&
input
,
kaldi
::
Vector
Bae
<
kaldi
::
BaseFloat
>*
feature
)
{
base_extractor_
->
Compute
(
input
,
feature
);
void
LinearSpectrogram
::
Compute
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
input
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feature
)
{
base_extractor_
->
Read
(
feature
);
}
// Compute spectrogram feat, only for test, remove later
...
...
@@ -112,9 +122,9 @@ void LinearSpectrogram::Compute(const kaldi::VectorBase<kaldi::BaseFloat>& input
bool
LinearSpectrogram
::
Compute
(
const
vector
<
float
>&
wave
,
vector
<
vector
<
float
>>&
feat
)
{
int
num_samples
=
wave
.
size
();
const
int
&
frame_length
=
opts
.
frame_opts
.
WindowSize
();
const
int
&
sample_rate
=
opts
.
frame_opts
.
samp_freq
;
const
int
&
frame_shift
=
opts
.
frame_opts
.
WindowShift
();
const
int
&
frame_length
=
opts
_
.
frame_opts
.
WindowSize
();
const
int
&
sample_rate
=
opts
_
.
frame_opts
.
samp_freq
;
const
int
&
frame_shift
=
opts
_
.
frame_opts
.
WindowShift
();
const
int
&
fft_points
=
fft_points_
;
const
float
scale
=
hanning_window_energy_
*
frame_shift
;
...
...
@@ -132,11 +142,11 @@ bool LinearSpectrogram::Compute(const vector<float>& wave,
for
(
int
i
=
0
;
i
<
num_frames
;
++
i
)
{
vector
<
float
>
data
(
wave
.
data
()
+
i
*
frame_shift
,
wave
.
data
()
+
i
*
frame_shift
+
frame_length
);
Hanning
(
data
);
Hanning
(
&
data
);
fft_img
.
clear
();
fft_real
.
clear
();
v
.
assign
(
data
.
begin
(),
data
.
end
());
if
(
NumpyFft
(
&
v
,
fft_real
,
fft_img
))
{
if
(
NumpyFft
(
&
v
,
&
fft_real
,
&
fft_img
))
{
LOG
(
ERROR
)
<<
i
<<
" fft compute occurs error, please checkout the input data"
;
return
false
;
}
...
...
@@ -155,5 +165,8 @@ bool LinearSpectrogram::Compute(const vector<float>& wave,
// log added eps=1e-14
feat
[
i
][
j
]
=
std
::
log
(
feat
[
i
][
j
]
+
1e-14
);
}
}
return
true
;
}
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/frontend/linear_spectrogram.h
浏览文件 @
c6027751
...
...
@@ -8,7 +8,7 @@
namespace
ppspeech
{
struct
LinearSpectrogramOptions
{
kaldi
::
FrameExtrationOptions
frame_opts
;
kaldi
::
FrameExtra
c
tionOptions
frame_opts
;
LinearSpectrogramOptions
()
:
frame_opts
()
{}
...
...
@@ -19,19 +19,19 @@ struct LinearSpectrogramOptions {
class
LinearSpectrogram
:
public
FeatureExtractorInterface
{
public:
explict
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
,
const
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
AcceptWavefrom
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
input
);
virtual
void
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
explic
i
t
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
AcceptWavefrom
(
const
kaldi
::
Vector
Base
<
kaldi
::
BaseFloat
>&
input
);
virtual
void
Read
(
kaldi
::
Vector
Base
<
kaldi
::
BaseFloat
>*
feat
);
virtual
size_t
Dim
()
const
{
return
dim_
;
}
void
ReadFeats
(
kaldi
::
Matrix
<
kaldi
::
Ba
esFloat
>*
feats
)
const
;
void
ReadFeats
(
kaldi
::
Matrix
<
kaldi
::
Ba
seFloat
>*
feats
)
;
private:
void
Hanning
(
std
::
vector
<
kaldi
::
BaseFloat
>
&
data
)
const
;
kaldi
::
int32
Compute
(
const
std
::
vector
<
kaldi
::
BaseFloat
>&
wave
,
std
::
vector
<
std
::
vector
<
kaldi
::
BaseFloat
>>&
feat
);
void
Compute
(
const
kaldi
::
Vector
Base
<
kaldi
::
BaseFloat
>&
input
,
kaldi
::
VectorBae
<
kaldi
::
BaseFloat
>*
feature
);
void
Hanning
(
std
::
vector
<
kaldi
::
BaseFloat
>
*
data
)
const
;
bool
Compute
(
const
std
::
vector
<
kaldi
::
BaseFloat
>&
wave
,
std
::
vector
<
std
::
vector
<
kaldi
::
BaseFloat
>>&
feat
);
void
Compute
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
input
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feature
);
bool
NumpyFft
(
std
::
vector
<
kaldi
::
BaseFloat
>*
v
,
std
::
vector
<
kaldi
::
BaseFloat
>*
real
,
std
::
vector
<
kaldi
::
BaseFloat
>*
img
)
const
;
...
...
@@ -41,7 +41,7 @@ class LinearSpectrogram : public FeatureExtractorInterface {
std
::
vector
<
kaldi
::
BaseFloat
>
hanning_window_
;
kaldi
::
BaseFloat
hanning_window_energy_
;
LinearSpectrogramOptions
opts_
;
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
wavef
ro
m_
;
// remove later, todo(SmileGoat)
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
wavef
or
m_
;
// remove later, todo(SmileGoat)
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
DISALLOW_COPY_AND_ASSIGN
(
LinearSpectrogram
);
};
...
...
speechx/speechx/frontend/normalizer.cc
浏览文件 @
c6027751
#include "frontend/normalizer.h"
DecibelNormalizer
::
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
)
{
namespace
ppspeech
{
using
kaldi
::
Vector
;
using
kaldi
::
BaseFloat
;
using
std
::
vector
;
DecibelNormalizer
::
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
)
{
opts_
=
opts
;
}
void
DecibelNormalizer
::
AcceptWavefrom
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
input
)
{
void
DecibelNormalizer
::
AcceptWavefrom
(
const
Vector
<
BaseFloat
>&
input
)
{
waveform_
=
input
;
}
void
DecibelNormalizer
::
Read
(
Vector
<
BaseFloat
>*
feat
)
{
if
(
waveform_
.
Dim
()
==
0
)
return
;
Compute
(
waveform_
,
feat
);
}
void
DecibelNormalizer
::
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
{
//todo remove later
void
CopyVector2StdVector
(
const
kaldi
::
Vector
<
BaseFloat
>&
input
,
vector
<
BaseFloat
>*
output
)
{
if
(
input
.
Dim
()
==
0
)
return
;
output
->
resize
(
input
.
Dim
());
for
(
size_t
idx
=
0
;
idx
<
input
.
Dim
();
++
idx
)
{
(
*
output
)[
idx
]
=
input
(
idx
);
}
}
void
CopyStdVector2Vector
(
const
vector
<
BaseFloat
>&
input
,
Vector
<
BaseFloat
>*
output
)
{
if
(
input
.
empty
())
return
;
output
->
Resize
(
input
.
size
());
for
(
size_t
idx
=
0
;
idx
<
input
.
size
();
++
idx
)
{
(
*
output
)(
idx
)
=
input
[
idx
];
}
}
bool
DecibelNormalizer
::
Compute
(
const
Vector
<
kaldi
::
BaseFloat
>&
input
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
{
bool
DecibelNormalizer
::
Compute
(
const
Vector
<
BaseFloat
>&
input
,
Vector
<
BaseFloat
>*
feat
)
const
{
// calculate db rms
float
rms_db
=
0.0
;
float
mean_square
=
0.0
;
float
gain
=
0.0
;
vector
<
BaseFloat
>
smaples
;
samples
.
resize
(
input
.
Size
());
BaseFloat
rms_db
=
0.0
;
BaseFloat
mean_square
=
0.0
;
BaseFloat
gain
=
0.0
;
BaseFloat
wave_float_normlization
=
1.0
f
/
(
std
::
pow
(
2
,
16
-
1
));
vector
<
BaseFloat
>
samples
;
samples
.
resize
(
input
.
Dim
());
for
(
int32
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
samples
[
i
]
=
input
(
i
);
}
// square
for
(
auto
&
d
:
samples
)
{
if
(
_opts
.
convert_int_float
)
{
d
=
d
*
WAVE_FLOAT_NORMALIZATION
;
if
(
opts_
.
convert_int_float
)
{
d
=
d
*
wave_float_normlization
;
}
mean_square
+=
d
*
d
;
}
...
...
@@ -37,12 +64,12 @@ bool DecibelNormalizer::Compute(const Vector<kaldi::BaseFloat>& input,
// mean
mean_square
/=
samples
.
size
();
rms_db
=
10
*
std
::
log10
(
mean_square
);
gain
=
opts
.
target_db
-
rms_db
;
gain
=
opts
_
.
target_db
-
rms_db
;
if
(
gain
>
opts
.
max_gain_db
)
{
LOG
(
ERROR
)
<<
"Unable to normalize segment to "
<<
opts
.
target_db
<<
"dB,"
<<
"because the the probable gain have exceeds opts.max_gain_db"
<<
opts
.
max_gain_db
<<
"dB."
;
if
(
gain
>
opts
_
.
max_gain_db
)
{
LOG
(
ERROR
)
<<
"Unable to normalize segment to "
<<
opts
_
.
target_db
<<
"dB,"
<<
"because the the probable gain have exceeds opts
_
.max_gain_db"
<<
opts
_
.
max_gain_db
<<
"dB."
;
return
false
;
}
...
...
@@ -51,27 +78,28 @@ bool DecibelNormalizer::Compute(const Vector<kaldi::BaseFloat>& input,
// python item *= 10.0 ** (gain / 20.0)
item
*=
std
::
pow
(
10.0
,
gain
/
20.0
);
}
CopyStdVector2Vector
(
samples
,
feat
);
return
true
;
}
/*
PPNormalizer::PPNormalizer(
const PPNormalizerOptions& opts,
const std::unique_ptr<FeatureExtractorInterface>& pre_extractor) {
}
void
PPNormalizer
::
AcceptWavefrom
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
input
)
{
void PPNormalizer::AcceptWavefrom(const
Vector<
BaseFloat>& input) {
}
void
PPNormalizer
::
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
{
void PPNormalizer::Read(
Vector<
BaseFloat>* feat) {
}
bool
PPNormalizer
::
Compute
(
const
Vector
<
kaldi
::
BaseFloat
>&
input
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>>*
feat
)
{
bool PPNormalizer::Compute(const Vector<BaseFloat>& input,
Vector<
BaseFloat>>* feat) {
if ((input.Dim() % mean_.Dim()) == 0) {
LOG(ERROR) << "CMVN dimension is wrong!";
return false;
...
...
@@ -93,4 +121,6 @@ bool PPNormalizer::Compute(const Vector<kaldi::BaseFloat>& input,
}
return true;
}
}*/
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/frontend/normalizer.h
浏览文件 @
c6027751
#pragma once
#include "base/common.h"
#include "frontend/feature_extractor_interface.h"
#include "kaldi/util/options-itf.h"
namespace
ppspeech
{
...
...
@@ -9,6 +11,7 @@ namespace ppspeech {
struct
DecibelNormalizerOptions
{
float
target_db
;
float
max_gain_db
;
bool
convert_int_float
;
DecibelNormalizerOptions
()
:
target_db
(
-
20
),
max_gain_db
(
300.0
),
...
...
@@ -23,16 +26,19 @@ struct DecibelNormalizerOptions {
class
DecibelNormalizer
:
public
FeatureExtractorInterface
{
public:
explict
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
,
const
std
::
unique_ptr
<
FeatureExtractorInterface
>&
pre_extractor
);
virtual
void
AcceptWavefrom
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
input
);
virtual
void
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
);
virtual
size_t
Dim
()
const
;
explicit
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
);
virtual
void
AcceptWavefrom
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
input
);
virtual
void
Read
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feat
);
virtual
size_t
Dim
()
const
{
return
0
;
}
bool
Compute
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
input
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
>*
feat
)
;
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
*
feat
)
const
;
private:
DecibelNormalizerOptions
opts_
;
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
waveform_
;
};
/*
struct NormalizerOptions {
std::string mean_std_path;
NormalizerOptions() :
...
...
@@ -61,5 +67,5 @@ class PPNormalizer : public FeatureExtractorInterface {
kaldi::Vector<float> variance_;
NormalizerOptions _opts;
};
*/
}
// namespace ppspeech
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录