Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
2e94e0f6
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2e94e0f6
编写于
4月 02, 2022
作者:
H
Hui Zhang
提交者:
GitHub
4月 02, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1640 from zh794390558/frontend
[speechx] Frontend refactor
上级
36df70cb
f83ec411
变更
26
显示空白变更内容
内联
并排
Showing
26 changed file
with
210 addition
and
178 deletion
+210
-178
speechx/examples/decoder/offline_decoder_main.cc
speechx/examples/decoder/offline_decoder_main.cc
+1
-1
speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
...hx/examples/decoder/offline_decoder_sliding_chunk_main.cc
+1
-1
speechx/examples/feat/linear_spectrogram_main.cc
speechx/examples/feat/linear_spectrogram_main.cc
+12
-14
speechx/speechx/frontend/CMakeLists.txt
speechx/speechx/frontend/CMakeLists.txt
+1
-9
speechx/speechx/frontend/audio/CMakeLists.txt
speechx/speechx/frontend/audio/CMakeLists.txt
+11
-0
speechx/speechx/frontend/audio/audio_cache.cc
speechx/speechx/frontend/audio/audio_cache.cc
+1
-1
speechx/speechx/frontend/audio/audio_cache.h
speechx/speechx/frontend/audio/audio_cache.h
+2
-2
speechx/speechx/frontend/audio/cmvn.cc
speechx/speechx/frontend/audio/cmvn.cc
+3
-68
speechx/speechx/frontend/audio/cmvn.h
speechx/speechx/frontend/audio/cmvn.h
+48
-0
speechx/speechx/frontend/audio/data_cache.h
speechx/speechx/frontend/audio/data_cache.h
+2
-2
speechx/speechx/frontend/audio/db_norm.cc
speechx/speechx/frontend/audio/db_norm.cc
+95
-0
speechx/speechx/frontend/audio/db_norm.h
speechx/speechx/frontend/audio/db_norm.h
+4
-28
speechx/speechx/frontend/audio/fbank.h
speechx/speechx/frontend/audio/fbank.h
+2
-2
speechx/speechx/frontend/audio/feature_cache.cc
speechx/speechx/frontend/audio/feature_cache.cc
+3
-3
speechx/speechx/frontend/audio/feature_cache.h
speechx/speechx/frontend/audio/feature_cache.h
+4
-4
speechx/speechx/frontend/audio/frontend_itf.h
speechx/speechx/frontend/audio/frontend_itf.h
+1
-1
speechx/speechx/frontend/audio/linear_spectrogram.cc
speechx/speechx/frontend/audio/linear_spectrogram.cc
+2
-2
speechx/speechx/frontend/audio/linear_spectrogram.h
speechx/speechx/frontend/audio/linear_spectrogram.h
+4
-4
speechx/speechx/frontend/audio/mfcc.h
speechx/speechx/frontend/audio/mfcc.h
+0
-0
speechx/speechx/frontend/audio/normalizer.h
speechx/speechx/frontend/audio/normalizer.h
+4
-1
speechx/speechx/frontend/feature_extractor_controller.h
speechx/speechx/frontend/feature_extractor_controller.h
+0
-13
speechx/speechx/frontend/feature_extractor_controller_impl.h
speechx/speechx/frontend/feature_extractor_controller_impl.h
+0
-13
speechx/speechx/nnet/decodable.cc
speechx/speechx/nnet/decodable.cc
+1
-1
speechx/speechx/nnet/decodable.h
speechx/speechx/nnet/decodable.h
+4
-5
speechx/speechx/nnet/nnet_itf.h
speechx/speechx/nnet/nnet_itf.h
+0
-0
speechx/speechx/nnet/paddle_nnet.h
speechx/speechx/nnet/paddle_nnet.h
+4
-3
未找到文件。
speechx/examples/decoder/offline_decoder_main.cc
浏览文件 @
2e94e0f6
...
...
@@ -17,7 +17,7 @@
#include "base/flags.h"
#include "base/log.h"
#include "decoder/ctc_beam_search_decoder.h"
#include "frontend/data_cache.h"
#include "frontend/
audio/
data_cache.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
#include "nnet/paddle_nnet.h"
...
...
speechx/examples/decoder/offline_decoder_sliding_chunk_main.cc
浏览文件 @
2e94e0f6
...
...
@@ -17,7 +17,7 @@
#include "base/flags.h"
#include "base/log.h"
#include "decoder/ctc_beam_search_decoder.h"
#include "frontend/data_cache.h"
#include "frontend/
audio/
data_cache.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
#include "nnet/paddle_nnet.h"
...
...
speechx/examples/feat/linear_spectrogram_main.cc
浏览文件 @
2e94e0f6
...
...
@@ -14,19 +14,18 @@
// todo refactor, repalce with gtest
#include "frontend/linear_spectrogram.h"
#include "base/flags.h"
#include "base/log.h"
#include "frontend/audio_cache.h"
#include "frontend/data_cache.h"
#include "frontend/feature_cache.h"
#include "frontend/feature_extractor_interface.h"
#include "frontend/normalizer.h"
#include "kaldi/feat/wave-reader.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"
#include <glog/logging.h>
#include "frontend/audio/audio_cache.h"
#include "frontend/audio/data_cache.h"
#include "frontend/audio/feature_cache.h"
#include "frontend/audio/frontend_itf.h"
#include "frontend/audio/linear_spectrogram.h"
#include "frontend/audio/normalizer.h"
DEFINE_string
(
wav_rspecifier
,
""
,
"test wav scp path"
);
DEFINE_string
(
feature_wspecifier
,
""
,
"output feats wspecifier"
);
...
...
@@ -170,13 +169,13 @@ int main(int argc, char* argv[]) {
// feature pipeline: wave cache --> decibel_normalizer --> hanning
// window -->linear_spectrogram --> global cmvn -> feat cache
// std::unique_ptr<ppspeech::F
eatureExtractor
Interface> data_source(new
// std::unique_ptr<ppspeech::F
rontend
Interface> data_source(new
// ppspeech::DataCache());
std
::
unique_ptr
<
ppspeech
::
F
eatureExtractor
Interface
>
data_source
(
std
::
unique_ptr
<
ppspeech
::
F
rontend
Interface
>
data_source
(
new
ppspeech
::
AudioCache
());
ppspeech
::
DecibelNormalizerOptions
db_norm_opt
;
std
::
unique_ptr
<
ppspeech
::
F
eatureExtractor
Interface
>
db_norm
(
std
::
unique_ptr
<
ppspeech
::
F
rontend
Interface
>
db_norm
(
new
ppspeech
::
DecibelNormalizer
(
db_norm_opt
,
std
::
move
(
data_source
)));
ppspeech
::
LinearSpectrogramOptions
opt
;
...
...
@@ -185,12 +184,11 @@ int main(int argc, char* argv[]) {
LOG
(
INFO
)
<<
"frame length (ms): "
<<
opt
.
frame_opts
.
frame_length_ms
;
LOG
(
INFO
)
<<
"frame shift (ms): "
<<
opt
.
frame_opts
.
frame_shift_ms
;
std
::
unique_ptr
<
ppspeech
::
F
eatureExtractor
Interface
>
linear_spectrogram
(
std
::
unique_ptr
<
ppspeech
::
F
rontend
Interface
>
linear_spectrogram
(
new
ppspeech
::
LinearSpectrogram
(
opt
,
std
::
move
(
db_norm
)));
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
cmvn
(
new
ppspeech
::
CMVN
(
FLAGS_cmvn_write_path
,
std
::
move
(
linear_spectrogram
)));
std
::
unique_ptr
<
ppspeech
::
FrontendInterface
>
cmvn
(
new
ppspeech
::
CMVN
(
FLAGS_cmvn_write_path
,
std
::
move
(
linear_spectrogram
)));
ppspeech
::
FeatureCache
feature_cache
(
kint16max
,
std
::
move
(
cmvn
));
LOG
(
INFO
)
<<
"feat dim: "
<<
feature_cache
.
Dim
();
...
...
speechx/speechx/frontend/CMakeLists.txt
浏览文件 @
2e94e0f6
project
(
frontend
)
add_library
(
frontend STATIC
normalizer.cc
linear_spectrogram.cc
audio_cache.cc
feature_cache.cc
)
target_link_libraries
(
frontend PUBLIC kaldi-matrix
)
\ No newline at end of file
add_subdirectory
(
audio
)
\ No newline at end of file
speechx/speechx/frontend/audio/CMakeLists.txt
浏览文件 @
2e94e0f6
project
(
frontend
)
add_library
(
frontend STATIC
cmvn.cc
db_norm.cc
linear_spectrogram.cc
audio_cache.cc
feature_cache.cc
)
target_link_libraries
(
frontend PUBLIC kaldi-matrix
)
\ No newline at end of file
speechx/speechx/frontend/audio_cache.cc
→
speechx/speechx/frontend/audio
/audio
_cache.cc
浏览文件 @
2e94e0f6
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "frontend/audio_cache.h"
#include "frontend/audio
/audio
_cache.h"
#include "kaldi/base/timer.h"
namespace
ppspeech
{
...
...
speechx/speechx/frontend/audio_cache.h
→
speechx/speechx/frontend/audio
/audio
_cache.h
浏览文件 @
2e94e0f6
...
...
@@ -16,12 +16,12 @@
#pragma once
#include "base/common.h"
#include "frontend/
feature_extractor_interface
.h"
#include "frontend/
audio/frontend_itf
.h"
namespace
ppspeech
{
// waves cache
class
AudioCache
:
public
F
eatureExtractor
Interface
{
class
AudioCache
:
public
F
rontend
Interface
{
public:
explicit
AudioCache
(
int
buffer_size
=
kint16max
);
...
...
speechx/speechx/frontend/
normalizer
.cc
→
speechx/speechx/frontend/
audio/cmvn
.cc
浏览文件 @
2e94e0f6
...
...
@@ -13,7 +13,7 @@
// limitations under the License.
#include "frontend/
normalizer
.h"
#include "frontend/
audio/cmvn
.h"
#include "kaldi/feat/cmvn.h"
#include "kaldi/util/kaldi-io.h"
...
...
@@ -26,73 +26,8 @@ using std::vector;
using
kaldi
::
SubVector
;
using
std
::
unique_ptr
;
DecibelNormalizer
::
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
)
{
base_extractor_
=
std
::
move
(
base_extractor
);
opts_
=
opts
;
dim_
=
1
;
}
void
DecibelNormalizer
::
Accept
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
waves
)
{
base_extractor_
->
Accept
(
waves
);
}
bool
DecibelNormalizer
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
waves
)
{
if
(
base_extractor_
->
Read
(
waves
)
==
false
||
waves
->
Dim
()
==
0
)
{
return
false
;
}
Compute
(
waves
);
return
true
;
}
bool
DecibelNormalizer
::
Compute
(
VectorBase
<
BaseFloat
>*
waves
)
const
{
// calculate db rms
BaseFloat
rms_db
=
0.0
;
BaseFloat
mean_square
=
0.0
;
BaseFloat
gain
=
0.0
;
BaseFloat
wave_float_normlization
=
1.0
f
/
(
std
::
pow
(
2
,
16
-
1
));
vector
<
BaseFloat
>
samples
;
samples
.
resize
(
waves
->
Dim
());
for
(
size_t
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
samples
[
i
]
=
(
*
waves
)(
i
);
}
// square
for
(
auto
&
d
:
samples
)
{
if
(
opts_
.
convert_int_float
)
{
d
=
d
*
wave_float_normlization
;
}
mean_square
+=
d
*
d
;
}
// mean
mean_square
/=
samples
.
size
();
rms_db
=
10
*
std
::
log10
(
mean_square
);
gain
=
opts_
.
target_db
-
rms_db
;
if
(
gain
>
opts_
.
max_gain_db
)
{
LOG
(
ERROR
)
<<
"Unable to normalize segment to "
<<
opts_
.
target_db
<<
"dB,"
<<
"because the the probable gain have exceeds opts_.max_gain_db"
<<
opts_
.
max_gain_db
<<
"dB."
;
return
false
;
}
// Note that this is an in-place transformation.
for
(
auto
&
item
:
samples
)
{
// python item *= 10.0 ** (gain / 20.0)
item
*=
std
::
pow
(
10.0
,
gain
/
20.0
);
}
std
::
memcpy
(
waves
->
Data
(),
samples
.
data
(),
sizeof
(
BaseFloat
)
*
samples
.
size
());
return
true
;
}
CMVN
::
CMVN
(
std
::
string
cmvn_file
,
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
)
CMVN
::
CMVN
(
std
::
string
cmvn_file
,
unique_ptr
<
FrontendInterface
>
base_extractor
)
:
var_norm_
(
true
)
{
base_extractor_
=
std
::
move
(
base_extractor
);
bool
binary
;
...
...
speechx/speechx/frontend/audio/cmvn.h
0 → 100644
浏览文件 @
2e94e0f6
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "base/common.h"
#include "frontend/audio/frontend_itf.h"
#include "kaldi/matrix/kaldi-matrix.h"
#include "kaldi/util/options-itf.h"
namespace
ppspeech
{
class
CMVN
:
public
FrontendInterface
{
public:
explicit
CMVN
(
std
::
string
cmvn_file
,
std
::
unique_ptr
<
FrontendInterface
>
base_extractor
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
// the length of feats = feature_row * feature_dim,
// the Matrix is squashed into Vector
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
// the dim_ is the feautre dim.
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
void
Reset
()
{
base_extractor_
->
Reset
();
}
private:
void
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feats
)
const
;
void
ApplyCMVN
(
kaldi
::
MatrixBase
<
BaseFloat
>*
feats
);
kaldi
::
Matrix
<
double
>
stats_
;
std
::
unique_ptr
<
FrontendInterface
>
base_extractor_
;
size_t
dim_
;
bool
var_norm_
;
};
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/frontend/data_cache.h
→
speechx/speechx/frontend/
audio/
data_cache.h
浏览文件 @
2e94e0f6
...
...
@@ -17,13 +17,13 @@
#include "base/common.h"
#include "frontend/
feature_extractor_interface
.h"
#include "frontend/
audio/frontend_itf
.h"
namespace
ppspeech
{
// A data source for testing different frontend module.
// It accepts waves or feats.
class
DataCache
:
public
F
eatureExtractor
Interface
{
class
DataCache
:
public
F
rontend
Interface
{
public:
explicit
DataCache
()
{
finished_
=
false
;
}
...
...
speechx/speechx/frontend/audio/db_norm.cc
0 → 100644
浏览文件 @
2e94e0f6
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "frontend/audio/db_norm.h"
#include "kaldi/feat/cmvn.h"
#include "kaldi/util/kaldi-io.h"
namespace
ppspeech
{
using
kaldi
::
Vector
;
using
kaldi
::
VectorBase
;
using
kaldi
::
BaseFloat
;
using
std
::
vector
;
using
kaldi
::
SubVector
;
using
std
::
unique_ptr
;
DecibelNormalizer
::
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
,
std
::
unique_ptr
<
FrontendInterface
>
base_extractor
)
{
base_extractor_
=
std
::
move
(
base_extractor
);
opts_
=
opts
;
dim_
=
1
;
}
void
DecibelNormalizer
::
Accept
(
const
kaldi
::
VectorBase
<
BaseFloat
>&
waves
)
{
base_extractor_
->
Accept
(
waves
);
}
bool
DecibelNormalizer
::
Read
(
kaldi
::
Vector
<
BaseFloat
>*
waves
)
{
if
(
base_extractor_
->
Read
(
waves
)
==
false
||
waves
->
Dim
()
==
0
)
{
return
false
;
}
Compute
(
waves
);
return
true
;
}
bool
DecibelNormalizer
::
Compute
(
VectorBase
<
BaseFloat
>*
waves
)
const
{
// calculate db rms
BaseFloat
rms_db
=
0.0
;
BaseFloat
mean_square
=
0.0
;
BaseFloat
gain
=
0.0
;
BaseFloat
wave_float_normlization
=
1.0
f
/
(
std
::
pow
(
2
,
16
-
1
));
vector
<
BaseFloat
>
samples
;
samples
.
resize
(
waves
->
Dim
());
for
(
size_t
i
=
0
;
i
<
samples
.
size
();
++
i
)
{
samples
[
i
]
=
(
*
waves
)(
i
);
}
// square
for
(
auto
&
d
:
samples
)
{
if
(
opts_
.
convert_int_float
)
{
d
=
d
*
wave_float_normlization
;
}
mean_square
+=
d
*
d
;
}
// mean
mean_square
/=
samples
.
size
();
rms_db
=
10
*
std
::
log10
(
mean_square
);
gain
=
opts_
.
target_db
-
rms_db
;
if
(
gain
>
opts_
.
max_gain_db
)
{
LOG
(
ERROR
)
<<
"Unable to normalize segment to "
<<
opts_
.
target_db
<<
"dB,"
<<
"because the the probable gain have exceeds opts_.max_gain_db"
<<
opts_
.
max_gain_db
<<
"dB."
;
return
false
;
}
// Note that this is an in-place transformation.
for
(
auto
&
item
:
samples
)
{
// python item *= 10.0 ** (gain / 20.0)
item
*=
std
::
pow
(
10.0
,
gain
/
20.0
);
}
std
::
memcpy
(
waves
->
Data
(),
samples
.
data
(),
sizeof
(
BaseFloat
)
*
samples
.
size
());
return
true
;
}
}
// namespace ppspeech
speechx/speechx/frontend/
normalizer
.h
→
speechx/speechx/frontend/
audio/db_norm
.h
浏览文件 @
2e94e0f6
...
...
@@ -16,7 +16,7 @@
#pragma once
#include "base/common.h"
#include "frontend/
feature_extractor_interface
.h"
#include "frontend/
audio/frontend_itf
.h"
#include "kaldi/matrix/kaldi-matrix.h"
#include "kaldi/util/options-itf.h"
...
...
@@ -40,11 +40,11 @@ struct DecibelNormalizerOptions {
}
};
class
DecibelNormalizer
:
public
F
eatureExtractor
Interface
{
class
DecibelNormalizer
:
public
F
rontend
Interface
{
public:
explicit
DecibelNormalizer
(
const
DecibelNormalizerOptions
&
opts
,
std
::
unique_ptr
<
F
eatureExtractor
Interface
>
base_extractor
);
std
::
unique_ptr
<
F
rontend
Interface
>
base_extractor
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
waves
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
waves
);
// noramlize audio, the dim is 1.
...
...
@@ -57,33 +57,9 @@ class DecibelNormalizer : public FeatureExtractorInterface {
bool
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
waves
)
const
;
DecibelNormalizerOptions
opts_
;
size_t
dim_
;
std
::
unique_ptr
<
F
eatureExtractor
Interface
>
base_extractor_
;
std
::
unique_ptr
<
F
rontend
Interface
>
base_extractor_
;
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
waveform_
;
};
class
CMVN
:
public
FeatureExtractorInterface
{
public:
explicit
CMVN
(
std
::
string
cmvn_file
,
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
// the length of feats = feature_row * feature_dim,
// the Matrix is squashed into Vector
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
// the dim_ is the feautre dim.
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
base_extractor_
->
SetFinished
();
}
virtual
bool
IsFinished
()
const
{
return
base_extractor_
->
IsFinished
();
}
virtual
void
Reset
()
{
base_extractor_
->
Reset
();
}
private:
void
Compute
(
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>*
feats
)
const
;
void
ApplyCMVN
(
kaldi
::
MatrixBase
<
BaseFloat
>*
feats
);
kaldi
::
Matrix
<
double
>
stats_
;
std
::
unique_ptr
<
FeatureExtractorInterface
>
base_extractor_
;
size_t
dim_
;
bool
var_norm_
;
};
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/frontend/fbank.h
→
speechx/speechx/frontend/
audio/
fbank.h
浏览文件 @
2e94e0f6
...
...
@@ -20,10 +20,10 @@
namespace
ppspeech
{
class
FbankExtractor
:
F
eatureExtractor
Interface
{
class
FbankExtractor
:
F
rontend
Interface
{
public:
explicit
FbankExtractor
(
const
FbankOptions
&
opts
,
share_ptr
<
F
eatureExtractor
Interface
>
pre_extractor
);
share_ptr
<
F
rontend
Interface
>
pre_extractor
);
virtual
void
AcceptWaveform
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
input
)
=
0
;
virtual
void
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feat
)
=
0
;
...
...
speechx/speechx/frontend/feature_cache.cc
→
speechx/speechx/frontend/
audio/
feature_cache.cc
浏览文件 @
2e94e0f6
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "frontend/feature_cache.h"
#include "frontend/
audio/
feature_cache.h"
namespace
ppspeech
{
...
...
@@ -23,8 +23,8 @@ using std::vector;
using
kaldi
::
SubVector
;
using
std
::
unique_ptr
;
FeatureCache
::
FeatureCache
(
int
max_size
,
unique_ptr
<
FeatureExtractor
Interface
>
base_extractor
)
{
FeatureCache
::
FeatureCache
(
int
max_size
,
unique_ptr
<
Frontend
Interface
>
base_extractor
)
{
max_size_
=
max_size
;
base_extractor_
=
std
::
move
(
base_extractor
);
}
...
...
speechx/speechx/frontend/feature_cache.h
→
speechx/speechx/frontend/
audio/
feature_cache.h
浏览文件 @
2e94e0f6
...
...
@@ -15,15 +15,15 @@
#pragma once
#include "base/common.h"
#include "frontend/
feature_extractor_interface
.h"
#include "frontend/
audio/frontend_itf
.h"
namespace
ppspeech
{
class
FeatureCache
:
public
F
eatureExtractor
Interface
{
class
FeatureCache
:
public
F
rontend
Interface
{
public:
explicit
FeatureCache
(
int32
max_size
=
kint16max
,
std
::
unique_ptr
<
F
eatureExtractor
Interface
>
base_extractor
=
NULL
);
std
::
unique_ptr
<
F
rontend
Interface
>
base_extractor
=
NULL
);
// Feed feats or waves
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
...
...
@@ -53,7 +53,7 @@ class FeatureCache : public FeatureExtractorInterface {
bool
Compute
();
size_t
max_size_
;
std
::
unique_ptr
<
F
eatureExtractor
Interface
>
base_extractor_
;
std
::
unique_ptr
<
F
rontend
Interface
>
base_extractor_
;
std
::
mutex
mutex_
;
std
::
queue
<
kaldi
::
Vector
<
BaseFloat
>>
cache_
;
...
...
speechx/speechx/frontend/
feature_extractor_interface
.h
→
speechx/speechx/frontend/
audio/frontend_itf
.h
浏览文件 @
2e94e0f6
...
...
@@ -19,7 +19,7 @@
namespace
ppspeech
{
class
F
eatureExtractor
Interface
{
class
F
rontend
Interface
{
public:
// Feed inputs: features(2D saved in 1D) or waveforms(1D).
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
)
=
0
;
...
...
speechx/speechx/frontend/linear_spectrogram.cc
→
speechx/speechx/frontend/
audio/
linear_spectrogram.cc
浏览文件 @
2e94e0f6
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "frontend/linear_spectrogram.h"
#include "frontend/
audio/
linear_spectrogram.h"
#include "kaldi/base/kaldi-math.h"
#include "kaldi/matrix/matrix-functions.h"
...
...
@@ -27,7 +27,7 @@ using std::vector;
LinearSpectrogram
::
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
,
std
::
unique_ptr
<
F
eatureExtractor
Interface
>
base_extractor
)
{
std
::
unique_ptr
<
F
rontend
Interface
>
base_extractor
)
{
opts_
=
opts
;
base_extractor_
=
std
::
move
(
base_extractor
);
int32
window_size
=
opts
.
frame_opts
.
WindowSize
();
...
...
speechx/speechx/frontend/linear_spectrogram.h
→
speechx/speechx/frontend/
audio/
linear_spectrogram.h
浏览文件 @
2e94e0f6
...
...
@@ -16,7 +16,7 @@
#pragma once
#include "base/common.h"
#include "frontend/
feature_extractor_interface
.h"
#include "frontend/
audio/frontend_itf
.h"
#include "kaldi/feat/feature-window.h"
namespace
ppspeech
{
...
...
@@ -35,11 +35,11 @@ struct LinearSpectrogramOptions {
}
};
class
LinearSpectrogram
:
public
F
eatureExtractor
Interface
{
class
LinearSpectrogram
:
public
F
rontend
Interface
{
public:
explicit
LinearSpectrogram
(
const
LinearSpectrogramOptions
&
opts
,
std
::
unique_ptr
<
F
eatureExtractor
Interface
>
base_extractor
);
std
::
unique_ptr
<
F
rontend
Interface
>
base_extractor
);
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
// the dim_ is the dim of single frame feature
...
...
@@ -61,7 +61,7 @@ class LinearSpectrogram : public FeatureExtractorInterface {
std
::
vector
<
kaldi
::
BaseFloat
>
hanning_window_
;
kaldi
::
BaseFloat
hanning_window_energy_
;
LinearSpectrogramOptions
opts_
;
std
::
unique_ptr
<
F
eatureExtractor
Interface
>
base_extractor_
;
std
::
unique_ptr
<
F
rontend
Interface
>
base_extractor_
;
int
chunk_sample_size_
;
DISALLOW_COPY_AND_ASSIGN
(
LinearSpectrogram
);
};
...
...
speechx/speechx/frontend/mfcc.h
→
speechx/speechx/frontend/
audio/
mfcc.h
浏览文件 @
2e94e0f6
文件已移动
speechx/speechx/frontend/
window
.h
→
speechx/speechx/frontend/
audio/normalizer
.h
浏览文件 @
2e94e0f6
...
...
@@ -12,4 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// extract the window of kaldi feat.
#pragma once
#include "frontend/audio/cmvn.h"
#include "frontend/audio/db_norm.h"
\ No newline at end of file
speechx/speechx/frontend/feature_extractor_controller.h
已删除
100644 → 0
浏览文件 @
36df70cb
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
speechx/speechx/frontend/feature_extractor_controller_impl.h
已删除
100644 → 0
浏览文件 @
36df70cb
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
speechx/speechx/nnet/decodable.cc
浏览文件 @
2e94e0f6
...
...
@@ -22,7 +22,7 @@ using std::vector;
using
kaldi
::
Vector
;
Decodable
::
Decodable
(
const
std
::
shared_ptr
<
NnetInterface
>&
nnet
,
const
std
::
shared_ptr
<
F
eatureExtractor
Interface
>&
frontend
)
const
std
::
shared_ptr
<
F
rontend
Interface
>&
frontend
)
:
frontend_
(
frontend
),
nnet_
(
nnet
),
frame_offset_
(
0
),
frames_ready_
(
0
)
{}
void
Decodable
::
Acceptlikelihood
(
const
Matrix
<
BaseFloat
>&
likelihood
)
{
...
...
speechx/speechx/nnet/decodable.h
浏览文件 @
2e94e0f6
...
...
@@ -13,7 +13,7 @@
// limitations under the License.
#include "base/common.h"
#include "frontend/
feature_extractor_interface
.h"
#include "frontend/
audio/frontend_itf
.h"
#include "kaldi/matrix/kaldi-matrix.h"
#include "nnet/decodable-itf.h"
#include "nnet/nnet_interface.h"
...
...
@@ -24,9 +24,8 @@ struct DecodableOpts;
class
Decodable
:
public
kaldi
::
DecodableInterface
{
public:
explicit
Decodable
(
const
std
::
shared_ptr
<
NnetInterface
>&
nnet
,
const
std
::
shared_ptr
<
FeatureExtractorInterface
>&
frontend
);
explicit
Decodable
(
const
std
::
shared_ptr
<
NnetInterface
>&
nnet
,
const
std
::
shared_ptr
<
FrontendInterface
>&
frontend
);
// void Init(DecodableOpts config);
virtual
kaldi
::
BaseFloat
LogLikelihood
(
int32
frame
,
int32
index
);
virtual
bool
IsLastFrame
(
int32
frame
)
const
;
...
...
@@ -41,7 +40,7 @@ class Decodable : public kaldi::DecodableInterface {
private:
bool
AdvanceChunk
();
std
::
shared_ptr
<
F
eatureExtractor
Interface
>
frontend_
;
std
::
shared_ptr
<
F
rontend
Interface
>
frontend_
;
std
::
shared_ptr
<
NnetInterface
>
nnet_
;
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>
nnet_cache_
;
// std::vector<std::vector<kaldi::BaseFloat>> nnet_cache_;
...
...
speechx/speechx/nnet/nnet_i
nterface
.h
→
speechx/speechx/nnet/nnet_i
tf
.h
浏览文件 @
2e94e0f6
文件已移动
speechx/speechx/nnet/paddle_nnet.h
浏览文件 @
2e94e0f6
...
...
@@ -15,13 +15,14 @@
#pragma once
#include "base/common.h"
#include "nnet/nnet_interface.h"
#include "paddle_inference_api.h"
#include "kaldi/matrix/kaldi-matrix.h"
#include "kaldi/util/options-itf.h"
#include "base/common.h"
#include "nnet/nnet_itf.h"
#include "paddle_inference_api.h"
#include <numeric>
namespace
ppspeech
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录