Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
37c6106e
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
37c6106e
编写于
5月 06, 2022
作者:
H
Hui Zhang
提交者:
GitHub
5月 06, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1848 from zh794390558/spx
[speechx] rm to_float32 flags, default feature fbank
上级
a58831dd
8522b829
变更
16
显示空白变更内容
内联
并排
Showing
16 changed file
with
84 addition
and
43 deletion
+84
-43
demos/streaming_asr_server/README.md
demos/streaming_asr_server/README.md
+1
-1
demos/streaming_asr_server/README_cn.md
demos/streaming_asr_server/README_cn.md
+1
-1
paddlespeech/cli/vector/infer.py
paddlespeech/cli/vector/infer.py
+3
-1
paddlespeech/server/README_cn.md
paddlespeech/server/README_cn.md
+1
-1
paddlespeech/server/engine/vector/__init__.py
paddlespeech/server/engine/vector/__init__.py
+13
-0
paddlespeech/server/engine/vector/python/__init__.py
paddlespeech/server/engine/vector/python/__init__.py
+13
-0
paddlespeech/server/engine/vector/python/vector_engine.py
paddlespeech/server/engine/vector/python/vector_engine.py
+1
-1
speechx/examples/ds2_ol/aishell/run.sh
speechx/examples/ds2_ol/aishell/run.sh
+0
-1
speechx/examples/ds2_ol/decoder/recognizer_test_main.cc
speechx/examples/ds2_ol/decoder/recognizer_test_main.cc
+3
-1
speechx/examples/ds2_ol/feat/compute_fbank_main.cc
speechx/examples/ds2_ol/feat/compute_fbank_main.cc
+1
-0
speechx/examples/ds2_ol/feat/linear-spectrogram-wo-db-norm-ol.cc
.../examples/ds2_ol/feat/linear-spectrogram-wo-db-norm-ol.cc
+2
-1
speechx/examples/ds2_ol/websocket/websocket_server.sh
speechx/examples/ds2_ol/websocket/websocket_server.sh
+0
-1
speechx/speechx/decoder/param.h
speechx/speechx/decoder/param.h
+29
-21
speechx/speechx/frontend/audio/fbank.cc
speechx/speechx/frontend/audio/fbank.cc
+7
-4
speechx/speechx/frontend/audio/feature_pipeline.cc
speechx/speechx/frontend/audio/feature_pipeline.cc
+5
-5
speechx/speechx/frontend/audio/feature_pipeline.h
speechx/speechx/frontend/audio/feature_pipeline.h
+4
-4
未找到文件。
demos/streaming_asr_server/README.md
浏览文件 @
37c6106e
demos/streaming_asr_server/README_cn.md
浏览文件 @
37c6106e
paddlespeech/cli/vector/infer.py
浏览文件 @
37c6106e
...
@@ -437,7 +437,9 @@ class VectorExecutor(BaseExecutor):
...
@@ -437,7 +437,9 @@ class VectorExecutor(BaseExecutor):
if
self
.
sample_rate
!=
16000
and
self
.
sample_rate
!=
8000
:
if
self
.
sample_rate
!=
16000
and
self
.
sample_rate
!=
8000
:
logger
.
error
(
logger
.
error
(
"invalid sample rate, please input --sr 8000 or --sr 16000"
)
"invalid sample rate, please input --sr 8000 or --sr 16000"
)
logger
.
error
(
f
"The model sample rate:
{
self
.
sample_rate
}
, the external sample rate is:
{
sample_rate
}
"
)
logger
.
error
(
f
"The model sample rate:
{
self
.
sample_rate
}
, the external sample rate is:
{
sample_rate
}
"
)
return
False
return
False
if
isinstance
(
audio_file
,
(
str
,
os
.
PathLike
)):
if
isinstance
(
audio_file
,
(
str
,
os
.
PathLike
)):
...
...
paddlespeech/server/README_cn.md
浏览文件 @
37c6106e
paddlespeech/server/engine/vector/__init__.py
浏览文件 @
37c6106e
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
paddlespeech/server/engine/vector/python/__init__.py
浏览文件 @
37c6106e
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
paddlespeech/server/engine/vector/python/vector_engine.py
浏览文件 @
37c6106e
...
@@ -16,9 +16,9 @@ from collections import OrderedDict
...
@@ -16,9 +16,9 @@ from collections import OrderedDict
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
from
paddleaudio.backends
import
load
as
load_audio
from
paddleaudio.backends
import
load
as
load_audio
from
paddleaudio.compliance.librosa
import
melspectrogram
from
paddleaudio.compliance.librosa
import
melspectrogram
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.vector.infer
import
VectorExecutor
from
paddlespeech.cli.vector.infer
import
VectorExecutor
from
paddlespeech.server.engine.base_engine
import
BaseEngine
from
paddlespeech.server.engine.base_engine
import
BaseEngine
...
...
speechx/examples/ds2_ol/aishell/run.sh
浏览文件 @
37c6106e
...
@@ -155,7 +155,6 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
...
@@ -155,7 +155,6 @@ if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
--wav_rspecifier
=
scp:
$data
/split
${
nj
}
/JOB/
${
aishell_wav_scp
}
\
--wav_rspecifier
=
scp:
$data
/split
${
nj
}
/JOB/
${
aishell_wav_scp
}
\
--cmvn_file
=
$cmvn
\
--cmvn_file
=
$cmvn
\
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--to_float32
=
true
\
--streaming_chunk
=
30
\
--streaming_chunk
=
30
\
--param_path
=
$model_dir
/avg_1.jit.pdiparams
\
--param_path
=
$model_dir
/avg_1.jit.pdiparams
\
--word_symbol_table
=
$wfst
/words.txt
\
--word_symbol_table
=
$wfst
/words.txt
\
...
...
speechx/examples/ds2_ol/decoder/recognizer_test_main.cc
浏览文件 @
37c6106e
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
DEFINE_string
(
wav_rspecifier
,
""
,
"test feature rspecifier"
);
DEFINE_string
(
wav_rspecifier
,
""
,
"test feature rspecifier"
);
DEFINE_string
(
result_wspecifier
,
""
,
"test result wspecifier"
);
DEFINE_string
(
result_wspecifier
,
""
,
"test result wspecifier"
);
DEFINE_int32
(
sample_rate
,
16000
,
"sample rate"
);
int
main
(
int
argc
,
char
*
argv
[])
{
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
...
@@ -30,7 +31,8 @@ int main(int argc, char* argv[]) {
...
@@ -30,7 +31,8 @@ int main(int argc, char* argv[]) {
kaldi
::
SequentialTableReader
<
kaldi
::
WaveHolder
>
wav_reader
(
kaldi
::
SequentialTableReader
<
kaldi
::
WaveHolder
>
wav_reader
(
FLAGS_wav_rspecifier
);
FLAGS_wav_rspecifier
);
kaldi
::
TokenWriter
result_writer
(
FLAGS_result_wspecifier
);
kaldi
::
TokenWriter
result_writer
(
FLAGS_result_wspecifier
);
int
sample_rate
=
16000
;
int
sample_rate
=
FLAGS_sample_rate
;
float
streaming_chunk
=
FLAGS_streaming_chunk
;
float
streaming_chunk
=
FLAGS_streaming_chunk
;
int
chunk_sample_size
=
streaming_chunk
*
sample_rate
;
int
chunk_sample_size
=
streaming_chunk
*
sample_rate
;
LOG
(
INFO
)
<<
"sr: "
<<
sample_rate
;
LOG
(
INFO
)
<<
"sr: "
<<
sample_rate
;
...
...
speechx/examples/ds2_ol/feat/compute_fbank_main.cc
浏览文件 @
37c6106e
...
@@ -69,6 +69,7 @@ int main(int argc, char* argv[]) {
...
@@ -69,6 +69,7 @@ int main(int argc, char* argv[]) {
feat_cache_opts
.
frame_chunk_stride
=
1
;
feat_cache_opts
.
frame_chunk_stride
=
1
;
feat_cache_opts
.
frame_chunk_size
=
1
;
feat_cache_opts
.
frame_chunk_size
=
1
;
ppspeech
::
FeatureCache
feature_cache
(
feat_cache_opts
,
std
::
move
(
cmvn
));
ppspeech
::
FeatureCache
feature_cache
(
feat_cache_opts
,
std
::
move
(
cmvn
));
LOG
(
INFO
)
<<
"fbank: "
<<
true
;
LOG
(
INFO
)
<<
"feat dim: "
<<
feature_cache
.
Dim
();
LOG
(
INFO
)
<<
"feat dim: "
<<
feature_cache
.
Dim
();
int
sample_rate
=
16000
;
int
sample_rate
=
16000
;
...
...
speechx/examples/ds2_ol/feat/linear-spectrogram-wo-db-norm-ol.cc
浏览文件 @
37c6106e
...
@@ -56,6 +56,7 @@ int main(int argc, char* argv[]) {
...
@@ -56,6 +56,7 @@ int main(int argc, char* argv[]) {
opt
.
frame_opts
.
remove_dc_offset
=
false
;
opt
.
frame_opts
.
remove_dc_offset
=
false
;
opt
.
frame_opts
.
window_type
=
"hanning"
;
opt
.
frame_opts
.
window_type
=
"hanning"
;
opt
.
frame_opts
.
preemph_coeff
=
0.0
;
opt
.
frame_opts
.
preemph_coeff
=
0.0
;
LOG
(
INFO
)
<<
"linear feature: "
<<
true
;
LOG
(
INFO
)
<<
"frame length (ms): "
<<
opt
.
frame_opts
.
frame_length_ms
;
LOG
(
INFO
)
<<
"frame length (ms): "
<<
opt
.
frame_opts
.
frame_length_ms
;
LOG
(
INFO
)
<<
"frame shift (ms): "
<<
opt
.
frame_opts
.
frame_shift_ms
;
LOG
(
INFO
)
<<
"frame shift (ms): "
<<
opt
.
frame_opts
.
frame_shift_ms
;
...
@@ -77,7 +78,7 @@ int main(int argc, char* argv[]) {
...
@@ -77,7 +78,7 @@ int main(int argc, char* argv[]) {
int
sample_rate
=
16000
;
int
sample_rate
=
16000
;
float
streaming_chunk
=
FLAGS_streaming_chunk
;
float
streaming_chunk
=
FLAGS_streaming_chunk
;
int
chunk_sample_size
=
streaming_chunk
*
sample_rate
;
int
chunk_sample_size
=
streaming_chunk
*
sample_rate
;
LOG
(
INFO
)
<<
"s
r
: "
<<
sample_rate
;
LOG
(
INFO
)
<<
"s
ample rate
: "
<<
sample_rate
;
LOG
(
INFO
)
<<
"chunk size (s): "
<<
streaming_chunk
;
LOG
(
INFO
)
<<
"chunk size (s): "
<<
streaming_chunk
;
LOG
(
INFO
)
<<
"chunk size (sample): "
<<
chunk_sample_size
;
LOG
(
INFO
)
<<
"chunk size (sample): "
<<
chunk_sample_size
;
...
...
speechx/examples/ds2_ol/websocket/websocket_server.sh
浏览文件 @
37c6106e
...
@@ -63,7 +63,6 @@ websocket_server_main \
...
@@ -63,7 +63,6 @@ websocket_server_main \
--cmvn_file
=
$cmvn
\
--cmvn_file
=
$cmvn
\
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--streaming_chunk
=
0.1
\
--streaming_chunk
=
0.1
\
--to_float32
=
true
\
--param_path
=
$model_dir
/avg_1.jit.pdiparams
\
--param_path
=
$model_dir
/avg_1.jit.pdiparams
\
--word_symbol_table
=
$wfst
/words.txt
\
--word_symbol_table
=
$wfst
/words.txt
\
--model_output_names
=
softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0
\
--model_output_names
=
softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0
\
...
...
speechx/speechx/decoder/param.h
浏览文件 @
37c6106e
...
@@ -19,23 +19,24 @@
...
@@ -19,23 +19,24 @@
#include "decoder/ctc_tlg_decoder.h"
#include "decoder/ctc_tlg_decoder.h"
#include "frontend/audio/feature_pipeline.h"
#include "frontend/audio/feature_pipeline.h"
// feature
DEFINE_bool
(
use_fbank
,
false
,
"False for fbank; or linear feature"
);
// DEFINE_bool(to_float32, true, "audio convert to pcm32. True for linear
// feature, or fbank");
DEFINE_int32
(
num_bins
,
161
,
"num bins of mel"
);
DEFINE_string
(
cmvn_file
,
""
,
"read cmvn"
);
DEFINE_string
(
cmvn_file
,
""
,
"read cmvn"
);
DEFINE_double
(
streaming_chunk
,
0.1
,
"streaming feature chunk size"
);
DEFINE_double
(
streaming_chunk
,
0.1
,
"streaming feature chunk size"
);
DEFINE_bool
(
to_float32
,
true
,
"audio convert to pcm32"
);
// feature sliding window
DEFINE_string
(
model_path
,
"avg_1.jit.pdmodel"
,
"paddle nnet model"
);
DEFINE_string
(
param_path
,
"avg_1.jit.pdiparams"
,
"paddle nnet model param"
);
DEFINE_string
(
word_symbol_table
,
"words.txt"
,
"word symbol table"
);
DEFINE_string
(
graph_path
,
"TLG"
,
"decoder graph"
);
DEFINE_double
(
acoustic_scale
,
1.0
,
"acoustic scale"
);
DEFINE_int32
(
max_active
,
7500
,
"max active"
);
DEFINE_double
(
beam
,
15.0
,
"decoder beam"
);
DEFINE_double
(
lattice_beam
,
7.5
,
"decoder beam"
);
DEFINE_int32
(
receptive_field_length
,
DEFINE_int32
(
receptive_field_length
,
7
,
7
,
"receptive field of two CNN(kernel=5) downsampling module."
);
"receptive field of two CNN(kernel=5) downsampling module."
);
DEFINE_int32
(
downsampling_rate
,
DEFINE_int32
(
downsampling_rate
,
4
,
4
,
"two CNN(kernel=5) module downsampling rate."
);
"two CNN(kernel=5) module downsampling rate."
);
// nnet
DEFINE_string
(
model_path
,
"avg_1.jit.pdmodel"
,
"paddle nnet model"
);
DEFINE_string
(
param_path
,
"avg_1.jit.pdiparams"
,
"paddle nnet model param"
);
DEFINE_string
(
DEFINE_string
(
model_input_names
,
model_input_names
,
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box"
,
"audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box"
,
...
@@ -47,8 +48,14 @@ DEFINE_string(model_cache_names,
...
@@ -47,8 +48,14 @@ DEFINE_string(model_cache_names,
"chunk_state_h_box,chunk_state_c_box"
,
"chunk_state_h_box,chunk_state_c_box"
,
"model cache names"
);
"model cache names"
);
DEFINE_string
(
model_cache_shapes
,
"5-1-1024,5-1-1024"
,
"model cache shapes"
);
DEFINE_string
(
model_cache_shapes
,
"5-1-1024,5-1-1024"
,
"model cache shapes"
);
DEFINE_bool
(
use_fbank
,
false
,
"use fbank or linear feature"
);
DEFINE_int32
(
num_bins
,
161
,
"num bins of mel"
);
// decoder
DEFINE_string
(
word_symbol_table
,
"words.txt"
,
"word symbol table"
);
DEFINE_string
(
graph_path
,
"TLG"
,
"decoder graph"
);
DEFINE_double
(
acoustic_scale
,
1.0
,
"acoustic scale"
);
DEFINE_int32
(
max_active
,
7500
,
"max active"
);
DEFINE_double
(
beam
,
15.0
,
"decoder beam"
);
DEFINE_double
(
lattice_beam
,
7.5
,
"decoder beam"
);
namespace
ppspeech
{
namespace
ppspeech
{
// todo refactor later
// todo refactor later
...
@@ -56,17 +63,18 @@ FeaturePipelineOptions InitFeaturePipelineOptions() {
...
@@ -56,17 +63,18 @@ FeaturePipelineOptions InitFeaturePipelineOptions() {
FeaturePipelineOptions
opts
;
FeaturePipelineOptions
opts
;
opts
.
cmvn_file
=
FLAGS_cmvn_file
;
opts
.
cmvn_file
=
FLAGS_cmvn_file
;
opts
.
linear_spectrogram_opts
.
streaming_chunk
=
FLAGS_streaming_chunk
;
opts
.
linear_spectrogram_opts
.
streaming_chunk
=
FLAGS_streaming_chunk
;
opts
.
to_float32
=
FLAGS_to_float32
;
kaldi
::
FrameExtractionOptions
frame_opts
;
kaldi
::
FrameExtractionOptions
frame_opts
;
frame_opts
.
dither
=
0.0
;
frame_opts
.
dither
=
0.0
;
frame_opts
.
frame_shift_ms
=
10
;
frame_opts
.
frame_shift_ms
=
10
;
opts
.
use_fbank
=
FLAGS_use_fbank
;
opts
.
use_fbank
=
FLAGS_use_fbank
;
if
(
opts
.
use_fbank
)
{
if
(
opts
.
use_fbank
)
{
opts
.
to_float32
=
false
;
frame_opts
.
window_type
=
"povey"
;
frame_opts
.
window_type
=
"povey"
;
frame_opts
.
frame_length_ms
=
25
;
frame_opts
.
frame_length_ms
=
25
;
opts
.
fbank_opts
.
fbank_opts
.
mel_opts
.
num_bins
=
FLAGS_num_bins
;
opts
.
fbank_opts
.
fbank_opts
.
mel_opts
.
num_bins
=
FLAGS_num_bins
;
opts
.
fbank_opts
.
fbank_opts
.
frame_opts
=
frame_opts
;
opts
.
fbank_opts
.
fbank_opts
.
frame_opts
=
frame_opts
;
}
else
{
}
else
{
opts
.
to_float32
=
true
;
frame_opts
.
remove_dc_offset
=
false
;
frame_opts
.
remove_dc_offset
=
false
;
frame_opts
.
frame_length_ms
=
20
;
frame_opts
.
frame_length_ms
=
20
;
frame_opts
.
window_type
=
"hanning"
;
frame_opts
.
window_type
=
"hanning"
;
...
...
speechx/speechx/frontend/audio/fbank.cc
浏览文件 @
37c6106e
...
@@ -102,13 +102,16 @@ bool Fbank::Compute(const Vector<BaseFloat>& waves, Vector<BaseFloat>* feats) {
...
@@ -102,13 +102,16 @@ bool Fbank::Compute(const Vector<BaseFloat>& waves, Vector<BaseFloat>* feats) {
// note: this online feature-extraction code does not support VTLN.
// note: this online feature-extraction code does not support VTLN.
RealFft
(
&
window
,
true
);
RealFft
(
&
window
,
true
);
kaldi
::
ComputePowerSpectrum
(
&
window
);
kaldi
::
ComputePowerSpectrum
(
&
window
);
const
kaldi
::
MelBanks
&
mel_bank
=
*
(
computer_
.
GetMelBanks
(
1.0
));
const
kaldi
::
MelBanks
&
mel_bank
=
*
(
computer_
.
GetMelBanks
(
1.0
));
SubVector
<
BaseFloat
>
power_spectrum
(
window
,
0
,
window
.
Dim
()
/
2
+
1
);
SubVector
<
BaseFloat
>
power_spectrum
(
window
,
0
,
window
.
Dim
()
/
2
+
1
);
if
(
!
opts_
.
fbank_opts
.
use_power
)
{
if
(
!
opts_
.
fbank_opts
.
use_power
)
{
power_spectrum
.
ApplyPow
(
0.5
);
power_spectrum
.
ApplyPow
(
0.5
);
}
}
int32
mel_offset
=
((
opts_
.
fbank_opts
.
use_energy
&&
!
opts_
.
fbank_opts
.
htk_compat
)
?
1
:
0
);
int32
mel_offset
=
SubVector
<
BaseFloat
>
mel_energies
(
this_feature
,
mel_offset
,
opts_
.
fbank_opts
.
mel_opts
.
num_bins
);
((
opts_
.
fbank_opts
.
use_energy
&&
!
opts_
.
fbank_opts
.
htk_compat
)
?
1
:
0
);
SubVector
<
BaseFloat
>
mel_energies
(
this_feature
,
mel_offset
,
opts_
.
fbank_opts
.
mel_opts
.
num_bins
);
mel_bank
.
Compute
(
power_spectrum
,
&
mel_energies
);
mel_bank
.
Compute
(
power_spectrum
,
&
mel_energies
);
mel_energies
.
ApplyFloor
(
1e-07
);
mel_energies
.
ApplyFloor
(
1e-07
);
mel_energies
.
ApplyLog
();
mel_energies
.
ApplyLog
();
...
...
speechx/speechx/frontend/audio/feature_pipeline.cc
浏览文件 @
37c6106e
...
@@ -25,11 +25,11 @@ FeaturePipeline::FeaturePipeline(const FeaturePipelineOptions& opts) {
...
@@ -25,11 +25,11 @@ FeaturePipeline::FeaturePipeline(const FeaturePipelineOptions& opts) {
unique_ptr
<
FrontendInterface
>
base_feature
;
unique_ptr
<
FrontendInterface
>
base_feature
;
if
(
opts
.
use_fbank
)
{
if
(
opts
.
use_fbank
)
{
base_feature
.
reset
(
new
ppspeech
::
Fbank
(
opts
.
fbank_opts
,
base_feature
.
reset
(
std
::
move
(
data_source
)));
new
ppspeech
::
Fbank
(
opts
.
fbank_opts
,
std
::
move
(
data_source
)));
}
else
{
}
else
{
base_feature
.
reset
(
new
ppspeech
::
LinearSpectrogram
(
opts
.
linear_spectrogram_opts
,
base_feature
.
reset
(
new
ppspeech
::
LinearSpectrogram
(
std
::
move
(
data_source
)));
opts
.
linear_spectrogram_opts
,
std
::
move
(
data_source
)));
}
}
unique_ptr
<
FrontendInterface
>
cmvn
(
unique_ptr
<
FrontendInterface
>
cmvn
(
...
...
speechx/speechx/frontend/audio/feature_pipeline.h
浏览文件 @
37c6106e
...
@@ -18,25 +18,25 @@
...
@@ -18,25 +18,25 @@
#include "frontend/audio/audio_cache.h"
#include "frontend/audio/audio_cache.h"
#include "frontend/audio/data_cache.h"
#include "frontend/audio/data_cache.h"
#include "frontend/audio/fbank.h"
#include "frontend/audio/feature_cache.h"
#include "frontend/audio/feature_cache.h"
#include "frontend/audio/frontend_itf.h"
#include "frontend/audio/frontend_itf.h"
#include "frontend/audio/linear_spectrogram.h"
#include "frontend/audio/linear_spectrogram.h"
#include "frontend/audio/fbank.h"
#include "frontend/audio/normalizer.h"
#include "frontend/audio/normalizer.h"
namespace
ppspeech
{
namespace
ppspeech
{
struct
FeaturePipelineOptions
{
struct
FeaturePipelineOptions
{
std
::
string
cmvn_file
;
std
::
string
cmvn_file
;
bool
to_float32
;
bool
to_float32
;
// true, only for linear feature
bool
use_fbank
;
bool
use_fbank
;
LinearSpectrogramOptions
linear_spectrogram_opts
;
LinearSpectrogramOptions
linear_spectrogram_opts
;
FbankOptions
fbank_opts
;
FbankOptions
fbank_opts
;
FeatureCacheOptions
feature_cache_opts
;
FeatureCacheOptions
feature_cache_opts
;
FeaturePipelineOptions
()
FeaturePipelineOptions
()
:
cmvn_file
(
""
),
:
cmvn_file
(
""
),
to_float32
(
false
),
to_float32
(
false
),
// true, only for linear feature
use_fbank
(
fals
e
),
use_fbank
(
tru
e
),
linear_spectrogram_opts
(),
linear_spectrogram_opts
(),
fbank_opts
(),
fbank_opts
(),
feature_cache_opts
()
{}
feature_cache_opts
()
{}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录