Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
94e5e37b
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
94e5e37b
编写于
3月 31, 2022
作者:
Y
YangZhou
提交者:
GitHub
3月 31, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1631 from zh794390558/spx
[speechx] openfst patch and glog
上级
602b0b0d
cb66b742
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
70 addition
and
31 deletion
+70
-31
demos/audio_searching/src/encode.py
demos/audio_searching/src/encode.py
+0
-4
speechx/cmake/external/openfst.cmake
speechx/cmake/external/openfst.cmake
+1
-1
speechx/examples/CMakeLists.txt
speechx/examples/CMakeLists.txt
+2
-0
speechx/examples/README.md
speechx/examples/README.md
+2
-1
speechx/examples/decoder/offline_decoder_main.cc
speechx/examples/decoder/offline_decoder_main.cc
+30
-10
speechx/examples/decoder/run.sh
speechx/examples/decoder/run.sh
+5
-2
speechx/examples/feat/feature-mfcc-test.cc
speechx/examples/feat/feature-mfcc-test.cc
+0
-1
speechx/examples/feat/linear_spectrogram_main.cc
speechx/examples/feat/linear_spectrogram_main.cc
+25
-9
speechx/examples/feat/run.sh
speechx/examples/feat/run.sh
+1
-0
speechx/speechx/frontend/feature_cache.h
speechx/speechx/frontend/feature_cache.h
+2
-2
speechx/speechx/frontend/raw_audio.h
speechx/speechx/frontend/raw_audio.h
+2
-1
未找到文件。
demos/audio_searching/src/encode.py
浏览文件 @
94e5e37b
...
...
@@ -11,11 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
librosa
import
numpy
as
np
from
config
import
DEFAULT_TABLE
from
logs
import
LOGGER
from
paddlespeech.cli
import
VectorExecutor
...
...
speechx/cmake/external/openfst.cmake
浏览文件 @
94e5e37b
...
...
@@ -13,7 +13,7 @@ ExternalProject_Add(openfst
"CPPFLAGS=-I
${
gflags_BINARY_DIR
}
/include -I
${
glog_SOURCE_DIR
}
/src -I
${
glog_BINARY_DIR
}
"
"LDFLAGS=-L
${
gflags_BINARY_DIR
}
-L
${
glog_BINARY_DIR
}
"
"LIBS=-lgflags_nothreads -lglog -lpthread"
COMMAND
${
CMAKE_COMMAND
}
-E copy_directory
${
CMAKE_CURREN
T_SOURCE_DIR
}
/patch/openfst
${
openfst_SOURCE_DIR
}
COMMAND
${
CMAKE_COMMAND
}
-E copy_directory
${
PROJEC
T_SOURCE_DIR
}
/patch/openfst
${
openfst_SOURCE_DIR
}
BUILD_COMMAND make -j 4
)
link_directories
(
${
openfst_PREFIX_DIR
}
/lib
)
...
...
speechx/examples/CMakeLists.txt
浏览文件 @
94e5e37b
...
...
@@ -3,3 +3,5 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
add_subdirectory
(
feat
)
add_subdirectory
(
nnet
)
add_subdirectory
(
decoder
)
add_subdirectory
(
glog
)
\ No newline at end of file
speechx/examples/README.md
浏览文件 @
94e5e37b
# Examples
*
decoder - online decoder to work as offlin
e
*
glog - glog usag
e
*
feat - mfcc, linear
*
nnet - ds2 nn
*
decoder - online decoder to work as offline
## How to run
...
...
speechx/examples/decoder/offline_decoder_main.cc
浏览文件 @
94e5e37b
...
...
@@ -22,11 +22,12 @@
#include "nnet/decodable.h"
#include "nnet/paddle_nnet.h"
DEFINE_string
(
feature_respecifier
,
""
,
"
test feature
rspecifier"
);
DEFINE_string
(
feature_respecifier
,
""
,
"
feature matrix
rspecifier"
);
DEFINE_string
(
model_path
,
"avg_1.jit.pdmodel"
,
"paddle nnet model"
);
DEFINE_string
(
param_path
,
"avg_1.jit.pdiparams"
,
"paddle nnet model param"
);
DEFINE_string
(
dict_file
,
"vocab.txt"
,
"vocabulary of lm"
);
DEFINE_string
(
lm_path
,
"lm.klm"
,
"language model"
);
DEFINE_int32
(
chunk_size
,
35
,
"feat chunk size"
);
using
kaldi
::
BaseFloat
;
...
...
@@ -43,14 +44,16 @@ int main(int argc, char* argv[]) {
std
::
string
model_params
=
FLAGS_param_path
;
std
::
string
dict_file
=
FLAGS_dict_file
;
std
::
string
lm_path
=
FLAGS_lm_path
;
int32
chunk_size
=
FLAGS_chunk_size
;
LOG
(
INFO
)
<<
"model path: "
<<
model_graph
;
LOG
(
INFO
)
<<
"model param: "
<<
model_params
;
LOG
(
INFO
)
<<
"dict path: "
<<
dict_file
;
LOG
(
INFO
)
<<
"lm path: "
<<
lm_path
;
LOG
(
INFO
)
<<
"chunk size (frame): "
<<
chunk_size
;
int32
num_done
=
0
,
num_err
=
0
;
ppspeech
::
CTCBeamSearchOptions
opts
;
opts
.
dict_file
=
dict_file
;
opts
.
lm_path
=
lm_path
;
ppspeech
::
CTCBeamSearch
decoder
(
opts
);
// frontend + nnet is decodable
ppspeech
::
ModelOptions
model_opts
;
model_opts
.
model_path
=
model_graph
;
model_opts
.
params_path
=
model_params
;
...
...
@@ -60,33 +63,50 @@ int main(int argc, char* argv[]) {
new
ppspeech
::
RawDataCache
());
std
::
shared_ptr
<
ppspeech
::
Decodable
>
decodable
(
new
ppspeech
::
Decodable
(
nnet
,
raw_data
));
LOG
(
INFO
)
<<
"Init decodeable."
;
int32
chunk_size
=
35
;
decoder
.
InitDecoder
();
// init decoder
ppspeech
::
CTCBeamSearchOptions
opts
;
opts
.
dict_file
=
dict_file
;
opts
.
lm_path
=
lm_path
;
ppspeech
::
CTCBeamSearch
decoder
(
opts
);
LOG
(
INFO
)
<<
"Init decoder."
;
decoder
.
InitDecoder
();
for
(;
!
feature_reader
.
Done
();
feature_reader
.
Next
())
{
string
utt
=
feature_reader
.
Key
();
const
kaldi
::
Matrix
<
BaseFloat
>
feature
=
feature_reader
.
Value
();
LOG
(
INFO
)
<<
"utt: "
<<
utt
;
// feat dim
raw_data
->
SetDim
(
feature
.
NumCols
());
LOG
(
INFO
)
<<
"dim: "
<<
raw_data
->
Dim
();
int32
row_idx
=
0
;
int32
num_chunks
=
feature
.
NumRows
()
/
chunk_size
;
LOG
(
INFO
)
<<
"n chunks: "
<<
num_chunks
;
for
(
int
chunk_idx
=
0
;
chunk_idx
<
num_chunks
;
++
chunk_idx
)
{
// feat chunk
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
feature_chunk
(
chunk_size
*
feature
.
NumCols
());
for
(
int
row_id
=
0
;
row_id
<
chunk_size
;
++
row_id
)
{
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
tmp
(
feature
,
row_idx
);
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
feat_one_row
(
feature
,
row_idx
);
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
f_chunk_tmp
(
feature_chunk
.
Data
()
+
row_id
*
feature
.
NumCols
(),
feature
.
NumCols
());
f_chunk_tmp
.
CopyFromVec
(
tmp
);
f_chunk_tmp
.
CopyFromVec
(
feat_one_row
);
row_idx
++
;
}
// feed to raw cache
raw_data
->
Accept
(
feature_chunk
);
if
(
chunk_idx
==
num_chunks
-
1
)
{
raw_data
->
SetFinished
();
}
// decode step
decoder
.
AdvanceDecode
(
decodable
);
}
std
::
string
result
;
result
=
decoder
.
GetFinalBestPath
();
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is "
<<
result
;
...
...
speechx/examples/decoder/run.sh
浏览文件 @
94e5e37b
...
...
@@ -25,7 +25,10 @@ model_dir=../paddle_asr_model
feat_wspecifier
=
./feats.ark
cmvn
=
./cmvn.ark
# 3. run feat
export
GLOG_logtostderr
=
1
# 3. gen linear feat
linear_spectrogram_main
\
--wav_rspecifier
=
scp:
$model_dir
/wav.scp
\
--feature_wspecifier
=
ark,t:
$feat_wspecifier
\
...
...
@@ -37,4 +40,4 @@ offline_decoder_main \
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--param_path
=
$model_dir
/avg_1.jit.pdparams
\
--dict_file
=
$model_dir
/vocab.txt
\
--lm_path
=
$model_dir
/avg_1.jit.klm
\ No newline at end of file
--lm_path
=
$model_dir
/avg_1.jit.klm
speechx/examples/feat/feature-mfcc-test.cc
浏览文件 @
94e5e37b
...
...
@@ -41,7 +41,6 @@
using
namespace
kaldi
;
static
void
UnitTestReadWave
()
{
std
::
cout
<<
"=== UnitTestReadWave() ===
\n
"
;
...
...
speechx/examples/feat/linear_spectrogram_main.cc
浏览文件 @
94e5e37b
...
...
@@ -25,6 +25,8 @@
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"
#include <glog/logging.h>
DEFINE_string
(
wav_rspecifier
,
""
,
"test wav scp path"
);
DEFINE_string
(
feature_wspecifier
,
""
,
"output feats wspecifier"
);
DEFINE_string
(
cmvn_write_path
,
"./cmvn.ark"
,
"write cmvn"
);
...
...
@@ -149,7 +151,7 @@ void WriteMatrix() {
cmvn_stats
(
1
,
idx
)
=
variance_
[
idx
];
}
cmvn_stats
(
0
,
mean_
.
size
())
=
count_
;
kaldi
::
WriteKaldiObject
(
cmvn_stats
,
FLAGS_cmvn_write_path
,
tru
e
);
kaldi
::
WriteKaldiObject
(
cmvn_stats
,
FLAGS_cmvn_write_path
,
fals
e
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
...
...
@@ -161,43 +163,56 @@ int main(int argc, char* argv[]) {
kaldi
::
BaseFloatMatrixWriter
feat_writer
(
FLAGS_feature_wspecifier
);
WriteMatrix
();
// test feature linear_spectorgram: wave --> decibel_normalizer --> hanning
// window -->linear_spectrogram --> cmvn
int32
num_done
=
0
,
num_err
=
0
;
// feature pipeline: wave cache --> decibel_normalizer --> hanning
// window -->linear_spectrogram --> global cmvn -> feat cache
// std::unique_ptr<ppspeech::FeatureExtractorInterface> data_source(new
// ppspeech::RawDataCache());
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
data_source
(
new
ppspeech
::
RawAudioCache
());
ppspeech
::
DecibelNormalizerOptions
db_norm_opt
;
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
db_norm
(
new
ppspeech
::
DecibelNormalizer
(
db_norm_opt
,
std
::
move
(
data_source
)));
ppspeech
::
LinearSpectrogramOptions
opt
;
opt
.
frame_opts
.
frame_length_ms
=
20
;
opt
.
frame_opts
.
frame_shift_ms
=
10
;
ppspeech
::
DecibelNormalizerOptions
db_norm_opt
;
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
base_feature_extractor
(
new
ppspeech
::
DecibelNormalizer
(
db_norm_opt
,
std
::
move
(
data_source
)));
LOG
(
INFO
)
<<
"frame length (ms): "
<<
opt
.
frame_opts
.
frame_length_ms
;
LOG
(
INFO
)
<<
"frame shift (ms): "
<<
opt
.
frame_opts
.
frame_shift_ms
;
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
linear_spectrogram
(
new
ppspeech
::
LinearSpectrogram
(
opt
,
std
::
move
(
base_feature_extractor
)));
new
ppspeech
::
LinearSpectrogram
(
opt
,
std
::
move
(
db_norm
)));
std
::
unique_ptr
<
ppspeech
::
FeatureExtractorInterface
>
cmvn
(
new
ppspeech
::
CMVN
(
FLAGS_cmvn_write_path
,
std
::
move
(
linear_spectrogram
)));
ppspeech
::
FeatureCache
feature_cache
(
kint16max
,
std
::
move
(
cmvn
));
LOG
(
INFO
)
<<
"feat dim: "
<<
feature_cache
.
Dim
();
float
streaming_chunk
=
0.36
;
int
sample_rate
=
16000
;
float
streaming_chunk
=
0.36
;
int
chunk_sample_size
=
streaming_chunk
*
sample_rate
;
LOG
(
INFO
)
<<
"sr: "
<<
sample_rate
;
LOG
(
INFO
)
<<
"chunk size (s): "
<<
streaming_chunk
;
LOG
(
INFO
)
<<
"chunk size (sample): "
<<
chunk_sample_size
;
for
(;
!
wav_reader
.
Done
();
wav_reader
.
Next
())
{
std
::
string
utt
=
wav_reader
.
Key
();
const
kaldi
::
WaveData
&
wave_data
=
wav_reader
.
Value
();
LOG
(
INFO
)
<<
"process utt: "
<<
utt
;
int32
this_channel
=
0
;
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
waveform
(
wave_data
.
Data
(),
this_channel
);
int
tot_samples
=
waveform
.
Dim
();
LOG
(
INFO
)
<<
"wav len (sample): "
<<
tot_samples
;
int
sample_offset
=
0
;
std
::
vector
<
kaldi
::
Vector
<
BaseFloat
>>
feats
;
int
feature_rows
=
0
;
...
...
@@ -209,6 +224,7 @@ int main(int argc, char* argv[]) {
for
(
int
i
=
0
;
i
<
cur_chunk_size
;
++
i
)
{
wav_chunk
(
i
)
=
waveform
(
sample_offset
+
i
);
}
kaldi
::
Vector
<
BaseFloat
>
features
;
feature_cache
.
Accept
(
wav_chunk
);
if
(
cur_chunk_size
<
chunk_sample_size
)
{
...
...
speechx/examples/feat/run.sh
浏览文件 @
94e5e37b
...
...
@@ -25,6 +25,7 @@ feat_wspecifier=./feats.ark
cmvn
=
./cmvn.ark
# 3. run feat
export
GLOG_logtostderr
=
1
linear_spectrogram_main
\
--wav_rspecifier
=
scp:
$model_dir
/wav.scp
\
--feature_wspecifier
=
ark,t:
$feat_wspecifier
\
...
...
speechx/speechx/frontend/feature_cache.h
浏览文件 @
94e5e37b
...
...
@@ -28,10 +28,10 @@ class FeatureCache : public FeatureExtractorInterface {
// Feed feats or waves
virtual
void
Accept
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
inputs
);
// feats
dim = num_frames * feature
_dim
// feats
size = num_frames * feat
_dim
virtual
bool
Read
(
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
// feat
ure cache only cache feature which from base extractor
// feat
dim
virtual
size_t
Dim
()
const
{
return
base_extractor_
->
Dim
();
}
virtual
void
SetFinished
()
{
...
...
speechx/speechx/frontend/raw_audio.h
浏览文件 @
94e5e37b
...
...
@@ -68,9 +68,10 @@ class RawDataCache : public FeatureExtractorInterface {
data_
.
Resize
(
0
);
return
true
;
}
virtual
size_t
Dim
()
const
{
return
dim_
;
}
virtual
void
SetFinished
()
{
finished_
=
true
;
}
virtual
bool
IsFinished
()
const
{
return
finished_
;
}
virtual
size_t
Dim
()
const
{
return
dim_
;
}
void
SetDim
(
int32
dim
)
{
dim_
=
dim
;
}
virtual
void
Reset
()
{
finished_
=
true
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录