Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
17ea30e7
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
17ea30e7
编写于
10月 17, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
u2 recog test main ok
上级
86eb7189
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
59 addition
and
24 deletion
+59
-24
speechx/examples/codelab/u2/local/recognizer.sh
speechx/examples/codelab/u2/local/recognizer.sh
+22
-0
speechx/speechx/decoder/param.h
speechx/speechx/decoder/param.h
+5
-3
speechx/speechx/decoder/u2_recognizer.cc
speechx/speechx/decoder/u2_recognizer.cc
+4
-0
speechx/speechx/decoder/u2_recognizer.h
speechx/speechx/decoder/u2_recognizer.h
+3
-2
speechx/speechx/decoder/u2_recognizer_main.cc
speechx/speechx/decoder/u2_recognizer_main.cc
+8
-5
speechx/speechx/frontend/audio/cmvn.cc
speechx/speechx/frontend/audio/cmvn.cc
+16
-14
speechx/speechx/frontend/audio/feature_pipeline.cc
speechx/speechx/frontend/audio/feature_pipeline.cc
+1
-0
未找到文件。
speechx/examples/codelab/u2/local/recognizer.sh
0 → 100755
浏览文件 @
17ea30e7
#!/bin/bash
set
-e
.
path.sh
data
=
data
exp
=
exp
mkdir
-p
$exp
ckpt_dir
=
./data/model
model_dir
=
$ckpt_dir
/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/
u2_recognizer_main
\
--use_fbank
=
true
\
--num_bins
=
80
\
--cmvn_file
=
$exp
/cmvn.ark
\
--model_path
=
$model_dir
/export.jit
\
--nnet_decoder_chunk
=
16
\
--receptive_field_length
=
7
\
--downsampling_rate
=
4
\
--vocab_path
=
$model_dir
/unit.txt
\
--wav_rspecifier
=
scp:
$data
/wav.scp
\
--result_wspecifier
=
ark,t:
$exp
/result.ark
speechx/speechx/decoder/param.h
浏览文件 @
17ea30e7
...
...
@@ -52,11 +52,12 @@ DEFINE_string(model_cache_names,
"chunk_state_h_box,chunk_state_c_box"
,
"model cache names"
);
DEFINE_string
(
model_cache_shapes
,
"5-1-1024,5-1-1024"
,
"model cache shapes"
);
DEFINE_string
(
vocab_path
,
""
,
"nnet vocab path."
);
// decoder
DEFINE_string
(
word_symbol_table
,
"words.txt"
,
"word symbol table"
);
DEFINE_string
(
graph_path
,
"TLG"
,
"decoder graph"
);
DEFINE_double
(
acoustic_scale
,
1.0
,
"acoustic scale"
);
DEFINE_string
(
graph_path
,
"TLG"
,
"decoder graph"
);
DEFINE_string
(
word_symbol_table
,
"words.txt"
,
"word symbol table"
);
DEFINE_int32
(
max_active
,
7500
,
"max active"
);
DEFINE_double
(
beam
,
15.0
,
"decoder beam"
);
DEFINE_double
(
lattice_beam
,
7.5
,
"decoder beam"
);
...
...
@@ -72,13 +73,14 @@ FeaturePipelineOptions InitFeaturePipelineOptions() {
frame_opts
.
dither
=
0.0
;
frame_opts
.
frame_shift_ms
=
10
;
opts
.
use_fbank
=
FLAGS_use_fbank
;
LOG
(
INFO
)
<<
"feature type: "
<<
opts
.
use_fbank
?
"fbank"
:
"linear"
;
LOG
(
INFO
)
<<
"feature type: "
<<
(
opts
.
use_fbank
?
"fbank"
:
"linear"
)
;
if
(
opts
.
use_fbank
)
{
opts
.
to_float32
=
false
;
frame_opts
.
window_type
=
"povey"
;
frame_opts
.
frame_length_ms
=
25
;
opts
.
fbank_opts
.
mel_opts
.
num_bins
=
FLAGS_num_bins
;
opts
.
fbank_opts
.
frame_opts
=
frame_opts
;
LOG
(
INFO
)
<<
"num bins: "
<<
opts
.
fbank_opts
.
mel_opts
.
num_bins
;
}
else
{
opts
.
to_float32
=
true
;
frame_opts
.
remove_dc_offset
=
false
;
...
...
speechx/speechx/decoder/u2_recognizer.cc
浏览文件 @
17ea30e7
...
...
@@ -33,12 +33,15 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource): opts_(resource
BaseFloat
am_scale
=
resource
.
acoustic_scale
;
decodable_
.
reset
(
new
Decodable
(
nnet
,
feature_pipeline_
,
am_scale
));
CHECK
(
resource
.
vocab_path
!=
""
);
decoder_
.
reset
(
new
CTCPrefixBeamSearch
(
resource
.
vocab_path
,
resource
.
decoder_opts
.
ctc_prefix_search_opts
));
unit_table_
=
decoder_
->
VocabTable
();
symbol_table_
=
unit_table_
;
input_finished_
=
false
;
Reset
();
}
void
U2Recognizer
::
Reset
()
{
...
...
@@ -69,6 +72,7 @@ void U2Recognizer::Accept(const VectorBase<BaseFloat>& waves) {
void
U2Recognizer
::
Decode
()
{
decoder_
->
AdvanceDecode
(
decodable_
);
UpdateResult
(
false
);
}
void
U2Recognizer
::
Rescoring
()
{
...
...
speechx/speechx/decoder/u2_recognizer.h
浏览文件 @
17ea30e7
...
...
@@ -92,12 +92,13 @@ struct DecodeOptions {
struct
U2RecognizerResource
{
kaldi
::
BaseFloat
acoustic_scale
{
1.0
};
std
::
string
vocab_path
{};
FeaturePipelineOptions
feature_pipeline_opts
{};
ModelOptions
model_opts
{};
DecodeOptions
decoder_opts
{};
// CTCBeamSearchOptions beam_search_opts;
kaldi
::
BaseFloat
acoustic_scale
{
1.0
};
std
::
string
vocab_path
{};
};
...
...
speechx/speechx/decoder/u2_recognizer_main.cc
浏览文件 @
17ea30e7
...
...
@@ -25,13 +25,16 @@ DEFINE_int32(sample_rate, 16000, "sample rate");
ppspeech
::
U2RecognizerResource
InitOpts
()
{
ppspeech
::
U2RecognizerResource
resource
;
resource
.
vocab_path
=
FLAGS_vocab_path
;
resource
.
acoustic_scale
=
FLAGS_acoustic_scale
;
resource
.
feature_pipeline_opts
=
ppspeech
::
InitFeaturePipelineOptions
();
resource
.
feature_pipeline_opts
=
ppspeech
::
InitFeaturePipelineOptions
();
LOG
(
INFO
)
<<
"feature!"
;
ppspeech
::
ModelOptions
model_opts
;
model_opts
.
model_path
=
FLAGS_model_path
;
resource
.
model_opts
=
model_opts
;
LOG
(
INFO
)
<<
"model!"
;
ppspeech
::
DecodeOptions
decoder_opts
;
decoder_opts
.
chunk_size
=
16
;
...
...
@@ -44,6 +47,7 @@ ppspeech::U2RecognizerResource InitOpts() {
decoder_opts
.
ctc_prefix_search_opts
.
second_beam_size
=
10
;
resource
.
decoder_opts
=
decoder_opts
;
LOG
(
INFO
)
<<
"decoder!"
;
return
resource
;
}
...
...
@@ -57,9 +61,6 @@ int main(int argc, char* argv[]) {
int32
num_done
=
0
,
num_err
=
0
;
double
tot_wav_duration
=
0.0
;
ppspeech
::
U2RecognizerResource
resource
=
InitOpts
();
ppspeech
::
U2Recognizer
recognizer
(
resource
);
kaldi
::
SequentialTableReader
<
kaldi
::
WaveHolder
>
wav_reader
(
FLAGS_wav_rspecifier
);
kaldi
::
TokenWriter
result_writer
(
FLAGS_result_wspecifier
);
...
...
@@ -71,8 +72,10 @@ int main(int argc, char* argv[]) {
LOG
(
INFO
)
<<
"chunk size (s): "
<<
streaming_chunk
;
LOG
(
INFO
)
<<
"chunk size (sample): "
<<
chunk_sample_size
;
kaldi
::
Timer
timer
;
ppspeech
::
U2RecognizerResource
resource
=
InitOpts
();
ppspeech
::
U2Recognizer
recognizer
(
resource
);
kaldi
::
Timer
timer
;
for
(;
!
wav_reader
.
Done
();
wav_reader
.
Next
())
{
std
::
string
utt
=
wav_reader
.
Key
();
const
kaldi
::
WaveData
&
wave_data
=
wav_reader
.
Value
();
...
...
speechx/speechx/frontend/audio/cmvn.cc
浏览文件 @
17ea30e7
...
...
@@ -29,7 +29,9 @@ using std::unique_ptr;
CMVN
::
CMVN
(
std
::
string
cmvn_file
,
unique_ptr
<
FrontendInterface
>
base_extractor
)
:
var_norm_
(
true
)
{
CHECK
(
cmvn_file
!=
""
);
base_extractor_
=
std
::
move
(
base_extractor
);
bool
binary
;
kaldi
::
Input
ki
(
cmvn_file
,
&
binary
);
stats_
.
Read
(
ki
.
Stream
(),
binary
);
...
...
@@ -55,11 +57,11 @@ bool CMVN::Read(kaldi::Vector<BaseFloat>* feats) {
// feats contain num_frames feature.
void
CMVN
::
Compute
(
VectorBase
<
BaseFloat
>*
feats
)
const
{
KALDI_ASSERT
(
feats
!=
NULL
);
int32
dim
=
stats_
.
NumCols
()
-
1
;
if
(
stats_
.
NumRows
()
>
2
||
stats_
.
NumRows
()
<
1
||
feats
->
Dim
()
%
dim
!=
0
)
{
KALDI_ERR
<<
"Dim mismatch: cmvn "
<<
stats_
.
NumRows
()
<<
'
x
'
<<
stats_
.
NumCols
()
<<
", feats "
<<
feats
->
Dim
()
<<
'x'
;
feats
->
Dim
()
%
dim
_
!=
0
)
{
KALDI_ERR
<<
"Dim mismatch: cmvn "
<<
stats_
.
NumRows
()
<<
'
,
'
<<
stats_
.
NumCols
()
-
1
<<
", feats "
<<
feats
->
Dim
()
<<
'x'
;
}
if
(
stats_
.
NumRows
()
==
1
&&
var_norm_
)
{
KALDI_ERR
...
...
@@ -67,7 +69,7 @@ void CMVN::Compute(VectorBase<BaseFloat>* feats) const {
<<
"are supplied."
;
}
double
count
=
stats_
(
0
,
dim
);
double
count
=
stats_
(
0
,
dim
_
);
// Do not change the threshold of 1.0 here: in the balanced-cmvn code, when
// computing an offset and representing it as stats_, we use a count of one.
if
(
count
<
1.0
)
...
...
@@ -77,14 +79,14 @@ void CMVN::Compute(VectorBase<BaseFloat>* feats) const {
if
(
!
var_norm_
)
{
Vector
<
BaseFloat
>
offset
(
feats
->
Dim
());
SubVector
<
double
>
mean_stats
(
stats_
.
RowData
(
0
),
dim
);
SubVector
<
double
>
mean_stats
(
stats_
.
RowData
(
0
),
dim
_
);
Vector
<
double
>
mean_stats_apply
(
feats
->
Dim
());
// fill the datat of mean_stats in mean_stats_appy whose dim is equal
// with the dim of feature.
// the dim
of feats = dim
* num_frames;
for
(
int32
idx
=
0
;
idx
<
feats
->
Dim
()
/
dim
;
++
idx
)
{
SubVector
<
double
>
stats_tmp
(
mean_stats_apply
.
Data
()
+
dim
*
idx
,
dim
);
// fill the datat of mean_stats in mean_stats_appy whose dim
_
is equal
// with the dim
_
of feature.
// the dim
_ of feats = dim_
* num_frames;
for
(
int32
idx
=
0
;
idx
<
feats
->
Dim
()
/
dim
_
;
++
idx
)
{
SubVector
<
double
>
stats_tmp
(
mean_stats_apply
.
Data
()
+
dim
_
*
idx
,
dim
_
);
stats_tmp
.
CopyFromVec
(
mean_stats
);
}
offset
.
AddVec
(
-
1.0
/
count
,
mean_stats_apply
);
...
...
@@ -94,7 +96,7 @@ void CMVN::Compute(VectorBase<BaseFloat>* feats) const {
// norm(0, d) = mean offset;
// norm(1, d) = scale, e.g. x(d) <-- x(d)*norm(1, d) + norm(0, d).
kaldi
::
Matrix
<
BaseFloat
>
norm
(
2
,
feats
->
Dim
());
for
(
int32
d
=
0
;
d
<
dim
;
d
++
)
{
for
(
int32
d
=
0
;
d
<
dim
_
;
d
++
)
{
double
mean
,
offset
,
scale
;
mean
=
stats_
(
0
,
d
)
/
count
;
double
var
=
(
stats_
(
1
,
d
)
/
count
)
-
mean
*
mean
,
floor
=
1.0e-20
;
...
...
@@ -111,7 +113,7 @@ void CMVN::Compute(VectorBase<BaseFloat>* feats) const {
for
(
int32
d_skip
=
d
;
d_skip
<
feats
->
Dim
();)
{
norm
(
0
,
d_skip
)
=
offset
;
norm
(
1
,
d_skip
)
=
scale
;
d_skip
=
d_skip
+
dim
;
d_skip
=
d_skip
+
dim
_
;
}
}
// Apply the normalization.
...
...
speechx/speechx/frontend/audio/feature_pipeline.cc
浏览文件 @
17ea30e7
...
...
@@ -32,6 +32,7 @@ FeaturePipeline::FeaturePipeline(const FeaturePipelineOptions& opts) : opts_(opt
opts
.
linear_spectrogram_opts
,
std
::
move
(
data_source
)));
}
CHECK
(
opts
.
cmvn_file
!=
""
);
unique_ptr
<
FrontendInterface
>
cmvn
(
new
ppspeech
::
CMVN
(
opts
.
cmvn_file
,
std
::
move
(
base_feature
)));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录