Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
5cc874e1
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5cc874e1
编写于
10月 11, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
u2 nnet get encoder out and align with py
上级
a75abc18
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
96 addition
and
23 deletion
+96
-23
speechx/examples/codelab/u2nnet/run.sh
speechx/examples/codelab/u2nnet/run.sh
+6
-1
speechx/speechx/nnet/decodable.h
speechx/speechx/nnet/decodable.h
+2
-0
speechx/speechx/nnet/ds2_nnet.h
speechx/speechx/nnet/ds2_nnet.h
+10
-4
speechx/speechx/nnet/nnet_itf.h
speechx/speechx/nnet/nnet_itf.h
+12
-2
speechx/speechx/nnet/u2_nnet.cc
speechx/speechx/nnet/u2_nnet.cc
+26
-0
speechx/speechx/nnet/u2_nnet.h
speechx/speechx/nnet/u2_nnet.h
+2
-3
speechx/speechx/nnet/u2_nnet_main.cc
speechx/speechx/nnet/u2_nnet_main.cc
+38
-13
未找到文件。
speechx/examples/codelab/u2nnet/run.sh
浏览文件 @
5cc874e1
...
...
@@ -40,6 +40,7 @@ cmvn_json2kaldi_main \
--json_file
$model_dir
/mean_std.json
\
--cmvn_write_path
$exp
/cmvn.ark
\
--binary
=
false
echo
"convert json cmvn to kaldi ark."
compute_fbank_main
\
...
...
@@ -47,6 +48,7 @@ compute_fbank_main \
--wav_rspecifier
=
scp:
$data
/wav.scp
\
--cmvn_file
=
$exp
/cmvn.ark
\
--feature_wspecifier
=
ark,t:
$exp
/fbank.ark
echo
"compute fbank feature."
u2_nnet_main
\
...
...
@@ -56,4 +58,7 @@ u2_nnet_main \
--receptive_field_length
=
7
\
--downsampling_rate
=
4
\
--acoustic_scale
=
1.0
\
--nnet_prob_wspecifier
=
ark,t:
$exp
/probs.ark
--nnet_encoder_outs_wspecifier
=
ark,t:
$exp
/encoder_outs.ark
\
--nnet_prob_wspecifier
=
ark,t:
$exp
/logprobs.ark
echo
"u2 nnet decode."
speechx/speechx/nnet/decodable.h
浏览文件 @
5cc874e1
...
...
@@ -55,6 +55,8 @@ class Decodable : public kaldi::DecodableInterface {
int32
TokenId2NnetId
(
int32
token_id
);
std
::
shared_ptr
<
NnetInterface
>
Nnet
()
{
return
nnet_
;
}
private:
bool
AdvanceChunk
();
...
...
speechx/speechx/nnet/ds2_nnet.h
浏览文件 @
5cc874e1
...
...
@@ -96,16 +96,22 @@ class PaddleNnet : public NnetInterface {
public:
PaddleNnet
(
const
ModelOptions
&
opts
);
v
irtual
v
oid
FeedForward
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
features
,
void
FeedForward
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
features
,
const
int32
&
feature_dim
,
NnetOut
*
out
)
;
NnetOut
*
out
)
override
;
void
Dim
();
virtual
void
Reset
();
void
Reset
()
override
;
std
::
shared_ptr
<
Tensor
<
kaldi
::
BaseFloat
>>
GetCacheEncoder
(
const
std
::
string
&
name
);
void
InitCacheEncouts
(
const
ModelOptions
&
opts
);
void
EncoderOuts
(
std
::
vector
<
kaldi
::
Vector
<
kaldi
::
BaseFloat
>>*
encoder_out
)
const
override
{}
private:
paddle_infer
::
Predictor
*
GetPredictor
();
int
ReleasePredictor
(
paddle_infer
::
Predictor
*
predictor
);
...
...
speechx/speechx/nnet/nnet_itf.h
浏览文件 @
5cc874e1
...
...
@@ -22,7 +22,7 @@
namespace
ppspeech
{
struct
NnetOut
{
// nnet out
, maybe logprob or prob
// nnet out
. maybe logprob or prob. Almost time this is logprob.
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
logprobs
;
int32
vocab_dim
;
...
...
@@ -35,11 +35,21 @@ struct NnetOut {
class
NnetInterface
{
public:
virtual
~
NnetInterface
()
{}
// forward feat with nnet.
// nnet do not cache feats, feats cached by frontend.
// nnet cache model outputs, i.e. logprobs/encoder_outs.
virtual
void
FeedForward
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
features
,
const
int32
&
feature_dim
,
NnetOut
*
out
)
=
0
;
// reset nnet state, e.g. nnet_logprob_cache_, offset_, encoder_outs_.
virtual
void
Reset
()
=
0
;
virtual
~
NnetInterface
()
{}
// using to get encoder outs. e.g. seq2seq with Attention model.
virtual
void
EncoderOuts
(
std
::
vector
<
kaldi
::
Vector
<
kaldi
::
BaseFloat
>>*
encoder_out
)
const
=
0
;
};
}
// namespace ppspeech
speechx/speechx/nnet/u2_nnet.cc
浏览文件 @
5cc874e1
...
...
@@ -705,4 +705,30 @@ void U2Nnet::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
}
}
void
U2Nnet
::
EncoderOuts
(
std
::
vector
<
kaldi
::
Vector
<
kaldi
::
BaseFloat
>>*
encoder_out
)
const
{
// list of (B=1,T,D)
int
size
=
encoder_outs_
.
size
();
VLOG
(
1
)
<<
"encoder_outs_ size: "
<<
size
;
for
(
int
i
=
0
;
i
<
size
;
i
++
){
const
paddle
::
Tensor
&
item
=
encoder_outs_
[
i
];
const
std
::
vector
<
int64_t
>
shape
=
item
.
shape
();
CHECK
(
shape
.
size
()
==
3
);
const
int
&
B
=
shape
[
0
];
const
int
&
T
=
shape
[
1
];
const
int
&
D
=
shape
[
2
];
CHECK
(
B
==
1
)
<<
"Only support batch one."
;
VLOG
(
1
)
<<
"encoder out "
<<
i
<<
" shape: ("
<<
B
<<
","
<<
T
<<
","
<<
D
<<
")"
;
const
float
*
this_tensor_ptr
=
item
.
data
<
float
>
();
for
(
int
j
=
0
;
j
<
T
;
j
++
){
const
float
*
cur
=
this_tensor_ptr
+
j
*
D
;
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
out
(
D
);
std
::
memcpy
(
out
.
Data
(),
cur
,
D
*
sizeof
(
kaldi
::
BaseFloat
));
encoder_out
->
emplace_back
(
out
);
}
}
}
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/nnet/u2_nnet.h
浏览文件 @
5cc874e1
...
...
@@ -137,9 +137,8 @@ class U2Nnet : public U2NnetBase {
// debug
void
FeedEncoderOuts
(
paddle
::
Tensor
&
encoder_out
);
const
std
::
vector
<
paddle
::
Tensor
>&
EncoderOuts
()
const
{
return
encoder_outs_
;
}
void
EncoderOuts
(
std
::
vector
<
kaldi
::
Vector
<
kaldi
::
BaseFloat
>>*
encoder_out
)
const
;
private:
U2ModelOptions
opts_
;
...
...
speechx/speechx/nnet/u2_nnet_main.cc
浏览文件 @
5cc874e1
...
...
@@ -21,6 +21,7 @@
DEFINE_string
(
feature_rspecifier
,
""
,
"test feature rspecifier"
);
DEFINE_string
(
nnet_prob_wspecifier
,
""
,
"nnet porb wspecifier"
);
DEFINE_string
(
nnet_encoder_outs_wspecifier
,
""
,
"nnet encoder outs wspecifier"
);
DEFINE_string
(
model_path
,
""
,
"paddle nnet model"
);
...
...
@@ -52,9 +53,10 @@ int main(int argc, char* argv[]) {
LOG
(
INFO
)
<<
"input rspecifier: "
<<
FLAGS_feature_rspecifier
;
LOG
(
INFO
)
<<
"output wspecifier: "
<<
FLAGS_nnet_prob_wspecifier
;
LOG
(
INFO
)
<<
"model path: "
<<
FLAGS_model_path
;
kaldi
::
SequentialBaseFloatMatrixReader
feature_reader
(
FLAGS_feature_rspecifier
);
kaldi
::
SequentialBaseFloatMatrixReader
feature_reader
(
FLAGS_feature_rspecifier
);
kaldi
::
BaseFloatMatrixWriter
nnet_out_writer
(
FLAGS_nnet_prob_wspecifier
);
kaldi
::
BaseFloatMatrixWriter
nnet_encoder_outs_writer
(
FLAGS_nnet_encoder_outs_wspecifier
);
ppspeech
::
U2ModelOptions
model_opts
;
model_opts
.
model_path
=
FLAGS_model_path
;
...
...
@@ -97,6 +99,7 @@ int main(int argc, char* argv[]) {
int32
frame_idx
=
0
;
std
::
vector
<
kaldi
::
Vector
<
kaldi
::
BaseFloat
>>
prob_vec
;
std
::
vector
<
kaldi
::
Vector
<
kaldi
::
BaseFloat
>>
encoder_out_vec
;
int32
ori_feature_len
=
feature
.
NumRows
();
int32
num_chunks
=
feature
.
NumRows
()
/
chunk_stride
+
1
;
LOG
(
INFO
)
<<
"num_chunks: "
<<
num_chunks
;
...
...
@@ -144,29 +147,51 @@ int main(int argc, char* argv[]) {
prob_vec
.
push_back
(
vec_tmp
);
frame_idx
++
;
}
}
// get encoder out
decodable
->
Nnet
()
->
EncoderOuts
(
&
encoder_out_vec
);
// after process one utt, then reset decoder state.
decodable
->
Reset
();
if
(
prob_vec
.
size
()
==
0
)
{
if
(
prob_vec
.
size
()
==
0
||
encoder_out_vec
.
size
()
==
0
)
{
// the TokenWriter can not write empty string.
++
num_err
;
LOG
(
WARNING
)
<<
" the nnet prob of "
<<
utt
<<
" is empty"
;
LOG
(
WARNING
)
<<
" the nnet prob
/encoder_out
of "
<<
utt
<<
" is empty"
;
continue
;
}
{
// writer nnet output
kaldi
::
MatrixIndexT
nrow
=
prob_vec
.
size
();
kaldi
::
MatrixIndexT
ncol
=
prob_vec
[
0
].
Dim
();
LOG
(
INFO
)
<<
"nnet out shape: "
<<
nrow
<<
", "
<<
ncol
;
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>
result
(
nrow
,
ncol
);
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>
nnet_out
(
nrow
,
ncol
);
for
(
int32
row_idx
=
0
;
row_idx
<
nrow
;
++
row_idx
)
{
for
(
int32
col_idx
=
0
;
col_idx
<
ncol
;
++
col_idx
)
{
nnet_out
(
row_idx
,
col_idx
)
=
prob_vec
[
row_idx
](
col_idx
);
}
}
nnet_out_writer
.
Write
(
utt
,
nnet_out
);
}
{
// writer nnet encoder outs
kaldi
::
MatrixIndexT
nrow
=
encoder_out_vec
.
size
();
kaldi
::
MatrixIndexT
ncol
=
encoder_out_vec
[
0
].
Dim
();
LOG
(
INFO
)
<<
"nnet encoder outs shape: "
<<
nrow
<<
", "
<<
ncol
;
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>
encoder_outs
(
nrow
,
ncol
);
for
(
int32
row_idx
=
0
;
row_idx
<
nrow
;
++
row_idx
)
{
for
(
int32
col_idx
=
0
;
col_idx
<
ncol
;
++
col_idx
)
{
result
(
row_idx
,
col_idx
)
=
prob_vec
[
row_idx
](
col_idx
);
encoder_outs
(
row_idx
,
col_idx
)
=
encoder_out_vec
[
row_idx
](
col_idx
);
}
}
nnet_encoder_outs_writer
.
Write
(
utt
,
encoder_outs
);
}
nnet_out_writer
.
Write
(
utt
,
result
);
++
num_done
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录