Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
e9043828
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e9043828
编写于
2月 18, 2022
作者:
S
SmileGoat
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add offline_deocder_main
上级
d14ee800
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
189 addition
and
46 deletion
+189
-46
speechx/speechx/codelab/decoder_test/offline_decoder_main.cc
speechx/speechx/codelab/decoder_test/offline_decoder_main.cc
+58
-0
speechx/speechx/decoder/ctc_beam_search_decoder.cc
speechx/speechx/decoder/ctc_beam_search_decoder.cc
+55
-23
speechx/speechx/decoder/ctc_beam_search_decoder.h
speechx/speechx/decoder/ctc_beam_search_decoder.h
+11
-7
speechx/speechx/nnet/ctc_decodable.h
speechx/speechx/nnet/ctc_decodable.h
+0
-0
speechx/speechx/nnet/decodable.cc
speechx/speechx/nnet/decodable.cc
+38
-0
speechx/speechx/nnet/decodable.h
speechx/speechx/nnet/decodable.h
+21
-11
speechx/speechx/nnet/dnn_decodable.h
speechx/speechx/nnet/dnn_decodable.h
+0
-0
speechx/speechx/nnet/nnet_interface.h
speechx/speechx/nnet/nnet_interface.h
+3
-2
speechx/speechx/nnet/paddle_nnet.cc
speechx/speechx/nnet/paddle_nnet.cc
+2
-2
speechx/speechx/nnet/paddle_nnet.h
speechx/speechx/nnet/paddle_nnet.h
+1
-1
未找到文件。
speechx/speechx/codelab/decoder_test/offline_decoder_main.cc
0 → 100644
浏览文件 @
e9043828
// todo refactor, repalce with gtest
#include "decoder/ctc_beam_search_decoder.h"
#include "kaldi/util/table-types.h"
#include "base/log.h"
#include "base/flags.h"
DEFINE_string
(
feature_respecifier
,
""
,
"test nnet prob"
);
using
kaldi
::
BaseFloat
;
void
SplitFeature
(
kaldi
::
Matrix
<
BaseFloat
>
feature
,
int32
chunk_size
,
std
::
vector
<
kaldi
::
Matrix
<
BaseFloat
>>
feature_chunks
)
{
}
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
google
::
InitGoogleLogging
(
argv
[
0
]);
kaldi
::
SequentialBaseFloatMatrixReader
feature_reader
(
FLAGS_feature_respecifier
);
// test nnet_output --> decoder result
int32
num_done
=
0
,
num_err
=
0
;
CTCBeamSearchOptions
opts
;
CTCBeamSearch
decoder
(
opts
);
ModelOptions
model_opts
;
std
::
shared_ptr
<
PaddleNnet
>
nnet
(
new
PaddleNnet
(
model_opts
));
Decodable
decodable
();
decodable
.
SetNnet
(
nnet
);
int32
chunk_size
=
0
;
for
(;
!
feature_reader
.
Done
();
feature_reader
.
Next
())
{
string
utt
=
feature_reader
.
Key
();
const
kaldi
::
Matrix
<
BaseFloat
>
feature
=
feature_reader
.
Value
();
vector
<
Matrix
<
BaseFloat
>>
feature_chunks
;
SplitFeature
(
feature
,
chunk_size
,
&
feature_chunks
);
for
(
auto
feature_chunk
:
feature_chunks
)
{
decodable
.
FeedFeatures
(
feature_chunk
);
decoder
.
InitDecoder
();
decoder
.
AdvanceDecode
(
decodable
,
chunk_size
);
}
decodable
.
InputFinished
();
std
::
string
result
;
result
=
decoder
.
GetFinalBestPath
();
KALDI_LOG
<<
" the result of "
<<
utt
<<
" is "
<<
result
;
decodable
.
Reset
();
++
num_done
;
}
KALDI_LOG
<<
"Done "
<<
num_done
<<
" utterances, "
<<
num_err
<<
" with errors."
;
return
(
num_done
!=
0
?
0
:
1
);
}
\ No newline at end of file
speechx/speechx/decoder/ctc_beam_search_decoder.cc
浏览文件 @
e9043828
...
...
@@ -14,22 +14,28 @@ CTCBeamSearch::CTCBeamSearch(std::shared_ptr<CTCBeamSearchOptions> opts) :
init_ext_scorer_
(
nullptr
),
blank_id
(
-
1
),
space_id
(
-
1
),
num_frame_decoded
(
0
),
root
(
nullptr
)
{
LOG
(
INFO
)
<<
"dict path: "
<<
_opts
.
dict_file
;
LOG
(
INFO
)
<<
"dict path: "
<<
opts_
.
dict_file
;
vocabulary_
=
std
::
make_shared
<
vector
<
string
>>
();
if
(
!
basr
::
ReadDictToVector
(
_opts
.
dict_file
,
*
vocabulary_
))
{
if
(
!
basr
::
ReadDictToVector
(
opts_
.
dict_file
,
*
vocabulary_
))
{
LOG
(
INFO
)
<<
"load the dict failed"
;
}
LOG
(
INFO
)
<<
"read the vocabulary success, dict size: "
<<
vocabulary_
->
size
();
LOG
(
INFO
)
<<
"language model path: "
<<
_opts
.
lm_path
;
init_ext_scorer_
=
std
::
make_shared
<
Scorer
>
(
_opts
.
alpha
,
_opts
.
beta
,
_opts
.
lm_path
,
LOG
(
INFO
)
<<
"language model path: "
<<
opts_
.
lm_path
;
init_ext_scorer_
=
std
::
make_shared
<
Scorer
>
(
opts_
.
alpha
,
opts_
.
beta
,
opts_
.
lm_path
,
*
vocabulary_
);
}
void
CTCBeamSearch
::
Reset
()
{
num_frame_decoded_
=
0
;
ResetPrefixes
();
}
void
CTCBeamSearch
::
InitDecoder
()
{
blank_id
=
0
;
...
...
@@ -41,7 +47,7 @@ void CTCBeamSearch::InitDecoder() {
space_id
=
-
2
;
}
clear_p
refixes
();
ResetP
refixes
();
root
=
std
::
make_shared
<
PathTrie
>
();
root
->
score
=
root
->
log_prob_b_prev
=
0.0
;
...
...
@@ -57,6 +63,23 @@ void CTCBeamSearch::InitDecoder() {
}
}
int32
CTCBeamSearch
::
NumFrameDecoded
()
{
return
num_frame_decoded_
;
}
// todo rename, refactor
void
CTCBeamSearch
::
AdvanceDecode
(
const
std
::
shared_ptr
<
kaldi
::
DecodableInterface
>&
decodable
,
int
max_frames
)
{
while
(
max_frames
>
0
)
{
vector
<
vector
<
BaseFloat
>>
likelihood
;
if
(
decodable
->
IsLastFrame
(
NumFrameDecoded
()
+
1
))
{
break
;
}
likelihood
.
push_back
(
decodable
->
FrameLogLikelihood
(
NumFrameDecoded
()
+
1
));
AdvanceDecoding
(
result
);
max_frames
--
;
}
}
void
CTCBeamSearch
::
ResetPrefixes
()
{
for
(
size_t
i
=
0
;
i
<
prefixes
.
size
();
i
++
)
{
if
(
prefixes
[
i
]
!=
nullptr
)
{
...
...
@@ -81,19 +104,32 @@ int CTCBeamSearch::DecodeLikelihoods(const vector<vector<float>>&probs,
}
timer
.
Reset
();
vector
<
std
::
pair
<
double
,
string
>>
results
=
AdvanceDecoding
(
double_probs
);
AdvanceDecoding
(
double_probs
);
LOG
(
INFO
)
<<
"ctc decoding elapsed time(s) "
<<
static_cast
<
float
>
(
timer
.
Elapsed
())
/
1000.0
f
;
for
(
const
auto
&
item
:
results
)
{
nbest_words
.
push_back
(
item
.
second
);
}
return
0
;
}
vector
<
std
::
pair
<
double
,
string
>>
CTCBeamSearch
::
AdvanceDecoding
(
const
vector
<
vector
<
double
>>&
probs_seq
)
{
vector
<
std
::
pair
<
double
,
string
>>
CTCBeamSearch
::
GetNBestPath
()
{
return
get_beam_search_result
(
prefixes
,
*
vocabulary_
,
opts_
.
beam_size
);
}
string
CTCBeamSearch
::
GetBestPath
()
{
std
::
vector
<
std
::
pair
<
double
,
std
::
string
>>
result
;
result
=
get_beam_search_result
(
prefixes
,
*
vocabulary_
,
opts_
.
beam_size
);
return
result
[
0
]
->
second
;
}
string
CTCBeamSearch
::
GetFinalBestPath
()
{
CalculateApproxScore
();
LMRescore
();
return
GetBestPath
();
}
void
CTCBeamSearch
::
AdvanceDecoding
(
const
vector
<
vector
<
double
>>&
probs_seq
)
{
size_t
num_time_steps
=
probs_seq
.
size
();
size_t
beam_size
=
_opts
.
beam_size
;
double
cutoff_prob
=
_opts
.
cutoff_prob
;
size_t
cutoff_top_n
=
_opts
.
cutoff_top_n
;
size_t
beam_size
=
opts_
.
beam_size
;
double
cutoff_prob
=
opts_
.
cutoff_prob
;
size_t
cutoff_top_n
=
opts_
.
cutoff_top_n
;
for
(
size_t
time_step
=
0
;
time_step
<
num_time_steps
;
time_step
++
)
{
const
auto
&
prob
=
probs_seq
[
time_step
];
...
...
@@ -137,18 +173,14 @@ vector<std::pair<double, string>> CTCBeamSearch::AdvanceDecoding(const vector<ve
prefixes
[
i
]
->
remove
();
}
}
// if
num_frame_decoded_
++
;
}
// for probs_seq
// score the last word of each prefix that doesn't end with space
LMRescore
();
CalculateApproxScore
();
return
get_beam_search_result
(
prefixes
,
*
vocabulary_
,
beam_size
);
}
int
CTCBeamSearch
::
SearchOneChar
(
const
bool
&
full_beam
,
const
std
::
pair
<
size_t
,
float
>&
log_prob_idx
,
const
float
&
min_cutoff
)
{
size_t
beam_size
=
_opts
.
beam_size
;
size_t
beam_size
=
opts_
.
beam_size
;
const
auto
&
c
=
log_prob_idx
.
first
;
const
auto
&
log_prob_c
=
log_prob_idx
.
second
;
size_t
prefixes_len
=
std
::
min
(
prefixes
.
size
(),
beam_size
);
...
...
@@ -219,7 +251,7 @@ int CTCBeamSearch::SearchOneChar(const bool& full_beam,
}
void
CTCBeamSearch
::
CalculateApproxScore
()
{
size_t
beam_size
=
_opts
.
beam_size
;
size_t
beam_size
=
opts_
.
beam_size
;
size_t
num_prefixes
=
std
::
min
(
prefixes
.
size
(),
beam_size
);
std
::
sort
(
prefixes
.
begin
(),
...
...
@@ -246,7 +278,7 @@ void CTCBeamSearch::CalculateApproxScore() {
}
void
CTCBeamSearch
::
LMRescore
()
{
size_t
beam_size
=
_opts
.
beam_size
;
size_t
beam_size
=
opts_
.
beam_size
;
if
(
init_ext_scorer_
!=
nullptr
&&
!
init_ext_scorer_
->
is_character_based
())
{
for
(
size_t
i
=
0
;
i
<
beam_size
&&
i
<
prefixes
.
size
();
++
i
)
{
auto
prefix
=
prefixes
[
i
];
...
...
speechx/speechx/decoder/ctc_beam_search_decoder.h
浏览文件 @
e9043828
#include "base/basic_types.h"
#include "nnet/decodable-itf.h"
#pragma once
...
...
@@ -44,12 +45,14 @@ public:
~
CTCBeamSearch
()
{
}
bool
InitDecoder
();
void
Decode
(
std
::
shared_ptr
<
kaldi
::
DecodableInterface
>
decodable
);
std
::
string
GetBestPath
();
std
::
vector
<
std
::
pair
<
double
,
std
::
string
>>
GetNBestPath
();
std
::
string
GetFinalBestPath
();
int
NumFrameDecoded
();
int
DecodeLikelihoods
(
const
std
::
vector
<
std
::
vector
<
BaseFloat
>>&
probs
,
std
::
vector
<
std
::
string
>&
nbest_words
);
std
::
vector
<
DecodeResult
>&
GetDecodeResult
()
{
return
decoder_results_
;
}
void
Reset
();
private:
void
ResetPrefixes
();
...
...
@@ -58,17 +61,18 @@ private:
const
BaseFloat
&
min_cutoff
);
void
CalculateApproxScore
();
void
LMRescore
();
std
::
vector
<
std
::
pair
<
double
,
std
::
string
>>
AdvanceDecoding
(
const
std
::
vector
<
std
::
vector
<
double
>>&
probs_seq
);
void
AdvanceDecoding
(
const
std
::
vector
<
std
::
vector
<
double
>>&
probs_seq
);
CTCBeamSearchOptions
opts_
;
std
::
shared_ptr
<
Scorer
>
init_ext_scorer_
;
// todo separate later
std
::
vector
<
DecodeResult
>
decoder_results_
;
//
std::vector<DecodeResult> decoder_results_;
std
::
vector
<
std
::
vector
<
std
::
string
>>
vocabulary_
;
// todo remove later
size_t
blank_id
;
int
space_id
;
std
::
shared_ptr
<
PathTrie
>
root
;
std
::
vector
<
PathTrie
*>
prefixes
;
int
num_frame_decoded_
;
};
}
// namespace basr
\ No newline at end of file
speechx/speechx/nnet/ctc_decodable.h
已删除
100644 → 0
浏览文件 @
d14ee800
speechx/speechx/nnet/decodable.cc
0 → 100644
浏览文件 @
e9043828
#include "nnet/decodable.h"
namespace
ppspeech
{
Decodable
::
Acceptlikelihood
(
const
kaldi
::
Matrix
<
BaseFloat
>&
likelihood
)
{
frames_ready_
+=
likelihood
.
NumRows
();
}
Decodable
::
Init
(
DecodableConfig
config
)
{
}
Decodable
::
IsLastFrame
(
int32
frame
)
const
{
CHECK_LE
(
frame
,
frames_ready_
);
return
finished_
&&
(
frame
==
frames_ready_
-
1
);
}
int32
Decodable
::
NumIndices
()
const
{
return
0
;
}
void
Decodable
::
LogLikelihood
(
int32
frame
,
int32
index
)
{
return
;
}
void
Decodable
::
FeedFeatures
(
const
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>&
features
)
{
// skip frame ???
nnet_
->
FeedForward
(
features
,
&
nnet_cache_
);
frames_ready_
+=
nnet_cache_
.
NumRows
();
return
;
}
void
Decodable
::
Reset
()
{
// frontend_.Reset();
nnet_
->
Reset
();
}
}
// namespace ppspeech
speechx/speechx/nnet/decodable.h
浏览文件 @
e9043828
...
...
@@ -2,17 +2,27 @@
#include "base/common.h"
namespace
ppsepeech
{
struct
DecodeableConfig
;
namespace
ppspeech
{
class
Decodeable
:
public
kaldi
::
DecodableInterface
{
struct
DecodableConfig
;
class
Decodable
:
public
kaldi
::
DecodableInterface
{
public:
virtual
Init
(
Decodeable
config
)
=
0
;
virtual
Acceptlikeihood
()
=
0
;
virtual
void
Init
(
DecodableOpts
config
);
virtual
kaldi
::
BaseFloat
LogLikelihood
(
int32
frame
,
int32
index
);
virtual
bool
IsLastFrame
(
int32
frame
)
const
;
virtual
int32
NumIndices
()
const
;
void
Acceptlikelihood
(
const
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>&
likelihood
);
// remove later
void
FeedFeatures
(
const
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>&
feature
);
// only for test, todo remove later
std
::
vector
<
BaseFloat
>
FrameLogLikelihood
(
int32
frame
);
void
Reset
();
void
InputFinished
()
{
finished_
=
true
;
}
private:
std
::
share_ptr
<
FeatureExtractorInterface
>
frontend_
;
std
::
share_ptr
<
NnetInterface
>
nnet_
;
//Cache nnet_cache_;
}
std
::
shared_ptr
<
FeatureExtractorInterface
>
frontend_
;
std
::
shared_ptr
<
NnetInterface
>
nnet_
;
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>
nnet_cache_
;
bool
finished_
;
int32
frames_ready_
;
};
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/nnet/dnn_decodable.h
已删除
100644 → 0
浏览文件 @
d14ee800
speechx/speechx/nnet/nnet_interface.h
浏览文件 @
e9043828
...
...
@@ -10,7 +10,8 @@ class NnetInterface {
public:
virtual
~
NnetInterface
()
{}
virtual
void
FeedForward
(
const
kaldi
::
Matrix
<
BaseFloat
>&
features
,
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>*
inferences
)
const
=
0
;
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>*
inferences
);
virtual
void
Reset
();
};
...
...
speechx/speechx/nnet/paddle_nnet.cc
浏览文件 @
e9043828
...
...
@@ -3,7 +3,7 @@
namespace
ppspeech
{
void
PaddleNnet
::
init_cache_e
ncouts
(
const
ModelOptions
&
opts
)
{
void
PaddleNnet
::
InitCacheE
ncouts
(
const
ModelOptions
&
opts
)
{
std
::
vector
<
std
::
string
>
cache_names
;
cache_names
=
absl
::
StrSplit
(
opts
.
cache_names
,
", "
);
std
::
vector
<
std
::
string
>
cache_shapes
;
...
...
@@ -66,7 +66,7 @@ PaddleNet::PaddleNnet(const ModelOptions& opts) {
}
release_predictor
(
predictor
);
init_cache_e
ncouts
(
opts
);
InitCacheE
ncouts
(
opts
);
}
paddle_infer
::
Predictor
*
PaddleNnet
::
get_predictor
()
{
...
...
speechx/speechx/nnet/paddle_nnet.h
浏览文件 @
e9043828
...
...
@@ -94,7 +94,7 @@ class PaddleNnet : public NnetInterface {
virtual
void
FeedForward
(
const
kaldi
::
Matrix
<
BaseFloat
>&
features
,
kaldi
::
Matrix
<
kaldi
::
BaseFloat
>*
inferences
)
const
;
std
::
shared_ptr
<
Tensor
<
kaldi
::
BaseFloat
>>
GetCacheEncoder
(
const
std
::
string
&
name
);
void
init_cache_e
ncouts
(
const
ModelOptions
&
opts
);
void
InitCacheE
ncouts
(
const
ModelOptions
&
opts
);
private:
std
::
unique_ptr
<
paddle_infer
::
services
::
PredictorPool
>
pool
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录