Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
bc1b6c2e
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
bc1b6c2e
编写于
10月 12, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor ctc opts, extract decoder interface, add ctc beamsearch score
上级
5c8725e8
变更
14
显示空白变更内容
内联
并排
Showing
14 changed file
with
344 addition
and
71 deletion
+344
-71
speechx/examples/ds2_ol/aishell/run.sh
speechx/examples/ds2_ol/aishell/run.sh
+1
-1
speechx/examples/ds2_ol/aishell/run_fbank.sh
speechx/examples/ds2_ol/aishell/run_fbank.sh
+1
-1
speechx/speechx/decoder/CMakeLists.txt
speechx/speechx/decoder/CMakeLists.txt
+1
-1
speechx/speechx/decoder/ctc_beam_search_decoder.cc
speechx/speechx/decoder/ctc_beam_search_decoder.cc
+5
-5
speechx/speechx/decoder/ctc_beam_search_decoder.h
speechx/speechx/decoder/ctc_beam_search_decoder.h
+25
-44
speechx/speechx/decoder/ctc_beam_search_opt.h
speechx/speechx/decoder/ctc_beam_search_opt.h
+78
-0
speechx/speechx/decoder/ctc_prefix_beam_search.cc
speechx/speechx/decoder/ctc_prefix_beam_search.cc
+0
-0
speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc
speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc
+13
-0
speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h
speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h
+64
-0
speechx/speechx/decoder/ctc_prefix_beam_search_score.h
speechx/speechx/decoder/ctc_prefix_beam_search_score.h
+68
-0
speechx/speechx/decoder/ctc_tlg_decoder.cc
speechx/speechx/decoder/ctc_tlg_decoder.cc
+6
-6
speechx/speechx/decoder/ctc_tlg_decoder.h
speechx/speechx/decoder/ctc_tlg_decoder.h
+20
-9
speechx/speechx/decoder/ctc_tlg_decoder_main.cc
speechx/speechx/decoder/ctc_tlg_decoder_main.cc
+6
-4
speechx/speechx/decoder/decoder_itf.h
speechx/speechx/decoder/decoder_itf.h
+56
-0
未找到文件。
speechx/examples/ds2_ol/aishell/run.sh
浏览文件 @
bc1b6c2e
...
...
@@ -135,7 +135,7 @@ fi
if
[
${
stage
}
-le
4
]
&&
[
${
stop_stage
}
-ge
4
]
;
then
# TLG decoder
utils/run.pl
JOB
=
1:
$nj
$data
/split
${
nj
}
/JOB/recog.wfst.log
\
tlg_decoder_main
\
ctc_
tlg_decoder_main
\
--feature_rspecifier
=
scp:
$data
/split
${
nj
}
/JOB/feat.scp
\
--model_path
=
$model_dir
/avg_1.jit.pdmodel
\
--param_path
=
$model_dir
/avg_1.jit.pdiparams
\
...
...
speechx/examples/ds2_ol/aishell/run_fbank.sh
浏览文件 @
bc1b6c2e
...
...
@@ -133,7 +133,7 @@ fi
if
[
${
stage
}
-le
4
]
&&
[
${
stop_stage
}
-ge
4
]
;
then
# TLG decoder
utils/run.pl
JOB
=
1:
$nj
$data
/split
${
nj
}
/JOB/recog.fbank.wfst.log
\
tlg_decoder_main
\
ctc_
tlg_decoder_main
\
--feature_rspecifier
=
scp:
$data
/split
${
nj
}
/JOB/fbank_feat.scp
\
--model_path
=
$model_dir
/avg_5.jit.pdmodel
\
--param_path
=
$model_dir
/avg_5.jit.pdiparams
\
...
...
speechx/speechx/decoder/CMakeLists.txt
浏览文件 @
bc1b6c2e
...
...
@@ -15,7 +15,7 @@ set(BINS
ctc_beam_search_decoder_main
nnet_logprob_decoder_main
recognizer_main
tlg_decoder_main
ctc_
tlg_decoder_main
)
foreach
(
bin_name IN LISTS BINS
)
...
...
speechx/speechx/decoder/ctc_beam_search_decoder.cc
浏览文件 @
bc1b6c2e
...
...
@@ -12,10 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "decoder/ctc_beam_search_decoder.h"
#include "base/
basic_types
.h"
#include "base/
common
.h"
#include "decoder/ctc_decoders/decoder_utils.h"
#include "decoder/ctc_beam_search_decoder.h"
#include "utils/file_utils.h"
namespace
ppspeech
{
...
...
@@ -26,7 +26,7 @@ using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
CTCBeamSearch
::
CTCBeamSearch
(
const
CTCBeamSearchOptions
&
opts
)
:
opts_
(
opts
),
init_ext_scorer_
(
nullptr
),
blank_id_
(
-
1
),
blank_id_
(
opts
.
blank
),
space_id_
(
-
1
),
num_frame_decoded_
(
0
),
root_
(
nullptr
)
{
...
...
@@ -43,9 +43,9 @@ CTCBeamSearch::CTCBeamSearch(const CTCBeamSearchOptions& opts)
opts_
.
alpha
,
opts_
.
beta
,
opts_
.
lm_path
,
vocabulary_
);
}
blank_id_
=
0
;
auto
it
=
std
::
find
(
vocabulary_
.
begin
(),
vocabulary_
.
end
(),
" "
);
CHECK
(
blank_id_
==
0
);
auto
it
=
std
::
find
(
vocabulary_
.
begin
(),
vocabulary_
.
end
(),
" "
);
space_id_
=
it
-
vocabulary_
.
begin
();
// if no space in vocabulary
if
((
size_t
)
space_id_
>=
vocabulary_
.
size
())
{
...
...
speechx/speechx/decoder/ctc_beam_search_decoder.h
浏览文件 @
bc1b6c2e
...
...
@@ -14,67 +14,48 @@
// used by deepspeech2
#include "base/common.h"
#pragma once
#include "decoder/ctc_beam_search_opt.h"
#include "decoder/ctc_decoders/path_trie.h"
#include "decoder/ctc_decoders/scorer.h"
#include "kaldi/decoder/decodable-itf.h"
#include "util/parse-options.h"
#pragma once
#include "decoder/decoder_itf.h"
namespace
ppspeech
{
struct
CTCBeamSearchOptions
{
std
::
string
dict_file
;
std
::
string
lm_path
;
BaseFloat
alpha
;
BaseFloat
beta
;
BaseFloat
cutoff_prob
;
int
beam_size
;
int
cutoff_top_n
;
int
num_proc_bsearch
;
CTCBeamSearchOptions
()
:
dict_file
(
"vocab.txt"
),
lm_path
(
""
),
alpha
(
1.9
f
),
beta
(
5.0
),
beam_size
(
300
),
cutoff_prob
(
0.99
f
),
cutoff_top_n
(
40
),
num_proc_bsearch
(
10
)
{}
void
Register
(
kaldi
::
OptionsItf
*
opts
)
{
opts
->
Register
(
"dict"
,
&
dict_file
,
"dict file "
);
opts
->
Register
(
"lm-path"
,
&
lm_path
,
"language model file"
);
opts
->
Register
(
"alpha"
,
&
alpha
,
"alpha"
);
opts
->
Register
(
"beta"
,
&
beta
,
"beta"
);
opts
->
Register
(
"beam-size"
,
&
beam_size
,
"beam size for beam search method"
);
opts
->
Register
(
"cutoff-prob"
,
&
cutoff_prob
,
"cutoff probs"
);
opts
->
Register
(
"cutoff-top-n"
,
&
cutoff_top_n
,
"cutoff top n"
);
opts
->
Register
(
"num-proc-bsearch"
,
&
num_proc_bsearch
,
"num proc bsearch"
);
}
};
class
CTCBeamSearch
{
class
CTCBeamSearch
:
public
DecoderInterface
{
public:
explicit
CTCBeamSearch
(
const
CTCBeamSearchOptions
&
opts
);
~
CTCBeamSearch
()
{}
void
InitDecoder
();
void
Reset
();
void
AdvanceDecode
(
const
std
::
shared_ptr
<
kaldi
::
DecodableInterface
>&
decodable
);
std
::
string
GetFinalBestPath
();
std
::
string
GetPartialResult
()
{
CHECK
(
false
)
<<
"Not implement."
;
return
{};
}
void
Decode
(
std
::
shared_ptr
<
kaldi
::
DecodableInterface
>
decodable
);
std
::
string
GetBestPath
();
std
::
vector
<
std
::
pair
<
double
,
std
::
string
>>
GetNBestPath
();
std
::
string
GetFinalBestPath
();
int
NumFrameDecoded
();
int
DecodeLikelihoods
(
const
std
::
vector
<
std
::
vector
<
BaseFloat
>>&
probs
,
std
::
vector
<
std
::
string
>&
nbest_words
);
void
AdvanceDecode
(
const
std
::
shared_ptr
<
kaldi
::
DecodableInterface
>&
decodable
);
void
Reset
();
private:
void
ResetPrefixes
();
int32
SearchOneChar
(
const
bool
&
full_beam
,
const
std
::
pair
<
size_t
,
BaseFloat
>&
log_prob_idx
,
const
BaseFloat
&
min_cutoff
);
...
...
@@ -93,4 +74,4 @@ class CTCBeamSearch {
DISALLOW_COPY_AND_ASSIGN
(
CTCBeamSearch
);
};
}
// namespace basr
\ No newline at end of file
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/decoder/ctc_beam_search_opt.h
0 → 100644
浏览文件 @
bc1b6c2e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "base/common.h"
#include "util/parse-options.h"
#pragma once
namespace
ppspeech
{
struct
CTCBeamSearchOptions
{
// common
int
blank
;
// ds2
std
::
string
dict_file
;
std
::
string
lm_path
;
int
beam_size
;
BaseFloat
alpha
;
BaseFloat
beta
;
BaseFloat
cutoff_prob
;
int
cutoff_top_n
;
int
num_proc_bsearch
;
// u2
int
first_beam_size
;
int
second_beam_size
;
CTCBeamSearchOptions
()
:
blank
(
0
),
dict_file
(
"vocab.txt"
),
lm_path
(
""
),
alpha
(
1.9
f
),
beta
(
5.0
),
beam_size
(
300
),
cutoff_prob
(
0.99
f
),
cutoff_top_n
(
40
),
num_proc_bsearch
(
10
),
first_beam_size
(
10
),
second_beam_size
(
10
)
{}
void
Register
(
kaldi
::
OptionsItf
*
opts
)
{
std
::
string
module
=
"Ds2BeamSearchConfig: "
;
opts
->
Register
(
"dict"
,
&
dict_file
,
module
+
"vocab file path."
);
opts
->
Register
(
"lm-path"
,
&
lm_path
,
module
+
"ngram language model path."
);
opts
->
Register
(
"alpha"
,
&
alpha
,
module
+
"alpha"
);
opts
->
Register
(
"beta"
,
&
beta
,
module
+
"beta"
);
opts
->
Register
(
"beam-size"
,
&
beam_size
,
module
+
"beam size for beam search method"
);
opts
->
Register
(
"cutoff-prob"
,
&
cutoff_prob
,
module
+
"cutoff probs"
);
opts
->
Register
(
"cutoff-top-n"
,
&
cutoff_top_n
,
module
+
"cutoff top n"
);
opts
->
Register
(
"num-proc-bsearch"
,
&
num_proc_bsearch
,
module
+
"num proc bsearch"
);
opts
->
Register
(
"blank"
,
&
blank
,
"blank id, default is 0."
);
module
=
"U2BeamSearchConfig: "
;
opts
->
Register
(
"first-beam-size"
,
&
first_beam_size
,
module
+
"first beam size."
);
opts
->
Register
(
"second-beam-size"
,
&
second_beam_size
,
module
+
"second beam size."
);
}
};
}
// namespace ppspeech
\ No newline at end of file
speechx/speechx/decoder/ctc_prefix_beam_search.cc
已删除
100644 → 0
浏览文件 @
5c8725e8
speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc
0 → 100644
浏览文件 @
bc1b6c2e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h
0 → 100644
浏览文件 @
bc1b6c2e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "decoder/ctc_beam_search_opt.h"
#include "decoder/ctc_prefix_beam_search_score.h"
#include "decoder/decoder_itf.h"
#include "kaldi/decoder/decodable-itf.h"
namespace
ppspeech
{
class
CTCPrefixBeamSearch
:
public
DecoderInterface
{
public:
explicit
CTCPrefixBeamSearch
(
const
CTCBeamSearchOptions
&
opts
);
~
CTCPrefixBeamSearch
()
{}
void
InitDecoder
();
void
Decode
(
std
::
shared_ptr
<
kaldi
::
DecodableInterface
>
decodable
);
std
::
string
GetBestPath
();
std
::
vector
<
std
::
pair
<
double
,
std
::
string
>>
GetNBestPath
();
std
::
string
GetFinalBestPath
();
int
NumFrameDecoded
();
int
DecodeLikelihoods
(
const
std
::
vector
<
std
::
vector
<
BaseFloat
>>&
probs
,
std
::
vector
<
std
::
string
>&
nbest_words
);
void
AdvanceDecode
(
const
std
::
shared_ptr
<
kaldi
::
DecodableInterface
>&
decodable
);
void
Reset
();
private:
void
ResetPrefixes
();
int32
SearchOneChar
(
const
bool
&
full_beam
,
const
std
::
pair
<
size_t
,
BaseFloat
>&
log_prob_idx
,
const
BaseFloat
&
min_cutoff
);
void
CalculateApproxScore
();
void
LMRescore
();
void
AdvanceDecoding
(
const
std
::
vector
<
std
::
vector
<
BaseFloat
>>&
probs
);
CTCBeamSearchOptions
opts_
;
size_t
blank_id_
;
int
num_frame_decoded_
;
DISALLOW_COPY_AND_ASSIGN
(
CTCPrefixBeamSearch
);
};
}
// namespace basr
\ No newline at end of file
speechx/speechx/decoder/ctc_prefix_beam_search_score.h
0 → 100644
浏览文件 @
bc1b6c2e
// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "base/common.h"
#include "utils/math.h"
namespace
ppspeech
{
struct
PrefxiScore
{
// decoding, unit in log scale
float
b
=
-
kFloatMax
;
// blank ending score
float
nb
=
-
kFloatMax
;
// none-blank ending score
// timestamp, unit in log sclae
float
v_b
=
-
kFloatMax
;
// viterbi blank ending score
float
v_nb
=
-
kFloatMax
;
// niterbi none-blank ending score
float
cur_token_prob
=
-
kFloatMax
;
// prob of current token
std
::
vector
<
int
>
times_b
;
// times of viterbi blank path
std
::
vector
<
int
>
times_nb
;
// times of viterbi non-blank path
// context state
bool
has_context
=
false
;
int
context_state
=
0
;
float
context_score
=
0
;
// decoding score, sum
float
Score
()
const
{
return
LogSumExp
(
b
,
nb
);
}
// decodign score with context bias
float
TotalScore
()
const
{
return
Score
()
+
context_score
;
}
// timestamp score, max
float
ViterbiScore
()
const
{
return
std
::
max
(
v_b
,
v_nb
);
}
// get timestamp
const
std
::
vector
<
int
>&
Times
()
const
{
return
v_b
>
v_nb
?
times_b
:
times_nb
;
}
};
struct
PrefixScoreHash
{
// https://stackoverflow.com/questions/20511347/a-good-hash-function-for-a-vector
std
::
size_t
operator
()(
const
std
::
vector
<
int
>&
prefix
)
const
{
std
::
size_t
seed
=
prefix
.
size
();
for
(
auto
&
i
:
prefix
)
{
seed
^=
i
+
0x9e3779b9
+
(
seed
<<
6
)
+
(
seed
>>
2
);
}
return
seed
;
}
};
using
PrefixWithScoreType
=
std
::
pair
<
std
::
vector
<
int
>
,
PrefixScoreHash
>
;
}
// namespace ppspeech
speechx/speechx/decoder/ctc_tlg_decoder.cc
浏览文件 @
bc1b6c2e
...
...
@@ -22,24 +22,24 @@ TLGDecoder::TLGDecoder(TLGDecoderOptions opts) {
fst
::
SymbolTable
::
ReadText
(
opts
.
word_symbol_table
));
decoder_
.
reset
(
new
kaldi
::
LatticeFasterOnlineDecoder
(
*
fst_
,
opts
.
opts
));
decoder_
->
InitDecoding
();
frame_decoded_size
_
=
0
;
num_frame_decoded
_
=
0
;
}
void
TLGDecoder
::
InitDecoder
()
{
decoder_
->
InitDecoding
();
frame_decoded_size
_
=
0
;
num_frame_decoded
_
=
0
;
}
void
TLGDecoder
::
AdvanceDecode
(
const
std
::
shared_ptr
<
kaldi
::
DecodableInterface
>&
decodable
)
{
while
(
!
decodable
->
IsLastFrame
(
frame_decoded_size
_
))
{
while
(
!
decodable
->
IsLastFrame
(
num_frame_decoded
_
))
{
AdvanceDecoding
(
decodable
.
get
());
}
}
void
TLGDecoder
::
AdvanceDecoding
(
kaldi
::
DecodableInterface
*
decodable
)
{
decoder_
->
AdvanceDecoding
(
decodable
,
1
);
frame_decoded_size
_
++
;
num_frame_decoded
_
++
;
}
void
TLGDecoder
::
Reset
()
{
...
...
@@ -48,7 +48,7 @@ void TLGDecoder::Reset() {
}
std
::
string
TLGDecoder
::
GetPartialResult
()
{
if
(
frame_decoded_size
_
==
0
)
{
if
(
num_frame_decoded
_
==
0
)
{
// Assertion failed: (this->NumFramesDecoded() > 0 && "You cannot call
// BestPathEnd if no frames were decoded.")
return
std
::
string
(
""
);
...
...
@@ -68,7 +68,7 @@ std::string TLGDecoder::GetPartialResult() {
}
std
::
string
TLGDecoder
::
GetFinalBestPath
()
{
if
(
frame_decoded_size
_
==
0
)
{
if
(
num_frame_decoded
_
==
0
)
{
// Assertion failed: (this->NumFramesDecoded() > 0 && "You cannot call
// BestPathEnd if no frames were decoded.")
return
std
::
string
(
""
);
...
...
speechx/speechx/decoder/ctc_tlg_decoder.h
浏览文件 @
bc1b6c2e
...
...
@@ -14,8 +14,9 @@
#pragma once
#include "base/basic_types.h"
#include "kaldi/decoder/decodable-itf.h"
#include "base/common.h"
#include "decoder/decoder_itf.h"
#include "kaldi/decoder/lattice-faster-online-decoder.h"
#include "util/parse-options.h"
...
...
@@ -30,21 +31,31 @@ struct TLGDecoderOptions {
TLGDecoderOptions
()
:
word_symbol_table
(
""
),
fst_path
(
""
)
{}
};
class
TLGDecoder
{
class
TLGDecoder
:
public
DecoderInterface
{
public:
explicit
TLGDecoder
(
TLGDecoderOptions
opts
);
~
TLGDecoder
()
=
default
;
void
InitDecoder
();
void
Reset
();
void
AdvanceDecode
(
const
std
::
shared_ptr
<
kaldi
::
DecodableInterface
>&
decodable
);
std
::
string
GetFinalBestPath
();
std
::
string
GetPartialResult
();
void
Decode
();
std
::
string
GetBestPath
();
std
::
vector
<
std
::
pair
<
double
,
std
::
string
>>
GetNBestPath
();
std
::
string
GetFinalBestPath
();
std
::
string
GetPartialResult
();
int
NumFrameDecoded
();
int
DecodeLikelihoods
(
const
std
::
vector
<
std
::
vector
<
BaseFloat
>>&
probs
,
std
::
vector
<
std
::
string
>&
nbest_words
);
void
AdvanceDecode
(
const
std
::
shared_ptr
<
kaldi
::
DecodableInterface
>&
decodable
);
void
Reset
();
private:
void
AdvanceDecoding
(
kaldi
::
DecodableInterface
*
decodable
);
...
...
@@ -53,7 +64,7 @@ class TLGDecoder {
std
::
shared_ptr
<
fst
::
Fst
<
fst
::
StdArc
>>
fst_
;
std
::
shared_ptr
<
fst
::
SymbolTable
>
word_symbol_table_
;
// the frame size which have decoded starts from 0.
int32
frame_decoded_size
_
;
int32
num_frame_decoded
_
;
};
...
...
speechx/speechx/decoder/tlg_decoder_main.cc
→
speechx/speechx/decoder/
ctc_
tlg_decoder_main.cc
浏览文件 @
bc1b6c2e
...
...
@@ -14,13 +14,15 @@
// todo refactor, repalce with gtest
#include "base/flags.h"
#include "base/log.h"
#include "decoder/ctc_tlg_decoder.h"
#include "base/common.h"
#include "frontend/audio/data_cache.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
#include "nnet/ds2_nnet.h"
#include "decoder/ctc_tlg_decoder.h"
#include "kaldi/util/table-types.h"
DEFINE_string
(
feature_rspecifier
,
""
,
"test feature rspecifier"
);
DEFINE_string
(
result_wspecifier
,
""
,
"test result wspecifier"
);
...
...
speechx/speechx/decoder/decoder_itf.h
0 → 100644
浏览文件 @
bc1b6c2e
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "base/common.h"
#include "kaldi/decoder/decodable-itf.h"
namespace
ppspeech
{
class
DecoderInterface
{
public:
virtual
~
DecoderInterface
()
{}
virtual
void
InitDecoder
()
=
0
;
virtual
void
Reset
()
=
0
;
virtual
void
AdvanceDecode
(
const
std
::
shared_ptr
<
kaldi
::
DecodableInterface
>&
decodable
)
=
0
;
virtual
std
::
string
GetFinalBestPath
()
=
0
;
virtual
std
::
string
GetPartialResult
()
=
0
;
// void Decode();
// std::string GetBestPath();
// std::vector<std::pair<double, std::string>> GetNBestPath();
// int NumFrameDecoded();
// int DecodeLikelihoods(const std::vector<std::vector<BaseFloat>>& probs,
// std::vector<std::string>& nbest_words);
private:
// void AdvanceDecoding(kaldi::DecodableInterface* decodable);
// current decoding frame number
int32
num_frame_decoded_
;
};
}
// namespace ppspeech
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录