Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
12f540cd
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
12f540cd
编写于
10月 03, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ctc decoder with blankid
上级
e411e0bd
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
31 addition
and
23 deletion
+31
-23
deepspeech/decoders/swig/ctc_beam_search_decoder.cpp
deepspeech/decoders/swig/ctc_beam_search_decoder.cpp
+7
-10
deepspeech/decoders/swig/ctc_beam_search_decoder.h
deepspeech/decoders/swig/ctc_beam_search_decoder.h
+4
-2
deepspeech/decoders/swig/ctc_greedy_decoder.cpp
deepspeech/decoders/swig/ctc_greedy_decoder.cpp
+4
-3
deepspeech/decoders/swig/ctc_greedy_decoder.h
deepspeech/decoders/swig/ctc_greedy_decoder.h
+2
-1
deepspeech/decoders/swig/decoder_utils.h
deepspeech/decoders/swig/decoder_utils.h
+2
-0
deepspeech/decoders/swig/scorer.cpp
deepspeech/decoders/swig/scorer.cpp
+1
-1
deepspeech/decoders/swig/setup.py
deepspeech/decoders/swig/setup.py
+2
-0
deepspeech/decoders/swig_wrapper.py
deepspeech/decoders/swig_wrapper.py
+9
-6
未找到文件。
deepspeech/decoders/swig/ctc_beam_search_decoder.cpp
浏览文件 @
12f540cd
...
@@ -35,7 +35,8 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
...
@@ -35,7 +35,8 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
size_t
beam_size
,
size_t
beam_size
,
double
cutoff_prob
,
double
cutoff_prob
,
size_t
cutoff_top_n
,
size_t
cutoff_top_n
,
Scorer
*
ext_scorer
)
{
Scorer
*
ext_scorer
,
size_t
blank_id
)
{
// dimension check
// dimension check
size_t
num_time_steps
=
probs_seq
.
size
();
size_t
num_time_steps
=
probs_seq
.
size
();
for
(
size_t
i
=
0
;
i
<
num_time_steps
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
num_time_steps
;
++
i
)
{
...
@@ -45,19 +46,13 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
...
@@ -45,19 +46,13 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
"The shape of probs_seq does not match with "
"The shape of probs_seq does not match with "
"the shape of the vocabulary"
);
"the shape of the vocabulary"
);
}
}
// assign blank id
// size_t blank_id = vocabulary.size();
size_t
blank_id
=
0
;
// assign space id
// assign space id
auto
it
=
std
::
find
(
vocabulary
.
begin
(),
vocabulary
.
end
(),
" "
);
auto
it
=
std
::
find
(
vocabulary
.
begin
(),
vocabulary
.
end
(),
kSPACE
);
int
space_id
=
it
-
vocabulary
.
begin
();
int
space_id
=
it
-
vocabulary
.
begin
();
// if no space in vocabulary
// if no space in vocabulary
if
((
size_t
)
space_id
>=
vocabulary
.
size
())
{
if
((
size_t
)
space_id
>=
vocabulary
.
size
())
{
space_id
=
-
2
;
space_id
=
-
2
;
}
}
// init prefixes' root
// init prefixes' root
PathTrie
root
;
PathTrie
root
;
root
.
score
=
root
.
log_prob_b_prev
=
0.0
;
root
.
score
=
root
.
log_prob_b_prev
=
0.0
;
...
@@ -218,7 +213,8 @@ ctc_beam_search_decoder_batch(
...
@@ -218,7 +213,8 @@ ctc_beam_search_decoder_batch(
size_t
num_processes
,
size_t
num_processes
,
double
cutoff_prob
,
double
cutoff_prob
,
size_t
cutoff_top_n
,
size_t
cutoff_top_n
,
Scorer
*
ext_scorer
)
{
Scorer
*
ext_scorer
,
size_t
blank_id
)
{
VALID_CHECK_GT
(
num_processes
,
0
,
"num_processes must be nonnegative!"
);
VALID_CHECK_GT
(
num_processes
,
0
,
"num_processes must be nonnegative!"
);
// thread pool
// thread pool
ThreadPool
pool
(
num_processes
);
ThreadPool
pool
(
num_processes
);
...
@@ -234,7 +230,8 @@ ctc_beam_search_decoder_batch(
...
@@ -234,7 +230,8 @@ ctc_beam_search_decoder_batch(
beam_size
,
beam_size
,
cutoff_prob
,
cutoff_prob
,
cutoff_top_n
,
cutoff_top_n
,
ext_scorer
));
ext_scorer
,
blank_id
));
}
}
// get decoding results
// get decoding results
...
...
deepspeech/decoders/swig/ctc_beam_search_decoder.h
浏览文件 @
12f540cd
...
@@ -43,7 +43,8 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
...
@@ -43,7 +43,8 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
size_t
beam_size
,
size_t
beam_size
,
double
cutoff_prob
=
1
.
0
,
double
cutoff_prob
=
1
.
0
,
size_t
cutoff_top_n
=
40
,
size_t
cutoff_top_n
=
40
,
Scorer
*
ext_scorer
=
nullptr
);
Scorer
*
ext_scorer
=
nullptr
,
size_t
blank_id
=
0
);
/* CTC Beam Search Decoder for batch data
/* CTC Beam Search Decoder for batch data
...
@@ -70,6 +71,7 @@ ctc_beam_search_decoder_batch(
...
@@ -70,6 +71,7 @@ ctc_beam_search_decoder_batch(
size_t
num_processes
,
size_t
num_processes
,
double
cutoff_prob
=
1
.
0
,
double
cutoff_prob
=
1
.
0
,
size_t
cutoff_top_n
=
40
,
size_t
cutoff_top_n
=
40
,
Scorer
*
ext_scorer
=
nullptr
);
Scorer
*
ext_scorer
=
nullptr
,
size_t
blank_id
=
0
);
#endif // CTC_BEAM_SEARCH_DECODER_H_
#endif // CTC_BEAM_SEARCH_DECODER_H_
deepspeech/decoders/swig/ctc_greedy_decoder.cpp
浏览文件 @
12f540cd
...
@@ -17,17 +17,18 @@
...
@@ -17,17 +17,18 @@
std
::
string
ctc_greedy_decoder
(
std
::
string
ctc_greedy_decoder
(
const
std
::
vector
<
std
::
vector
<
double
>>
&
probs_seq
,
const
std
::
vector
<
std
::
vector
<
double
>>
&
probs_seq
,
const
std
::
vector
<
std
::
string
>
&
vocabulary
)
{
const
std
::
vector
<
std
::
string
>
&
vocabulary
,
size_t
blank_id
)
{
// dimension check
// dimension check
size_t
num_time_steps
=
probs_seq
.
size
();
size_t
num_time_steps
=
probs_seq
.
size
();
for
(
size_t
i
=
0
;
i
<
num_time_steps
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
num_time_steps
;
++
i
)
{
VALID_CHECK_EQ
(
probs_seq
[
i
].
size
(),
VALID_CHECK_EQ
(
probs_seq
[
i
].
size
(),
vocabulary
.
size
()
+
1
,
vocabulary
.
size
(),
"The shape of probs_seq does not match with "
"The shape of probs_seq does not match with "
"the shape of the vocabulary"
);
"the shape of the vocabulary"
);
}
}
size_t
blank_id
=
vocabulary
.
size
();
//
size_t blank_id = vocabulary.size();
std
::
vector
<
size_t
>
max_idx_vec
(
num_time_steps
,
0
);
std
::
vector
<
size_t
>
max_idx_vec
(
num_time_steps
,
0
);
std
::
vector
<
size_t
>
idx_vec
;
std
::
vector
<
size_t
>
idx_vec
;
...
...
deepspeech/decoders/swig/ctc_greedy_decoder.h
浏览文件 @
12f540cd
...
@@ -29,6 +29,7 @@
...
@@ -29,6 +29,7 @@
*/
*/
std
::
string
ctc_greedy_decoder
(
std
::
string
ctc_greedy_decoder
(
const
std
::
vector
<
std
::
vector
<
double
>>&
probs_seq
,
const
std
::
vector
<
std
::
vector
<
double
>>&
probs_seq
,
const
std
::
vector
<
std
::
string
>&
vocabulary
);
const
std
::
vector
<
std
::
string
>&
vocabulary
,
size_t
blank_id
);
#endif // CTC_GREEDY_DECODER_H
#endif // CTC_GREEDY_DECODER_H
deepspeech/decoders/swig/decoder_utils.h
浏览文件 @
12f540cd
...
@@ -15,10 +15,12 @@
...
@@ -15,10 +15,12 @@
#ifndef DECODER_UTILS_H_
#ifndef DECODER_UTILS_H_
#define DECODER_UTILS_H_
#define DECODER_UTILS_H_
#include <string>
#include <utility>
#include <utility>
#include "fst/log.h"
#include "fst/log.h"
#include "path_trie.h"
#include "path_trie.h"
const
std
::
string
kSPACE
=
"<space>"
;
const
float
NUM_FLT_INF
=
std
::
numeric_limits
<
float
>::
max
();
const
float
NUM_FLT_INF
=
std
::
numeric_limits
<
float
>::
max
();
const
float
NUM_FLT_MIN
=
std
::
numeric_limits
<
float
>::
min
();
const
float
NUM_FLT_MIN
=
std
::
numeric_limits
<
float
>::
min
();
...
...
deepspeech/decoders/swig/scorer.cpp
浏览文件 @
12f540cd
...
@@ -165,7 +165,7 @@ void Scorer::set_char_map(const std::vector<std::string>& char_list) {
...
@@ -165,7 +165,7 @@ void Scorer::set_char_map(const std::vector<std::string>& char_list) {
// Set the char map for the FST for spelling correction
// Set the char map for the FST for spelling correction
for
(
size_t
i
=
0
;
i
<
char_list_
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
char_list_
.
size
();
i
++
)
{
if
(
char_list_
[
i
]
==
" "
)
{
if
(
char_list_
[
i
]
==
kSPACE
)
{
SPACE_ID_
=
i
;
SPACE_ID_
=
i
;
}
}
// The initial state of FST is state 0, hence the index of chars in
// The initial state of FST is state 0, hence the index of chars in
...
...
deepspeech/decoders/swig/setup.py
浏览文件 @
12f540cd
...
@@ -83,10 +83,12 @@ FILES = glob.glob('kenlm/util/*.cc') \
...
@@ -83,10 +83,12 @@ FILES = glob.glob('kenlm/util/*.cc') \
FILES
+=
glob
.
glob
(
'openfst-1.6.3/src/lib/*.cc'
)
FILES
+=
glob
.
glob
(
'openfst-1.6.3/src/lib/*.cc'
)
# yapf: disable
FILES
=
[
FILES
=
[
fn
for
fn
in
FILES
if
not
(
fn
.
endswith
(
'main.cc'
)
or
fn
.
endswith
(
'test.cc'
)
fn
for
fn
in
FILES
if
not
(
fn
.
endswith
(
'main.cc'
)
or
fn
.
endswith
(
'test.cc'
)
or
fn
.
endswith
(
'unittest.cc'
))
or
fn
.
endswith
(
'unittest.cc'
))
]
]
# yapf: enable
LIBS
=
[
'stdc++'
]
LIBS
=
[
'stdc++'
]
if
platform
.
system
()
!=
'Darwin'
:
if
platform
.
system
()
!=
'Darwin'
:
...
...
deepspeech/decoders/swig_wrapper.py
浏览文件 @
12f540cd
...
@@ -32,7 +32,7 @@ class Scorer(swig_decoders.Scorer):
...
@@ -32,7 +32,7 @@ class Scorer(swig_decoders.Scorer):
swig_decoders
.
Scorer
.
__init__
(
self
,
alpha
,
beta
,
model_path
,
vocabulary
)
swig_decoders
.
Scorer
.
__init__
(
self
,
alpha
,
beta
,
model_path
,
vocabulary
)
def
ctc_greedy_decoder
(
probs_seq
,
vocabulary
):
def
ctc_greedy_decoder
(
probs_seq
,
vocabulary
,
blank_id
):
"""Wrapper for ctc best path decoder in swig.
"""Wrapper for ctc best path decoder in swig.
:param probs_seq: 2-D list of probability distributions over each time
:param probs_seq: 2-D list of probability distributions over each time
...
@@ -44,7 +44,8 @@ def ctc_greedy_decoder(probs_seq, vocabulary):
...
@@ -44,7 +44,8 @@ def ctc_greedy_decoder(probs_seq, vocabulary):
:return: Decoding result string.
:return: Decoding result string.
:rtype: str
:rtype: str
"""
"""
result
=
swig_decoders
.
ctc_greedy_decoder
(
probs_seq
.
tolist
(),
vocabulary
)
result
=
swig_decoders
.
ctc_greedy_decoder
(
probs_seq
.
tolist
(),
vocabulary
,
blank_id
)
return
result
return
result
...
@@ -53,7 +54,8 @@ def ctc_beam_search_decoder(probs_seq,
...
@@ -53,7 +54,8 @@ def ctc_beam_search_decoder(probs_seq,
beam_size
,
beam_size
,
cutoff_prob
=
1.0
,
cutoff_prob
=
1.0
,
cutoff_top_n
=
40
,
cutoff_top_n
=
40
,
ext_scoring_func
=
None
):
ext_scoring_func
=
None
,
blank_id
=
0
):
"""Wrapper for the CTC Beam Search Decoder.
"""Wrapper for the CTC Beam Search Decoder.
:param probs_seq: 2-D list of probability distributions over each time
:param probs_seq: 2-D list of probability distributions over each time
...
@@ -81,7 +83,7 @@ def ctc_beam_search_decoder(probs_seq,
...
@@ -81,7 +83,7 @@ def ctc_beam_search_decoder(probs_seq,
"""
"""
beam_results
=
swig_decoders
.
ctc_beam_search_decoder
(
beam_results
=
swig_decoders
.
ctc_beam_search_decoder
(
probs_seq
.
tolist
(),
vocabulary
,
beam_size
,
cutoff_prob
,
cutoff_top_n
,
probs_seq
.
tolist
(),
vocabulary
,
beam_size
,
cutoff_prob
,
cutoff_top_n
,
ext_scoring_func
)
ext_scoring_func
,
blank_id
)
beam_results
=
[(
res
[
0
],
res
[
1
].
decode
(
'utf-8'
))
for
res
in
beam_results
]
beam_results
=
[(
res
[
0
],
res
[
1
].
decode
(
'utf-8'
))
for
res
in
beam_results
]
return
beam_results
return
beam_results
...
@@ -92,7 +94,8 @@ def ctc_beam_search_decoder_batch(probs_split,
...
@@ -92,7 +94,8 @@ def ctc_beam_search_decoder_batch(probs_split,
num_processes
,
num_processes
,
cutoff_prob
=
1.0
,
cutoff_prob
=
1.0
,
cutoff_top_n
=
40
,
cutoff_top_n
=
40
,
ext_scoring_func
=
None
):
ext_scoring_func
=
None
,
blank_id
=
0
):
"""Wrapper for the batched CTC beam search decoder.
"""Wrapper for the batched CTC beam search decoder.
:param probs_seq: 3-D list with each element as an instance of 2-D list
:param probs_seq: 3-D list with each element as an instance of 2-D list
...
@@ -125,7 +128,7 @@ def ctc_beam_search_decoder_batch(probs_split,
...
@@ -125,7 +128,7 @@ def ctc_beam_search_decoder_batch(probs_split,
batch_beam_results
=
swig_decoders
.
ctc_beam_search_decoder_batch
(
batch_beam_results
=
swig_decoders
.
ctc_beam_search_decoder_batch
(
probs_split
,
vocabulary
,
beam_size
,
num_processes
,
cutoff_prob
,
probs_split
,
vocabulary
,
beam_size
,
num_processes
,
cutoff_prob
,
cutoff_top_n
,
ext_scoring_func
)
cutoff_top_n
,
ext_scoring_func
,
blank_id
)
batch_beam_results
=
[[(
res
[
0
],
res
[
1
])
for
res
in
beam_results
]
batch_beam_results
=
[[(
res
[
0
],
res
[
1
])
for
res
in
beam_results
]
for
beam_results
in
batch_beam_results
]
for
beam_results
in
batch_beam_results
]
return
batch_beam_results
return
batch_beam_results
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录