Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
7402c120
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7402c120
编写于
10月 21, 2021
作者:
H
Hui Zhang
提交者:
GitHub
10月 21, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #916 from PaddlePaddle/decoder
[decoder] scorer interface and beam search
上级
6d717a49
26a4a46c
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
688 addition
and
7 deletion
+688
-7
deepspeech/__init__.py
deepspeech/__init__.py
+151
-0
deepspeech/decoders/beam_search.py
deepspeech/decoders/beam_search.py
+528
-0
deepspeech/decoders/scorers/__init__.py
deepspeech/decoders/scorers/__init__.py
+0
-0
deepspeech/decoders/scorers/ctc.py
deepspeech/decoders/scorers/ctc.py
+2
-2
deepspeech/decoders/scorers/ctc_prefix_score.py
deepspeech/decoders/scorers/ctc_prefix_score.py
+2
-2
deepspeech/decoders/scorers/length_bonus.py
deepspeech/decoders/scorers/length_bonus.py
+0
-0
deepspeech/decoders/scorers/ngram.py
deepspeech/decoders/scorers/ngram.py
+0
-0
deepspeech/decoders/scorers/score_interface.py
deepspeech/decoders/scorers/score_interface.py
+3
-1
deepspeech/io/batchfy.py
deepspeech/io/batchfy.py
+1
-1
deepspeech/utils/utility.py
deepspeech/utils/utility.py
+1
-1
未找到文件。
deepspeech/__init__.py
浏览文件 @
7402c120
...
...
@@ -355,6 +355,8 @@ if not hasattr(paddle.Tensor, 'tolist'):
setattr
(
paddle
.
Tensor
,
'tolist'
,
tolist
)
########### hcak paddle.nn.functional #############
# hack loss
def
ctc_loss
(
logits
,
labels
,
...
...
@@ -381,3 +383,152 @@ logger.debug(
"override ctc_loss of paddle.nn.functional if exists, remove this when fixed!"
)
F
.
ctc_loss
=
ctc_loss
########### hcak paddle.nn #############
from
paddle.nn
import
Layer
from
typing
import
Optional
from
typing
import
Mapping
from
typing
import
Iterable
from
typing
import
Tuple
from
typing
import
Iterator
from
collections
import
OrderedDict
,
abc
as
container_abcs
class
LayerDict
(
paddle
.
nn
.
Layer
):
r
"""Holds submodules in a dictionary.
:class:`~paddle.nn.LayerDict` can be indexed like a regular Python dictionary,
but modules it contains are properly registered, and will be visible by all
:class:`~paddle.nn.Layer` methods.
:class:`~paddle.nn.LayerDict` is an **ordered** dictionary that respects
* the order of insertion, and
* in :meth:`~paddle.nn.LayerDict.update`, the order of the merged
``OrderedDict``, ``dict`` (started from Python 3.6) or another
:class:`~paddle.nn.LayerDict` (the argument to
:meth:`~paddle.nn.LayerDict.update`).
Note that :meth:`~paddle.nn.LayerDict.update` with other unordered mapping
types (e.g., Python's plain ``dict`` before Python version 3.6) does not
preserve the order of the merged mapping.
Args:
modules (iterable, optional): a mapping (dictionary) of (string: module)
or an iterable of key-value pairs of type (string, module)
Example::
class MyModule(nn.Layer):
def __init__(self):
super(MyModule, self).__init__()
self.choices = nn.LayerDict({
'conv': nn.Conv2d(10, 10, 3),
'pool': nn.MaxPool2d(3)
})
self.activations = nn.LayerDict([
['lrelu', nn.LeakyReLU()],
['prelu', nn.PReLU()]
])
def forward(self, x, choice, act):
x = self.choices[choice](x)
x = self.activations[act](x)
return x
"""
def
__init__
(
self
,
modules
:
Optional
[
Mapping
[
str
,
Layer
]]
=
None
)
->
None
:
super
(
LayerDict
,
self
).
__init__
()
if
modules
is
not
None
:
self
.
update
(
modules
)
def
__getitem__
(
self
,
key
:
str
)
->
Layer
:
return
self
.
_modules
[
key
]
def
__setitem__
(
self
,
key
:
str
,
module
:
Layer
)
->
None
:
self
.
add_module
(
key
,
module
)
def
__delitem__
(
self
,
key
:
str
)
->
None
:
del
self
.
_modules
[
key
]
def
__len__
(
self
)
->
int
:
return
len
(
self
.
_modules
)
def
__iter__
(
self
)
->
Iterator
[
str
]:
return
iter
(
self
.
_modules
)
def
__contains__
(
self
,
key
:
str
)
->
bool
:
return
key
in
self
.
_modules
def
clear
(
self
)
->
None
:
"""Remove all items from the LayerDict.
"""
self
.
_modules
.
clear
()
def
pop
(
self
,
key
:
str
)
->
Layer
:
r
"""Remove key from the LayerDict and return its module.
Args:
key (string): key to pop from the LayerDict
"""
v
=
self
[
key
]
del
self
[
key
]
return
v
def
keys
(
self
)
->
Iterable
[
str
]:
r
"""Return an iterable of the LayerDict keys.
"""
return
self
.
_modules
.
keys
()
def
items
(
self
)
->
Iterable
[
Tuple
[
str
,
Layer
]]:
r
"""Return an iterable of the LayerDict key/value pairs.
"""
return
self
.
_modules
.
items
()
def
values
(
self
)
->
Iterable
[
Layer
]:
r
"""Return an iterable of the LayerDict values.
"""
return
self
.
_modules
.
values
()
def
update
(
self
,
modules
:
Mapping
[
str
,
Layer
])
->
None
:
r
"""Update the :class:`~paddle.nn.LayerDict` with the key-value pairs from a
mapping or an iterable, overwriting existing keys.
.. note::
If :attr:`modules` is an ``OrderedDict``, a :class:`~paddle.nn.LayerDict`, or
an iterable of key-value pairs, the order of new elements in it is preserved.
Args:
modules (iterable): a mapping (dictionary) from string to :class:`~paddle.nn.Layer`,
or an iterable of key-value pairs of type (string, :class:`~paddle.nn.Layer`)
"""
if
not
isinstance
(
modules
,
container_abcs
.
Iterable
):
raise
TypeError
(
"LayerDict.update should be called with an "
"iterable of key/value pairs, but got "
+
type
(
modules
).
__name__
)
if
isinstance
(
modules
,
(
OrderedDict
,
LayerDict
,
container_abcs
.
Mapping
)):
for
key
,
module
in
modules
.
items
():
self
[
key
]
=
module
else
:
# modules here can be a list with two items
for
j
,
m
in
enumerate
(
modules
):
if
not
isinstance
(
m
,
container_abcs
.
Iterable
):
raise
TypeError
(
"LayerDict update sequence element "
"#"
+
str
(
j
)
+
" should be Iterable; is"
+
type
(
m
).
__name__
)
if
not
len
(
m
)
==
2
:
raise
ValueError
(
"LayerDict update sequence element "
"#"
+
str
(
j
)
+
" has length "
+
str
(
len
(
m
))
+
"; 2 is required"
)
# modules can be Mapping (what it's typed at), or a list: [(name1, module1), (name2, module2)]
# that's too cumbersome to type correctly with overloads, so we add an ignore here
self
[
m
[
0
]]
=
m
[
1
]
# type: ignore[assignment]
# remove forward alltogether to fallback on Module's _forward_unimplemented
if
not
hasattr
(
paddle
.
nn
,
'LayerDict'
):
logger
.
debug
(
"register user LayerDict to paddle.nn, remove this when fixed!"
)
setattr
(
paddle
.
nn
,
'LayerDict'
,
LayerDict
)
deepspeech/decoders/beam_search.py
0 → 100644
浏览文件 @
7402c120
"""Beam search module."""
from
itertools
import
chain
import
logger
from
typing
import
Any
from
typing
import
Dict
from
typing
import
List
from
typing
import
NamedTuple
from
typing
import
Tuple
from
typing
import
Union
import
paddle
from
.utils
import
end_detect
from
.scorers.scorer_interface
import
PartialScorerInterface
from
.scorers.scorer_interface
import
ScorerInterface
from
deepspeech.utils.log
import
Log
logger
=
Log
(
__name__
).
getlog
()
class
Hypothesis
(
NamedTuple
):
"""Hypothesis data type."""
yseq
:
paddle
.
Tensor
# (T,)
score
:
Union
[
float
,
paddle
.
Tensor
]
=
0
scores
:
Dict
[
str
,
Union
[
float
,
paddle
.
Tensor
]]
=
dict
()
states
:
Dict
[
str
,
Any
]
=
dict
()
def
asdict
(
self
)
->
dict
:
"""Convert data to JSON-friendly dict."""
return
self
.
_replace
(
yseq
=
self
.
yseq
.
tolist
(),
score
=
float
(
self
.
score
),
scores
=
{
k
:
float
(
v
)
for
k
,
v
in
self
.
scores
.
items
()},
).
_asdict
()
class
BeamSearch
(
paddle
.
nn
.
Layer
):
"""Beam search implementation."""
def
__init__
(
self
,
scorers
:
Dict
[
str
,
ScorerInterface
],
weights
:
Dict
[
str
,
float
],
beam_size
:
int
,
vocab_size
:
int
,
sos
:
int
,
eos
:
int
,
token_list
:
List
[
str
]
=
None
,
pre_beam_ratio
:
float
=
1.5
,
pre_beam_score_key
:
str
=
None
,
):
"""Initialize beam search.
Args:
scorers (dict[str, ScorerInterface]): Dict of decoder modules
e.g., Decoder, CTCPrefixScorer, LM
The scorer will be ignored if it is `None`
weights (dict[str, float]): Dict of weights for each scorers
The scorer will be ignored if its weight is 0
beam_size (int): The number of hypotheses kept during search
vocab_size (int): The number of vocabulary
sos (int): Start of sequence id
eos (int): End of sequence id
token_list (list[str]): List of tokens for debug log
pre_beam_score_key (str): key of scores to perform pre-beam search
pre_beam_ratio (float): beam size in the pre-beam search
will be `int(pre_beam_ratio * beam_size)`
"""
super
().
__init__
()
# set scorers
self
.
weights
=
weights
self
.
scorers
=
dict
()
# all = full + partial
self
.
full_scorers
=
dict
()
# full tokens
self
.
part_scorers
=
dict
()
# partial tokens
# this module dict is required for recursive cast
# `self.to(device, dtype)` in `recog.py`
self
.
nn_dict
=
paddle
.
nn
.
LayerDict
()
# nn.Layer
for
k
,
v
in
scorers
.
items
():
w
=
weights
.
get
(
k
,
0
)
if
w
==
0
or
v
is
None
:
continue
assert
isinstance
(
v
,
ScorerInterface
),
f
"
{
k
}
(
{
type
(
v
)
}
) does not implement ScorerInterface"
self
.
scorers
[
k
]
=
v
if
isinstance
(
v
,
PartialScorerInterface
):
self
.
part_scorers
[
k
]
=
v
else
:
self
.
full_scorers
[
k
]
=
v
if
isinstance
(
v
,
paddle
.
nn
.
Layer
):
self
.
nn_dict
[
k
]
=
v
# set configurations
self
.
sos
=
sos
self
.
eos
=
eos
self
.
token_list
=
token_list
# pre_beam_size > beam_size
self
.
pre_beam_size
=
int
(
pre_beam_ratio
*
beam_size
)
self
.
beam_size
=
beam_size
self
.
n_vocab
=
vocab_size
if
(
pre_beam_score_key
is
not
None
and
pre_beam_score_key
!=
"full"
and
pre_beam_score_key
not
in
self
.
full_scorers
):
raise
KeyError
(
f
"
{
pre_beam_score_key
}
is not found in
{
self
.
full_scorers
}
"
)
# selected `key` scorer to do pre beam search
self
.
pre_beam_score_key
=
pre_beam_score_key
# do_pre_beam when need, valid and has part_scorers
self
.
do_pre_beam
=
(
self
.
pre_beam_score_key
is
not
None
and
self
.
pre_beam_size
<
self
.
n_vocab
and
len
(
self
.
part_scorers
)
>
0
)
def
init_hyp
(
self
,
x
:
paddle
.
Tensor
)
->
List
[
Hypothesis
]:
"""Get an initial hypothesis data.
Args:
x (paddle.Tensor): The encoder output feature, (T, D)
Returns:
Hypothesis: The initial hypothesis.
"""
init_states
=
dict
()
init_scores
=
dict
()
for
k
,
d
in
self
.
scorers
.
items
():
init_states
[
k
]
=
d
.
init_state
(
x
)
init_scores
[
k
]
=
0.0
return
[
Hypothesis
(
yseq
=
paddle
.
to_tensor
([
self
.
sos
],
place
=
x
.
place
),
score
=
0.0
,
scores
=
init_scores
,
states
=
init_states
,
)
]
@
staticmethod
def
append_token
(
xs
:
paddle
.
Tensor
,
x
:
int
)
->
paddle
.
Tensor
:
"""Append new token to prefix tokens.
Args:
xs (paddle.Tensor): The prefix token, (T,)
x (int): The new token to append
Returns:
paddle.Tensor: (T+1,), New tensor contains: xs + [x] with xs.dtype and xs.device
"""
x
=
paddle
.
to_tensor
([
x
],
dtype
=
xs
.
dtype
,
place
=
xs
.
place
)
return
paddle
.
cat
((
xs
,
x
))
def
score_full
(
self
,
hyp
:
Hypothesis
,
x
:
paddle
.
Tensor
)
->
Tuple
[
Dict
[
str
,
paddle
.
Tensor
],
Dict
[
str
,
Any
]]:
"""Score new hypothesis by `self.full_scorers`.
Args:
hyp (Hypothesis): Hypothesis with prefix tokens to score
x (paddle.Tensor): Corresponding input feature, (T, D)
Returns:
Tuple[Dict[str, paddle.Tensor], Dict[str, Any]]: Tuple of
score dict of `hyp` that has string keys of `self.full_scorers`
and tensor score values of shape: `(self.n_vocab,)`,
and state dict that has string keys
and state values of `self.full_scorers`
"""
scores
=
dict
()
states
=
dict
()
for
k
,
d
in
self
.
full_scorers
.
items
():
# scores[k] shape (self.n_vocab,)
scores
[
k
],
states
[
k
]
=
d
.
score
(
hyp
.
yseq
,
hyp
.
states
[
k
],
x
)
return
scores
,
states
def
score_partial
(
self
,
hyp
:
Hypothesis
,
ids
:
paddle
.
Tensor
,
x
:
paddle
.
Tensor
)
->
Tuple
[
Dict
[
str
,
paddle
.
Tensor
],
Dict
[
str
,
Any
]]:
"""Score new hypothesis by `self.part_scorers`.
Args:
hyp (Hypothesis): Hypothesis with prefix tokens to score
ids (paddle.Tensor): 1D tensor of new partial tokens to score,
len(ids) < n_vocab
x (paddle.Tensor): Corresponding input feature, (T, D)
Returns:
Tuple[Dict[str, paddle.Tensor], Dict[str, Any]]: Tuple of
score dict of `hyp` that has string keys of `self.part_scorers`
and tensor score values of shape: `(len(ids),)`,
and state dict that has string keys
and state values of `self.part_scorers`
"""
scores
=
dict
()
states
=
dict
()
for
k
,
d
in
self
.
part_scorers
.
items
():
# scores[k] shape (len(ids),)
scores
[
k
],
states
[
k
]
=
d
.
score_partial
(
hyp
.
yseq
,
ids
,
hyp
.
states
[
k
],
x
)
return
scores
,
states
def
beam
(
self
,
weighted_scores
:
paddle
.
Tensor
,
ids
:
paddle
.
Tensor
)
->
Tuple
[
paddle
.
Tensor
,
paddle
.
Tensor
]:
"""Compute topk full token ids and partial token ids.
Args:
weighted_scores (paddle.Tensor): The weighted sum scores for each tokens.
Its shape is `(self.n_vocab,)`.
ids (paddle.Tensor): The partial token ids(Global) to compute topk.
Returns:
Tuple[paddle.Tensor, paddle.Tensor]:
The topk full token ids and partial token ids.
Their shapes are `(self.beam_size,)`.
i.e. (global ids, global relative local ids).
"""
# no pre beam performed, `ids` equal to `weighted_scores`
if
weighted_scores
.
size
(
0
)
==
ids
.
size
(
0
):
top_ids
=
weighted_scores
.
topk
(
self
.
beam_size
)[
1
]
# index in n_vocab
return
top_ids
,
top_ids
# mask pruned in pre-beam not to select in topk
tmp
=
weighted_scores
[
ids
]
weighted_scores
[:]
=
-
float
(
"inf"
)
weighted_scores
[
ids
]
=
tmp
# top_ids no equal to local_ids, since ids shape not same
top_ids
=
weighted_scores
.
topk
(
self
.
beam_size
)[
1
]
# index in n_vocab
local_ids
=
weighted_scores
[
ids
].
topk
(
self
.
beam_size
)[
1
]
# index in len(ids)
return
top_ids
,
local_ids
@
staticmethod
def
merge_scores
(
prev_scores
:
Dict
[
str
,
float
],
next_full_scores
:
Dict
[
str
,
paddle
.
Tensor
],
full_idx
:
int
,
next_part_scores
:
Dict
[
str
,
paddle
.
Tensor
],
part_idx
:
int
,
)
->
Dict
[
str
,
paddle
.
Tensor
]:
"""Merge scores for new hypothesis.
Args:
prev_scores (Dict[str, float]):
The previous hypothesis scores by `self.scorers`
next_full_scores (Dict[str, paddle.Tensor]): scores by `self.full_scorers`
full_idx (int): The next token id for `next_full_scores`
next_part_scores (Dict[str, paddle.Tensor]):
scores of partial tokens by `self.part_scorers`
part_idx (int): The new token id for `next_part_scores`
Returns:
Dict[str, paddle.Tensor]: The new score dict.
Its keys are names of `self.full_scorers` and `self.part_scorers`.
Its values are scalar tensors by the scorers.
"""
new_scores
=
dict
()
for
k
,
v
in
next_full_scores
.
items
():
new_scores
[
k
]
=
prev_scores
[
k
]
+
v
[
full_idx
]
for
k
,
v
in
next_part_scores
.
items
():
new_scores
[
k
]
=
prev_scores
[
k
]
+
v
[
part_idx
]
return
new_scores
def
merge_states
(
self
,
states
:
Any
,
part_states
:
Any
,
part_idx
:
int
)
->
Any
:
"""Merge states for new hypothesis.
Args:
states: states of `self.full_scorers`
part_states: states of `self.part_scorers`
part_idx (int): The new token id for `part_scores`
Returns:
Dict[str, paddle.Tensor]: The new score dict.
Its keys are names of `self.full_scorers` and `self.part_scorers`.
Its values are states of the scorers.
"""
new_states
=
dict
()
for
k
,
v
in
states
.
items
():
new_states
[
k
]
=
v
for
k
,
d
in
self
.
part_scorers
.
items
():
new_states
[
k
]
=
d
.
select_state
(
part_states
[
k
],
part_idx
)
return
new_states
def
search
(
self
,
running_hyps
:
List
[
Hypothesis
],
x
:
paddle
.
Tensor
)
->
List
[
Hypothesis
]:
"""Search new tokens for running hypotheses and encoded speech x.
Args:
running_hyps (List[Hypothesis]): Running hypotheses on beam
x (paddle.Tensor): Encoded speech feature (T, D)
Returns:
List[Hypotheses]: Best sorted hypotheses
"""
best_hyps
=
[]
part_ids
=
paddle
.
arange
(
self
.
n_vocab
)
# no pre-beam
for
hyp
in
running_hyps
:
# scoring
weighted_scores
=
paddle
.
zeros
(
self
.
n_vocab
,
dtype
=
x
.
dtype
)
scores
,
states
=
self
.
score_full
(
hyp
,
x
)
for
k
in
self
.
full_scorers
:
weighted_scores
+=
self
.
weights
[
k
]
*
scores
[
k
]
# partial scoring
if
self
.
do_pre_beam
:
pre_beam_scores
=
(
weighted_scores
if
self
.
pre_beam_score_key
==
"full"
else
scores
[
self
.
pre_beam_score_key
]
)
part_ids
=
paddle
.
topk
(
pre_beam_scores
,
self
.
pre_beam_size
)[
1
]
part_scores
,
part_states
=
self
.
score_partial
(
hyp
,
part_ids
,
x
)
for
k
in
self
.
part_scorers
:
weighted_scores
[
part_ids
]
+=
self
.
weights
[
k
]
*
part_scores
[
k
]
# add previous hyp score
weighted_scores
+=
hyp
.
score
# update hyps
for
j
,
part_j
in
zip
(
*
self
.
beam
(
weighted_scores
,
part_ids
)):
# `part_j` is `j` relative id in `part_scores`
# will be (2 x beam at most)
best_hyps
.
append
(
Hypothesis
(
score
=
weighted_scores
[
j
],
yseq
=
self
.
append_token
(
hyp
.
yseq
,
j
),
scores
=
self
.
merge_scores
(
hyp
.
scores
,
scores
,
j
,
part_scores
,
part_j
),
states
=
self
.
merge_states
(
states
,
part_states
,
part_j
),
)
)
# sort and prune 2 x beam -> beam
best_hyps
=
sorted
(
best_hyps
,
key
=
lambda
x
:
x
.
score
,
reverse
=
True
)[
:
min
(
len
(
best_hyps
),
self
.
beam_size
)
]
return
best_hyps
def
forward
(
self
,
x
:
paddle
.
Tensor
,
maxlenratio
:
float
=
0.0
,
minlenratio
:
float
=
0.0
)
->
List
[
Hypothesis
]:
"""Perform beam search.
Args:
x (paddle.Tensor): Encoded speech feature (T, D)
maxlenratio (float): Input length ratio to obtain max output length.
If maxlenratio=0.0 (default), it uses a end-detect function
to automatically find maximum hypothesis lengths
If maxlenratio<0.0, its absolute value is interpreted
as a constant max output length.
minlenratio (float): Input length ratio to obtain min output length.
Returns:
list[Hypothesis]: N-best decoding results
"""
# set length bounds
if
maxlenratio
==
0
:
maxlen
=
x
.
shape
[
0
]
elif
maxlenratio
<
0
:
maxlen
=
-
1
*
int
(
maxlenratio
)
else
:
maxlen
=
max
(
1
,
int
(
maxlenratio
*
x
.
size
(
0
)))
minlen
=
int
(
minlenratio
*
x
.
size
(
0
))
logger
.
info
(
"decoder input length: "
+
str
(
x
.
shape
[
0
]))
logger
.
info
(
"max output length: "
+
str
(
maxlen
))
logger
.
info
(
"min output length: "
+
str
(
minlen
))
# main loop of prefix search
running_hyps
=
self
.
init_hyp
(
x
)
ended_hyps
=
[]
for
i
in
range
(
maxlen
):
logger
.
debug
(
"position "
+
str
(
i
))
best
=
self
.
search
(
running_hyps
,
x
)
# post process of one iteration
running_hyps
=
self
.
post_process
(
i
,
maxlen
,
maxlenratio
,
best
,
ended_hyps
)
# end detection
if
maxlenratio
==
0.0
and
end_detect
([
h
.
asdict
()
for
h
in
ended_hyps
],
i
):
logger
.
info
(
f
"end detected at
{
i
}
"
)
break
if
len
(
running_hyps
)
==
0
:
logger
.
info
(
"no hypothesis. Finish decoding."
)
break
else
:
logger
.
debug
(
f
"remained hypotheses:
{
len
(
running_hyps
)
}
"
)
nbest_hyps
=
sorted
(
ended_hyps
,
key
=
lambda
x
:
x
.
score
,
reverse
=
True
)
# check the number of hypotheses reaching to eos
if
len
(
nbest_hyps
)
==
0
:
logger
.
warning
(
"there is no N-best results, perform recognition "
"again with smaller minlenratio."
)
return
(
[]
if
minlenratio
<
0.1
else
self
.
forward
(
x
,
maxlenratio
,
max
(
0.0
,
minlenratio
-
0.1
))
)
# report the best result
best
=
nbest_hyps
[
0
]
for
k
,
v
in
best
.
scores
.
items
():
logger
.
info
(
f
"
{
v
:
6.2
f
}
*
{
self
.
weights
[
k
]:
3
}
=
{
v
*
self
.
weights
[
k
]:
6.2
f
}
for
{
k
}
"
)
logger
.
info
(
f
"total log probability:
{
best
.
score
:.
2
f
}
"
)
logger
.
info
(
f
"normalized log probability:
{
best
.
score
/
len
(
best
.
yseq
):.
2
f
}
"
)
logger
.
info
(
f
"total number of ended hypotheses:
{
len
(
nbest_hyps
)
}
"
)
if
self
.
token_list
is
not
None
:
logger
.
info
(
"best hypo: "
+
""
.
join
([
self
.
token_list
[
x
]
for
x
in
best
.
yseq
[
1
:
-
1
]])
+
"
\n
"
)
return
nbest_hyps
def
post_process
(
self
,
i
:
int
,
maxlen
:
int
,
maxlenratio
:
float
,
running_hyps
:
List
[
Hypothesis
],
ended_hyps
:
List
[
Hypothesis
],
)
->
List
[
Hypothesis
]:
"""Perform post-processing of beam search iterations.
Args:
i (int): The length of hypothesis tokens.
maxlen (int): The maximum length of tokens in beam search.
maxlenratio (int): The maximum length ratio in beam search.
running_hyps (List[Hypothesis]): The running hypotheses in beam search.
ended_hyps (List[Hypothesis]): The ended hypotheses in beam search.
Returns:
List[Hypothesis]: The new running hypotheses.
"""
logger
.
debug
(
f
"the number of running hypotheses:
{
len
(
running_hyps
)
}
"
)
if
self
.
token_list
is
not
None
:
logger
.
debug
(
"best hypo: "
+
""
.
join
([
self
.
token_list
[
x
]
for
x
in
running_hyps
[
0
].
yseq
[
1
:]])
)
# add eos in the final loop to avoid that there are no ended hyps
if
i
==
maxlen
-
1
:
logger
.
info
(
"adding <eos> in the last position in the loop"
)
running_hyps
=
[
h
.
_replace
(
yseq
=
self
.
append_token
(
h
.
yseq
,
self
.
eos
))
for
h
in
running_hyps
]
# add ended hypotheses to a final list, and removed them from current hypotheses
# (this will be a problem, number of hyps < beam)
remained_hyps
=
[]
for
hyp
in
running_hyps
:
if
hyp
.
yseq
[
-
1
]
==
self
.
eos
:
# e.g., Word LM needs to add final <eos> score
for
k
,
d
in
chain
(
self
.
full_scorers
.
items
(),
self
.
part_scorers
.
items
()):
s
=
d
.
final_score
(
hyp
.
states
[
k
])
hyp
.
scores
[
k
]
+=
s
hyp
=
hyp
.
_replace
(
score
=
hyp
.
score
+
self
.
weights
[
k
]
*
s
)
ended_hyps
.
append
(
hyp
)
else
:
remained_hyps
.
append
(
hyp
)
return
remained_hyps
def
beam_search
(
x
:
paddle
.
Tensor
,
sos
:
int
,
eos
:
int
,
beam_size
:
int
,
vocab_size
:
int
,
scorers
:
Dict
[
str
,
ScorerInterface
],
weights
:
Dict
[
str
,
float
],
token_list
:
List
[
str
]
=
None
,
maxlenratio
:
float
=
0.0
,
minlenratio
:
float
=
0.0
,
pre_beam_ratio
:
float
=
1.5
,
pre_beam_score_key
:
str
=
"full"
,
)
->
list
:
"""Perform beam search with scorers.
Args:
x (paddle.Tensor): Encoded speech feature (T, D)
sos (int): Start of sequence id
eos (int): End of sequence id
beam_size (int): The number of hypotheses kept during search
vocab_size (int): The number of vocabulary
scorers (dict[str, ScorerInterface]): Dict of decoder modules
e.g., Decoder, CTCPrefixScorer, LM
The scorer will be ignored if it is `None`
weights (dict[str, float]): Dict of weights for each scorers
The scorer will be ignored if its weight is 0
token_list (list[str]): List of tokens for debug log
maxlenratio (float): Input length ratio to obtain max output length.
If maxlenratio=0.0 (default), it uses a end-detect function
to automatically find maximum hypothesis lengths
minlenratio (float): Input length ratio to obtain min output length.
pre_beam_score_key (str): key of scores to perform pre-beam search
pre_beam_ratio (float): beam size in the pre-beam search
will be `int(pre_beam_ratio * beam_size)`
Returns:
List[Dict]: N-best decoding results
"""
ret
=
BeamSearch
(
scorers
,
weights
,
beam_size
=
beam_size
,
vocab_size
=
vocab_size
,
pre_beam_ratio
=
pre_beam_ratio
,
pre_beam_score_key
=
pre_beam_score_key
,
sos
=
sos
,
eos
=
eos
,
token_list
=
token_list
,
).
forward
(
x
=
x
,
maxlenratio
=
maxlenratio
,
minlenratio
=
minlenratio
)
return
[
h
.
asdict
()
for
h
in
ret
]
deepspeech/decoders/scores/__init__.py
→
deepspeech/decoders/score
r
s/__init__.py
浏览文件 @
7402c120
文件已移动
deepspeech/decoders/scores/ctc.py
→
deepspeech/decoders/score
r
s/ctc.py
浏览文件 @
7402c120
...
...
@@ -15,8 +15,8 @@
import
numpy
as
np
import
paddle
from
.ctc_prefix_score
import
CTCPrefixScore
from
.ctc_prefix_score
import
CTCPrefixScorePD
from
.ctc_prefix_score
import
CTCPrefixScore
r
from
.ctc_prefix_score
import
CTCPrefixScore
r
PD
from
.scorer_interface
import
BatchPartialScorerInterface
...
...
deepspeech/decoders/scores/ctc_prefix_score.py
→
deepspeech/decoders/score
r
s/ctc_prefix_score.py
浏览文件 @
7402c120
...
...
@@ -6,7 +6,7 @@ import paddle
import
six
class
CTCPrefixScorePD
():
class
CTCPrefixScore
r
PD
():
"""Batch processing of CTCPrefixScore
which is based on Algorithm 2 in WATANABE et al.
...
...
@@ -267,7 +267,7 @@ class CTCPrefixScorePD():
return
(
r_prev_new
,
s_prev
,
f_min_prev
,
f_max_prev
)
class
CTCPrefixScore
():
class
CTCPrefixScore
r
():
"""Compute CTC label sequence scores
which is based on Algorithm 2 in WATANABE et al.
...
...
deepspeech/decoders/scores/length_bonus.py
→
deepspeech/decoders/score
r
s/length_bonus.py
浏览文件 @
7402c120
文件已移动
deepspeech/decoders/scores/ngram.py
→
deepspeech/decoders/score
r
s/ngram.py
浏览文件 @
7402c120
文件已移动
deepspeech/decoders/scores/score_interface.py
→
deepspeech/decoders/score
r
s/score_interface.py
浏览文件 @
7402c120
...
...
@@ -145,9 +145,11 @@ class PartialScorerInterface(ScorerInterface):
and receives pre-pruned next tokens to score because it is too heavy to score
all the tokens.
Score sub-set of tokens, not all.
Examples:
* Prefix search for connectionist-temporal-classification models
* :class:`
espnet.net
s.scorers.ctc.CTCPrefixScorer`
* :class:`
decoder
s.scorers.ctc.CTCPrefixScorer`
"""
...
...
deepspeech/io/batchfy.py
浏览文件 @
7402c120
...
...
@@ -354,7 +354,7 @@ def make_batchset(
:param int batch_frames_out: maximum number of output frames in a minibatch.
:param int batch_frames_out: maximum number of input+output frames in a minibatch.
:param str count: strategy to count maximum size of batch.
For choices, see
espnet.asr
.batchfy.BATCH_COUNT_CHOICES
For choices, see
io
.batchfy.BATCH_COUNT_CHOICES
:param int max_length_in: maximum length of input to decide adaptive batch size
:param int max_length_out: maximum length of output to decide adaptive batch size
...
...
deepspeech/utils/utility.py
浏览文件 @
7402c120
...
...
@@ -42,7 +42,7 @@ def all_version():
"paddle_commit"
:
paddle
.
version
.
commit
,
"soundfile"
:
soundfile
.
__version__
,
}
logger
.
info
(
f
"Deps Module Version:
{
pformat
(
vers
.
items
(
))
}
"
)
logger
.
info
(
f
"Deps Module Version:
{
pformat
(
list
(
vers
.
items
()
))
}
"
)
@
contextmanager
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录