提交 ddf184be 编写于 作者: B billishyahao

fix some typos

上级 3568bb62
......@@ -149,13 +149,13 @@ class DeepSpeech2Model(nn.Layer):
"""Compute Model loss
Args:
audio (Tenosr): [B, T, D]
audio (Tensor): [B, T, D]
audio_len (Tensor): [B]
text (Tensor): [B, U]
text_len (Tensor): [B]
Returns:
loss (Tenosr): [1]
loss (Tensor): [1]
"""
eouts, eouts_len = self.encoder(audio, audio_len)
loss = self.decoder(eouts, eouts_len, text, text_len)
......
......@@ -62,7 +62,7 @@ class Scorer(object):
"""Evaluation function, gathering all the different scores
and return the final one.
:param sentence: The input sentence for evalutation
:param sentence: The input sentence for evaluation
:type sentence: str
:param log: Whether return the score in log representation.
:type log: bool
......
......@@ -183,7 +183,7 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
std::sort(
prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare);
// compute aproximate ctc score as the return score, without affecting the
// compute approximate ctc score as the return score, without affecting the
// return order of decoding result. To delete when decoder gets stable.
for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
double approx_ctc = prefixes[i]->score;
......
......@@ -26,7 +26,7 @@ std::vector<std::pair<size_t, float>> get_pruned_log_probs(
for (size_t i = 0; i < prob_step.size(); ++i) {
prob_idx.push_back(std::pair<int, double>(i, prob_step[i]));
}
// pruning of vacobulary
// pruning of vocabulary
size_t cutoff_len = prob_step.size();
if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) {
std::sort(prob_idx.begin(),
......
......@@ -223,7 +223,7 @@ void Scorer::fill_dictionary(bool add_space) {
* This gets rid of "epsilon" transitions in the FST.
* These are transitions that don't require a string input to be taken.
* Getting rid of them is necessary to make the FST determinisitc, but
* Getting rid of them is necessary to make the FST deterministic, but
* can greatly increase the size of the FST
*/
fst::RmEpsilon(&dictionary);
......
......@@ -154,7 +154,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
Args:
state: The states of hyps
Returns: exteded state
Returns: extended state
"""
new_state = []
......
......@@ -11,7 +11,7 @@ class CTCPrefixScorePD():
which is based on Algorithm 2 in WATANABE et al.
"HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION,"
but extended to efficiently compute the label probablities for multiple
but extended to efficiently compute the label probabilities for multiple
hypotheses simultaneously
See also Seki et al. "Vectorized Beam Search for CTC-Attention-Based
Speech Recognition," In INTERSPEECH (pp. 3825-3829), 2019.
......@@ -272,7 +272,7 @@ class CTCPrefixScore():
which is based on Algorithm 2 in WATANABE et al.
"HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION,"
but extended to efficiently compute the probablities of multiple labels
but extended to efficiently compute the probabilities of multiple labels
simultaneously
"""
......
......@@ -151,13 +151,13 @@ class DeepSpeech2Model(nn.Layer):
"""Compute Model loss
Args:
audio (Tenosr): [B, T, D]
audio (Tensors): [B, T, D]
audio_len (Tensor): [B]
text (Tensor): [B, U]
text_len (Tensor): [B]
Returns:
loss (Tenosr): [1]
loss (Tensor): [1]
"""
eouts, eouts_len = self.encoder(audio, audio_len)
loss = self.decoder(eouts, eouts_len, text, text_len)
......
......@@ -279,13 +279,13 @@ class DeepSpeech2ModelOnline(nn.Layer):
"""Compute Model loss
Args:
audio (Tenosr): [B, T, D]
audio (Tensor): [B, T, D]
audio_len (Tensor): [B]
text (Tensor): [B, U]
text_len (Tensor): [B]
Returns:
loss (Tenosr): [1]
loss (Tensor): [1]
"""
eouts, eouts_len, final_state_h_box, final_state_c_box = self.encoder(
audio, audio_len, None, None)
......
......@@ -680,8 +680,8 @@ class U2BaseModel(ASRInterface, nn.Layer):
"""u2 decoding.
Args:
feats (Tenosr): audio features, (B, T, D)
feats_lengths (Tenosr): (B)
feats (Tensor): audio features, (B, T, D)
feats_lengths (Tensor): (B)
text_feature (TextFeaturizer): text feature object.
decoding_method (str): decoding mode, e.g.
'attention', 'ctc_greedy_search',
......
......@@ -478,8 +478,8 @@ class U2STBaseModel(nn.Layer):
"""u2 decoding.
Args:
feats (Tenosr): audio features, (B, T, D)
feats_lengths (Tenosr): (B)
feats (Tensor): audio features, (B, T, D)
feats_lengths (Tensor): (B)
text_feature (TextFeaturizer): text feature object.
decoding_method (str): decoding mode, e.g.
'fullsentence',
......
......@@ -81,10 +81,10 @@ class CTCDecoderBase(nn.Layer):
Args:
hs_pad (Tensor): batch of padded hidden state sequences (B, Tmax, D)
hlens (Tensor): batch of lengths of hidden state sequences (B)
ys_pad (Tenosr): batch of padded character id sequence tensor (B, Lmax)
ys_pad (Tensor): batch of padded character id sequence tensor (B, Lmax)
ys_lens (Tensor): batch of lengths of character sequence (B)
Returns:
loss (Tenosr): ctc loss value, scalar.
loss (Tensor): ctc loss value, scalar.
"""
logits = self.ctc_lo(self.dropout(hs_pad))
loss = self.criterion(logits, ys_pad, hlens, ys_lens)
......@@ -252,8 +252,8 @@ class CTCDecoder(CTCDecoderBase):
"""ctc decoding with probs.
Args:
probs (Tenosr): activation after softmax
logits_lens (Tenosr): audio output lens
probs (Tensor): activation after softmax
logits_lens (Tensor): audio output lens
vocab_list ([type]): [description]
decoding_method ([type]): [description]
lang_model_path ([type]): [description]
......
......@@ -54,7 +54,7 @@ def make_pad_mask(lengths: paddle.Tensor) -> paddle.Tensor:
[0, 0, 0, 1, 1],
[0, 0, 1, 1, 1]]
"""
# (TODO: Hui Zhang): jit not support Tenosr.dim() and Tensor.ndim
# (TODO: Hui Zhang): jit not support Tensor.dim() and Tensor.ndim
# assert lengths.dim() == 1
batch_size = int(lengths.shape[0])
max_len = int(lengths.max())
......
......@@ -57,7 +57,7 @@ def filter_valid_args(args: Dict[Text, Any], valid_keys: List[Text]):
return new_args
def filter_out_tenosr(args: Dict[Text, Any]):
def filter_out_tensor(args: Dict[Text, Any]):
return {key: val for key, val in args.items() if not has_tensor(val)}
......@@ -65,5 +65,5 @@ def instance_class(module_class, args: Dict[Text, Any]):
valid_keys = inspect.signature(module_class).parameters.keys()
new_args = filter_valid_args(args, valid_keys)
logger.info(
f"Instance: {module_class.__name__} {filter_out_tenosr(new_args)}.")
f"Instance: {module_class.__name__} {filter_out_tensor(new_args)}.")
return module_class(**new_args)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册