diff --git a/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py b/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py index e25e40309395484da4aa345359f9c75bca27aa75..fb8b321ced781c5246f01a7c19fec79df0cd9c85 100644 --- a/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py +++ b/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py @@ -149,13 +149,13 @@ class DeepSpeech2Model(nn.Layer): """Compute Model loss Args: - audio (Tenosr): [B, T, D] + audio (Tensor): [B, T, D] audio_len (Tensor): [B] text (Tensor): [B, U] text_len (Tensor): [B] Returns: - loss (Tenosr): [1] + loss (Tensor): [1] """ eouts, eouts_len = self.encoder(audio, audio_len) loss = self.decoder(eouts, eouts_len, text, text_len) diff --git a/paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py b/paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py index d81fb2e3c7143edfa48720b5e5a5f1731dc90cda..362098fe65ec34106926e1804dfbb5abb273d97d 100644 --- a/paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py +++ b/paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py @@ -62,7 +62,7 @@ class Scorer(object): """Evaluation function, gathering all the different scores and return the final one. - :param sentence: The input sentence for evalutation + :param sentence: The input sentence for evaluation :type sentence: str :param log: Whether return the score in log representation. :type log: bool diff --git a/paddlespeech/s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp b/paddlespeech/s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp index 8469a194dfe461192b2280a43c7b2c6e3bf64cdd..663c52bb4525665922c604180618437dbfbe2eea 100644 --- a/paddlespeech/s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp +++ b/paddlespeech/s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp @@ -183,7 +183,7 @@ std::vector> ctc_beam_search_decoder( std::sort( prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare); - // compute aproximate ctc score as the return score, without affecting the + // compute approximate ctc score as the return score, without affecting the // return order of decoding result. To delete when decoder gets stable. for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { double approx_ctc = prefixes[i]->score; diff --git a/paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp b/paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp index 5d69ad0326e97bbb84140309dd3abeabfea831ef..e86c22401a48d0604e10107ccd463e04a34dcd1f 100644 --- a/paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp +++ b/paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp @@ -26,7 +26,7 @@ std::vector> get_pruned_log_probs( for (size_t i = 0; i < prob_step.size(); ++i) { prob_idx.push_back(std::pair(i, prob_step[i])); } - // pruning of vacobulary + // pruning of vocabulary size_t cutoff_len = prob_step.size(); if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) { std::sort(prob_idx.begin(), diff --git a/paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp b/paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp index 7bd6542dfef1c003327161c6fb886e094cef0c2c..7c9a75d58542fa4910bdade2736c40fcd25334ff 100644 --- a/paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp +++ b/paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp @@ -223,7 +223,7 @@ void Scorer::fill_dictionary(bool add_space) { * This gets rid of "epsilon" transitions in the FST. * These are transitions that don't require a string input to be taken. - * Getting rid of them is necessary to make the FST determinisitc, but + * Getting rid of them is necessary to make the FST deterministic, but * can greatly increase the size of the FST */ fst::RmEpsilon(&dictionary); diff --git a/paddlespeech/s2t/decoders/scorers/ctc.py b/paddlespeech/s2t/decoders/scorers/ctc.py index ace80bd3eb8d3cf1639be03c244b43aacbba2ec4..81d8b078392eb0282d59cfbefbb72a2583647aae 100644 --- a/paddlespeech/s2t/decoders/scorers/ctc.py +++ b/paddlespeech/s2t/decoders/scorers/ctc.py @@ -154,7 +154,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface): Args: state: The states of hyps - Returns: exteded state + Returns: extended state """ new_state = [] diff --git a/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py b/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py index 13429d491399e5b0f12268dfe6cde805691ba7cf..78b8fe36c8c0383d642740cab252ba7c89ba2ec0 100644 --- a/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py +++ b/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py @@ -11,7 +11,7 @@ class CTCPrefixScorePD(): which is based on Algorithm 2 in WATANABE et al. "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION," - but extended to efficiently compute the label probablities for multiple + but extended to efficiently compute the label probabilities for multiple hypotheses simultaneously See also Seki et al. "Vectorized Beam Search for CTC-Attention-Based Speech Recognition," In INTERSPEECH (pp. 3825-3829), 2019. @@ -272,7 +272,7 @@ class CTCPrefixScore(): which is based on Algorithm 2 in WATANABE et al. "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION," - but extended to efficiently compute the probablities of multiple labels + but extended to efficiently compute the probabilities of multiple labels simultaneously """ diff --git a/paddlespeech/s2t/models/ds2/deepspeech2.py b/paddlespeech/s2t/models/ds2/deepspeech2.py index a478ba823f2e37bd72db8d46dcf9a552cb158ea5..4a4d67ce97492e2df5953411d22ecf879c526afd 100644 --- a/paddlespeech/s2t/models/ds2/deepspeech2.py +++ b/paddlespeech/s2t/models/ds2/deepspeech2.py @@ -151,13 +151,13 @@ class DeepSpeech2Model(nn.Layer): """Compute Model loss Args: - audio (Tenosr): [B, T, D] + audio (Tensors): [B, T, D] audio_len (Tensor): [B] text (Tensor): [B, U] text_len (Tensor): [B] Returns: - loss (Tenosr): [1] + loss (Tensor): [1] """ eouts, eouts_len = self.encoder(audio, audio_len) loss = self.decoder(eouts, eouts_len, text, text_len) diff --git a/paddlespeech/s2t/models/ds2_online/deepspeech2.py b/paddlespeech/s2t/models/ds2_online/deepspeech2.py index 7d463755ae886c3bf15c418336c84d77cb80c489..5e4981c06ce60257dc44dfa561a7b49ad9e785bc 100644 --- a/paddlespeech/s2t/models/ds2_online/deepspeech2.py +++ b/paddlespeech/s2t/models/ds2_online/deepspeech2.py @@ -279,13 +279,13 @@ class DeepSpeech2ModelOnline(nn.Layer): """Compute Model loss Args: - audio (Tenosr): [B, T, D] + audio (Tensor): [B, T, D] audio_len (Tensor): [B] text (Tensor): [B, U] text_len (Tensor): [B] Returns: - loss (Tenosr): [1] + loss (Tensor): [1] """ eouts, eouts_len, final_state_h_box, final_state_c_box = self.encoder( audio, audio_len, None, None) diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index d920a200c8b620423f9accfc894e42790a160cdb..ff4012e8eefb5f44cbbcfa1adfb4da8ad4754062 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -680,8 +680,8 @@ class U2BaseModel(ASRInterface, nn.Layer): """u2 decoding. Args: - feats (Tenosr): audio features, (B, T, D) - feats_lengths (Tenosr): (B) + feats (Tensor): audio features, (B, T, D) + feats_lengths (Tensor): (B) text_feature (TextFeaturizer): text feature object. decoding_method (str): decoding mode, e.g. 'attention', 'ctc_greedy_search', diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py index 2aa0b3479843b322e4cc5381596139e36c9e1fb3..79ca423f8740bde850e8c1b71a36856ea54f3d5e 100644 --- a/paddlespeech/s2t/models/u2_st/u2_st.py +++ b/paddlespeech/s2t/models/u2_st/u2_st.py @@ -478,8 +478,8 @@ class U2STBaseModel(nn.Layer): """u2 decoding. Args: - feats (Tenosr): audio features, (B, T, D) - feats_lengths (Tenosr): (B) + feats (Tensor): audio features, (B, T, D) + feats_lengths (Tensor): (B) text_feature (TextFeaturizer): text feature object. decoding_method (str): decoding mode, e.g. 'fullsentence', diff --git a/paddlespeech/s2t/modules/ctc.py b/paddlespeech/s2t/modules/ctc.py index 6e9655799c42f5c2deb41bed873dd88774261ec4..1f98380771eb1e67ca186b369e5ad13eb7f1c463 100644 --- a/paddlespeech/s2t/modules/ctc.py +++ b/paddlespeech/s2t/modules/ctc.py @@ -81,10 +81,10 @@ class CTCDecoderBase(nn.Layer): Args: hs_pad (Tensor): batch of padded hidden state sequences (B, Tmax, D) hlens (Tensor): batch of lengths of hidden state sequences (B) - ys_pad (Tenosr): batch of padded character id sequence tensor (B, Lmax) + ys_pad (Tensor): batch of padded character id sequence tensor (B, Lmax) ys_lens (Tensor): batch of lengths of character sequence (B) Returns: - loss (Tenosr): ctc loss value, scalar. + loss (Tensor): ctc loss value, scalar. """ logits = self.ctc_lo(self.dropout(hs_pad)) loss = self.criterion(logits, ys_pad, hlens, ys_lens) @@ -252,8 +252,8 @@ class CTCDecoder(CTCDecoderBase): """ctc decoding with probs. Args: - probs (Tenosr): activation after softmax - logits_lens (Tenosr): audio output lens + probs (Tensor): activation after softmax + logits_lens (Tensor): audio output lens vocab_list ([type]): [description] decoding_method ([type]): [description] lang_model_path ([type]): [description] diff --git a/paddlespeech/s2t/modules/mask.py b/paddlespeech/s2t/modules/mask.py index d6b63761b49b530db68a7ff0bb342675124c9fca..1f66c015acb4574bddab8276a7c9c4454206997e 100644 --- a/paddlespeech/s2t/modules/mask.py +++ b/paddlespeech/s2t/modules/mask.py @@ -54,7 +54,7 @@ def make_pad_mask(lengths: paddle.Tensor) -> paddle.Tensor: [0, 0, 0, 1, 1], [0, 0, 1, 1, 1]] """ - # (TODO: Hui Zhang): jit not support Tenosr.dim() and Tensor.ndim + # (TODO: Hui Zhang): jit not support Tensor.dim() and Tensor.ndim # assert lengths.dim() == 1 batch_size = int(lengths.shape[0]) max_len = int(lengths.max()) diff --git a/paddlespeech/s2t/utils/dynamic_import.py b/paddlespeech/s2t/utils/dynamic_import.py index 50bd73a6de4219fd10c1679856eda918ac012eac..bd738edf8b8a96759d2fe7466fb9f7f027687c9d 100644 --- a/paddlespeech/s2t/utils/dynamic_import.py +++ b/paddlespeech/s2t/utils/dynamic_import.py @@ -57,7 +57,7 @@ def filter_valid_args(args: Dict[Text, Any], valid_keys: List[Text]): return new_args -def filter_out_tenosr(args: Dict[Text, Any]): +def filter_out_tensor(args: Dict[Text, Any]): return {key: val for key, val in args.items() if not has_tensor(val)} @@ -65,5 +65,5 @@ def instance_class(module_class, args: Dict[Text, Any]): valid_keys = inspect.signature(module_class).parameters.keys() new_args = filter_valid_args(args, valid_keys) logger.info( - f"Instance: {module_class.__name__} {filter_out_tenosr(new_args)}.") + f"Instance: {module_class.__name__} {filter_out_tensor(new_args)}.") return module_class(**new_args)