fix some typos

ddf184be · billishyahao · 3568bb62 · ddf184be · ddf184be · ddf184be
14 changed file
--- a/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py
+++ b/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py
@@ -149,13 +149,13 @@ class DeepSpeech2Model(nn.Layer):
        """Compute Model loss

        Args:
-            audio (Tenosr): [B, T, D]
+            audio (Tensor): [B, T, D]
            audio_len (Tensor): [B]
            text (Tensor): [B, U]
            text_len (Tensor): [B]

        Returns:
-            loss (Tenosr): [1]
+            loss (Tensor): [1]
        """
        eouts, eouts_len = self.encoder(audio, audio_len)
        loss = self.decoder(eouts, eouts_len, text, text_len)

--- a/paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py
+++ b/paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py
@@ -62,7 +62,7 @@ class Scorer(object):
        """Evaluation function, gathering all the different scores
        and return the final one.

-        :param sentence: The input sentence for evalutation
+        :param sentence: The input sentence for evaluation
        :type sentence: str
        :param log: Whether return the score in log representation.
        :type log: bool

--- a/paddlespeech/s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp
+++ b/paddlespeech/s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp
@@ -183,7 +183,7 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
    std::sort(
        prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare);

-    // compute aproximate ctc score as the return score, without affecting the
+    // compute approximate ctc score as the return score, without affecting the
    // return order of decoding result. To delete when decoder gets stable.
    for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
        double approx_ctc = prefixes[i]->score;

--- a/paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp
+++ b/paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp
@@ -26,7 +26,7 @@ std::vector<std::pair<size_t, float>> get_pruned_log_probs(
    for (size_t i = 0; i < prob_step.size(); ++i) {
        prob_idx.push_back(std::pair<int, double>(i, prob_step[i]));
    }
-    // pruning of vacobulary
+    // pruning of vocabulary
    size_t cutoff_len = prob_step.size();
    if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) {
        std::sort(prob_idx.begin(),

--- a/paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp
+++ b/paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp
@@ -223,7 +223,7 @@ void Scorer::fill_dictionary(bool add_space) {

     * This gets rid of "epsilon" transitions in the FST.
     * These are transitions that don't require a string input to be taken.
-     * Getting rid of them is necessary to make the FST determinisitc, but
+     * Getting rid of them is necessary to make the FST deterministic, but
     * can greatly increase the size of the FST
     */
    fst::RmEpsilon(&dictionary);

--- a/paddlespeech/s2t/decoders/scorers/ctc.py
+++ b/paddlespeech/s2t/decoders/scorers/ctc.py
@@ -154,7 +154,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
        Args:
            state: The states of hyps

-        Returns: exteded state
+        Returns: extended state

        """
        new_state = []

--- a/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
+++ b/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
@@ -11,7 +11,7 @@ class CTCPrefixScorePD():

    which is based on Algorithm 2 in WATANABE et al.
    "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION,"
-    but extended to efficiently compute the label probablities for multiple
+    but extended to efficiently compute the label probabilities for multiple
    hypotheses simultaneously
    See also Seki et al. "Vectorized Beam Search for CTC-Attention-Based
    Speech Recognition," In INTERSPEECH (pp. 3825-3829), 2019.
@@ -272,7 +272,7 @@ class CTCPrefixScore():

    which is based on Algorithm 2 in WATANABE et al.
    "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION,"
-    but extended to efficiently compute the probablities of multiple labels
+    but extended to efficiently compute the probabilities of multiple labels
    simultaneously
    """


--- a/paddlespeech/s2t/models/ds2/deepspeech2.py
+++ b/paddlespeech/s2t/models/ds2/deepspeech2.py
@@ -151,13 +151,13 @@ class DeepSpeech2Model(nn.Layer):
        """Compute Model loss

        Args:
-            audio (Tenosr): [B, T, D]
+            audio (Tensors): [B, T, D]
            audio_len (Tensor): [B]
            text (Tensor): [B, U]
            text_len (Tensor): [B]

        Returns:
-            loss (Tenosr): [1]
+            loss (Tensor): [1]
        """
        eouts, eouts_len = self.encoder(audio, audio_len)
        loss = self.decoder(eouts, eouts_len, text, text_len)

--- a/paddlespeech/s2t/models/ds2_online/deepspeech2.py
+++ b/paddlespeech/s2t/models/ds2_online/deepspeech2.py
@@ -279,13 +279,13 @@ class DeepSpeech2ModelOnline(nn.Layer):
        """Compute Model loss

        Args:
-            audio (Tenosr): [B, T, D]
+            audio (Tensor): [B, T, D]
            audio_len (Tensor): [B]
            text (Tensor): [B, U]
            text_len (Tensor): [B]

        Returns:
-            loss (Tenosr): [1]
+            loss (Tensor): [1]
        """
        eouts, eouts_len, final_state_h_box, final_state_c_box = self.encoder(
            audio, audio_len, None, None)

--- a/paddlespeech/s2t/models/u2/u2.py
+++ b/paddlespeech/s2t/models/u2/u2.py
@@ -680,8 +680,8 @@ class U2BaseModel(ASRInterface, nn.Layer):
        """u2 decoding.

        Args:
-            feats (Tenosr): audio features, (B, T, D)
-            feats_lengths (Tenosr): (B)
+            feats (Tensor): audio features, (B, T, D)
+            feats_lengths (Tensor): (B)
            text_feature (TextFeaturizer): text feature object.
            decoding_method (str): decoding mode, e.g.
                    'attention', 'ctc_greedy_search',

--- a/paddlespeech/s2t/models/u2_st/u2_st.py
+++ b/paddlespeech/s2t/models/u2_st/u2_st.py
@@ -478,8 +478,8 @@ class U2STBaseModel(nn.Layer):
        """u2 decoding.

        Args:
-            feats (Tenosr): audio features, (B, T, D)
-            feats_lengths (Tenosr): (B)
+            feats (Tensor): audio features, (B, T, D)
+            feats_lengths (Tensor): (B)
            text_feature (TextFeaturizer): text feature object.
            decoding_method (str): decoding mode, e.g.
                    'fullsentence',

--- a/paddlespeech/s2t/modules/ctc.py
+++ b/paddlespeech/s2t/modules/ctc.py
@@ -81,10 +81,10 @@ class CTCDecoderBase(nn.Layer):
        Args:
            hs_pad (Tensor): batch of padded hidden state sequences (B, Tmax, D)
            hlens (Tensor): batch of lengths of hidden state sequences (B)
-            ys_pad (Tenosr): batch of padded character id sequence tensor (B, Lmax)
+            ys_pad (Tensor): batch of padded character id sequence tensor (B, Lmax)
            ys_lens (Tensor): batch of lengths of character sequence (B)
        Returns:
-            loss (Tenosr): ctc loss value, scalar.
+            loss (Tensor): ctc loss value, scalar.
        """
        logits = self.ctc_lo(self.dropout(hs_pad))
        loss = self.criterion(logits, ys_pad, hlens, ys_lens)
@@ -252,8 +252,8 @@ class CTCDecoder(CTCDecoderBase):
        """ctc decoding with probs.

        Args:
-            probs (Tenosr): activation after softmax
-            logits_lens (Tenosr): audio output lens
+            probs (Tensor): activation after softmax
+            logits_lens (Tensor): audio output lens
            vocab_list ([type]): [description]
            decoding_method ([type]): [description]
            lang_model_path ([type]): [description]

--- a/paddlespeech/s2t/modules/mask.py
+++ b/paddlespeech/s2t/modules/mask.py
@@ -54,7 +54,7 @@ def make_pad_mask(lengths: paddle.Tensor) -> paddle.Tensor:
                 [0, 0, 0, 1, 1],
                 [0, 0, 1, 1, 1]]
    """
-    # (TODO: Hui Zhang): jit not support Tenosr.dim() and Tensor.ndim
+    # (TODO: Hui Zhang): jit not support Tensor.dim() and Tensor.ndim
    # assert lengths.dim() == 1
    batch_size = int(lengths.shape[0])
    max_len = int(lengths.max())

--- a/paddlespeech/s2t/utils/dynamic_import.py
+++ b/paddlespeech/s2t/utils/dynamic_import.py
@@ -57,7 +57,7 @@ def filter_valid_args(args: Dict[Text, Any], valid_keys: List[Text]):
    return new_args


-def filter_out_tenosr(args: Dict[Text, Any]):
+def filter_out_tensor(args: Dict[Text, Any]):
    return {key: val for key, val in args.items() if not has_tensor(val)}


@@ -65,5 +65,5 @@ def instance_class(module_class, args: Dict[Text, Any]):
    valid_keys = inspect.signature(module_class).parameters.keys()
    new_args = filter_valid_args(args, valid_keys)
    logger.info(
-        f"Instance: {module_class.__name__} {filter_out_tenosr(new_args)}.")
+        f"Instance: {module_class.__name__} {filter_out_tensor(new_args)}.")
    return module_class(**new_args)