Merge branch 'develop' of https://github.com/PaddlePaddle/DeepSpeech into ds2_online

40466ef6 · huangyuxin · b3d27e4b · 7840806b · 40466ef6 · 40466ef6
6 changed file
--- a/deepspeech/exps/u2_kaldi/model.py
+++ b/deepspeech/exps/u2_kaldi/model.py
@@ -228,7 +228,7 @@ class U2Trainer(Trainer):
            maxlen_in=float('inf'),
            maxlen_out=float('inf'),
            minibatches=0,
-            mini_batch_size=1,
+            mini_batch_size=self.args.nprocs,
            batch_count='auto',
            batch_bins=0,
            batch_frames_in=0,
@@ -247,7 +247,7 @@ class U2Trainer(Trainer):
            maxlen_in=float('inf'),
            maxlen_out=float('inf'),
            minibatches=0,
-            mini_batch_size=1,
+            mini_batch_size=self.args.nprocs,
            batch_count='auto',
            batch_bins=0,
            batch_frames_in=0,
@@ -263,7 +263,7 @@ class U2Trainer(Trainer):
            json_file=config.data.test_manifest,
            train_mode=False,
            sortagrad=False,
-            batch_size=config.collator.batch_size,
+            batch_size=config.decoding.batch_size,
            maxlen_in=float('inf'),
            maxlen_out=float('inf'),
            minibatches=0,
@@ -282,7 +282,7 @@ class U2Trainer(Trainer):
            json_file=config.data.test_manifest,
            train_mode=False,
            sortagrad=False,
-            batch_size=config.collator.batch_size,
+            batch_size=config.decoding.batch_size,
            maxlen_in=float('inf'),
            maxlen_out=float('inf'),
            minibatches=0,

--- a/deepspeech/frontend/augmentor/spec_augment.py
+++ b/deepspeech/frontend/augmentor/spec_augment.py
@@ -151,6 +151,9 @@ class SpecAugmentor(AugmentorBase):
            np.ndarray: time warped spectrogram (time, freq)
        """
        window = max_time_warp = self.W
+        if window == 0:
+            return x
+        
        if mode == "PIL":
            t = x.shape[0]
            if t - window <= window:

--- a/deepspeech/frontend/utility.py
+++ b/deepspeech/frontend/utility.py
@@ -46,7 +46,7 @@ def load_dict(dict_path: Optional[Text], maskctc=False) -> Optional[List[Text]]:

    with open(dict_path, "r") as f:
        dictionary = f.readlines()
-    char_list = [entry.split(" ")[0] for entry in dictionary]
+    char_list = [entry.strip().split(" ")[0] for entry in dictionary]
    if BLANK not in char_list:
        char_list.insert(0, BLANK)
    if EOS not in char_list:

--- a/examples/aishell/s0/README.md
+++ b/examples/aishell/s0/README.md
 # Aishell-1

+## Data
+| Data Subset | Duration in Seconds |
+| data/manifest.train |  1.23 ~ 14.53125 |
+| data/manifest.dev | 1.645 ~ 12.533 |  
+| data/manifest.test | 1.859125 ~ 14.6999375 |
+
+`jq '.feat_shape[0]' data/manifest.train | sort -un`
+
 ## Deepspeech2

 | Model | Params | Release | Config | Test set | Loss | CER |  
 | --- | --- | --- | --- | --- | --- | --- |  
-| DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382,0.073507 |  
+| DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382 |  
 | DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml + spec aug | test | 7.483316898345947 | 0.077860 |  
 | DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml | test | 7.299022197723389 | 0.078671 |
 | DeepSpeech2 | 58.4M | 2.0.0 | conf/deepspeech2.yaml | test | - | 0.078977 |  

--- a/examples/aishell/s0/conf/augmentation.json
+++ b/examples/aishell/s0/conf/augmentation.json
@@ -19,17 +19,17 @@
  {
    "type": "specaug",
    "params": {
-      "W": 5,
+      "W": 0,
      "warp_mode": "PIL",
-      "F": 30,
+      "F": 10,
      "n_freq_masks": 2,
-      "T": 40,
+      "T": 50,
      "n_time_masks": 2,
      "p": 1.0,
      "adaptive_number_ratio": 0,
      "adaptive_size_ratio": 0,
      "max_n_time_masks": 20,
-      "replace_with_zero": false
+      "replace_with_zero": true 
    },
    "prob": 1.0
  }

--- a/examples/librispeech/s0/conf/augmentation.json
+++ b/examples/librispeech/s0/conf/augmentation.json
@@ -19,17 +19,17 @@
  {
    "type": "specaug",
    "params": {
+      "W": 0,
+      "warp_mode": "PIL",
      "F": 10,
-      "T": 50,
      "n_freq_masks": 2,
+      "T": 50,
      "n_time_masks": 2,
      "p": 1.0,
-      "W": 80,
      "adaptive_number_ratio": 0,
      "adaptive_size_ratio": 0,
      "max_n_time_masks": 20,
-      "replace_with_zero": true,
-      "warp_mode": "PIL"
+      "replace_with_zero": true
    },
    "prob": 1.0
  }