From 2bdcf2c5ba36cf948a48aab23908d088723ddf14 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Wed, 14 Apr 2021 09:10:45 +0000
Subject: [PATCH] fix for train

---
 .pre-commit-config.yaml                       | 8 ++++----
 deepspeech/frontend/audio.py                  | 3 ++-
 deepspeech/frontend/augmentor/augmentation.py | 2 +-
 deepspeech/frontend/augmentor/base.py         | 4 ++--
 deepspeech/models/u2.py                       | 1 -
 examples/aishell/s1/conf/augmentation.json    | 4 ++--
 examples/aishell/s1/conf/conformer.yaml       | 8 ++++----
 examples/aishell/s1/local/export.sh           | 2 +-
 examples/aishell/s1/local/test.sh             | 2 +-
 examples/aishell/s1/local/train.sh            | 2 +-
 10 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 02c084bb..c18efbc1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -52,7 +52,7 @@
         language: system
         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
         #exclude: (?=decoders/swig).*(\.cpp|\.h)$
--   repo: https://github.com/asottile/reorder_python_imports
-    rev: v2.4.0
-    hooks:
-      - id: reorder-python-imports
+#-   repo: https://github.com/asottile/reorder_python_imports
+#    rev: v2.4.0
+#    hooks:
+#      - id: reorder-python-imports
diff --git a/deepspeech/frontend/audio.py b/deepspeech/frontend/audio.py
index 10a26db2..ebc89bec 100644
--- a/deepspeech/frontend/audio.py
+++ b/deepspeech/frontend/audio.py
@@ -330,9 +330,10 @@ class AudioSegment(object):
         # new_indices = np.linspace(start=0, stop=old_length, num=new_length)
         # self._samples = np.interp(new_indices, old_indices, self._samples)
         tfm = sox.Transformer()
+        tfm.set_globals(multithread=False)
         tfm.speed(speed_rate)
         self._samples = tfm.build_array(
-            input_array=self._samples, sample_rate_in=self._sample_rate)
+            input_array=self._samples, sample_rate_in=self._sample_rate).copy()
 
     def normalize(self, target_db=-20, max_gain_db=300.0):
         """Normalize audio to be of the desired RMS value in decibels.
diff --git a/deepspeech/frontend/augmentor/augmentation.py b/deepspeech/frontend/augmentor/augmentation.py
index 16dc8ec3..9204ae3f 100644
--- a/deepspeech/frontend/augmentor/augmentation.py
+++ b/deepspeech/frontend/augmentor/augmentation.py
@@ -113,7 +113,7 @@ class AugmentationPipeline():
         Args:
             spec_segment (np.ndarray): audio feature, (D, T).
         """
-        for augmentor, rate in zip(self._augmentors, self._rates):
+        for augmentor, rate in zip(self._spec_augmentors, self._spec_rates):
             if self._rng.uniform(0., 1.) < rate:
                 spec_segment = augmentor.transform_feature(spec_segment)
         return spec_segment
diff --git a/deepspeech/frontend/augmentor/base.py b/deepspeech/frontend/augmentor/base.py
index 250d32dc..fcc49d3f 100644
--- a/deepspeech/frontend/augmentor/base.py
+++ b/deepspeech/frontend/augmentor/base.py
@@ -40,7 +40,7 @@ class AugmentorBase():
         :param audio_segment: Audio segment to add effects to.
         :type audio_segment: AudioSegmenet|SpeechSegment
         """
-        pass
+        raise NotImplementedError
 
     @abstractmethod
     def transform_feature(self, spec_segment):
@@ -52,4 +52,4 @@ class AugmentorBase():
         Args:
             spec_segment (Spectrogram): Spectrogram segment to add effects to.
         """
-        pass
+        raise NotImplementedError
diff --git a/deepspeech/models/u2.py b/deepspeech/models/u2.py
index 54a99399..16573a38 100644
--- a/deepspeech/models/u2.py
+++ b/deepspeech/models/u2.py
@@ -133,7 +133,6 @@ class U2BaseModel(nn.Module):
             smoothing=lsm_weight,
             normalize_length=length_normalized_loss, )
 
-    @jit.export
     def forward(
             self,
             speech: paddle.Tensor,
diff --git a/examples/aishell/s1/conf/augmentation.json b/examples/aishell/s1/conf/augmentation.json
index aa16afb2..1987ad42 100644
--- a/examples/aishell/s1/conf/augmentation.json
+++ b/examples/aishell/s1/conf/augmentation.json
@@ -6,7 +6,7 @@
       "max_speed_rate": 1.1,
       "num_rates": 3
     },
-    "prob": 1.0
+    "prob": 0.0
   },
   {
     "type": "shift",
@@ -29,6 +29,6 @@
       "adaptive_size_ratio": 0,
       "max_n_time_masks": 20
     },
-    "prob": 0.0
+    "prob": 1.0
   }
 ]
diff --git a/examples/aishell/s1/conf/conformer.yaml b/examples/aishell/s1/conf/conformer.yaml
index fced75d7..1b374507 100644
--- a/examples/aishell/s1/conf/conformer.yaml
+++ b/examples/aishell/s1/conf/conformer.yaml
@@ -8,11 +8,11 @@ data:
   spm_model_prefix: ''
   mean_std_filepath: ""
   augmentation_config: conf/augmentation.json
-  batch_size: 16
+  batch_size: 64
   min_input_len: 0.5
   max_input_len: 20.0
   min_output_len: 0.0
-  max_output_len: 400
+  max_output_len: 400.0
   min_output_input_ratio: 0.05
   max_output_input_ratio: 10.0
   raw_wav: True  # use raw_wav or kaldi feature
@@ -75,7 +75,7 @@ model:
 
 training:
   n_epoch: 240
-  accum_grad: 4
+  accum_grad: 1
   global_grad_clip: 5.0
   optim: adam
   optim_conf:
@@ -85,7 +85,7 @@ training:
   scheduler_conf:
     warmup_steps: 25000
     lr_decay: 1.0
-  log_interval: 100
+  log_interval: 1
 
 
 decoding:
diff --git a/examples/aishell/s1/local/export.sh b/examples/aishell/s1/local/export.sh
index 1b553391..864ecb2d 100644
--- a/examples/aishell/s1/local/export.sh
+++ b/examples/aishell/s1/local/export.sh
@@ -6,7 +6,7 @@ if [ $# != 2 ];then
 fi
 
 python3 -u ${BIN_DIR}/export.py \
---config conf/deepspeech2.yaml \
+--config conf/conformer.yaml \
 --checkpoint_path ${1} \
 --export_path ${2} 
 
diff --git a/examples/aishell/s1/local/test.sh b/examples/aishell/s1/local/test.sh
index 0872ff21..e4cf0c85 100644
--- a/examples/aishell/s1/local/test.sh
+++ b/examples/aishell/s1/local/test.sh
@@ -9,7 +9,7 @@ fi
 python3 -u ${BIN_DIR}/test.py \
 --device 'gpu' \
 --nproc 1 \
---config conf/deepspeech2.yaml \
+--config conf/conformer.yaml \
 --output ckpt
 
 if [ $? -ne 0 ]; then
diff --git a/examples/aishell/s1/local/train.sh b/examples/aishell/s1/local/train.sh
index 8ed5010e..d20395d0 100644
--- a/examples/aishell/s1/local/train.sh
+++ b/examples/aishell/s1/local/train.sh
@@ -6,7 +6,7 @@ echo "using $ngpu gpus..."
 python3 -u ${BIN_DIR}/train.py \
 --device 'gpu' \
 --nproc ${ngpu} \
---config conf/deepspeech2.yaml \
+--config conf/conformer.yaml \
 --output ckpt-${1}
 
 
-- 
GitLab