fix for train

2bdcf2c5 · Hui Zhang · 3e449d65 · 2bdcf2c5 · 2bdcf2c5 · 2bdcf2c5
10 changed file
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -52,7 +52,7 @@
        language: system
        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
        #exclude: (?=decoders/swig).*(\.cpp|\.h)$
-   repo: https://github.com/asottile/reorder_python_imports
-    rev: v2.4.0
-    hooks:
-      - id: reorder-python-imports
+#-   repo: https://github.com/asottile/reorder_python_imports
+#    rev: v2.4.0
+#    hooks:
+#      - id: reorder-python-imports
--- a/deepspeech/frontend/audio.py
+++ b/deepspeech/frontend/audio.py
@@ -330,9 +330,10 @@ class AudioSegment(object):
        # new_indices = np.linspace(start=0, stop=old_length, num=new_length)
        # self._samples = np.interp(new_indices, old_indices, self._samples)
        tfm = sox.Transformer()
+        tfm.set_globals(multithread=False)
        tfm.speed(speed_rate)
        self._samples = tfm.build_array(
-            input_array=self._samples, sample_rate_in=self._sample_rate)
+            input_array=self._samples, sample_rate_in=self._sample_rate).copy()

    def normalize(self, target_db=-20, max_gain_db=300.0):
        """Normalize audio to be of the desired RMS value in decibels.

--- a/deepspeech/frontend/augmentor/augmentation.py
+++ b/deepspeech/frontend/augmentor/augmentation.py
@@ -113,7 +113,7 @@ class AugmentationPipeline():
        Args:
            spec_segment (np.ndarray): audio feature, (D, T).
        """
-        for augmentor, rate in zip(self._augmentors, self._rates):
+        for augmentor, rate in zip(self._spec_augmentors, self._spec_rates):
            if self._rng.uniform(0., 1.) < rate:
                spec_segment = augmentor.transform_feature(spec_segment)
        return spec_segment

--- a/deepspeech/frontend/augmentor/base.py
+++ b/deepspeech/frontend/augmentor/base.py
@@ -40,7 +40,7 @@ class AugmentorBase():
        :param audio_segment: Audio segment to add effects to.
        :type audio_segment: AudioSegmenet|SpeechSegment
        """
-        pass
+        raise NotImplementedError

    @abstractmethod
    def transform_feature(self, spec_segment):
@@ -52,4 +52,4 @@ class AugmentorBase():
        Args:
            spec_segment (Spectrogram): Spectrogram segment to add effects to.
        """
-        pass
+        raise NotImplementedError
--- a/deepspeech/models/u2.py
+++ b/deepspeech/models/u2.py
@@ -133,7 +133,6 @@ class U2BaseModel(nn.Module):
            smoothing=lsm_weight,
            normalize_length=length_normalized_loss, )

-    @jit.export
    def forward(
            self,
            speech: paddle.Tensor,

--- a/examples/aishell/s1/conf/augmentation.json
+++ b/examples/aishell/s1/conf/augmentation.json
@@ -6,7 +6,7 @@
      "max_speed_rate": 1.1,
      "num_rates": 3
    },
-    "prob": 1.0
+    "prob": 0.0
  },
  {
    "type": "shift",
@@ -29,6 +29,6 @@
      "adaptive_size_ratio": 0,
      "max_n_time_masks": 20
    },
-    "prob": 0.0
+    "prob": 1.0
  }
 ]
--- a/examples/aishell/s1/conf/conformer.yaml
+++ b/examples/aishell/s1/conf/conformer.yaml
@@ -8,11 +8,11 @@ data:
  spm_model_prefix: ''
  mean_std_filepath: ""
  augmentation_config: conf/augmentation.json
-  batch_size: 16
+  batch_size: 64
  min_input_len: 0.5
  max_input_len: 20.0
  min_output_len: 0.0
-  max_output_len: 400
+  max_output_len: 400.0
  min_output_input_ratio: 0.05
  max_output_input_ratio: 10.0
  raw_wav: True  # use raw_wav or kaldi feature
@@ -75,7 +75,7 @@ model:

 training:
  n_epoch: 240
-  accum_grad: 4
+  accum_grad: 1
  global_grad_clip: 5.0
  optim: adam
  optim_conf:
@@ -85,7 +85,7 @@ training:
  scheduler_conf:
    warmup_steps: 25000
    lr_decay: 1.0
-  log_interval: 100
+  log_interval: 1


 decoding:

--- a/examples/aishell/s1/local/export.sh
+++ b/examples/aishell/s1/local/export.sh
@@ -6,7 +6,7 @@ if [ $# != 2 ];then
 fi

 python3 -u ${BIN_DIR}/export.py \
--config conf/deepspeech2.yaml \
+--config conf/conformer.yaml \
 --checkpoint_path ${1} \
 --export_path ${2} 


--- a/examples/aishell/s1/local/test.sh
+++ b/examples/aishell/s1/local/test.sh
@@ -9,7 +9,7 @@ fi
 python3 -u ${BIN_DIR}/test.py \
 --device 'gpu' \
 --nproc 1 \
--config conf/deepspeech2.yaml \
+--config conf/conformer.yaml \
 --output ckpt

 if [ $? -ne 0 ]; then

--- a/examples/aishell/s1/local/train.sh
+++ b/examples/aishell/s1/local/train.sh
@@ -6,7 +6,7 @@ echo "using $ngpu gpus..."
 python3 -u ${BIN_DIR}/train.py \
 --device 'gpu' \
 --nproc ${ngpu} \
--config conf/deepspeech2.yaml \
+--config conf/conformer.yaml \
 --output ckpt-${1}