From 2bdcf2c5ba36cf948a48aab23908d088723ddf14 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 14 Apr 2021 09:10:45 +0000 Subject: [PATCH] fix for train --- .pre-commit-config.yaml | 8 ++++---- deepspeech/frontend/audio.py | 3 ++- deepspeech/frontend/augmentor/augmentation.py | 2 +- deepspeech/frontend/augmentor/base.py | 4 ++-- deepspeech/models/u2.py | 1 - examples/aishell/s1/conf/augmentation.json | 4 ++-- examples/aishell/s1/conf/conformer.yaml | 8 ++++---- examples/aishell/s1/local/export.sh | 2 +- examples/aishell/s1/local/test.sh | 2 +- examples/aishell/s1/local/train.sh | 2 +- 10 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 02c084bb..c18efbc1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ #exclude: (?=decoders/swig).*(\.cpp|\.h)$ -- repo: https://github.com/asottile/reorder_python_imports - rev: v2.4.0 - hooks: - - id: reorder-python-imports +#- repo: https://github.com/asottile/reorder_python_imports +# rev: v2.4.0 +# hooks: +# - id: reorder-python-imports diff --git a/deepspeech/frontend/audio.py b/deepspeech/frontend/audio.py index 10a26db2..ebc89bec 100644 --- a/deepspeech/frontend/audio.py +++ b/deepspeech/frontend/audio.py @@ -330,9 +330,10 @@ class AudioSegment(object): # new_indices = np.linspace(start=0, stop=old_length, num=new_length) # self._samples = np.interp(new_indices, old_indices, self._samples) tfm = sox.Transformer() + tfm.set_globals(multithread=False) tfm.speed(speed_rate) self._samples = tfm.build_array( - input_array=self._samples, sample_rate_in=self._sample_rate) + input_array=self._samples, sample_rate_in=self._sample_rate).copy() def normalize(self, target_db=-20, max_gain_db=300.0): """Normalize audio to be of the desired RMS value in decibels. diff --git a/deepspeech/frontend/augmentor/augmentation.py b/deepspeech/frontend/augmentor/augmentation.py index 16dc8ec3..9204ae3f 100644 --- a/deepspeech/frontend/augmentor/augmentation.py +++ b/deepspeech/frontend/augmentor/augmentation.py @@ -113,7 +113,7 @@ class AugmentationPipeline(): Args: spec_segment (np.ndarray): audio feature, (D, T). """ - for augmentor, rate in zip(self._augmentors, self._rates): + for augmentor, rate in zip(self._spec_augmentors, self._spec_rates): if self._rng.uniform(0., 1.) < rate: spec_segment = augmentor.transform_feature(spec_segment) return spec_segment diff --git a/deepspeech/frontend/augmentor/base.py b/deepspeech/frontend/augmentor/base.py index 250d32dc..fcc49d3f 100644 --- a/deepspeech/frontend/augmentor/base.py +++ b/deepspeech/frontend/augmentor/base.py @@ -40,7 +40,7 @@ class AugmentorBase(): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegmenet|SpeechSegment """ - pass + raise NotImplementedError @abstractmethod def transform_feature(self, spec_segment): @@ -52,4 +52,4 @@ class AugmentorBase(): Args: spec_segment (Spectrogram): Spectrogram segment to add effects to. """ - pass + raise NotImplementedError diff --git a/deepspeech/models/u2.py b/deepspeech/models/u2.py index 54a99399..16573a38 100644 --- a/deepspeech/models/u2.py +++ b/deepspeech/models/u2.py @@ -133,7 +133,6 @@ class U2BaseModel(nn.Module): smoothing=lsm_weight, normalize_length=length_normalized_loss, ) - @jit.export def forward( self, speech: paddle.Tensor, diff --git a/examples/aishell/s1/conf/augmentation.json b/examples/aishell/s1/conf/augmentation.json index aa16afb2..1987ad42 100644 --- a/examples/aishell/s1/conf/augmentation.json +++ b/examples/aishell/s1/conf/augmentation.json @@ -6,7 +6,7 @@ "max_speed_rate": 1.1, "num_rates": 3 }, - "prob": 1.0 + "prob": 0.0 }, { "type": "shift", @@ -29,6 +29,6 @@ "adaptive_size_ratio": 0, "max_n_time_masks": 20 }, - "prob": 0.0 + "prob": 1.0 } ] diff --git a/examples/aishell/s1/conf/conformer.yaml b/examples/aishell/s1/conf/conformer.yaml index fced75d7..1b374507 100644 --- a/examples/aishell/s1/conf/conformer.yaml +++ b/examples/aishell/s1/conf/conformer.yaml @@ -8,11 +8,11 @@ data: spm_model_prefix: '' mean_std_filepath: "" augmentation_config: conf/augmentation.json - batch_size: 16 + batch_size: 64 min_input_len: 0.5 max_input_len: 20.0 min_output_len: 0.0 - max_output_len: 400 + max_output_len: 400.0 min_output_input_ratio: 0.05 max_output_input_ratio: 10.0 raw_wav: True # use raw_wav or kaldi feature @@ -75,7 +75,7 @@ model: training: n_epoch: 240 - accum_grad: 4 + accum_grad: 1 global_grad_clip: 5.0 optim: adam optim_conf: @@ -85,7 +85,7 @@ training: scheduler_conf: warmup_steps: 25000 lr_decay: 1.0 - log_interval: 100 + log_interval: 1 decoding: diff --git a/examples/aishell/s1/local/export.sh b/examples/aishell/s1/local/export.sh index 1b553391..864ecb2d 100644 --- a/examples/aishell/s1/local/export.sh +++ b/examples/aishell/s1/local/export.sh @@ -6,7 +6,7 @@ if [ $# != 2 ];then fi python3 -u ${BIN_DIR}/export.py \ ---config conf/deepspeech2.yaml \ +--config conf/conformer.yaml \ --checkpoint_path ${1} \ --export_path ${2} diff --git a/examples/aishell/s1/local/test.sh b/examples/aishell/s1/local/test.sh index 0872ff21..e4cf0c85 100644 --- a/examples/aishell/s1/local/test.sh +++ b/examples/aishell/s1/local/test.sh @@ -9,7 +9,7 @@ fi python3 -u ${BIN_DIR}/test.py \ --device 'gpu' \ --nproc 1 \ ---config conf/deepspeech2.yaml \ +--config conf/conformer.yaml \ --output ckpt if [ $? -ne 0 ]; then diff --git a/examples/aishell/s1/local/train.sh b/examples/aishell/s1/local/train.sh index 8ed5010e..d20395d0 100644 --- a/examples/aishell/s1/local/train.sh +++ b/examples/aishell/s1/local/train.sh @@ -6,7 +6,7 @@ echo "using $ngpu gpus..." python3 -u ${BIN_DIR}/train.py \ --device 'gpu' \ --nproc ${ngpu} \ ---config conf/deepspeech2.yaml \ +--config conf/conformer.yaml \ --output ckpt-${1} -- GitLab