diff --git a/dataset/aidatatang_200zh/aidatatang_200zh.py b/dataset/aidatatang_200zh/aidatatang_200zh.py index b8758c9a76863633ed6701aed402c911fec68cec..85f478c20ddba18d40c175c724370b81c93e46d0 100644 --- a/dataset/aidatatang_200zh/aidatatang_200zh.py +++ b/dataset/aidatatang_200zh/aidatatang_200zh.py @@ -25,6 +25,7 @@ import os from pathlib import Path import soundfile + from utils.utility import download from utils.utility import unpack diff --git a/dataset/aishell/aishell.py b/dataset/aishell/aishell.py index 32dc119d2ffb8110f49dbdddbba924ad9caa471c..7431fc08369546f372c93dc923f50300f1da10a3 100644 --- a/dataset/aishell/aishell.py +++ b/dataset/aishell/aishell.py @@ -25,6 +25,7 @@ import os from pathlib import Path import soundfile + from utils.utility import download from utils.utility import unpack diff --git a/dataset/librispeech/librispeech.py b/dataset/librispeech/librispeech.py index 0c779696d67332e7eaa04fd26dca80ccd2f8754c..69f0db599e12d0a482a8d7783eb85ce9e04c744d 100644 --- a/dataset/librispeech/librispeech.py +++ b/dataset/librispeech/librispeech.py @@ -27,6 +27,7 @@ import os from multiprocessing.pool import Pool import soundfile + from utils.utility import download from utils.utility import unpack diff --git a/dataset/mini_librispeech/mini_librispeech.py b/dataset/mini_librispeech/mini_librispeech.py index d96b5d64d5d242f028c1736be5ac0c7bac3c24e0..730c73a8b4dc44691351717de1bfe918f3b957ac 100644 --- a/dataset/mini_librispeech/mini_librispeech.py +++ b/dataset/mini_librispeech/mini_librispeech.py @@ -26,6 +26,7 @@ import os from multiprocessing.pool import Pool import soundfile + from utils.utility import download from utils.utility import unpack diff --git a/dataset/musan/musan.py b/dataset/musan/musan.py index dc237c30ae80aa71b92882262df702e0f8b9d4dc..2ac701bed0c9c24be1d1dffbd0482b6f4ce3f473 100644 --- a/dataset/musan/musan.py +++ b/dataset/musan/musan.py @@ -28,6 +28,7 @@ import json import os import soundfile + from utils.utility import download from utils.utility import unpack diff --git a/dataset/rir_noise/rir_noise.py b/dataset/rir_noise/rir_noise.py index 0e055f17b527b374adaeacce72939fcc19003b84..e7b1228904334982f3ed284f64c89a35b1a234c3 100644 --- a/dataset/rir_noise/rir_noise.py +++ b/dataset/rir_noise/rir_noise.py @@ -28,6 +28,7 @@ import json import os import soundfile + from utils.utility import download from utils.utility import unzip diff --git a/dataset/thchs30/thchs30.py b/dataset/thchs30/thchs30.py index 879ed58db158b2f68f3224040c43183e6b451078..cdfc0a75c0aacfdf89492d2f83642cb7f5decea8 100644 --- a/dataset/thchs30/thchs30.py +++ b/dataset/thchs30/thchs30.py @@ -26,6 +26,7 @@ from multiprocessing.pool import Pool from pathlib import Path import soundfile + from utils.utility import download from utils.utility import unpack diff --git a/dataset/timit/timit.py b/dataset/timit/timit.py index d03c48a1e10e9e40be082928fe27305d0ddbb8c7..c4a9f06631809bd4ca1d72755576d631f8590055 100644 --- a/dataset/timit/timit.py +++ b/dataset/timit/timit.py @@ -27,6 +27,7 @@ import string from pathlib import Path import soundfile + from utils.utility import unzip URL_ROOT = "" diff --git a/dataset/voxforge/voxforge.py b/dataset/voxforge/voxforge.py index c388f44917cb1510cc6a26af1e71984fbf37728c..373791bffe04114a51d89f6bf84c6dde504be84c 100644 --- a/dataset/voxforge/voxforge.py +++ b/dataset/voxforge/voxforge.py @@ -27,6 +27,7 @@ import shutil import subprocess import soundfile + from utils.utility import download_multi from utils.utility import getfile_insensitive from utils.utility import unpack diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 1b61ccc7bb48b105a63887a971ad954f83120a60..91ef6d166741dc946a916b1c0ffacf820430442f 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -75,4 +75,3 @@ PANN | ESC-50 |[pann-esc50]("./examples/esc50/cls0")|[panns_cnn6.tar.gz](https:/ | [Ds2 Offline Aishell model](https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_v1.8_to_v2.x.tar.gz) | Aishell Dataset | Char-based | 234 MB | 2 Conv + 3 bidirectional GRU layers | 0.0804 | - | 151 h | | [Ds2 Offline Librispeech model](https://deepspeech.bj.bcebos.com/eng_models/librispeech_v1.8_to_v2.x.tar.gz) | Librispeech Dataset | Word-based | 307 MB | 2 Conv + 3 bidirectional sharing weight RNN layers | - | 0.0685 | 960 h | | [Ds2 Offline Baidu en8k model](https://deepspeech.bj.bcebos.com/eng_models/baidu_en8k_v1.8_to_v2.x.tar.gz) | Baidu Internal English Dataset | Word-based | 273 MB | 2 Conv + 3 bidirectional GRU layers | - | 0.0541 | 8628 h | - diff --git a/paddlespeech/cli/__init__.py b/paddlespeech/cli/__init__.py index c82168aee0a1c03de893990e5ed81b8c0f90c360..80ca7a6658d79b92370e522bd46ea4afef26a5b0 100644 --- a/paddlespeech/cli/__init__.py +++ b/paddlespeech/cli/__init__.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import _locale + from .asr import ASRExecutor from .base_commands import BaseCommand from .base_commands import HelpCommand diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py index ee566ed4f8e29171a41364b8e86bb143fb570796..d4e5c22fb12b6453ba6ef6e4192f6a9442b960a9 100644 --- a/paddlespeech/cls/exps/panns/deploy/predict.py +++ b/paddlespeech/cls/exps/panns/deploy/predict.py @@ -16,10 +16,11 @@ import os import numpy as np from paddle import inference +from scipy.special import softmax + from paddleaudio.backends import load as load_audio from paddleaudio.datasets import ESC50 from paddleaudio.features import melspectrogram -from scipy.special import softmax # yapf: disable parser = argparse.ArgumentParser() diff --git a/paddlespeech/cls/exps/panns/export_model.py b/paddlespeech/cls/exps/panns/export_model.py index 63b22981adb62d213f0b2bd79ae9d4b180d06591..c295c6a33838b086480ddc4e681341cbd023d560 100644 --- a/paddlespeech/cls/exps/panns/export_model.py +++ b/paddlespeech/cls/exps/panns/export_model.py @@ -15,8 +15,8 @@ import argparse import os import paddle -from paddleaudio.datasets import ESC50 +from paddleaudio.datasets import ESC50 from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/exps/panns/predict.py b/paddlespeech/cls/exps/panns/predict.py index 0a1b6cccfe9b71d7ee98d3b974089e32a7b0145c..9cfd8b6ce44ab7318a218a52bd1c2eaee28d680a 100644 --- a/paddlespeech/cls/exps/panns/predict.py +++ b/paddlespeech/cls/exps/panns/predict.py @@ -16,11 +16,11 @@ import argparse import numpy as np import paddle import paddle.nn.functional as F + from paddleaudio.backends import load as load_audio from paddleaudio.datasets import ESC50 from paddleaudio.features import LogMelSpectrogram from paddleaudio.features import melspectrogram - from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py index 9508a977eae7c13be0a1a8ee2345fda4cbc1a998..1213097899cee2594b5ecf4e91310c16b5f46841 100644 --- a/paddlespeech/cls/exps/panns/train.py +++ b/paddlespeech/cls/exps/panns/train.py @@ -15,11 +15,11 @@ import argparse import os import paddle + from paddleaudio.datasets import ESC50 from paddleaudio.features import LogMelSpectrogram from paddleaudio.utils import logger from paddleaudio.utils import Timer - from paddlespeech.cls.models import cnn14 from paddlespeech.cls.models import SoundClassifier diff --git a/paddlespeech/cls/models/panns/panns.py b/paddlespeech/cls/models/panns/panns.py index b442b2fd1224df90d0ec519124d2fc65397928c5..6d2dac56ac23d9b3322e49703f98e15faf936fd0 100644 --- a/paddlespeech/cls/models/panns/panns.py +++ b/paddlespeech/cls/models/panns/panns.py @@ -15,6 +15,7 @@ import os import paddle.nn as nn import paddle.nn.functional as F + from paddleaudio.utils.download import load_state_dict_from_url from paddleaudio.utils.env import MODEL_HOME diff --git a/paddlespeech/s2t/frontend/audio.py b/paddlespeech/s2t/frontend/audio.py index d494cc4fdc66704176b1bdb14e2b8bf08f6d120c..d0368cc8d229e2e298a6fabffa8af09af2f7cbb1 100644 --- a/paddlespeech/s2t/frontend/audio.py +++ b/paddlespeech/s2t/frontend/audio.py @@ -356,7 +356,7 @@ class AudioSegment(): # sox, slow try: import soxbindings as sox - except: + except ImportError: try: from paddlespeech.s2t.utils import dynamic_pip_install package = "sox" @@ -364,8 +364,9 @@ class AudioSegment(): package = "soxbindings" dynamic_pip_install.install(package) import soxbindings as sox - except: - raise RuntimeError("Can not install soxbindings on your system." ) + except Exception: + raise RuntimeError( + "Can not install soxbindings on your system.") tfm = sox.Transformer() tfm.set_globals(multithread=False) diff --git a/paddlespeech/s2t/frontend/utility.py b/paddlespeech/s2t/frontend/utility.py index e6c7603fa20a8fcfa91ced38a61f1b329191ddf5..d35785db6825761e8bc26aada4c2c4d9d8066b0c 100644 --- a/paddlespeech/s2t/frontend/utility.py +++ b/paddlespeech/s2t/frontend/utility.py @@ -102,9 +102,11 @@ def read_manifest( with jsonlines.open(manifest_path, 'r') as reader: for json_data in reader: feat_len = json_data["input"][0]["shape"][ - 0] if "input" in json_data and "shape" in json_data["input"][0] else 1.0 + 0] if "input" in json_data and "shape" in json_data["input"][ + 0] else 1.0 token_len = json_data["output"][0]["shape"][ - 0] if "output" in json_data and "shape" in json_data["output"][0] else 1.0 + 0] if "output" in json_data and "shape" in json_data["output"][ + 0] else 1.0 conditions = [ feat_len >= min_input_len, feat_len <= max_input_len, diff --git a/paddlespeech/s2t/io/sampler.py b/paddlespeech/s2t/io/sampler.py index 35b57524b5906d53366ebc1c8d4b36322129bba2..ac55af1236f11d175e9e7717220980cf95c7d79b 100644 --- a/paddlespeech/s2t/io/sampler.py +++ b/paddlespeech/s2t/io/sampler.py @@ -20,13 +20,13 @@ from paddle.io import DistributedBatchSampler from paddlespeech.s2t.utils.log import Log +logger = Log(__name__).getlog() + __all__ = [ "SortagradDistributedBatchSampler", "SortagradBatchSampler", ] -logger = Log(__name__).getlog() - def _batch_shuffle(indices, batch_size, epoch, clipped=False): """Put similarly-sized instances into minibatches for better efficiency diff --git a/paddlespeech/s2t/models/ds2/__init__.py b/paddlespeech/s2t/models/ds2/__init__.py index efa50863be3ed2c256457a5f3713fb7a5786bb1b..8d5959c8b62ab6cf41b4fb9fc3eafe652b3cbdf3 100644 --- a/paddlespeech/s2t/models/ds2/__init__.py +++ b/paddlespeech/s2t/models/ds2/__init__.py @@ -17,11 +17,11 @@ from paddlespeech.s2t.utils import dynamic_pip_install try: import swig_decoders -except: +except ImportError: try: package_name = 'paddlespeech_ctcdecoders' dynamic_pip_install.install(package_name) - except: + except Exception: raise RuntimeError( "Can not install package paddlespeech_ctcdecoders on your system. \ The DeepSpeech2 model is not supported for your system") diff --git a/paddlespeech/s2t/models/ds2/deepspeech2.py b/paddlespeech/s2t/models/ds2/deepspeech2.py index f0a553ec80ffaa14342d9c2e85c4b2171f97c1fe..0dfaec29cd7762904a63676a694f7478fcbbb093 100644 --- a/paddlespeech/s2t/models/ds2/deepspeech2.py +++ b/paddlespeech/s2t/models/ds2/deepspeech2.py @@ -129,7 +129,7 @@ class DeepSpeech2Model(nn.Layer): rnn_layer_size=1024, #RNN layer size (number of RNN cells). use_gru=True, #Use gru if set True. Use simple rnn if set False. share_rnn_weights=True, #Whether to share input-hidden weights between forward and backward directional RNNs.Notice that for GRU, weight sharing is not supported. - ctc_grad_norm_type=None,)) + ctc_grad_norm_type=None, )) if config is not None: config.merge_from_other_cfg(default) return default diff --git a/paddlespeech/s2t/models/ds2_online/__init__.py b/paddlespeech/s2t/models/ds2_online/__init__.py index 65ddd51224264da968658bfaef7e704b2883b32c..2d304237b607ad6dca43ae0aff6676159faa0832 100644 --- a/paddlespeech/s2t/models/ds2_online/__init__.py +++ b/paddlespeech/s2t/models/ds2_online/__init__.py @@ -17,11 +17,11 @@ from paddlespeech.s2t.utils import dynamic_pip_install try: import swig_decoders -except: +except ImportError: try: package_name = 'paddlespeech_ctcdecoders' dynamic_pip_install.install(package_name) - except: + except Exception: raise RuntimeError( "Can not install package paddlespeech_ctcdecoders on your system. \ The DeepSpeech2 model is not supported for your system") diff --git a/paddlespeech/s2t/modules/ctc.py b/paddlespeech/s2t/modules/ctc.py index 774bcc62ed1f8f651d7f60dbf3681dc1198a561e..ffc9f038736d89224abb0275d3fe24ceb4a3ed71 100644 --- a/paddlespeech/s2t/modules/ctc.py +++ b/paddlespeech/s2t/modules/ctc.py @@ -28,7 +28,7 @@ try: from paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper import ctc_beam_search_decoder_batch # noqa: F401 from paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper import ctc_greedy_decoder # noqa: F401 from paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper import Scorer # noqa: F401 -except: +except ImportError: try: from paddlespeech.s2t.utils import dynamic_pip_install package_name = 'paddlespeech_ctcdecoders' diff --git a/paddlespeech/s2t/training/trainer.py b/paddlespeech/s2t/training/trainer.py index cc8f50317144b94be19aef424674053b977b1b3b..9bf1ca4db26c42f6c05e57916ffd96a94917da1d 100644 --- a/paddlespeech/s2t/training/trainer.py +++ b/paddlespeech/s2t/training/trainer.py @@ -221,6 +221,8 @@ class Trainer(): if hasattr(self.train_loader, "batch_sampler"): batch_sampler = self.train_loader.batch_sampler if isinstance(batch_sampler, paddle.io.DistributedBatchSampler): + logger.debug( + f"train_loader.batch_sample set epoch: {self.epoch}") batch_sampler.set_epoch(self.epoch) def before_train(self): diff --git a/paddlespeech/s2t/transform/perturb.py b/paddlespeech/s2t/transform/perturb.py index 90144197c2e200756a0572ea799a736ea693e923..226885f36688d7896ed68b602c691d207363f8f3 100644 --- a/paddlespeech/s2t/transform/perturb.py +++ b/paddlespeech/s2t/transform/perturb.py @@ -147,7 +147,7 @@ class SpeedPerturbationSox(): try: import soxbindings as sox - except: + except ImportError: try: from paddlespeech.s2t.utils import dynamic_pip_install package = "sox" @@ -155,8 +155,10 @@ class SpeedPerturbationSox(): package = "soxbindings" dynamic_pip_install.install(package) import soxbindings as sox - except: - raise RuntimeError("Can not install soxbindings on your system." ) + except Exception: + raise RuntimeError( + "Can not install soxbindings on your system.") + self.sox = sox if utt2ratio is not None: self.utt2ratio = {} @@ -200,7 +202,7 @@ class SpeedPerturbationSox(): else: ratio = self.state.uniform(self.lower, self.upper) - tfm = sox.Transformer() + tfm = self.sox.Transformer() tfm.set_globals(multithread=False) tfm.speed(ratio) y = tfm.build_array(input_array=x, sample_rate_in=self.sr) diff --git a/utils/manifest_key_value.py b/utils/manifest_key_value.py index 3a80090394bf41636eaf92873c4167266e55f2d6..fb3d3aaaf47948428cd5eaf4a9ae6b0fe82b93e1 100755 --- a/utils/manifest_key_value.py +++ b/utils/manifest_key_value.py @@ -5,6 +5,7 @@ import functools from pathlib import Path import jsonlines + from utils.utility import add_arguments from utils.utility import print_arguments