diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 60f0b92f6025d78908cf5043161c6b21771aaa95..7fb01708a3de083c368031e7353fd35e2455788a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -50,12 +50,13 @@ repos: entry: bash .pre-commit-hooks/clang-format.hook -i language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$ + exclude: (?=speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$ - id: copyright_checker name: copyright_checker entry: python .pre-commit-hooks/copyright-check.hook language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ - exclude: (?=third_party|pypinyin).*(\.cpp|\.h|\.py)$ + exclude: (?=third_party|pypinyin|speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$ - repo: https://github.com/asottile/reorder_python_imports rev: v2.4.0 hooks: diff --git a/dataset/voxceleb/voxceleb1.py b/dataset/voxceleb/voxceleb1.py index ce744751679d6e6ae756c8119cd4388adbebe404..e50c91bc169541612cc94575b85ba3794f7dbd05 100644 --- a/dataset/voxceleb/voxceleb1.py +++ b/dataset/voxceleb/voxceleb1.py @@ -80,6 +80,7 @@ parser.add_argument( args = parser.parse_args() + def create_manifest(data_dir, manifest_path_prefix): print("Creating manifest %s ..." % manifest_path_prefix) json_lines = [] @@ -128,6 +129,7 @@ def create_manifest(data_dir, manifest_path_prefix): print(f"{total_text / total_sec} text/sec", file=f) print(f"{total_sec / total_num} sec/utt", file=f) + def prepare_dataset(base_url, data_list, target_dir, manifest_path, target_data): if not os.path.exists(target_dir): @@ -164,6 +166,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path, # create the manifest file create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path) + def main(): if args.target_dir.startswith('~'): args.target_dir = os.path.expanduser(args.target_dir) @@ -184,5 +187,6 @@ def main(): print("Manifest prepare done!") + if __name__ == '__main__': main() diff --git a/examples/ami/sd0/local/ami_prepare.py b/examples/ami/sd0/local/ami_prepare.py index b7bb8e67eda82bc5f33e520e60c2b90aa1c22509..d03810a777aab5d5dcd85d25ea34a1ad59db3f6f 100644 --- a/examples/ami/sd0/local/ami_prepare.py +++ b/examples/ami/sd0/local/ami_prepare.py @@ -22,19 +22,17 @@ Authors * qingenz123@126.com (Qingen ZHAO) 2022 """ - -import os -import logging import argparse -import xml.etree.ElementTree as et import glob import json -from ami_splits import get_AMI_split +import logging +import os +import xml.etree.ElementTree as et from distutils.util import strtobool -from dataio import ( - load_pkl, - save_pkl, ) +from ami_splits import get_AMI_split +from dataio import load_pkl +from dataio import save_pkl logger = logging.getLogger(__name__) SAMPLERATE = 16000 diff --git a/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py b/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py index c92ede1ab5113d568265a1d261e1709213ef00d2..4e9639dc7d707df2111df09cae6b3bb5b4245571 100644 --- a/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py +++ b/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py @@ -12,28 +12,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """ Make VoxCeleb1 trial of kaldi format this script creat the test trial from kaldi trial voxceleb1_test_v2.txt or official trial veri_test2.txt to kaldi trial format """ - import argparse import codecs import os parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument("--voxceleb_trial", - default="voxceleb1_test_v2", - type=str, - help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt") -parser.add_argument("--trial", - default="data/test/trial", - type=str, - help="Kaldi format trial file") +parser.add_argument( + "--voxceleb_trial", + default="voxceleb1_test_v2", + type=str, + help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt" +) +parser.add_argument( + "--trial", + default="data/test/trial", + type=str, + help="Kaldi format trial file") args = parser.parse_args() + def main(voxceleb_trial, trial): """ VoxCeleb provide several trial file, which format is different with kaldi format. @@ -58,7 +60,9 @@ def main(voxceleb_trial, trial): """ print("Start convert the voxceleb trial to kaldi format") if not os.path.exists(voxceleb_trial): - raise RuntimeError("{} does not exist. Pleas input the correct file path".format(voxceleb_trial)) + raise RuntimeError( + "{} does not exist. Pleas input the correct file path".format( + voxceleb_trial)) trial_dirname = os.path.dirname(trial) if not os.path.exists(trial_dirname): @@ -66,9 +70,9 @@ def main(voxceleb_trial, trial): with codecs.open(voxceleb_trial, 'r', encoding='utf-8') as f, \ codecs.open(trial, 'w', encoding='utf-8') as w: - for line in f: + for line in f: target_or_nontarget, path1, path2 = line.strip().split() - + utt_id1 = "-".join(path1.split("/")) utt_id2 = "-".join(path2.split("/")) target = "nontarget" @@ -77,5 +81,6 @@ def main(voxceleb_trial, trial): w.write("{} {} {}\n".format(utt_id1, utt_id2, target)) print("Convert the voxceleb trial to kaldi format successfully") + if __name__ == "__main__": main(args.voxceleb_trial, args.trial) diff --git a/paddlespeech/__init__.py b/paddlespeech/__init__.py index 42537b15945d48513063d80abadf20ca1736cb50..185a92b8d94d3426d616c0624f0f2ee04339349e 100644 --- a/paddlespeech/__init__.py +++ b/paddlespeech/__init__.py @@ -11,14 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - - - - - - - - - - diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py index 7f648b4c3a28ed567f17bea099e7d5cc254ba53a..1fb4be43486fbe896b97d6d6a3ac766c53f208e1 100644 --- a/paddlespeech/cli/asr/infer.py +++ b/paddlespeech/cli/asr/infer.py @@ -413,7 +413,8 @@ class ASRExecutor(BaseExecutor): def _check(self, audio_file: str, sample_rate: int, force_yes: bool): self.sample_rate = sample_rate if self.sample_rate != 16000 and self.sample_rate != 8000: - logger.error("invalid sample rate, please input --sr 8000 or --sr 16000") + logger.error( + "invalid sample rate, please input --sr 8000 or --sr 16000") return False if isinstance(audio_file, (str, os.PathLike)): diff --git a/paddlespeech/s2t/io/utility.py b/paddlespeech/s2t/io/utility.py index ce5e772307754a4dc2a8bb2c3b000d62c64cbc83..c08b5535a6cccb7ddf8ba7df53f6c7703e6bb96e 100644 --- a/paddlespeech/s2t/io/utility.py +++ b/paddlespeech/s2t/io/utility.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List from io import BytesIO +from typing import List import numpy as np diff --git a/paddlespeech/t2s/datasets/dataset.py b/paddlespeech/t2s/datasets/dataset.py index f81c2877ca8214833ba71188db8659ea3f701758..2d6c03cb19c585a0736e1da61266d31e88b90dc8 100644 --- a/paddlespeech/t2s/datasets/dataset.py +++ b/paddlespeech/t2s/datasets/dataset.py @@ -258,4 +258,4 @@ class ChainDataset(Dataset): return dataset[i] i -= len(dataset) - raise IndexError("dataset index out of range") \ No newline at end of file + raise IndexError("dataset index out of range") diff --git a/utils/DER.py b/utils/DER.py index 5b62094dfbe730c00f5201157032c9de1ee0f5db..d6ab695d8f498dd9aafebe6b43b645cc5de709e3 100755 --- a/utils/DER.py +++ b/utils/DER.py @@ -23,10 +23,11 @@ Credits This code is adapted from https://github.com/nryant/dscore """ import argparse -from distutils.util import strtobool import os import re import subprocess +from distutils.util import strtobool + import numpy as np FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)")