format code,test=doc

6b1fe701 · Hui Zhang · 0a5624fe · 6b1fe701 · 6b1fe701 · 6b1fe701
9 changed file
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -50,12 +50,13 @@ repos:
        entry: bash .pre-commit-hooks/clang-format.hook -i
        language: system
        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
+        exclude: (?=speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$
    -   id: copyright_checker
        name: copyright_checker
        entry: python .pre-commit-hooks/copyright-check.hook
        language: system
        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
-        exclude: (?=third_party|pypinyin).*(\.cpp|\.h|\.py)$
+        exclude: (?=third_party|pypinyin|speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$
 -   repo: https://github.com/asottile/reorder_python_imports
    rev: v2.4.0
    hooks:

--- a/dataset/voxceleb/voxceleb1.py
+++ b/dataset/voxceleb/voxceleb1.py
@@ -80,6 +80,7 @@ parser.add_argument(
 args = parser.parse_args()
 def create_manifest(data_dir, manifest_path_prefix):
    print("Creating manifest %s ..." % manifest_path_prefix)
    json_lines = []
@@ -128,6 +129,7 @@ def create_manifest(data_dir, manifest_path_prefix):
        print(f"{total_text / total_sec} text/sec", file=f)
        print(f"{total_sec / total_num} sec/utt", file=f)
 def prepare_dataset(base_url, data_list, target_dir, manifest_path,
                    target_data):
    if not os.path.exists(target_dir):
@@ -164,6 +166,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
    # create the manifest file
    create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path)
 def main():
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)
@@ -184,5 +187,6 @@ def main():
    print("Manifest prepare done!")
 if __name__ == '__main__':
    main()
--- a/examples/ami/sd0/local/ami_prepare.py
+++ b/examples/ami/sd0/local/ami_prepare.py
@@ -22,19 +22,17 @@ Authors
 * qingenz123@126.com (Qingen ZHAO) 2022
 """
-import os
-import logging
 import argparse
-import xml.etree.ElementTree as et
 import glob
 import json
-from ami_splits import get_AMI_split
+import logging
+import os
+import xml.etree.ElementTree as et
 from distutils.util import strtobool
-from dataio import (
+from ami_splits import get_AMI_split
-    load_pkl,
+from dataio import load_pkl
-    save_pkl, )
+from dataio import save_pkl
 logger = logging.getLogger(__name__)
 SAMPLERATE = 16000

--- a/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py
+++ b/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py
@@ -12,28 +12,30 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 Make VoxCeleb1 trial of kaldi format
 this script creat the test trial from kaldi trial voxceleb1_test_v2.txt or official trial veri_test2.txt 
 to kaldi trial format
 """
 import argparse
 import codecs
 import os
 parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument("--voxceleb_trial",
+parser.add_argument(
-                    default="voxceleb1_test_v2",
+    "--voxceleb_trial",
-                    type=str,
+    default="voxceleb1_test_v2",
-                    help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt")
+    type=str,
-parser.add_argument("--trial",
+    help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt"
-                    default="data/test/trial",
+)
-                    type=str,
+parser.add_argument(
-                    help="Kaldi format trial file")
+    "--trial",
+    default="data/test/trial",
+    type=str,
+    help="Kaldi format trial file")
 args = parser.parse_args()
 def main(voxceleb_trial, trial):
    """
        VoxCeleb provide several trial file, which format is different with kaldi format.
@@ -58,7 +60,9 @@ def main(voxceleb_trial, trial):
    """
    print("Start convert the voxceleb trial to kaldi format")
    if not os.path.exists(voxceleb_trial):
-        raise RuntimeError("{} does not exist. Pleas input the correct file path".format(voxceleb_trial))
+        raise RuntimeError(
+            "{} does not exist. Pleas input the correct file path".format(
+                voxceleb_trial))
    trial_dirname = os.path.dirname(trial)
    if not os.path.exists(trial_dirname):
@@ -66,9 +70,9 @@ def main(voxceleb_trial, trial):
    with codecs.open(voxceleb_trial, 'r', encoding='utf-8') as f, \
         codecs.open(trial, 'w', encoding='utf-8') as w:
-         for line in f:
+        for line in f:
            target_or_nontarget, path1, path2 = line.strip().split()
            utt_id1 = "-".join(path1.split("/"))
            utt_id2 = "-".join(path2.split("/"))
            target = "nontarget"
@@ -77,5 +81,6 @@ def main(voxceleb_trial, trial):
            w.write("{} {} {}\n".format(utt_id1, utt_id2, target))
    print("Convert the voxceleb trial to kaldi format successfully")
 if __name__ == "__main__":
    main(args.voxceleb_trial, args.trial)
--- a/paddlespeech/__init__.py
+++ b/paddlespeech/__init__.py
@@ -11,14 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@@ -413,7 +413,8 @@ class ASRExecutor(BaseExecutor):
    def _check(self, audio_file: str, sample_rate: int, force_yes: bool):
        self.sample_rate = sample_rate
        if self.sample_rate != 16000 and self.sample_rate != 8000:
-            logger.error("invalid sample rate, please input --sr 8000 or --sr 16000")
+            logger.error(
+                "invalid sample rate, please input --sr 8000 or --sr 16000")
            return False
        if isinstance(audio_file, (str, os.PathLike)):

--- a/paddlespeech/s2t/io/utility.py
+++ b/paddlespeech/s2t/io/utility.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List
 from io import BytesIO
+from typing import List
 import numpy as np

--- a/paddlespeech/t2s/datasets/dataset.py
+++ b/paddlespeech/t2s/datasets/dataset.py
@@ -258,4 +258,4 @@ class ChainDataset(Dataset):
                return dataset[i]
            i -= len(dataset)
        raise IndexError("dataset index out of range")
\ No newline at end of file
--- a/utils/DER.py
+++ b/utils/DER.py
@@ -23,10 +23,11 @@ Credits
 This code is adapted from https://github.com/nryant/dscore
 """
 import argparse
-from distutils.util import strtobool
 import os
 import re
 import subprocess
+from distutils.util import strtobool
 import numpy as np
 FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)")