format

c7d9b115 · Hui Zhang · caf72258 · c7d9b115 · c7d9b115 · c7d9b115
18 changed file
--- a/.flake8
+++ b/.flake8
@@ -12,6 +12,8 @@ exclude =
    .git,
    # python cache
    __pycache__,
+    # third party
+    utils/compute-wer.py,
    third_party/,
 # Provide a comma-separate list of glob patterns to include for checks.
 filename =

--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@@ -40,6 +40,7 @@ from paddlespeech.s2t.utils.utility import UpdateConfig

 __all__ = ['ASRExecutor']

+
 @cli_register(
    name='paddlespeech.asr', description='Speech to text infer command.')
 class ASRExecutor(BaseExecutor):
@@ -148,7 +149,7 @@ class ASRExecutor(BaseExecutor):
                os.path.dirname(os.path.abspath(self.cfg_path)))
        logger.info(self.cfg_path)
        logger.info(self.ckpt_path)
-        
+
        #Init body.
        self.config = CfgNode(new_allowed=True)
        self.config.merge_from_file(self.cfg_path)
@@ -278,7 +279,8 @@ class ASRExecutor(BaseExecutor):
            self._outputs["result"] = result_transcripts[0]

        elif "conformer" in model_type or "transformer" in model_type:
-            logger.info(f"we will use the transformer like model : {model_type}")
+            logger.info(
+                f"we will use the transformer like model : {model_type}")
            try:
                result_transcripts = self.model.decode(
                    audio,

--- a/paddlespeech/s2t/models/u2/u2.py
+++ b/paddlespeech/s2t/models/u2/u2.py
@@ -279,7 +279,7 @@ class U2BaseModel(ASRInterface, nn.Layer):
            # TODO(Hui Zhang): if end_flag.sum() == running_size:
            if end_flag.cast(paddle.int64).sum() == running_size:
                break
-            
+
            # 2.1 Forward decoder step
            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
                running_size, 1, 1).to(device)  # (B*N, i, i)

--- a/paddlespeech/s2t/modules/ctc.py
+++ b/paddlespeech/s2t/modules/ctc.py
@@ -180,7 +180,7 @@ class CTCDecoder(CTCDecoderBase):
        # init once
        if self._ext_scorer is not None:
            return
-        
+
        if language_model_path != '':
            logger.info("begin to initialize the external scorer "
                        "for decoding")

--- a/paddlespeech/server/README.md
+++ b/paddlespeech/server/README.md
@@ -47,4 +47,4 @@ paddlespeech_server start --config_file conf/ws_conformer_application.yaml

 ```
 paddlespeech_client asr_online  --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
-```
\ No newline at end of file
+```
--- a/paddlespeech/server/README_cn.md
+++ b/paddlespeech/server/README_cn.md
@@ -48,4 +48,4 @@ paddlespeech_server start --config_file conf/ws_conformer_application.yaml

 ```
 paddlespeech_client asr_online  --server_ip 127.0.0.1 --port 8090 --input zh.wav
-```
\ No newline at end of file
+```
--- a/paddlespeech/server/bin/paddlespeech_client.py
+++ b/paddlespeech/server/bin/paddlespeech_client.py
@@ -305,6 +305,7 @@ class ASRClientExecutor(BaseExecutor):

        return res['asr_results']

+
 @cli_client_register(
    name='paddlespeech_client.cls', description='visit cls service')
 class CLSClientExecutor(BaseExecutor):

--- a/paddlespeech/server/engine/asr/online/ctc_search.py
+++ b/paddlespeech/server/engine/asr/online/ctc_search.py
@@ -12,7 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from collections import defaultdict
+
 import paddle
+
 from paddlespeech.cli.log import logger
 from paddlespeech.s2t.utils.utility import log_add


--- a/paddlespeech/server/tests/asr/online/websocket_client.py
+++ b/paddlespeech/server/tests/asr/online/websocket_client.py
@@ -36,7 +36,7 @@ class ASRAudioHandler:
        x_len = len(samples)

        chunk_size = 85 * 16  #80ms, sample_rate = 16kHz
-        if x_len % chunk_size!= 0:
+        if x_len % chunk_size != 0:
            padding_len_x = chunk_size - x_len % chunk_size
        else:
            padding_len_x = 0
@@ -92,7 +92,7 @@ class ASRAudioHandler:
                separators=(',', ': '))
            await ws.send(audio_info)
            msg = await ws.recv()
-            
+
            # decode the bytes to str
            msg = json.loads(msg)
            logging.info("final receive msg={}".format(msg))

--- a/paddlespeech/t2s/exps/synthesize.py
+++ b/paddlespeech/t2s/exps/synthesize.py
@@ -52,7 +52,7 @@ def evaluate(args):
    # acoustic model
    am_name = args.am[:args.am.rindex('_')]
    am_dataset = args.am[args.am.rindex('_') + 1:]
-    
+
    am_inference = get_am_inference(
        am=args.am,
        am_config=am_config,

--- a/paddlespeech/vector/cluster/diarization.py
+++ b/paddlespeech/vector/cluster/diarization.py
@@ -20,11 +20,11 @@ A few sklearn functions are modified in this script as per requirement.
 import argparse
 import copy
 import warnings
-from distutils.util import strtobool

 import numpy as np
 import scipy
 import sklearn
+from distutils.util import strtobool
 from scipy import linalg
 from scipy import sparse
 from scipy.sparse.csgraph import connected_components

--- a/speechx/examples/ngram/zh/local/text_to_lexicon.py
+++ b/speechx/examples/ngram/zh/local/text_to_lexicon.py
@@ -2,6 +2,7 @@
 import argparse
 from collections import Counter

+
 def main(args):
    counter = Counter()
    with open(args.text, 'r') as fin, open(args.lexicon, 'w') as fout:
@@ -12,7 +13,7 @@ def main(args):
                words = text.split()
            else:
                words = line.split()
-            
+
            counter.update(words)

        for word in counter:
@@ -20,21 +21,16 @@ def main(args):
            fout.write(f"{word}\t{val}\n")
            fout.flush()

+
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='text(line:utt1 中国 人) to lexicon（line:中国 中 国).')
    parser.add_argument(
-        '--has_key',
-        default=True,
-        help='text path, with utt or not')
+        '--has_key', default=True, help='text path, with utt or not')
    parser.add_argument(
-        '--text',
-        required=True,
-        help='text path. line: utt1 中国 人 or 中国 人')
+        '--text', required=True, help='text path. line: utt1 中国 人 or 中国 人')
    parser.add_argument(
-        '--lexicon',
-        required=True,
-        help='lexicon path. line:中国 中 国')
+        '--lexicon', required=True, help='lexicon path. line:中国 中 国')
    args = parser.parse_args()
    print(args)


--- a/speechx/examples/text_lm/local/mmseg.py
+++ b/speechx/examples/text_lm/local/mmseg.py
--- a/speechx/examples/wfst/README.md
+++ b/speechx/examples/wfst/README.md
@@ -183,4 +183,4 @@ data/
        ├── lexiconp_disambig.txt
        ├── lexiconp.txt
        └── units.list
-```
\ No newline at end of file
+```
--- a/utils/DER.py
+++ b/utils/DER.py
@@ -26,9 +26,9 @@ import argparse
 import os
 import re
 import subprocess
-from distutils.util import strtobool

 import numpy as np
+from distutils.util import strtobool

 FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)")
 SCORED_SPEAKER_TIME = re.compile(r"(?<=SCORED SPEAKER TIME =)[\d.]+")

--- a/utils/compute-wer.py
+++ b/utils/compute-wer.py
--- a/utils/format_rsl.py
+++ b/utils/format_rsl.py
-import os
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import argparse
 import jsonlines


-def trans_hyp(origin_hyp,
-             trans_hyp = None, 
-             trans_hyp_sclite = None):
+def trans_hyp(origin_hyp, trans_hyp=None, trans_hyp_sclite=None):
    """
    Args:
        origin_hyp: The input json file which contains the model output
@@ -17,19 +27,18 @@ def trans_hyp(origin_hyp,
    with open(origin_hyp, "r+", encoding="utf8") as f:
        for item in jsonlines.Reader(f):
            input_dict[item["utt"]] = item["hyps"][0]
-    if trans_hyp is not None:   
+    if trans_hyp is not None:
        with open(trans_hyp, "w+", encoding="utf8") as f:
            for key in input_dict.keys():
                f.write(key + " " + input_dict[key] + "\n")
-    if trans_hyp_sclite is not None: 
+    if trans_hyp_sclite is not None:
        with open(trans_hyp_sclite, "w+") as f:
            for key in input_dict.keys():
-                line = input_dict[key] + "(" + key + ".wav" +")" + "\n"
+                line = input_dict[key] + "(" + key + ".wav" + ")" + "\n"
                f.write(line)

-def trans_ref(origin_ref,
-                trans_ref = None, 
-                trans_ref_sclite = None):
+
+def trans_ref(origin_ref, trans_ref=None, trans_ref_sclite=None):
    """
    Args:
        origin_hyp: The input json file which contains the model output
@@ -49,42 +58,48 @@ def trans_ref(origin_ref,
    if trans_ref_sclite is not None:
        with open(trans_ref_sclite, "w") as f:
            for key in input_dict.keys():
-                line = input_dict[key] + "(" + key + ".wav" +")" + "\n"
+                line = input_dict[key] + "(" + key + ".wav" + ")" + "\n"
                f.write(line)


-
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(prog='format hyp file for compute CER/WER', add_help=True)
+    parser = argparse.ArgumentParser(
+        prog='format hyp file for compute CER/WER', add_help=True)
    parser.add_argument(
-        '--origin_hyp',
-        type=str,
-        default = None,
-        help='origin hyp file')
+        '--origin_hyp', type=str, default=None, help='origin hyp file')
    parser.add_argument(
-        '--trans_hyp', type=str, default = None, help='hyp file for caculating CER/WER')
+        '--trans_hyp',
+        type=str,
+        default=None,
+        help='hyp file for caculating CER/WER')
    parser.add_argument(
-        '--trans_hyp_sclite', type=str,  default = None, help='hyp file for caculating CER/WER by sclite')
+        '--trans_hyp_sclite',
+        type=str,
+        default=None,
+        help='hyp file for caculating CER/WER by sclite')

    parser.add_argument(
-        '--origin_ref',
-        type=str,
-        default = None,
-        help='origin ref file')
+        '--origin_ref', type=str, default=None, help='origin ref file')
    parser.add_argument(
-        '--trans_ref', type=str, default = None, help='ref file for caculating CER/WER')
+        '--trans_ref',
+        type=str,
+        default=None,
+        help='ref file for caculating CER/WER')
    parser.add_argument(
-        '--trans_ref_sclite', type=str,  default = None, help='ref file for caculating CER/WER by sclite')
+        '--trans_ref_sclite',
+        type=str,
+        default=None,
+        help='ref file for caculating CER/WER by sclite')
    parser_args = parser.parse_args()

    if parser_args.origin_hyp is not None:
        trans_hyp(
-            origin_hyp = parser_args.origin_hyp,
-            trans_hyp = parser_args.trans_hyp,
-            trans_hyp_sclite = parser_args.trans_hyp_sclite, )
+            origin_hyp=parser_args.origin_hyp,
+            trans_hyp=parser_args.trans_hyp,
+            trans_hyp_sclite=parser_args.trans_hyp_sclite, )

    if parser_args.origin_ref is not None:
        trans_ref(
-            origin_ref = parser_args.origin_ref,
-            trans_ref = parser_args.trans_ref,
-            trans_ref_sclite = parser_args.trans_ref_sclite, )
+            origin_ref=parser_args.origin_ref,
+            trans_ref=parser_args.trans_ref,
+            trans_ref_sclite=parser_args.trans_ref_sclite, )
--- a/utils/fst/prepare_dict.py
+++ b/utils/fst/prepare_dict.py
@@ -35,7 +35,7 @@ def main(args):
    # used to filter polyphone and invalid word
    lexicon_table = set()
    in_n = 0  # in lexicon word count
-    out_n = 0 # out lexicon word cout
+    out_n = 0  # out lexicon word cout
    with open(args.in_lexicon, 'r') as fin, \
            open(args.out_lexicon, 'w') as fout:
        for line in fin:
@@ -82,7 +82,10 @@ def main(args):
                lexicon_table.add(word)
                out_n += 1

-    print(f"Filter lexicon by unit table: filter out {in_n - out_n}, {out_n}/{in_n}")
+    print(
+        f"Filter lexicon by unit table: filter out {in_n - out_n}, {out_n}/{in_n}"
+    )
+

 if __name__ == '__main__':
    parser = argparse.ArgumentParser(