format code

39228864 · Hui Zhang · d395c2b8 · 39228864 · 39228864 · 39228864
20 changed file
--- a/examples/aishell/asr1/READEME.md
+++ b/examples/aishell/asr1/READEME.md
@@ -339,6 +339,3 @@ You need to prepare an audio file, please confirm the sample rate of the audio i
 ```bash
 CUDA_VISIBLE_DEVICES= ./local/test_hub.sh conf/transformer.yaml exp/transformer/checkpoints/avg_20 data/test_audio.wav
 ```
-
-
-
--- a/paddlespeech/s2t/exps/u2/model.py
+++ b/paddlespeech/s2t/exps/u2/model.py
@@ -129,8 +129,8 @@ class U2Trainer(Trainer):
                losses_np_v = losses_np.copy()
                losses_np_v.update({"lr": self.lr_scheduler()})
                for key, val in losses_np_v.items():
-                    self.visualizer.add_scalar(tag='train/'+key, value=val, step=self.iteration-1)
-
+                    self.visualizer.add_scalar(
+                        tag='train/' + key, value=val, step=self.iteration - 1)

    @paddle.no_grad()
    def valid(self):
@@ -238,8 +238,10 @@ class U2Trainer(Trainer):
            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
-                self.visualizer.add_scalar(tag='eval/cv_loss', value=cv_loss, step=self.epoch)
-                self.visualizer.add_scalar(tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
+                self.visualizer.add_scalar(
+                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
+                self.visualizer.add_scalar(
+                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)

            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.new_epoch()

--- a/paddlespeech/s2t/exps/u2_kaldi/model.py
+++ b/paddlespeech/s2t/exps/u2_kaldi/model.py
@@ -132,7 +132,8 @@ class U2Trainer(Trainer):
                losses_np_v = losses_np.copy()
                losses_np_v.update({"lr": self.lr_scheduler()})
                for key, val in losses_np_v.items():
-                    self.visualizer.add_scalar(tag="train/"+key, value=val, step=self.iteration - 1)
+                    self.visualizer.add_scalar(
+                        tag="train/" + key, value=val, step=self.iteration - 1)

    @paddle.no_grad()
    def valid(self):
@@ -222,9 +223,11 @@ class U2Trainer(Trainer):
            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
-                self.visualizer.add_scalar(tag='eval/cv_loss', value=cv_loss, step=self.epoch)
-                self.visualizer.add_scalar(tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
-                
+                self.visualizer.add_scalar(
+                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
+                self.visualizer.add_scalar(
+                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
+
            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.new_epoch()


--- a/paddlespeech/s2t/exps/u2_st/model.py
+++ b/paddlespeech/s2t/exps/u2_st/model.py
@@ -139,7 +139,8 @@ class U2STTrainer(Trainer):
                losses_np_v = losses_np.copy()
                losses_np_v.update({"lr": self.lr_scheduler()})
                for key, val in losses_np_v.items():
-                    self.visualizer.add_scalar(tag="train/"+key, value=val, step=self.iteration - 1)
+                    self.visualizer.add_scalar(
+                        tag="train/" + key, value=val, step=self.iteration - 1)

    @paddle.no_grad()
    def valid(self):
@@ -235,9 +236,11 @@ class U2STTrainer(Trainer):
            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
-                self.visualizer.add_scalar(tag='eval/cv_loss', value=cv_loss, step=self.epoch)
-                self.visualizer.add_scalar(tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
-                
+                self.visualizer.add_scalar(
+                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
+                self.visualizer.add_scalar(
+                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
+
            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.new_epoch()


--- a/paddlespeech/s2t/frontend/augmentor/impulse_response.py
+++ b/paddlespeech/s2t/frontend/augmentor/impulse_response.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 """Contains the impulse response augmentation model."""
 import jsonlines
+
 from paddlespeech.s2t.frontend.audio import AudioSegment
 from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase


--- a/paddlespeech/s2t/frontend/augmentor/noise_perturb.py
+++ b/paddlespeech/s2t/frontend/augmentor/noise_perturb.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 """Contains the noise perturb augmentation model."""
 import jsonlines
+
 from paddlespeech.s2t.frontend.audio import AudioSegment
 from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase


--- a/paddlespeech/s2t/frontend/normalizer.py
+++ b/paddlespeech/s2t/frontend/normalizer.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 """Contains feature normalizers."""
 import json
+
 import jsonlines
 import numpy as np
 import paddle
@@ -26,7 +27,8 @@ from paddlespeech.s2t.utils.log import Log
 __all__ = ["FeatureNormalizer"]

 logger = Log(__name__).getlog()
- 
+
+
 # https://github.com/PaddlePaddle/Paddle/pull/31481
 class CollateFunc(object):
    def __init__(self, feature_func):
@@ -62,7 +64,7 @@ class AudioDataset(Dataset):

        with jsonlines.open(manifest_path, 'r') as reader:
            manifest = list(reader)
-        
+
        if num_samples == -1:
            sampled_manifest = manifest
        else:

--- a/paddlespeech/s2t/frontend/utility.py
+++ b/paddlespeech/s2t/frontend/utility.py
@@ -64,7 +64,7 @@ def load_dict(dict_path: Optional[Text], maskctc=False) -> Optional[List[Text]]:
        char_list.append(MASKCTC)
    return char_list

-    
+
 def read_manifest(
        manifest_path,
        max_input_len=float('inf'),

--- a/paddlespeech/s2t/io/dataloader.py
+++ b/paddlespeech/s2t/io/dataloader.py
@@ -15,8 +15,8 @@ from typing import Any
 from typing import Dict
 from typing import List
 from typing import Text
-import jsonlines

+import jsonlines
 import numpy as np
 from paddle.io import DataLoader

@@ -93,7 +93,7 @@ class BatchDataLoader():
        # read json data
        with jsonlines.open(json_file, 'r') as reader:
            self.data_json = list(reader)
-            
+
        self.feat_dim, self.vocab_size = feat_dim_and_vocab_size(
            self.data_json, mode='asr')


--- a/paddlespeech/s2t/io/dataset.py
+++ b/paddlespeech/s2t/io/dataset.py
@@ -14,6 +14,7 @@
 # Modified from espnet(https://github.com/espnet/espnet)
 # Modified from wenet(https://github.com/wenet-e2e/wenet)
 from typing import Optional
+
 import jsonlines
 from paddle.io import Dataset
 from yacs.config import CfgNode

--- a/paddlespeech/s2t/io/sampler.py
+++ b/paddlespeech/s2t/io/sampler.py
@@ -51,7 +51,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False):
    """
    rng = np.random.RandomState(epoch)
    shift_len = rng.randint(0, batch_size - 1)
-    batch_indices = list(zip(*[iter(indices[shift_len:])] * batch_size))
+    batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size))
    rng.shuffle(batch_indices)
    batch_indices = [item for batch in batch_indices for item in batch]
    assert clipped is False

--- a/paddlespeech/s2t/training/trainer.py
+++ b/paddlespeech/s2t/training/trainer.py
@@ -309,8 +309,10 @@ class Trainer():
            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
-                self.visualizer.add_scalar(tag='eval/cv_loss', value=cv_loss, step=self.epoch)
-                self.visualizer.add_scalar(tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
+                self.visualizer.add_scalar(
+                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
+                self.visualizer.add_scalar(
+                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)

            # after epoch
            self.save(tag=self.epoch, infos={'val_loss': cv_loss})

--- a/paddlespeech/s2t/utils/socket_server.py
+++ b/paddlespeech/s2t/utils/socket_server.py
@@ -20,6 +20,7 @@ import time
 import wave
 from time import gmtime
 from time import strftime
+
 import jsonlines

 __all__ = ["socket_send", "warm_up_test", "AsrTCPServer", "AsrRequestHandler"]

--- a/paddlespeech/text/speechtask/punctuation_restoration/training/trainer.py
+++ b/paddlespeech/text/speechtask/punctuation_restoration/training/trainer.py
@@ -252,8 +252,10 @@ class Trainer():
            self.logger.info("Epoch {} Val info val_loss {}, F1_score {}".
                             format(self.epoch, total_loss, F1_score))
            if self.visualizer:
-                self.visualizer.add_scalar(tag='eval/cv_loss', value=cv_loss, step=self.epoch)
-                self.visualizer.add_scalar(tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)
+                self.visualizer.add_scalar(
+                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
+                self.visualizer.add_scalar(
+                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)

            self.save(
                tag=self.epoch, infos={"val_loss": total_loss,

--- a/utils/build_vocab.py
+++ b/utils/build_vocab.py
@@ -19,9 +19,10 @@ import argparse
 import functools
 import os
 import tempfile
-import jsonlines
 from collections import Counter

+import jsonlines
+
 from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
 from paddlespeech.s2t.frontend.utility import BLANK
 from paddlespeech.s2t.frontend.utility import SOS
@@ -63,7 +64,7 @@ def count_manifest(counter, text_feature, manifest_path):
    with jsonlines.open(manifest_path, 'r') as reader:
        for json_data in reader:
            manifest_jsons.append(json_data)
-        
+
    for line_json in manifest_jsons:
        line = text_feature.tokenize(line_json['text'], replace_space=False)
        counter.update(line)
@@ -73,7 +74,7 @@ def dump_text_manifest(fileobj, manifest_path, key='text'):
    with jsonlines.open(manifest_path, 'r') as reader:
        for json_data in reader:
            manifest_jsons.append(json_data)
-            
+
    for line_json in manifest_jsons:
        fileobj.write(line_json[key] + "\n")


--- a/utils/dump_manifest.py
+++ b/utils/dump_manifest.py
@@ -16,6 +16,7 @@
 import argparse
 from pathlib import Path
 from typing import Union
+
 import jsonlines

 key_whitelist = set(['feat', 'text', 'syllable', 'phone'])
@@ -34,7 +35,7 @@ def dump_manifest(manifest_path, output_dir: Union[str, Path]):

    with jsonlines.open(str(manifest_path), 'r') as reader:
        manifest_jsons = list(reader)
-        
+
    first_line = manifest_jsons[0]
    file_map = {}


--- a/utils/format_data.py
+++ b/utils/format_data.py
@@ -15,9 +15,10 @@
 """format manifest with more metadata."""
 import argparse
 import functools
-import jsonlines
 import json

+import jsonlines
+
 from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
 from paddlespeech.s2t.frontend.utility import load_cmvn
 from paddlespeech.s2t.io.utility import feat_type
@@ -73,7 +74,7 @@ def main():
    for manifest_path in args.manifest_paths:
        with jsonlines.open(str(manifest_path), 'r') as reader:
            manifest_jsons = list(reader)
-        
+
        for line_json in manifest_jsons:
            output_json = {
                "input": [],

--- a/utils/format_triplet_data.py
+++ b/utils/format_triplet_data.py
@@ -16,6 +16,7 @@
 import argparse
 import functools
 import json
+
 import jsonlines

 from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer

--- a/utils/manifest_key_value.py
+++ b/utils/manifest_key_value.py
@@ -3,6 +3,7 @@
 import argparse
 import functools
 from pathlib import Path
+
 import jsonlines

 from utils.utility import add_arguments

--- a/utils/utility.py
+++ b/utils/utility.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import hashlib
-import json
 import os
 import sys
 import tarfile