From 39228864bb1b4995de464d57b641ab43a247d9c7 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Tue, 30 Nov 2021 08:18:13 +0000 Subject: [PATCH] format code --- examples/aishell/asr1/READEME.md | 3 --- paddlespeech/s2t/exps/u2/model.py | 10 ++++++---- paddlespeech/s2t/exps/u2_kaldi/model.py | 11 +++++++---- paddlespeech/s2t/exps/u2_st/model.py | 11 +++++++---- .../s2t/frontend/augmentor/impulse_response.py | 1 + paddlespeech/s2t/frontend/augmentor/noise_perturb.py | 1 + paddlespeech/s2t/frontend/normalizer.py | 6 ++++-- paddlespeech/s2t/frontend/utility.py | 2 +- paddlespeech/s2t/io/dataloader.py | 4 ++-- paddlespeech/s2t/io/dataset.py | 1 + paddlespeech/s2t/io/sampler.py | 2 +- paddlespeech/s2t/training/trainer.py | 6 ++++-- paddlespeech/s2t/utils/socket_server.py | 1 + .../punctuation_restoration/training/trainer.py | 6 ++++-- utils/build_vocab.py | 7 ++++--- utils/dump_manifest.py | 3 ++- utils/format_data.py | 5 +++-- utils/format_triplet_data.py | 1 + utils/manifest_key_value.py | 1 + utils/utility.py | 1 - 20 files changed, 51 insertions(+), 32 deletions(-) diff --git a/examples/aishell/asr1/READEME.md b/examples/aishell/asr1/READEME.md index e9fd3017..2eea233d 100644 --- a/examples/aishell/asr1/READEME.md +++ b/examples/aishell/asr1/READEME.md @@ -339,6 +339,3 @@ You need to prepare an audio file, please confirm the sample rate of the audio i ```bash CUDA_VISIBLE_DEVICES= ./local/test_hub.sh conf/transformer.yaml exp/transformer/checkpoints/avg_20 data/test_audio.wav ``` - - - diff --git a/paddlespeech/s2t/exps/u2/model.py b/paddlespeech/s2t/exps/u2/model.py index 5dbb72f4..d448021c 100644 --- a/paddlespeech/s2t/exps/u2/model.py +++ b/paddlespeech/s2t/exps/u2/model.py @@ -129,8 +129,8 @@ class U2Trainer(Trainer): losses_np_v = losses_np.copy() losses_np_v.update({"lr": self.lr_scheduler()}) for key, val in losses_np_v.items(): - self.visualizer.add_scalar(tag='train/'+key, value=val, step=self.iteration-1) - + self.visualizer.add_scalar( + tag='train/' + key, value=val, step=self.iteration - 1) @paddle.no_grad() def valid(self): @@ -238,8 +238,10 @@ class U2Trainer(Trainer): logger.info( 'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss)) if self.visualizer: - self.visualizer.add_scalar(tag='eval/cv_loss', value=cv_loss, step=self.epoch) - self.visualizer.add_scalar(tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) + self.visualizer.add_scalar( + tag='eval/cv_loss', value=cv_loss, step=self.epoch) + self.visualizer.add_scalar( + tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) self.save(tag=self.epoch, infos={'val_loss': cv_loss}) self.new_epoch() diff --git a/paddlespeech/s2t/exps/u2_kaldi/model.py b/paddlespeech/s2t/exps/u2_kaldi/model.py index a3f45d8e..43e31a60 100644 --- a/paddlespeech/s2t/exps/u2_kaldi/model.py +++ b/paddlespeech/s2t/exps/u2_kaldi/model.py @@ -132,7 +132,8 @@ class U2Trainer(Trainer): losses_np_v = losses_np.copy() losses_np_v.update({"lr": self.lr_scheduler()}) for key, val in losses_np_v.items(): - self.visualizer.add_scalar(tag="train/"+key, value=val, step=self.iteration - 1) + self.visualizer.add_scalar( + tag="train/" + key, value=val, step=self.iteration - 1) @paddle.no_grad() def valid(self): @@ -222,9 +223,11 @@ class U2Trainer(Trainer): logger.info( 'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss)) if self.visualizer: - self.visualizer.add_scalar(tag='eval/cv_loss', value=cv_loss, step=self.epoch) - self.visualizer.add_scalar(tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) - + self.visualizer.add_scalar( + tag='eval/cv_loss', value=cv_loss, step=self.epoch) + self.visualizer.add_scalar( + tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) + self.save(tag=self.epoch, infos={'val_loss': cv_loss}) self.new_epoch() diff --git a/paddlespeech/s2t/exps/u2_st/model.py b/paddlespeech/s2t/exps/u2_st/model.py index 771203cf..2dbbdcd3 100644 --- a/paddlespeech/s2t/exps/u2_st/model.py +++ b/paddlespeech/s2t/exps/u2_st/model.py @@ -139,7 +139,8 @@ class U2STTrainer(Trainer): losses_np_v = losses_np.copy() losses_np_v.update({"lr": self.lr_scheduler()}) for key, val in losses_np_v.items(): - self.visualizer.add_scalar(tag="train/"+key, value=val, step=self.iteration - 1) + self.visualizer.add_scalar( + tag="train/" + key, value=val, step=self.iteration - 1) @paddle.no_grad() def valid(self): @@ -235,9 +236,11 @@ class U2STTrainer(Trainer): logger.info( 'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss)) if self.visualizer: - self.visualizer.add_scalar(tag='eval/cv_loss', value=cv_loss, step=self.epoch) - self.visualizer.add_scalar(tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) - + self.visualizer.add_scalar( + tag='eval/cv_loss', value=cv_loss, step=self.epoch) + self.visualizer.add_scalar( + tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) + self.save(tag=self.epoch, infos={'val_loss': cv_loss}) self.new_epoch() diff --git a/paddlespeech/s2t/frontend/augmentor/impulse_response.py b/paddlespeech/s2t/frontend/augmentor/impulse_response.py index 1a82bb92..5ba45bb2 100644 --- a/paddlespeech/s2t/frontend/augmentor/impulse_response.py +++ b/paddlespeech/s2t/frontend/augmentor/impulse_response.py @@ -13,6 +13,7 @@ # limitations under the License. """Contains the impulse response augmentation model.""" import jsonlines + from paddlespeech.s2t.frontend.audio import AudioSegment from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase diff --git a/paddlespeech/s2t/frontend/augmentor/noise_perturb.py b/paddlespeech/s2t/frontend/augmentor/noise_perturb.py index ce0a8818..71165dac 100644 --- a/paddlespeech/s2t/frontend/augmentor/noise_perturb.py +++ b/paddlespeech/s2t/frontend/augmentor/noise_perturb.py @@ -13,6 +13,7 @@ # limitations under the License. """Contains the noise perturb augmentation model.""" import jsonlines + from paddlespeech.s2t.frontend.audio import AudioSegment from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase diff --git a/paddlespeech/s2t/frontend/normalizer.py b/paddlespeech/s2t/frontend/normalizer.py index 0a634fc1..017851e6 100644 --- a/paddlespeech/s2t/frontend/normalizer.py +++ b/paddlespeech/s2t/frontend/normalizer.py @@ -13,6 +13,7 @@ # limitations under the License. """Contains feature normalizers.""" import json + import jsonlines import numpy as np import paddle @@ -26,7 +27,8 @@ from paddlespeech.s2t.utils.log import Log __all__ = ["FeatureNormalizer"] logger = Log(__name__).getlog() - + + # https://github.com/PaddlePaddle/Paddle/pull/31481 class CollateFunc(object): def __init__(self, feature_func): @@ -62,7 +64,7 @@ class AudioDataset(Dataset): with jsonlines.open(manifest_path, 'r') as reader: manifest = list(reader) - + if num_samples == -1: sampled_manifest = manifest else: diff --git a/paddlespeech/s2t/frontend/utility.py b/paddlespeech/s2t/frontend/utility.py index ccb767ad..175727e1 100644 --- a/paddlespeech/s2t/frontend/utility.py +++ b/paddlespeech/s2t/frontend/utility.py @@ -64,7 +64,7 @@ def load_dict(dict_path: Optional[Text], maskctc=False) -> Optional[List[Text]]: char_list.append(MASKCTC) return char_list - + def read_manifest( manifest_path, max_input_len=float('inf'), diff --git a/paddlespeech/s2t/io/dataloader.py b/paddlespeech/s2t/io/dataloader.py index bda48842..b8eb3367 100644 --- a/paddlespeech/s2t/io/dataloader.py +++ b/paddlespeech/s2t/io/dataloader.py @@ -15,8 +15,8 @@ from typing import Any from typing import Dict from typing import List from typing import Text -import jsonlines +import jsonlines import numpy as np from paddle.io import DataLoader @@ -93,7 +93,7 @@ class BatchDataLoader(): # read json data with jsonlines.open(json_file, 'r') as reader: self.data_json = list(reader) - + self.feat_dim, self.vocab_size = feat_dim_and_vocab_size( self.data_json, mode='asr') diff --git a/paddlespeech/s2t/io/dataset.py b/paddlespeech/s2t/io/dataset.py index ba10aebb..d64d7d3e 100644 --- a/paddlespeech/s2t/io/dataset.py +++ b/paddlespeech/s2t/io/dataset.py @@ -14,6 +14,7 @@ # Modified from espnet(https://github.com/espnet/espnet) # Modified from wenet(https://github.com/wenet-e2e/wenet) from typing import Optional + import jsonlines from paddle.io import Dataset from yacs.config import CfgNode diff --git a/paddlespeech/s2t/io/sampler.py b/paddlespeech/s2t/io/sampler.py index 0d5a16ce..35b57524 100644 --- a/paddlespeech/s2t/io/sampler.py +++ b/paddlespeech/s2t/io/sampler.py @@ -51,7 +51,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False): """ rng = np.random.RandomState(epoch) shift_len = rng.randint(0, batch_size - 1) - batch_indices = list(zip(*[iter(indices[shift_len:])] * batch_size)) + batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size)) rng.shuffle(batch_indices) batch_indices = [item for batch in batch_indices for item in batch] assert clipped is False diff --git a/paddlespeech/s2t/training/trainer.py b/paddlespeech/s2t/training/trainer.py index be398814..f0099f10 100644 --- a/paddlespeech/s2t/training/trainer.py +++ b/paddlespeech/s2t/training/trainer.py @@ -309,8 +309,10 @@ class Trainer(): logger.info( 'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss)) if self.visualizer: - self.visualizer.add_scalar(tag='eval/cv_loss', value=cv_loss, step=self.epoch) - self.visualizer.add_scalar(tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) + self.visualizer.add_scalar( + tag='eval/cv_loss', value=cv_loss, step=self.epoch) + self.visualizer.add_scalar( + tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) # after epoch self.save(tag=self.epoch, infos={'val_loss': cv_loss}) diff --git a/paddlespeech/s2t/utils/socket_server.py b/paddlespeech/s2t/utils/socket_server.py index 6371ba85..691ea966 100644 --- a/paddlespeech/s2t/utils/socket_server.py +++ b/paddlespeech/s2t/utils/socket_server.py @@ -20,6 +20,7 @@ import time import wave from time import gmtime from time import strftime + import jsonlines __all__ = ["socket_send", "warm_up_test", "AsrTCPServer", "AsrRequestHandler"] diff --git a/paddlespeech/text/speechtask/punctuation_restoration/training/trainer.py b/paddlespeech/text/speechtask/punctuation_restoration/training/trainer.py index ba7ddde3..78512796 100644 --- a/paddlespeech/text/speechtask/punctuation_restoration/training/trainer.py +++ b/paddlespeech/text/speechtask/punctuation_restoration/training/trainer.py @@ -252,8 +252,10 @@ class Trainer(): self.logger.info("Epoch {} Val info val_loss {}, F1_score {}". format(self.epoch, total_loss, F1_score)) if self.visualizer: - self.visualizer.add_scalar(tag='eval/cv_loss', value=cv_loss, step=self.epoch) - self.visualizer.add_scalar(tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) + self.visualizer.add_scalar( + tag='eval/cv_loss', value=cv_loss, step=self.epoch) + self.visualizer.add_scalar( + tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) self.save( tag=self.epoch, infos={"val_loss": total_loss, diff --git a/utils/build_vocab.py b/utils/build_vocab.py index 61dc5e25..f832cbbc 100755 --- a/utils/build_vocab.py +++ b/utils/build_vocab.py @@ -19,9 +19,10 @@ import argparse import functools import os import tempfile -import jsonlines from collections import Counter +import jsonlines + from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.frontend.utility import BLANK from paddlespeech.s2t.frontend.utility import SOS @@ -63,7 +64,7 @@ def count_manifest(counter, text_feature, manifest_path): with jsonlines.open(manifest_path, 'r') as reader: for json_data in reader: manifest_jsons.append(json_data) - + for line_json in manifest_jsons: line = text_feature.tokenize(line_json['text'], replace_space=False) counter.update(line) @@ -73,7 +74,7 @@ def dump_text_manifest(fileobj, manifest_path, key='text'): with jsonlines.open(manifest_path, 'r') as reader: for json_data in reader: manifest_jsons.append(json_data) - + for line_json in manifest_jsons: fileobj.write(line_json[key] + "\n") diff --git a/utils/dump_manifest.py b/utils/dump_manifest.py index d602571d..58d91755 100755 --- a/utils/dump_manifest.py +++ b/utils/dump_manifest.py @@ -16,6 +16,7 @@ import argparse from pathlib import Path from typing import Union + import jsonlines key_whitelist = set(['feat', 'text', 'syllable', 'phone']) @@ -34,7 +35,7 @@ def dump_manifest(manifest_path, output_dir: Union[str, Path]): with jsonlines.open(str(manifest_path), 'r') as reader: manifest_jsons = list(reader) - + first_line = manifest_jsons[0] file_map = {} diff --git a/utils/format_data.py b/utils/format_data.py index 437d7e0f..6db2a1bb 100755 --- a/utils/format_data.py +++ b/utils/format_data.py @@ -15,9 +15,10 @@ """format manifest with more metadata.""" import argparse import functools -import jsonlines import json +import jsonlines + from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.frontend.utility import load_cmvn from paddlespeech.s2t.io.utility import feat_type @@ -73,7 +74,7 @@ def main(): for manifest_path in args.manifest_paths: with jsonlines.open(str(manifest_path), 'r') as reader: manifest_jsons = list(reader) - + for line_json in manifest_jsons: output_json = { "input": [], diff --git a/utils/format_triplet_data.py b/utils/format_triplet_data.py index dd9dab42..44ff4527 100755 --- a/utils/format_triplet_data.py +++ b/utils/format_triplet_data.py @@ -16,6 +16,7 @@ import argparse import functools import json + import jsonlines from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer diff --git a/utils/manifest_key_value.py b/utils/manifest_key_value.py index 0cfb2450..fb3d3aaa 100755 --- a/utils/manifest_key_value.py +++ b/utils/manifest_key_value.py @@ -3,6 +3,7 @@ import argparse import functools from pathlib import Path + import jsonlines from utils.utility import add_arguments diff --git a/utils/utility.py b/utils/utility.py index b3523b38..dbf8b1d7 100755 --- a/utils/utility.py +++ b/utils/utility.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import hashlib -import json import os import sys import tarfile -- GitLab