diff --git a/examples/aishell/asr1/READEME.md b/examples/aishell/asr1/READEME.md index e9fd3017ca090cc271719e71e06a88ad7509f939..2eea233da021b90de3b96c7089c80c23228279bb 100644 --- a/examples/aishell/asr1/READEME.md +++ b/examples/aishell/asr1/READEME.md @@ -339,6 +339,3 @@ You need to prepare an audio file, please confirm the sample rate of the audio i ```bash CUDA_VISIBLE_DEVICES= ./local/test_hub.sh conf/transformer.yaml exp/transformer/checkpoints/avg_20 data/test_audio.wav ``` - - - diff --git a/paddlespeech/s2t/exps/u2/model.py b/paddlespeech/s2t/exps/u2/model.py index b6dbcf443e94d752615295c20c7aae2a95525896..d448021cb1bc3d58d76d29f5c703bedf113a8713 100644 --- a/paddlespeech/s2t/exps/u2/model.py +++ b/paddlespeech/s2t/exps/u2/model.py @@ -128,8 +128,9 @@ class U2Trainer(Trainer): if dist.get_rank() == 0 and self.visualizer: losses_np_v = losses_np.copy() losses_np_v.update({"lr": self.lr_scheduler()}) - self.visualizer.add_scalars("step", losses_np_v, - self.iteration - 1) + for key, val in losses_np_v.items(): + self.visualizer.add_scalar( + tag='train/' + key, value=val, step=self.iteration - 1) @paddle.no_grad() def valid(self): @@ -237,9 +238,10 @@ class U2Trainer(Trainer): logger.info( 'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss)) if self.visualizer: - self.visualizer.add_scalars( - 'epoch', {'cv_loss': cv_loss, - 'lr': self.lr_scheduler()}, self.epoch) + self.visualizer.add_scalar( + tag='eval/cv_loss', value=cv_loss, step=self.epoch) + self.visualizer.add_scalar( + tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) self.save(tag=self.epoch, infos={'val_loss': cv_loss}) self.new_epoch() diff --git a/paddlespeech/s2t/exps/u2_kaldi/model.py b/paddlespeech/s2t/exps/u2_kaldi/model.py index c23b4c24545c2dda21b4b366a8d1e76c3acdf7a6..43e31a60dc127d607b7a644d1ce553832cb909a3 100644 --- a/paddlespeech/s2t/exps/u2_kaldi/model.py +++ b/paddlespeech/s2t/exps/u2_kaldi/model.py @@ -131,8 +131,9 @@ class U2Trainer(Trainer): if dist.get_rank() == 0 and self.visualizer: losses_np_v = losses_np.copy() losses_np_v.update({"lr": self.lr_scheduler()}) - self.visualizer.add_scalars("step", losses_np_v, - self.iteration - 1) + for key, val in losses_np_v.items(): + self.visualizer.add_scalar( + tag="train/" + key, value=val, step=self.iteration - 1) @paddle.no_grad() def valid(self): @@ -222,9 +223,11 @@ class U2Trainer(Trainer): logger.info( 'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss)) if self.visualizer: - self.visualizer.add_scalars( - 'epoch', {'cv_loss': cv_loss, - 'lr': self.lr_scheduler()}, self.epoch) + self.visualizer.add_scalar( + tag='eval/cv_loss', value=cv_loss, step=self.epoch) + self.visualizer.add_scalar( + tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) + self.save(tag=self.epoch, infos={'val_loss': cv_loss}) self.new_epoch() diff --git a/paddlespeech/s2t/exps/u2_st/model.py b/paddlespeech/s2t/exps/u2_st/model.py index 034463fea25854c8790aa61310028864ea6dc969..2dbbdcd30180418c523765c68c0869eb31e4d77c 100644 --- a/paddlespeech/s2t/exps/u2_st/model.py +++ b/paddlespeech/s2t/exps/u2_st/model.py @@ -138,8 +138,9 @@ class U2STTrainer(Trainer): if dist.get_rank() == 0 and self.visualizer: losses_np_v = losses_np.copy() losses_np_v.update({"lr": self.lr_scheduler()}) - self.visualizer.add_scalars("step", losses_np_v, - self.iteration - 1) + for key, val in losses_np_v.items(): + self.visualizer.add_scalar( + tag="train/" + key, value=val, step=self.iteration - 1) @paddle.no_grad() def valid(self): @@ -235,9 +236,11 @@ class U2STTrainer(Trainer): logger.info( 'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss)) if self.visualizer: - self.visualizer.add_scalars( - 'epoch', {'cv_loss': cv_loss, - 'lr': self.lr_scheduler()}, self.epoch) + self.visualizer.add_scalar( + tag='eval/cv_loss', value=cv_loss, step=self.epoch) + self.visualizer.add_scalar( + tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) + self.save(tag=self.epoch, infos={'val_loss': cv_loss}) self.new_epoch() diff --git a/paddlespeech/s2t/frontend/augmentor/impulse_response.py b/paddlespeech/s2t/frontend/augmentor/impulse_response.py index 6cc9c0d43991cd3f6141916ca5778485ce49dec5..5ba45bb20c2e3816358b34209fa3cb03142b285b 100644 --- a/paddlespeech/s2t/frontend/augmentor/impulse_response.py +++ b/paddlespeech/s2t/frontend/augmentor/impulse_response.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. """Contains the impulse response augmentation model.""" +import jsonlines + from paddlespeech.s2t.frontend.audio import AudioSegment from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase -from paddlespeech.s2t.frontend.utility import read_manifest class ImpulseResponseAugmentor(AugmentorBase): @@ -28,7 +29,8 @@ class ImpulseResponseAugmentor(AugmentorBase): def __init__(self, rng, impulse_manifest_path): self._rng = rng - self._impulse_manifest = read_manifest(impulse_manifest_path) + with jsonlines.open(impulse_manifest_path, 'r') as reader: + self._impulse_manifest = list(reader) def __call__(self, x, uttid=None, train=True): if not train: diff --git a/paddlespeech/s2t/frontend/augmentor/noise_perturb.py b/paddlespeech/s2t/frontend/augmentor/noise_perturb.py index 9d6da1a8f34818546ebc579efb4cb7a49cb559e5..71165dac893a526963b0e870ccbf99cf979aac42 100644 --- a/paddlespeech/s2t/frontend/augmentor/noise_perturb.py +++ b/paddlespeech/s2t/frontend/augmentor/noise_perturb.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. """Contains the noise perturb augmentation model.""" +import jsonlines + from paddlespeech.s2t.frontend.audio import AudioSegment from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase -from paddlespeech.s2t.frontend.utility import read_manifest class NoisePerturbAugmentor(AugmentorBase): @@ -34,7 +35,8 @@ class NoisePerturbAugmentor(AugmentorBase): self._min_snr_dB = min_snr_dB self._max_snr_dB = max_snr_dB self._rng = rng - self._noise_manifest = read_manifest(manifest_path=noise_manifest_path) + with jsonlines.open(noise_manifest_path, 'r') as reader: + self._noise_manifest = list(reader) def __call__(self, x, uttid=None, train=True): if not train: diff --git a/paddlespeech/s2t/frontend/normalizer.py b/paddlespeech/s2t/frontend/normalizer.py index a29cddc387c8c8336a09a6b2792bcd73a72b0908..017851e6344a83932f92e7f20f6fa680b4d650f1 100644 --- a/paddlespeech/s2t/frontend/normalizer.py +++ b/paddlespeech/s2t/frontend/normalizer.py @@ -14,6 +14,7 @@ """Contains feature normalizers.""" import json +import jsonlines import numpy as np import paddle from paddle.io import DataLoader @@ -21,7 +22,6 @@ from paddle.io import Dataset from paddlespeech.s2t.frontend.audio import AudioSegment from paddlespeech.s2t.frontend.utility import load_cmvn -from paddlespeech.s2t.frontend.utility import read_manifest from paddlespeech.s2t.utils.log import Log __all__ = ["FeatureNormalizer"] @@ -61,7 +61,10 @@ class CollateFunc(object): class AudioDataset(Dataset): def __init__(self, manifest_path, num_samples=-1, rng=None, random_seed=0): self._rng = rng if rng else np.random.RandomState(random_seed) - manifest = read_manifest(manifest_path) + + with jsonlines.open(manifest_path, 'r') as reader: + manifest = list(reader) + if num_samples == -1: sampled_manifest = manifest else: diff --git a/paddlespeech/s2t/frontend/utility.py b/paddlespeech/s2t/frontend/utility.py index d423a60447b48b6be218afd06a13e6ce355a31ea..175727e170daf6896d34215fab9fb898681bd300 100644 --- a/paddlespeech/s2t/frontend/utility.py +++ b/paddlespeech/s2t/frontend/utility.py @@ -98,7 +98,6 @@ def read_manifest( Returns: List[dict]: Manifest parsing results. """ - manifest = [] with jsonlines.open(manifest_path, 'r') as reader: for json_data in reader: diff --git a/paddlespeech/s2t/io/dataloader.py b/paddlespeech/s2t/io/dataloader.py index 3b5000a280621502a6b20ca2bf71b43789912ab8..b8eb33679dfadfb155ae4be9cce54876fe991666 100644 --- a/paddlespeech/s2t/io/dataloader.py +++ b/paddlespeech/s2t/io/dataloader.py @@ -16,10 +16,10 @@ from typing import Dict from typing import List from typing import Text +import jsonlines import numpy as np from paddle.io import DataLoader -from paddlespeech.s2t.frontend.utility import read_manifest from paddlespeech.s2t.io.batchfy import make_batchset from paddlespeech.s2t.io.converter import CustomConverter from paddlespeech.s2t.io.dataset import TransformDataset @@ -91,7 +91,9 @@ class BatchDataLoader(): self.n_iter_processes = n_iter_processes # read json data - self.data_json = read_manifest(json_file) + with jsonlines.open(json_file, 'r') as reader: + self.data_json = list(reader) + self.feat_dim, self.vocab_size = feat_dim_and_vocab_size( self.data_json, mode='asr') diff --git a/paddlespeech/s2t/io/dataset.py b/paddlespeech/s2t/io/dataset.py index 61eeb00f1c41b586762035fe1a097b2197a74008..d64d7d3ec16527c7b1f18e0f7c439d14aff3b0ad 100644 --- a/paddlespeech/s2t/io/dataset.py +++ b/paddlespeech/s2t/io/dataset.py @@ -15,6 +15,7 @@ # Modified from wenet(https://github.com/wenet-e2e/wenet) from typing import Optional +import jsonlines from paddle.io import Dataset from yacs.config import CfgNode @@ -184,7 +185,8 @@ class AudioDataset(Dataset): """ assert batch_type in ['static', 'dynamic'] # read manifest - data = read_manifest(data_file) + with jsonlines.open(data_file, 'r') as reader: + data = list(reader) if sort: data = sorted(data, key=lambda x: x["feat_shape"][0]) if raw_wav: diff --git a/paddlespeech/s2t/io/sampler.py b/paddlespeech/s2t/io/sampler.py index 0d5a16ce10a25b8234ad3bc6244b2828f350f092..35b57524b5906d53366ebc1c8d4b36322129bba2 100644 --- a/paddlespeech/s2t/io/sampler.py +++ b/paddlespeech/s2t/io/sampler.py @@ -51,7 +51,7 @@ def _batch_shuffle(indices, batch_size, epoch, clipped=False): """ rng = np.random.RandomState(epoch) shift_len = rng.randint(0, batch_size - 1) - batch_indices = list(zip(*[iter(indices[shift_len:])] * batch_size)) + batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size)) rng.shuffle(batch_indices) batch_indices = [item for batch in batch_indices for item in batch] assert clipped is False diff --git a/paddlespeech/s2t/training/trainer.py b/paddlespeech/s2t/training/trainer.py index f5fb2db03be0090d6e99656db0be4836745783a2..f0099f109ba07eb9aacb3e5399471c34606359c6 100644 --- a/paddlespeech/s2t/training/trainer.py +++ b/paddlespeech/s2t/training/trainer.py @@ -19,7 +19,7 @@ from pathlib import Path import paddle from paddle import distributed as dist -from tensorboardX import SummaryWriter +from visualdl import LogWriter from paddlespeech.s2t.training.reporter import ObsScope from paddlespeech.s2t.training.reporter import report @@ -309,9 +309,10 @@ class Trainer(): logger.info( 'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss)) if self.visualizer: - self.visualizer.add_scalars( - 'epoch', {'cv_loss': cv_loss, - 'lr': self.lr_scheduler()}, self.epoch) + self.visualizer.add_scalar( + tag='eval/cv_loss', value=cv_loss, step=self.epoch) + self.visualizer.add_scalar( + tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) # after epoch self.save(tag=self.epoch, infos={'val_loss': cv_loss}) @@ -427,7 +428,7 @@ class Trainer(): unexpected behaviors. """ # visualizer - visualizer = SummaryWriter(logdir=str(self.visual_dir)) + visualizer = LogWriter(logdir=str(self.visual_dir)) self.visualizer = visualizer @mp_tools.rank_zero_only diff --git a/paddlespeech/s2t/utils/socket_server.py b/paddlespeech/s2t/utils/socket_server.py index 43b56d723c154c35be9b71afad67123bf99c238f..691ea966821dce4923652b68937dd4a4fbb17ede 100644 --- a/paddlespeech/s2t/utils/socket_server.py +++ b/paddlespeech/s2t/utils/socket_server.py @@ -21,7 +21,7 @@ import wave from time import gmtime from time import strftime -from paddlespeech.s2t.frontend.utility import read_manifest +import jsonlines __all__ = ["socket_send", "warm_up_test", "AsrTCPServer", "AsrRequestHandler"] @@ -44,7 +44,8 @@ def warm_up_test(audio_process_handler, num_test_cases, random_seed=0): """Warming-up test.""" - manifest = read_manifest(manifest_path) + with jsonlines.open(manifest_path) as reader: + manifest = list(reader) rng = random.Random(random_seed) samples = rng.sample(manifest, num_test_cases) for idx, sample in enumerate(samples): diff --git a/paddlespeech/text/speechtask/punctuation_restoration/training/trainer.py b/paddlespeech/text/speechtask/punctuation_restoration/training/trainer.py index d6b6eeb65ea338029b9a58d2dda1d3824f093866..78512796be0de321c8e2bef865b0bce6af1f226a 100644 --- a/paddlespeech/text/speechtask/punctuation_restoration/training/trainer.py +++ b/paddlespeech/text/speechtask/punctuation_restoration/training/trainer.py @@ -34,7 +34,7 @@ from speechtask.punctuation_restoration.model.lstm import RnnLm from speechtask.punctuation_restoration.utils import layer_tools from speechtask.punctuation_restoration.utils import mp_tools from speechtask.punctuation_restoration.utils.checkpoint import Checkpoint -from tensorboardX import SummaryWriter +from visualdl import LogWriter __all__ = ["Trainer", "Tester"] @@ -252,10 +252,10 @@ class Trainer(): self.logger.info("Epoch {} Val info val_loss {}, F1_score {}". format(self.epoch, total_loss, F1_score)) if self.visualizer: - self.visualizer.add_scalars("epoch", { - "total_loss": total_loss, - "lr": self.lr_scheduler() - }, self.epoch) + self.visualizer.add_scalar( + tag='eval/cv_loss', value=cv_loss, step=self.epoch) + self.visualizer.add_scalar( + tag='eval/lr', value=self.lr_scheduler(), step=self.epoch) self.save( tag=self.epoch, infos={"val_loss": total_loss, @@ -341,7 +341,7 @@ class Trainer(): unexpected behaviors. """ # visualizer - visualizer = SummaryWriter(logdir=str(self.output_dir)) + visualizer = LogWriter(logdir=str(self.output_dir)) self.visualizer = visualizer @mp_tools.rank_zero_only diff --git a/requirements.txt b/requirements.txt index 99e485f867ea3dfaeee2f7e440d230f8802881b6..2ee60d3f620f803b8c4eebf4747bfee2fba55fa6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,7 +40,6 @@ snakeviz soundfile~=0.10 sox soxbindings -tensorboardX textgrid timer tqdm diff --git a/utils/build_vocab.py b/utils/build_vocab.py index 6a90314759a20d8ade3e33995a1afd9bfa770d3c..f832cbbc3f2f4c647545d8a8fe37d0343b8868c6 100755 --- a/utils/build_vocab.py +++ b/utils/build_vocab.py @@ -21,9 +21,10 @@ import os import tempfile from collections import Counter +import jsonlines + from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.frontend.utility import BLANK -from paddlespeech.s2t.frontend.utility import read_manifest from paddlespeech.s2t.frontend.utility import SOS from paddlespeech.s2t.frontend.utility import SPACE from paddlespeech.s2t.frontend.utility import UNK @@ -59,13 +60,21 @@ args = parser.parse_args() def count_manifest(counter, text_feature, manifest_path): - manifest_jsons = read_manifest(manifest_path) + manifest_jsons = [] + with jsonlines.open(manifest_path, 'r') as reader: + for json_data in reader: + manifest_jsons.append(json_data) + for line_json in manifest_jsons: line = text_feature.tokenize(line_json['text'], replace_space=False) counter.update(line) def dump_text_manifest(fileobj, manifest_path, key='text'): - manifest_jsons = read_manifest(manifest_path) + manifest_jsons = [] + with jsonlines.open(manifest_path, 'r') as reader: + for json_data in reader: + manifest_jsons.append(json_data) + for line_json in manifest_jsons: fileobj.write(line_json[key] + "\n") diff --git a/utils/dump_manifest.py b/utils/dump_manifest.py index b5f7b64a44dead5bac2ea66ee5fdab09d9c255b6..58d91755886ad14538cc88afaf609a06b6d3341a 100755 --- a/utils/dump_manifest.py +++ b/utils/dump_manifest.py @@ -17,7 +17,7 @@ import argparse from pathlib import Path from typing import Union -from paddlespeech.s2t.frontend.utility import read_manifest +import jsonlines key_whitelist = set(['feat', 'text', 'syllable', 'phone']) filename = { @@ -32,7 +32,10 @@ def dump_manifest(manifest_path, output_dir: Union[str, Path]): output_dir = Path(output_dir).expanduser() manifest_path = Path(manifest_path).expanduser() - manifest_jsons = read_manifest(manifest_path) + + with jsonlines.open(str(manifest_path), 'r') as reader: + manifest_jsons = list(reader) + first_line = manifest_jsons[0] file_map = {} diff --git a/utils/format_data.py b/utils/format_data.py index 2fa1924a072faa67ad559f68174896846f8cbdf9..6db2a1bbbeb568c4cc0a6c3ef0ab17039247966a 100755 --- a/utils/format_data.py +++ b/utils/format_data.py @@ -17,9 +17,10 @@ import argparse import functools import json +import jsonlines + from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.frontend.utility import load_cmvn -from paddlespeech.s2t.frontend.utility import read_manifest from paddlespeech.s2t.io.utility import feat_type from paddlespeech.s2t.utils.utility import add_arguments from paddlespeech.s2t.utils.utility import print_arguments @@ -71,7 +72,9 @@ def main(): # } count = 0 for manifest_path in args.manifest_paths: - manifest_jsons = read_manifest(manifest_path) + with jsonlines.open(str(manifest_path), 'r') as reader: + manifest_jsons = list(reader) + for line_json in manifest_jsons: output_json = { "input": [], diff --git a/utils/format_triplet_data.py b/utils/format_triplet_data.py index e0b5ece37353dc9cc592d440ce11ba486569bfaf..44ff4527c897a826fdaade9ed8c6276f86add54e 100755 --- a/utils/format_triplet_data.py +++ b/utils/format_triplet_data.py @@ -17,9 +17,10 @@ import argparse import functools import json +import jsonlines + from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer from paddlespeech.s2t.frontend.utility import load_cmvn -from paddlespeech.s2t.frontend.utility import read_manifest from paddlespeech.s2t.io.utility import feat_type from paddlespeech.s2t.utils.utility import add_arguments from paddlespeech.s2t.utils.utility import print_arguments @@ -63,7 +64,8 @@ def main(): count = 0 for manifest_path in args.manifest_paths: - manifest_jsons = read_manifest(manifest_path) + with jsonlines.open(str(manifest_path), 'r') as reader: + manifest_jsons = list(reader) for line_json in manifest_jsons: # text: translation text, text1: transcript text. # Currently only support joint-vocab, will add separate vocabs setting. diff --git a/utils/manifest_key_value.py b/utils/manifest_key_value.py index b409236fc05e8661e136c76d4b780becbf0ee19d..fb3d3aaaf47948428cd5eaf4a9ae6b0fe82b93e1 100755 --- a/utils/manifest_key_value.py +++ b/utils/manifest_key_value.py @@ -4,9 +4,10 @@ import argparse import functools from pathlib import Path +import jsonlines + from utils.utility import add_arguments from utils.utility import print_arguments -from utils.utility import read_manifest def main(args): @@ -19,7 +20,8 @@ def main(args): dur_scp = outdir / 'duration' text_scp = outdir / 'text' - manifest_jsons = read_manifest(args.manifest_path) + with jsonlines.open(args.manifest_path, 'r') as reader: + manifest_jsons = list(reader) with wav_scp.open('w') as fwav, dur_scp.open('w') as fdur, text_scp.open( 'w') as ftxt: diff --git a/utils/utility.py b/utils/utility.py index b4db518a414de78fcc1c95dc0b9ab63d0a3c1733..dbf8b1d7fe24cc8595dfbd6858125b2213987c88 100755 --- a/utils/utility.py +++ b/utils/utility.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import hashlib -import json import os import sys import tarfile @@ -22,31 +21,10 @@ from typing import Text __all__ = [ "check_md5sum", "getfile_insensitive", "download_multi", "download", "unpack", "unzip", "md5file", "print_arguments", "add_arguments", - "read_manifest", "get_commandline_args" + "get_commandline_args" ] -def read_manifest(manifest_path): - """Load and parse manifest file. - Args: - manifest_path ([type]): Manifest file to load and parse. - - Raises: - IOError: If failed to parse the manifest. - - Returns: - List[dict]: Manifest parsing results. - """ - - manifest = [] - for json_line in open(manifest_path, 'r'): - try: - json_data = json.loads(json_line) - except Exception as e: - raise IOError("Error reading manifest: %s" % str(e)) - return manifest - - def get_commandline_args(): extra_chars = [ " ",