From bc93bffbb423eb5d803c2997d540b1d0731089bb Mon Sep 17 00:00:00 2001 From: TianYuan Date: Fri, 1 Jul 2022 08:27:16 +0000 Subject: [PATCH] replace logger.info with logger.debug in cli, change default log level to INFO --- paddlespeech/cli/asr/infer.py | 40 ++++++------ paddlespeech/cli/cls/infer.py | 6 +- paddlespeech/cli/download.py | 16 ++--- paddlespeech/cli/kws/infer.py | 4 +- paddlespeech/cli/log.py | 2 +- paddlespeech/cli/st/infer.py | 10 +-- paddlespeech/cli/text/infer.py | 2 +- paddlespeech/cli/tts/infer.py | 14 ++--- paddlespeech/cli/vector/infer.py | 61 ++++++++++--------- .../s2t/frontend/augmentor/spec_augment.py | 6 +- .../frontend/featurizer/text_featurizer.py | 12 ++-- paddlespeech/s2t/models/u2/u2.py | 4 +- paddlespeech/s2t/modules/loss.py | 7 ++- paddlespeech/s2t/transform/spec_augment.py | 7 ++- paddlespeech/s2t/utils/tensor_utils.py | 2 +- 15 files changed, 98 insertions(+), 95 deletions(-) diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py index 24839a89..52cb7cc8 100644 --- a/paddlespeech/cli/asr/infer.py +++ b/paddlespeech/cli/asr/infer.py @@ -133,11 +133,11 @@ class ASRExecutor(BaseExecutor): """ Init model and other resources from a specific path. """ - logger.info("start to init the model") + logger.debug("start to init the model") # default max_len: unit:second self.max_len = 50 if hasattr(self, 'model'): - logger.info('Model had been initialized.') + logger.debug('Model had been initialized.') return if cfg_path is None or ckpt_path is None: @@ -151,15 +151,15 @@ class ASRExecutor(BaseExecutor): self.ckpt_path = os.path.join( self.res_path, self.task_resource.res_dict['ckpt_path'] + ".pdparams") - logger.info(self.res_path) + logger.debug(self.res_path) else: self.cfg_path = os.path.abspath(cfg_path) self.ckpt_path = os.path.abspath(ckpt_path + ".pdparams") self.res_path = os.path.dirname( os.path.dirname(os.path.abspath(self.cfg_path))) - logger.info(self.cfg_path) - logger.info(self.ckpt_path) + logger.debug(self.cfg_path) + logger.debug(self.ckpt_path) #Init body. self.config = CfgNode(new_allowed=True) @@ -216,7 +216,7 @@ class ASRExecutor(BaseExecutor): max_len = self.config.encoder_conf.max_len self.max_len = frame_shift_ms * max_len * subsample_rate - logger.info( + logger.debug( f"The asr server limit max duration len: {self.max_len}") def preprocess(self, model_type: str, input: Union[str, os.PathLike]): @@ -227,15 +227,15 @@ class ASRExecutor(BaseExecutor): audio_file = input if isinstance(audio_file, (str, os.PathLike)): - logger.info("Preprocess audio_file:" + audio_file) + logger.debug("Preprocess audio_file:" + audio_file) # Get the object for feature extraction if "deepspeech2" in model_type or "conformer" in model_type or "transformer" in model_type: - logger.info("get the preprocess conf") + logger.debug("get the preprocess conf") preprocess_conf = self.config.preprocess_config preprocess_args = {"train": False} preprocessing = Transformation(preprocess_conf) - logger.info("read the audio file") + logger.debug("read the audio file") audio, audio_sample_rate = soundfile.read( audio_file, dtype="int16", always_2d=True) if self.change_format: @@ -255,7 +255,7 @@ class ASRExecutor(BaseExecutor): else: audio = audio[:, 0] - logger.info(f"audio shape: {audio.shape}") + logger.debug(f"audio shape: {audio.shape}") # fbank audio = preprocessing(audio, **preprocess_args) @@ -264,19 +264,19 @@ class ASRExecutor(BaseExecutor): self._inputs["audio"] = audio self._inputs["audio_len"] = audio_len - logger.info(f"audio feat shape: {audio.shape}") + logger.debug(f"audio feat shape: {audio.shape}") else: raise Exception("wrong type") - logger.info("audio feat process success") + logger.debug("audio feat process success") @paddle.no_grad() def infer(self, model_type: str): """ Model inference and result stored in self.output. """ - logger.info("start to infer the model to get the output") + logger.debug("start to infer the model to get the output") cfg = self.config.decode audio = self._inputs["audio"] audio_len = self._inputs["audio_len"] @@ -293,7 +293,7 @@ class ASRExecutor(BaseExecutor): self._outputs["result"] = result_transcripts[0] elif "conformer" in model_type or "transformer" in model_type: - logger.info( + logger.debug( f"we will use the transformer like model : {model_type}") try: result_transcripts = self.model.decode( @@ -352,7 +352,7 @@ class ASRExecutor(BaseExecutor): logger.error("Please input the right audio file path") return False - logger.info("checking the audio file format......") + logger.debug("checking the audio file format......") try: audio, audio_sample_rate = soundfile.read( audio_file, dtype="int16", always_2d=True) @@ -374,7 +374,7 @@ class ASRExecutor(BaseExecutor): sox input_audio.xx --rate 8k --bits 16 --channels 1 output_audio.wav \n \ ") return False - logger.info("The sample rate is %d" % audio_sample_rate) + logger.debug("The sample rate is %d" % audio_sample_rate) if audio_sample_rate != self.sample_rate: logger.warning("The sample rate of the input file is not {}.\n \ The program will resample the wav file to {}.\n \ @@ -383,28 +383,28 @@ class ASRExecutor(BaseExecutor): ".format(self.sample_rate, self.sample_rate)) if force_yes is False: while (True): - logger.info( + logger.debug( "Whether to change the sample rate and the channel. Y: change the sample. N: exit the prgream." ) content = input("Input(Y/N):") if content.strip() == "Y" or content.strip( ) == "y" or content.strip() == "yes" or content.strip( ) == "Yes": - logger.info( + logger.debug( "change the sampele rate, channel to 16k and 1 channel" ) break elif content.strip() == "N" or content.strip( ) == "n" or content.strip() == "no" or content.strip( ) == "No": - logger.info("Exit the program") + logger.debug("Exit the program") return False else: logger.warning("Not regular input, please input again") self.change_format = True else: - logger.info("The audio file format is right") + logger.debug("The audio file format is right") self.change_format = False return True diff --git a/paddlespeech/cli/cls/infer.py b/paddlespeech/cli/cls/infer.py index 942dc3b9..c869e28b 100644 --- a/paddlespeech/cli/cls/infer.py +++ b/paddlespeech/cli/cls/infer.py @@ -92,7 +92,7 @@ class CLSExecutor(BaseExecutor): Init model and other resources from a specific path. """ if hasattr(self, 'model'): - logger.info('Model had been initialized.') + logger.debug('Model had been initialized.') return if label_file is None or ckpt_path is None: @@ -135,14 +135,14 @@ class CLSExecutor(BaseExecutor): Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet). """ feat_conf = self._conf['feature'] - logger.info(feat_conf) + logger.debug(feat_conf) waveform, _ = load( file=audio_file, sr=feat_conf['sample_rate'], mono=True, dtype='float32') if isinstance(audio_file, (str, os.PathLike)): - logger.info("Preprocessing audio_file:" + audio_file) + logger.debug("Preprocessing audio_file:" + audio_file) # Feature extraction feature_extractor = LogMelSpectrogram( diff --git a/paddlespeech/cli/download.py b/paddlespeech/cli/download.py index ec725874..5661f18f 100644 --- a/paddlespeech/cli/download.py +++ b/paddlespeech/cli/download.py @@ -61,7 +61,7 @@ def _get_unique_endpoints(trainer_endpoints): continue ips.add(ip) unique_endpoints.add(endpoint) - logger.info("unique_endpoints {}".format(unique_endpoints)) + logger.debug("unique_endpoints {}".format(unique_endpoints)) return unique_endpoints @@ -96,7 +96,7 @@ def get_path_from_url(url, # data, and the same ip will only download data once. unique_endpoints = _get_unique_endpoints(ParallelEnv().trainer_endpoints[:]) if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum): - logger.info("Found {}".format(fullpath)) + logger.debug("Found {}".format(fullpath)) else: if ParallelEnv().current_endpoint in unique_endpoints: fullpath = _download(url, root_dir, md5sum, method=method) @@ -118,7 +118,7 @@ def _get_download(url, fullname): try: req = requests.get(url, stream=True) except Exception as e: # requests.exceptions.ConnectionError - logger.info("Downloading {} from {} failed with exception {}".format( + logger.debug("Downloading {} from {} failed with exception {}".format( fname, url, str(e))) return False @@ -190,7 +190,7 @@ def _download(url, path, md5sum=None, method='get'): fullname = osp.join(path, fname) retry_cnt = 0 - logger.info("Downloading {} from {}".format(fname, url)) + logger.debug("Downloading {} from {}".format(fname, url)) while not (osp.exists(fullname) and _md5check(fullname, md5sum)): if retry_cnt < DOWNLOAD_RETRY_LIMIT: retry_cnt += 1 @@ -209,7 +209,7 @@ def _md5check(fullname, md5sum=None): if md5sum is None: return True - logger.info("File {} md5 checking...".format(fullname)) + logger.debug("File {} md5 checking...".format(fullname)) md5 = hashlib.md5() with open(fullname, 'rb') as f: for chunk in iter(lambda: f.read(4096), b""): @@ -217,8 +217,8 @@ def _md5check(fullname, md5sum=None): calc_md5sum = md5.hexdigest() if calc_md5sum != md5sum: - logger.info("File {} md5 check failed, {}(calc) != " - "{}(base)".format(fullname, calc_md5sum, md5sum)) + logger.debug("File {} md5 check failed, {}(calc) != " + "{}(base)".format(fullname, calc_md5sum, md5sum)) return False return True @@ -227,7 +227,7 @@ def _decompress(fname): """ Decompress for zip and tar file """ - logger.info("Decompressing {}...".format(fname)) + logger.debug("Decompressing {}...".format(fname)) # For protecting decompressing interupted, # decompress to fpath_tmp directory firstly, if decompress diff --git a/paddlespeech/cli/kws/infer.py b/paddlespeech/cli/kws/infer.py index e3f426f5..111cfd75 100644 --- a/paddlespeech/cli/kws/infer.py +++ b/paddlespeech/cli/kws/infer.py @@ -88,7 +88,7 @@ class KWSExecutor(BaseExecutor): Init model and other resources from a specific path. """ if hasattr(self, 'model'): - logger.info('Model had been initialized.') + logger.debug('Model had been initialized.') return if ckpt_path is None: @@ -141,7 +141,7 @@ class KWSExecutor(BaseExecutor): assert os.path.isfile(audio_file) waveform, _ = load(audio_file) if isinstance(audio_file, (str, os.PathLike)): - logger.info("Preprocessing audio_file:" + audio_file) + logger.debug("Preprocessing audio_file:" + audio_file) # Feature extraction waveform = paddle.to_tensor(waveform).unsqueeze(0) diff --git a/paddlespeech/cli/log.py b/paddlespeech/cli/log.py index 8644064c..8b33e71e 100644 --- a/paddlespeech/cli/log.py +++ b/paddlespeech/cli/log.py @@ -49,7 +49,7 @@ class Logger(object): self.handler.setFormatter(self.format) self.logger.addHandler(self.handler) - self.logger.setLevel(logging.DEBUG) + self.logger.setLevel(logging.INFO) self.logger.propagate = False def __call__(self, log_level: str, msg: str): diff --git a/paddlespeech/cli/st/infer.py b/paddlespeech/cli/st/infer.py index 4e099c40..bc2bdd1a 100644 --- a/paddlespeech/cli/st/infer.py +++ b/paddlespeech/cli/st/infer.py @@ -110,7 +110,7 @@ class STExecutor(BaseExecutor): """ decompressed_path = download_and_decompress(self.kaldi_bins, MODEL_HOME) decompressed_path = os.path.abspath(decompressed_path) - logger.info("Kaldi_bins stored in: {}".format(decompressed_path)) + logger.debug("Kaldi_bins stored in: {}".format(decompressed_path)) if "LD_LIBRARY_PATH" in os.environ: os.environ["LD_LIBRARY_PATH"] += f":{decompressed_path}" else: @@ -128,7 +128,7 @@ class STExecutor(BaseExecutor): Init model and other resources from a specific path. """ if hasattr(self, 'model'): - logger.info('Model had been initialized.') + logger.debug('Model had been initialized.') return if cfg_path is None or ckpt_path is None: @@ -140,8 +140,8 @@ class STExecutor(BaseExecutor): self.ckpt_path = os.path.join( self.task_resource.res_dir, self.task_resource.res_dict['ckpt_path']) - logger.info(self.cfg_path) - logger.info(self.ckpt_path) + logger.debug(self.cfg_path) + logger.debug(self.ckpt_path) res_path = self.task_resource.res_dir else: self.cfg_path = os.path.abspath(cfg_path) @@ -192,7 +192,7 @@ class STExecutor(BaseExecutor): Input content can be a file(wav). """ audio_file = os.path.abspath(wav_file) - logger.info("Preprocess audio_file:" + audio_file) + logger.debug("Preprocess audio_file:" + audio_file) if "fat_st" in model_type: cmvn = self.config.cmvn_path diff --git a/paddlespeech/cli/text/infer.py b/paddlespeech/cli/text/infer.py index 7b8faf99..24b8c9c2 100644 --- a/paddlespeech/cli/text/infer.py +++ b/paddlespeech/cli/text/infer.py @@ -98,7 +98,7 @@ class TextExecutor(BaseExecutor): Init model and other resources from a specific path. """ if hasattr(self, 'model'): - logger.info('Model had been initialized.') + logger.debug('Model had been initialized.') return self.task = task diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py index 7c837761..07c676e5 100644 --- a/paddlespeech/cli/tts/infer.py +++ b/paddlespeech/cli/tts/infer.py @@ -173,7 +173,7 @@ class TTSExecutor(BaseExecutor): Init model and other resources from a specific path. """ if hasattr(self, 'am_inference') and hasattr(self, 'voc_inference'): - logger.info('Models had been initialized.') + logger.debug('Models had been initialized.') return # am @@ -200,9 +200,9 @@ class TTSExecutor(BaseExecutor): # must have phones_dict in acoustic self.phones_dict = os.path.join( self.am_res_path, self.task_resource.res_dict['phones_dict']) - logger.info(self.am_res_path) - logger.info(self.am_config) - logger.info(self.am_ckpt) + logger.debug(self.am_res_path) + logger.debug(self.am_config) + logger.debug(self.am_ckpt) else: self.am_config = os.path.abspath(am_config) self.am_ckpt = os.path.abspath(am_ckpt) @@ -248,9 +248,9 @@ class TTSExecutor(BaseExecutor): self.voc_stat = os.path.join( self.voc_res_path, self.task_resource.voc_res_dict['speech_stats']) - logger.info(self.voc_res_path) - logger.info(self.voc_config) - logger.info(self.voc_ckpt) + logger.debug(self.voc_res_path) + logger.debug(self.voc_config) + logger.debug(self.voc_ckpt) else: self.voc_config = os.path.abspath(voc_config) self.voc_ckpt = os.path.abspath(voc_ckpt) diff --git a/paddlespeech/cli/vector/infer.py b/paddlespeech/cli/vector/infer.py index 4bc8e135..48ca1f98 100644 --- a/paddlespeech/cli/vector/infer.py +++ b/paddlespeech/cli/vector/infer.py @@ -117,7 +117,7 @@ class VectorExecutor(BaseExecutor): # stage 2: read the input data and store them as a list task_source = self.get_input_source(parser_args.input) - logger.info(f"task source: {task_source}") + logger.debug(f"task source: {task_source}") # stage 3: process the audio one by one # we do action according the task type @@ -127,13 +127,13 @@ class VectorExecutor(BaseExecutor): try: # extract the speaker audio embedding if parser_args.task == "spk": - logger.info("do vector spk task") + logger.debug("do vector spk task") res = self(input_, model, sample_rate, config, ckpt_path, device) task_result[id_] = res elif parser_args.task == "score": - logger.info("do vector score task") - logger.info(f"input content {input_}") + logger.debug("do vector score task") + logger.debug(f"input content {input_}") if len(input_.split()) != 2: logger.error( f"vector score task input {input_} wav num is not two," @@ -142,7 +142,7 @@ class VectorExecutor(BaseExecutor): # get the enroll and test embedding enroll_audio, test_audio = input_.split() - logger.info( + logger.debug( f"score task, enroll audio: {enroll_audio}, test audio: {test_audio}" ) enroll_embedding = self(enroll_audio, model, sample_rate, @@ -158,8 +158,8 @@ class VectorExecutor(BaseExecutor): has_exceptions = True task_result[id_] = f'{e.__class__.__name__}: {e}' - logger.info("task result as follows: ") - logger.info(f"{task_result}") + logger.debug("task result as follows: ") + logger.debug(f"{task_result}") # stage 4: process the all the task results self.process_task_results(parser_args.input, task_result, @@ -207,7 +207,7 @@ class VectorExecutor(BaseExecutor): """ if not hasattr(self, "score_func"): self.score_func = paddle.nn.CosineSimilarity(axis=0) - logger.info("create the cosine score function ") + logger.debug("create the cosine score function ") score = self.score_func( paddle.to_tensor(enroll_embedding), @@ -244,7 +244,7 @@ class VectorExecutor(BaseExecutor): sys.exit(-1) # stage 1: set the paddle runtime host device - logger.info(f"device type: {device}") + logger.debug(f"device type: {device}") paddle.device.set_device(device) # stage 2: read the specific pretrained model @@ -283,7 +283,7 @@ class VectorExecutor(BaseExecutor): # stage 0: avoid to init the mode again self.task = task if hasattr(self, "model"): - logger.info("Model has been initialized") + logger.debug("Model has been initialized") return # stage 1: get the model and config path @@ -294,7 +294,7 @@ class VectorExecutor(BaseExecutor): sample_rate_str = "16k" if sample_rate == 16000 else "8k" tag = model_type + "-" + sample_rate_str self.task_resource.set_task_model(tag, version=None) - logger.info(f"load the pretrained model: {tag}") + logger.debug(f"load the pretrained model: {tag}") # get the model from the pretrained list # we download the pretrained model and store it in the res_path self.res_path = self.task_resource.res_dir @@ -312,19 +312,19 @@ class VectorExecutor(BaseExecutor): self.res_path = os.path.dirname( os.path.dirname(os.path.abspath(self.cfg_path))) - logger.info(f"start to read the ckpt from {self.ckpt_path}") - logger.info(f"read the config from {self.cfg_path}") - logger.info(f"get the res path {self.res_path}") + logger.debug(f"start to read the ckpt from {self.ckpt_path}") + logger.debug(f"read the config from {self.cfg_path}") + logger.debug(f"get the res path {self.res_path}") # stage 2: read and config and init the model body self.config = CfgNode(new_allowed=True) self.config.merge_from_file(self.cfg_path) # stage 3: get the model name to instance the model network with dynamic_import - logger.info("start to dynamic import the model class") + logger.debug("start to dynamic import the model class") model_name = model_type[:model_type.rindex('_')] model_class = self.task_resource.get_model_class(model_name) - logger.info(f"model name {model_name}") + logger.debug(f"model name {model_name}") model_conf = self.config.model backbone = model_class(**model_conf) model = SpeakerIdetification( @@ -333,11 +333,11 @@ class VectorExecutor(BaseExecutor): self.model.eval() # stage 4: load the model parameters - logger.info("start to set the model parameters to model") + logger.debug("start to set the model parameters to model") model_dict = paddle.load(self.ckpt_path) self.model.set_state_dict(model_dict) - logger.info("create the model instance success") + logger.debug("create the model instance success") @paddle.no_grad() def infer(self, model_type: str): @@ -349,14 +349,14 @@ class VectorExecutor(BaseExecutor): # stage 0: get the feat and length from _inputs feats = self._inputs["feats"] lengths = self._inputs["lengths"] - logger.info("start to do backbone network model forward") - logger.info( + logger.debug("start to do backbone network model forward") + logger.debug( f"feats shape:{feats.shape}, lengths shape: {lengths.shape}") # stage 1: get the audio embedding # embedding from (1, emb_size, 1) -> (emb_size) embedding = self.model.backbone(feats, lengths).squeeze().numpy() - logger.info(f"embedding size: {embedding.shape}") + logger.debug(f"embedding size: {embedding.shape}") # stage 2: put the embedding and dim info to _outputs property # the embedding type is numpy.array @@ -380,12 +380,13 @@ class VectorExecutor(BaseExecutor): """ audio_file = input_file if isinstance(audio_file, (str, os.PathLike)): - logger.info(f"Preprocess audio file: {audio_file}") + logger.debug(f"Preprocess audio file: {audio_file}") # stage 1: load the audio sample points # Note: this process must match the training process waveform, sr = load_audio(audio_file) - logger.info(f"load the audio sample points, shape is: {waveform.shape}") + logger.debug( + f"load the audio sample points, shape is: {waveform.shape}") # stage 2: get the audio feat # Note: Now we only support fbank feature @@ -396,9 +397,9 @@ class VectorExecutor(BaseExecutor): n_mels=self.config.n_mels, window_size=self.config.window_size, hop_length=self.config.hop_size) - logger.info(f"extract the audio feat, shape is: {feat.shape}") + logger.debug(f"extract the audio feat, shape is: {feat.shape}") except Exception as e: - logger.info(f"feat occurs exception {e}") + logger.debug(f"feat occurs exception {e}") sys.exit(-1) feat = paddle.to_tensor(feat).unsqueeze(0) @@ -411,11 +412,11 @@ class VectorExecutor(BaseExecutor): # stage 4: store the feat and length in the _inputs, # which will be used in other function - logger.info(f"feats shape: {feat.shape}") + logger.debug(f"feats shape: {feat.shape}") self._inputs["feats"] = feat self._inputs["lengths"] = lengths - logger.info("audio extract the feat success") + logger.debug("audio extract the feat success") def _check(self, audio_file: str, sample_rate: int): """Check if the model sample match the audio sample rate @@ -441,7 +442,7 @@ class VectorExecutor(BaseExecutor): logger.error("Please input the right audio file path") return False - logger.info("checking the aduio file format......") + logger.debug("checking the aduio file format......") try: audio, audio_sample_rate = soundfile.read( audio_file, dtype="float32", always_2d=True) @@ -458,7 +459,7 @@ class VectorExecutor(BaseExecutor): ") return False - logger.info(f"The sample rate is {audio_sample_rate}") + logger.debug(f"The sample rate is {audio_sample_rate}") if audio_sample_rate != self.sample_rate: logger.error("The sample rate of the input file is not {}.\n \ @@ -468,6 +469,6 @@ class VectorExecutor(BaseExecutor): ".format(self.sample_rate, self.sample_rate)) sys.exit(-1) else: - logger.info("The audio file format is right") + logger.debug("The audio file format is right") return True diff --git a/paddlespeech/s2t/frontend/augmentor/spec_augment.py b/paddlespeech/s2t/frontend/augmentor/spec_augment.py index e91cfdce..296129a9 100644 --- a/paddlespeech/s2t/frontend/augmentor/spec_augment.py +++ b/paddlespeech/s2t/frontend/augmentor/spec_augment.py @@ -16,7 +16,7 @@ import random import numpy as np from PIL import Image -from PIL.Image import BICUBIC +from PIL.Image import Resampling from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase from paddlespeech.s2t.utils.log import Log @@ -164,9 +164,9 @@ class SpecAugmentor(AugmentorBase): window) + 1 # 1 ... t - 1 left = Image.fromarray(x[:center]).resize((x.shape[1], warped), - BICUBIC) + Resampling.BICUBIC) right = Image.fromarray(x[center:]).resize((x.shape[1], t - warped), - BICUBIC) + Resampling.BICUBIC) if self.inplace: x[:warped] = left x[warped:] = right diff --git a/paddlespeech/s2t/frontend/featurizer/text_featurizer.py b/paddlespeech/s2t/frontend/featurizer/text_featurizer.py index 0c0fa5e2..982c6b8f 100644 --- a/paddlespeech/s2t/frontend/featurizer/text_featurizer.py +++ b/paddlespeech/s2t/frontend/featurizer/text_featurizer.py @@ -226,10 +226,10 @@ class TextFeaturizer(): sos_id = vocab_list.index(SOS) if SOS in vocab_list else -1 space_id = vocab_list.index(SPACE) if SPACE in vocab_list else -1 - logger.info(f"BLANK id: {blank_id}") - logger.info(f"UNK id: {unk_id}") - logger.info(f"EOS id: {eos_id}") - logger.info(f"SOS id: {sos_id}") - logger.info(f"SPACE id: {space_id}") - logger.info(f"MASKCTC id: {maskctc_id}") + logger.debug(f"BLANK id: {blank_id}") + logger.debug(f"UNK id: {unk_id}") + logger.debug(f"EOS id: {eos_id}") + logger.debug(f"SOS id: {sos_id}") + logger.debug(f"SPACE id: {space_id}") + logger.debug(f"MASKCTC id: {maskctc_id}") return token2id, id2token, vocab_list, unk_id, eos_id, blank_id diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index b4b61666..c9b6e2fb 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -827,7 +827,7 @@ class U2Model(U2DecodeModel): # encoder encoder_type = configs.get('encoder', 'transformer') - logger.info(f"U2 Encoder type: {encoder_type}") + logger.debug(f"U2 Encoder type: {encoder_type}") if encoder_type == 'transformer': encoder = TransformerEncoder( input_dim, global_cmvn=global_cmvn, **configs['encoder_conf']) @@ -894,7 +894,7 @@ class U2Model(U2DecodeModel): if checkpoint_path: infos = checkpoint.Checkpoint().load_parameters( model, checkpoint_path=checkpoint_path) - logger.info(f"checkpoint info: {infos}") + logger.debug(f"checkpoint info: {infos}") layer_tools.summary(model) return model diff --git a/paddlespeech/s2t/modules/loss.py b/paddlespeech/s2t/modules/loss.py index c7d9bd45..884fb70c 100644 --- a/paddlespeech/s2t/modules/loss.py +++ b/paddlespeech/s2t/modules/loss.py @@ -37,9 +37,9 @@ class CTCLoss(nn.Layer): self.loss = nn.CTCLoss(blank=blank, reduction=reduction) self.batch_average = batch_average - logger.info( + logger.debug( f"CTCLoss Loss reduction: {reduction}, div-bs: {batch_average}") - logger.info(f"CTCLoss Grad Norm Type: {grad_norm_type}") + logger.debug(f"CTCLoss Grad Norm Type: {grad_norm_type}") assert grad_norm_type in ('instance', 'batch', 'frame', None) self.norm_by_times = False @@ -70,7 +70,8 @@ class CTCLoss(nn.Layer): param = {} self._kwargs = {k: v for k, v in kwargs.items() if k in param} _notin = {k: v for k, v in kwargs.items() if k not in param} - logger.info(f"{self.loss} kwargs:{self._kwargs}, not support: {_notin}") + logger.debug( + f"{self.loss} kwargs:{self._kwargs}, not support: {_notin}") def forward(self, logits, ys_pad, hlens, ys_lens): """Compute CTC loss. diff --git a/paddlespeech/s2t/transform/spec_augment.py b/paddlespeech/s2t/transform/spec_augment.py index 5ce95085..fe24a416 100644 --- a/paddlespeech/s2t/transform/spec_augment.py +++ b/paddlespeech/s2t/transform/spec_augment.py @@ -17,7 +17,7 @@ import random import numpy from PIL import Image -from PIL.Image import BICUBIC +from PIL.Image import Resampling from paddlespeech.s2t.transform.functional import FuncTrans @@ -46,9 +46,10 @@ def time_warp(x, max_time_warp=80, inplace=False, mode="PIL"): warped = random.randrange(center - window, center + window) + 1 # 1 ... t - 1 - left = Image.fromarray(x[:center]).resize((x.shape[1], warped), BICUBIC) + left = Image.fromarray(x[:center]).resize((x.shape[1], warped), + Resampling.BICUBIC) right = Image.fromarray(x[center:]).resize((x.shape[1], t - warped), - BICUBIC) + Resampling.BICUBIC) if inplace: x[:warped] = left x[warped:] = right diff --git a/paddlespeech/s2t/utils/tensor_utils.py b/paddlespeech/s2t/utils/tensor_utils.py index f9a843ea..422d4f82 100644 --- a/paddlespeech/s2t/utils/tensor_utils.py +++ b/paddlespeech/s2t/utils/tensor_utils.py @@ -94,7 +94,7 @@ def pad_sequence(sequences: List[paddle.Tensor], for i, tensor in enumerate(sequences): length = tensor.shape[0] # use index notation to prevent duplicate references to the tensor - logger.info( + logger.debug( f"length {length}, out_tensor {out_tensor.shape}, tensor {tensor.shape}" ) if batch_first: -- GitLab