diff --git a/deepspeech/frontend/normalizer.py b/deepspeech/frontend/normalizer.py index 4c4ba222b635fbc5c3c9611309d4e75280c1f1fa..4ae74ef6ad5d8fbc3d1e6e446807654ee1b07ded 100644 --- a/deepspeech/frontend/normalizer.py +++ b/deepspeech/frontend/normalizer.py @@ -22,9 +22,12 @@ from paddle.io import Dataset from deepspeech.frontend.audio import AudioSegment from deepspeech.frontend.utility import load_cmvn from deepspeech.frontend.utility import read_manifest +from deepspeech.utils.log import Log __all__ = ["FeatureNormalizer"] +logger = Log(__name__).getlog() + # https://github.com/PaddlePaddle/Paddle/pull/31481 class CollateFunc(object): @@ -176,8 +179,8 @@ class FeatureNormalizer(object): wav_number += batch_size if wav_number % 1000 == 0: - print('process {} wavs,{} frames'.format(wav_number, - all_number)) + logger.info('process {} wavs,{} frames'.format(wav_number, + all_number)) self.cmvn_info = { 'mean_stat': list(all_mean_stat.tolist()), diff --git a/deepspeech/utils/log.py b/deepspeech/utils/log.py index 1de59730fffd93ec1149344d1f8ba36269c36538..499b1872f3a7ab8f8a2de9c0e97acdd986a3bf8c 100644 --- a/deepspeech/utils/log.py +++ b/deepspeech/utils/log.py @@ -17,16 +17,22 @@ import os import socket import sys +FORMAT_STR = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s' +DATE_FMT_STR = '%Y/%m/%d %H:%M:%S' + +logging.basicConfig( + level=logging.DEBUG, format=FORMAT_STR, datefmt=DATE_FMT_STR) + def find_log_dir(log_dir=None): """Returns the most suitable directory to put log files into. - Args: - log_dir: str|None, if specified, the logfile(s) will be created in that - directory. Otherwise if the --log_dir command-line flag is provided, - the logfile will be created in that directory. Otherwise the logfile - will be created in a standard location. - Raises: - FileNotFoundError: raised when it cannot find a log directory. + Args: + log_dir: str|None, if specified, the logfile(s) will be created in that + directory. Otherwise if the --log_dir command-line flag is provided, + the logfile will be created in that directory. Otherwise the logfile + will be created in a standard location. + Raises: + FileNotFoundError: raised when it cannot find a log directory. """ # Get a list of possible log dirs (will try to use them in order). if log_dir: @@ -45,22 +51,22 @@ def find_log_dir(log_dir=None): def find_log_dir_and_names(program_name=None, log_dir=None): """Computes the directory and filename prefix for log file. - Args: - program_name: str|None, the filename part of the path to the program that - is running without its extension. e.g: if your program is called - 'usr/bin/foobar.py' this method should probably be called with - program_name='foobar' However, this is just a convention, you can - pass in any string you want, and it will be used as part of the - log filename. If you don't pass in anything, the default behavior - is as described in the example. In python standard logging mode, - the program_name will be prepended with py_ if it is the program_name - argument is omitted. - log_dir: str|None, the desired log directory. - Returns: - (log_dir, file_prefix, symlink_prefix) - Raises: - FileNotFoundError: raised in Python 3 when it cannot find a log directory. - OSError: raised in Python 2 when it cannot find a log directory. + Args: + program_name: str|None, the filename part of the path to the program that + is running without its extension. e.g: if your program is called + 'usr/bin/foobar.py' this method should probably be called with + program_name='foobar' However, this is just a convention, you can + pass in any string you want, and it will be used as part of the + log filename. If you don't pass in anything, the default behavior + is as described in the example. In python standard logging mode, + the program_name will be prepended with py_ if it is the program_name + argument is omitted. + log_dir: str|None, the desired log directory. + Returns: + (log_dir, file_prefix, symlink_prefix) + Raises: + FileNotFoundError: raised in Python 3 when it cannot find a log directory. + OSError: raised in Python 2 when it cannot find a log directory. """ if not program_name: # Strip the extension (foobar.par becomes foobar, and @@ -123,12 +129,10 @@ class Log(): pass if not self.logger.hasHandlers(): - format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s' - formatter = logging.Formatter( - fmt=format, datefmt='%Y/%m/%d %H:%M:%S') + formatter = logging.Formatter(fmt=FORMAT_STR, datefmt=DATE_FMT_STR) fh = logging.FileHandler(Log.log_name) - fh.setFormatter(formatter) fh.setLevel(logging.DEBUG) + fh.setFormatter(formatter) self.logger.addHandler(fh) ch = logging.StreamHandler() @@ -136,9 +140,6 @@ class Log(): ch.setFormatter(formatter) self.logger.addHandler(ch) - #fh.close() - #ch.close() - # stop propagate for propagating may print # log multiple times self.logger.propagate = False diff --git a/examples/aishell/s0/local/data.sh b/examples/aishell/s0/local/data.sh index b814fc754b021bc50ee61309e6cac95b4c02294d..ef8b9198348381c26c985ef8fb65afcdc23b98f4 100755 --- a/examples/aishell/s0/local/data.sh +++ b/examples/aishell/s0/local/data.sh @@ -51,6 +51,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --stride_ms=10.0 \ --window_ms=25.0 \ --sample_rate=16000 \ + --use_dB_normalization=False \ --num_samples=-1 \ --num_workers=16 \ --output_path="data/mean_std.json" diff --git a/examples/librispeech/s1/local/data.sh b/examples/librispeech/s1/local/data.sh index a83c3414dde1d73aacef69c6de7c2e16a8a44781..185f429ba9e4c5c2d813e74f75b9c2e2708a4cdb 100755 --- a/examples/librispeech/s1/local/data.sh +++ b/examples/librispeech/s1/local/data.sh @@ -73,6 +73,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --sample_rate=16000 \ --stride_ms=10.0 \ --window_ms=25.0 \ + --use_dB_normalization=False \ --num_workers=${num_workers} \ --output_path="data/mean_std.json" diff --git a/examples/tiny/s0/local/data.sh b/examples/tiny/s0/local/data.sh index 513795784a8279ce8161e6a98853c7e03433616b..deff91e03f4002d82b845344fdf66070eb37bc4b 100755 --- a/examples/tiny/s0/local/data.sh +++ b/examples/tiny/s0/local/data.sh @@ -57,6 +57,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --sample_rate=16000 \ --stride_ms=10.0 \ --window_ms=25.0 \ + --use_dB_normalization=False \ --num_workers=2 \ --output_path="data/mean_std.json" diff --git a/utils/avg_model.py b/utils/avg_model.py index a002c1b0dc6432a593eab559de7f6675e7a6c9b7..0794c4b8f1f9ea9d784fdb7fefebf6edd4795ee9 100644 --- a/utils/avg_model.py +++ b/utils/avg_model.py @@ -21,6 +21,8 @@ import paddle def main(args): + paddle.set_device('cpu') + val_scores = [] beat_val_scores = [] selected_epochs = [] diff --git a/utils/compute_mean_std.py b/utils/compute_mean_std.py index ddaa368453f77ceda96e42ca807496401f87a65f..8dfd3e590c547a592560ae6565d385405075f54d 100644 --- a/utils/compute_mean_std.py +++ b/utils/compute_mean_std.py @@ -25,17 +25,19 @@ parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable add_arg('num_samples', int, -1, "# of samples to for statistics.") + add_arg('specgram_type', str, 'linear', "Audio feature type. Options: linear, mfcc, fbank.", choices=['linear', 'mfcc', 'fbank']) add_arg('feat_dim', int, 13, "Audio feature dim.") -add_arg('delta_delta', bool, - False, - "Audio feature with delta delta.") -add_arg('stride_ms', float, 10.0, "stride length in ms.") -add_arg('window_ms', float, 20.0, "stride length in ms.") -add_arg('sample_rate', int, 16000, "target sample rate.") +add_arg('delta_delta', bool, False, "Audio feature with delta delta.") +add_arg('stride_ms', float, 10.0, "stride length in ms.") +add_arg('window_ms', float, 20.0, "stride length in ms.") +add_arg('sample_rate', int, 16000, "target sample rate.") +add_arg('use_dB_normalization', bool, False, "do dB normalization.") +add_arg('target_dB', int, -20, "target dB.") + add_arg('manifest_path', str, 'data/librispeech/manifest.train', "Filepath of manifest to compute normalizer's mean and stddev.") @@ -63,8 +65,8 @@ def main(): n_fft=None, max_freq=None, target_sample_rate=args.sample_rate, - use_dB_normalization=True, - target_dB=-20, + use_dB_normalization=args.use_dB_normalization, + target_dB=args.target_dB, dither=0.0) def augment_and_featurize(audio_segment):