fix cmvn

1635e000 · Hui Zhang · 2aed2752 · 1635e000 · 1635e000 · 1635e000
7 changed file
--- a/deepspeech/frontend/normalizer.py
+++ b/deepspeech/frontend/normalizer.py
@@ -22,9 +22,12 @@ from paddle.io import Dataset
 from deepspeech.frontend.audio import AudioSegment
 from deepspeech.frontend.utility import load_cmvn
 from deepspeech.frontend.utility import read_manifest
+from deepspeech.utils.log import Log

 __all__ = ["FeatureNormalizer"]

+logger = Log(__name__).getlog()
+

 # https://github.com/PaddlePaddle/Paddle/pull/31481
 class CollateFunc(object):
@@ -176,8 +179,8 @@ class FeatureNormalizer(object):
                wav_number += batch_size

                if wav_number % 1000 == 0:
-                    print('process {} wavs,{} frames'.format(wav_number,
-                                                             all_number))
+                    logger.info('process {} wavs,{} frames'.format(wav_number,
+                                                                   all_number))

        self.cmvn_info = {
            'mean_stat': list(all_mean_stat.tolist()),

--- a/deepspeech/utils/log.py
+++ b/deepspeech/utils/log.py
@@ -17,16 +17,22 @@ import os
 import socket
 import sys

+FORMAT_STR = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
+DATE_FMT_STR = '%Y/%m/%d %H:%M:%S'
+
+logging.basicConfig(
+    level=logging.DEBUG, format=FORMAT_STR, datefmt=DATE_FMT_STR)
+

 def find_log_dir(log_dir=None):
    """Returns the most suitable directory to put log files into.
-  Args:
-    log_dir: str|None, if specified, the logfile(s) will be created in that
-        directory.  Otherwise if the --log_dir command-line flag is provided,
-        the logfile will be created in that directory.  Otherwise the logfile
-        will be created in a standard location.
-  Raises:
-    FileNotFoundError: raised when it cannot find a log directory.
+    Args:
+        log_dir: str|None, if specified, the logfile(s) will be created in that
+            directory.  Otherwise if the --log_dir command-line flag is provided,
+            the logfile will be created in that directory.  Otherwise the logfile
+            will be created in a standard location.
+    Raises:
+        FileNotFoundError: raised when it cannot find a log directory.
  """
    # Get a list of possible log dirs (will try to use them in order).
    if log_dir:
@@ -45,22 +51,22 @@ def find_log_dir(log_dir=None):

 def find_log_dir_and_names(program_name=None, log_dir=None):
    """Computes the directory and filename prefix for log file.
-  Args:
-    program_name: str|None, the filename part of the path to the program that
-        is running without its extension.  e.g: if your program is called
-        'usr/bin/foobar.py' this method should probably be called with
-        program_name='foobar' However, this is just a convention, you can
-        pass in any string you want, and it will be used as part of the
-        log filename. If you don't pass in anything, the default behavior
-        is as described in the example.  In python standard logging mode,
-        the program_name will be prepended with py_ if it is the program_name
-        argument is omitted.
-    log_dir: str|None, the desired log directory.
-  Returns:
-    (log_dir, file_prefix, symlink_prefix)
-  Raises:
-    FileNotFoundError: raised in Python 3 when it cannot find a log directory.
-    OSError: raised in Python 2 when it cannot find a log directory.
+    Args:
+        program_name: str|None, the filename part of the path to the program that
+            is running without its extension.  e.g: if your program is called
+            'usr/bin/foobar.py' this method should probably be called with
+            program_name='foobar' However, this is just a convention, you can
+            pass in any string you want, and it will be used as part of the
+            log filename. If you don't pass in anything, the default behavior
+            is as described in the example.  In python standard logging mode,
+            the program_name will be prepended with py_ if it is the program_name
+            argument is omitted.
+        log_dir: str|None, the desired log directory.
+    Returns:
+        (log_dir, file_prefix, symlink_prefix)
+    Raises:
+        FileNotFoundError: raised in Python 3 when it cannot find a log directory.
+        OSError: raised in Python 2 when it cannot find a log directory.
  """
    if not program_name:
        # Strip the extension (foobar.par becomes foobar, and
@@ -123,12 +129,10 @@ class Log():
            pass

        if not self.logger.hasHandlers():
-            format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
-            formatter = logging.Formatter(
-                fmt=format, datefmt='%Y/%m/%d %H:%M:%S')
+            formatter = logging.Formatter(fmt=FORMAT_STR, datefmt=DATE_FMT_STR)
            fh = logging.FileHandler(Log.log_name)
-            fh.setFormatter(formatter)
            fh.setLevel(logging.DEBUG)
+            fh.setFormatter(formatter)
            self.logger.addHandler(fh)

            ch = logging.StreamHandler()
@@ -136,9 +140,6 @@ class Log():
            ch.setFormatter(formatter)
            self.logger.addHandler(ch)

-            #fh.close()
-            #ch.close()
-
        # stop propagate for propagating may print
        # log multiple times
        self.logger.propagate = False

--- a/examples/aishell/s0/local/data.sh
+++ b/examples/aishell/s0/local/data.sh
@@ -51,6 +51,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    --stride_ms=10.0 \
    --window_ms=25.0 \
    --sample_rate=16000 \
+    --use_dB_normalization=False \
    --num_samples=-1 \
    --num_workers=16 \
    --output_path="data/mean_std.json"

--- a/examples/librispeech/s1/local/data.sh
+++ b/examples/librispeech/s1/local/data.sh
@@ -73,6 +73,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    --sample_rate=16000 \
    --stride_ms=10.0 \
    --window_ms=25.0 \
+    --use_dB_normalization=False \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"


--- a/examples/tiny/s0/local/data.sh
+++ b/examples/tiny/s0/local/data.sh
@@ -57,6 +57,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    --sample_rate=16000 \
    --stride_ms=10.0 \
    --window_ms=25.0 \
+    --use_dB_normalization=False \
    --num_workers=2 \
    --output_path="data/mean_std.json"
    

--- a/utils/avg_model.py
+++ b/utils/avg_model.py
@@ -21,6 +21,8 @@ import paddle


 def main(args):
+    paddle.set_device('cpu')
+
    val_scores = []
    beat_val_scores = []
    selected_epochs = []

--- a/utils/compute_mean_std.py
+++ b/utils/compute_mean_std.py
@@ -25,17 +25,19 @@ parser = argparse.ArgumentParser(description=__doc__)
 add_arg = functools.partial(add_arguments, argparser=parser)
 # yapf: disable
 add_arg('num_samples',      int,    -1,    "# of samples to for statistics.")
+
 add_arg('specgram_type',    str,
        'linear',
        "Audio feature type. Options: linear, mfcc, fbank.",
        choices=['linear', 'mfcc', 'fbank'])
 add_arg('feat_dim',    int, 13, "Audio feature dim.")
-add_arg('delta_delta',    bool,
-        False,
-        "Audio feature with delta delta.")
-add_arg('stride_ms',    float, 10.0,  "stride length in ms.")
-add_arg('window_ms',    float, 20.0,  "stride length in ms.")
-add_arg('sample_rate',    int, 16000,  "target sample rate.")
+add_arg('delta_delta', bool,  False, "Audio feature with delta delta.")
+add_arg('stride_ms', float, 10.0,  "stride length in ms.")
+add_arg('window_ms', float, 20.0,  "stride length in ms.")
+add_arg('sample_rate',  int, 16000,  "target sample rate.")
+add_arg('use_dB_normalization', bool, False, "do dB normalization.")
+add_arg('target_dB',   int, -20,  "target dB.")
+
 add_arg('manifest_path',    str,
        'data/librispeech/manifest.train',
        "Filepath of manifest to compute normalizer's mean and stddev.")
@@ -63,8 +65,8 @@ def main():
        n_fft=None,
        max_freq=None,
        target_sample_rate=args.sample_rate,
-        use_dB_normalization=True,
-        target_dB=-20,
+        use_dB_normalization=args.use_dB_normalization,
+        target_dB=args.target_dB,
        dither=0.0)

    def augment_and_featurize(audio_segment):