提交 1635e000 编写于 作者: H Hui Zhang

fix cmvn

上级 2aed2752
......@@ -22,9 +22,12 @@ from paddle.io import Dataset
from deepspeech.frontend.audio import AudioSegment
from deepspeech.frontend.utility import load_cmvn
from deepspeech.frontend.utility import read_manifest
from deepspeech.utils.log import Log
__all__ = ["FeatureNormalizer"]
logger = Log(__name__).getlog()
# https://github.com/PaddlePaddle/Paddle/pull/31481
class CollateFunc(object):
......@@ -176,7 +179,7 @@ class FeatureNormalizer(object):
wav_number += batch_size
if wav_number % 1000 == 0:
print('process {} wavs,{} frames'.format(wav_number,
logger.info('process {} wavs,{} frames'.format(wav_number,
all_number))
self.cmvn_info = {
......
......@@ -17,6 +17,12 @@ import os
import socket
import sys
FORMAT_STR = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
DATE_FMT_STR = '%Y/%m/%d %H:%M:%S'
logging.basicConfig(
level=logging.DEBUG, format=FORMAT_STR, datefmt=DATE_FMT_STR)
def find_log_dir(log_dir=None):
"""Returns the most suitable directory to put log files into.
......@@ -123,12 +129,10 @@ class Log():
pass
if not self.logger.hasHandlers():
format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
formatter = logging.Formatter(
fmt=format, datefmt='%Y/%m/%d %H:%M:%S')
formatter = logging.Formatter(fmt=FORMAT_STR, datefmt=DATE_FMT_STR)
fh = logging.FileHandler(Log.log_name)
fh.setFormatter(formatter)
fh.setLevel(logging.DEBUG)
fh.setFormatter(formatter)
self.logger.addHandler(fh)
ch = logging.StreamHandler()
......@@ -136,9 +140,6 @@ class Log():
ch.setFormatter(formatter)
self.logger.addHandler(ch)
#fh.close()
#ch.close()
# stop propagate for propagating may print
# log multiple times
self.logger.propagate = False
......
......@@ -51,6 +51,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--stride_ms=10.0 \
--window_ms=25.0 \
--sample_rate=16000 \
--use_dB_normalization=False \
--num_samples=-1 \
--num_workers=16 \
--output_path="data/mean_std.json"
......
......@@ -73,6 +73,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--sample_rate=16000 \
--stride_ms=10.0 \
--window_ms=25.0 \
--use_dB_normalization=False \
--num_workers=${num_workers} \
--output_path="data/mean_std.json"
......
......@@ -57,6 +57,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
--sample_rate=16000 \
--stride_ms=10.0 \
--window_ms=25.0 \
--use_dB_normalization=False \
--num_workers=2 \
--output_path="data/mean_std.json"
......
......@@ -21,6 +21,8 @@ import paddle
def main(args):
paddle.set_device('cpu')
val_scores = []
beat_val_scores = []
selected_epochs = []
......
......@@ -25,17 +25,19 @@ parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('num_samples', int, -1, "# of samples to for statistics.")
add_arg('specgram_type', str,
'linear',
"Audio feature type. Options: linear, mfcc, fbank.",
choices=['linear', 'mfcc', 'fbank'])
add_arg('feat_dim', int, 13, "Audio feature dim.")
add_arg('delta_delta', bool,
False,
"Audio feature with delta delta.")
add_arg('delta_delta', bool, False, "Audio feature with delta delta.")
add_arg('stride_ms', float, 10.0, "stride length in ms.")
add_arg('window_ms', float, 20.0, "stride length in ms.")
add_arg('sample_rate', int, 16000, "target sample rate.")
add_arg('use_dB_normalization', bool, False, "do dB normalization.")
add_arg('target_dB', int, -20, "target dB.")
add_arg('manifest_path', str,
'data/librispeech/manifest.train',
"Filepath of manifest to compute normalizer's mean and stddev.")
......@@ -63,8 +65,8 @@ def main():
n_fft=None,
max_freq=None,
target_sample_rate=args.sample_rate,
use_dB_normalization=True,
target_dB=-20,
use_dB_normalization=args.use_dB_normalization,
target_dB=args.target_dB,
dither=0.0)
def augment_and_featurize(audio_segment):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册