From 724ef185966a379ceca0caa1d0b2200e42bf32f3 Mon Sep 17 00:00:00 2001
From: Yibing Liu <liuyibing01@baidu.com>
Date: Wed, 19 Jul 2017 22:40:01 +0800
Subject: [PATCH] update several scripts to support mfcc

---
 README.md   | 2 ++
 evaluate.py | 7 +++++++
 infer.py    | 7 +++++++
 tune.py     | 7 +++++++
 4 files changed, 23 insertions(+)

diff --git a/README.md b/README.md
index a92b671c..24f0b3c3 100644
--- a/README.md
+++ b/README.md
@@ -44,6 +44,8 @@ python compute_mean_std.py
 python compute_mean_std.py --specgram_type mfcc
 ```
 
+and specify the ```specgram_type``` to ```mfcc``` in each step, including training, inference etc.
+
 More help for arguments:
 
 ```
diff --git a/evaluate.py b/evaluate.py
index 00516dcb..19eabf4e 100644
--- a/evaluate.py
+++ b/evaluate.py
@@ -86,6 +86,12 @@ parser.add_argument(
     default=500,
     type=int,
     help="Width for beam search decoding. (default: %(default)d)")
+parser.add_argument(
+    "--specgram_type",
+    default='linear',
+    type=str,
+    help="Feature type of audio data: 'linear' (power spectrum)"
+    " or 'mfcc'. (default: %(default)s)")
 parser.add_argument(
     "--decode_manifest_path",
     default='datasets/manifest.test',
@@ -111,6 +117,7 @@ def evaluate():
         vocab_filepath=args.vocab_filepath,
         mean_std_filepath=args.mean_std_filepath,
         augmentation_config='{}',
+        specgram_type=args.specgram_type,
         num_threads=args.num_threads_data)
 
     # create network config
diff --git a/infer.py b/infer.py
index bb81feac..81752630 100644
--- a/infer.py
+++ b/infer.py
@@ -51,6 +51,12 @@ parser.add_argument(
     default=multiprocessing.cpu_count(),
     type=int,
     help="Number of cpu processes for beam search. (default: %(default)s)")
+parser.add_argument(
+    "--specgram_type",
+    default='linear',
+    type=str,
+    help="Feature type of audio data: 'linear' (power spectrum)"
+    " or 'mfcc'. (default: %(default)s)")
 parser.add_argument(
     "--mean_std_filepath",
     default='mean_std.npz',
@@ -118,6 +124,7 @@ def infer():
         vocab_filepath=args.vocab_filepath,
         mean_std_filepath=args.mean_std_filepath,
         augmentation_config='{}',
+        specgram_type=args.specgram_type,
         num_threads=args.num_threads_data)
 
     # create network config
diff --git a/tune.py b/tune.py
index 19a2d559..2fcca486 100644
--- a/tune.py
+++ b/tune.py
@@ -50,6 +50,12 @@ parser.add_argument(
     default=multiprocessing.cpu_count(),
     type=int,
     help="Number of cpu processes for beam search. (default: %(default)s)")
+parser.add_argument(
+    "--specgram_type",
+    default='linear',
+    type=str,
+    help="Feature type of audio data: 'linear' (power spectrum)"
+    " or 'mfcc'. (default: %(default)s)")
 parser.add_argument(
     "--mean_std_filepath",
     default='mean_std.npz',
@@ -133,6 +139,7 @@ def tune():
         vocab_filepath=args.vocab_filepath,
         mean_std_filepath=args.mean_std_filepath,
         augmentation_config='{}',
+        specgram_type=args.specgram_type,
         num_threads=args.num_threads_data)
 
     # create network config
-- 
GitLab