diff --git a/README.md b/README.md index a92b671cb5df385a0948df9be078808c7391e835..24f0b3c3fd3269eb4c55f99dd26e947b5158c8ab 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,8 @@ python compute_mean_std.py python compute_mean_std.py --specgram_type mfcc ``` +and specify the ```specgram_type``` to ```mfcc``` in each step, including training, inference etc. + More help for arguments: ``` diff --git a/evaluate.py b/evaluate.py index 00516dcbf00de146677dfc1122125346a52ebe92..19eabf4e5aff090ed2f529e3ea3cd7f10ae57cb7 100644 --- a/evaluate.py +++ b/evaluate.py @@ -86,6 +86,12 @@ parser.add_argument( default=500, type=int, help="Width for beam search decoding. (default: %(default)d)") +parser.add_argument( + "--specgram_type", + default='linear', + type=str, + help="Feature type of audio data: 'linear' (power spectrum)" + " or 'mfcc'. (default: %(default)s)") parser.add_argument( "--decode_manifest_path", default='datasets/manifest.test', @@ -111,6 +117,7 @@ def evaluate(): vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, augmentation_config='{}', + specgram_type=args.specgram_type, num_threads=args.num_threads_data) # create network config diff --git a/infer.py b/infer.py index bb81feac163993848541835fe4dbbf4285727cde..817526302764b3d6044688da97ad0cc072c14144 100644 --- a/infer.py +++ b/infer.py @@ -51,6 +51,12 @@ parser.add_argument( default=multiprocessing.cpu_count(), type=int, help="Number of cpu processes for beam search. (default: %(default)s)") +parser.add_argument( + "--specgram_type", + default='linear', + type=str, + help="Feature type of audio data: 'linear' (power spectrum)" + " or 'mfcc'. (default: %(default)s)") parser.add_argument( "--mean_std_filepath", default='mean_std.npz', @@ -118,6 +124,7 @@ def infer(): vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, augmentation_config='{}', + specgram_type=args.specgram_type, num_threads=args.num_threads_data) # create network config diff --git a/tune.py b/tune.py index 19a2d5595118c53751a8aa59ef63bd494375af84..2fcca48628aa0aba7fd2e09a1d9ba90582492f89 100644 --- a/tune.py +++ b/tune.py @@ -50,6 +50,12 @@ parser.add_argument( default=multiprocessing.cpu_count(), type=int, help="Number of cpu processes for beam search. (default: %(default)s)") +parser.add_argument( + "--specgram_type", + default='linear', + type=str, + help="Feature type of audio data: 'linear' (power spectrum)" + " or 'mfcc'. (default: %(default)s)") parser.add_argument( "--mean_std_filepath", default='mean_std.npz', @@ -133,6 +139,7 @@ def tune(): vocab_filepath=args.vocab_filepath, mean_std_filepath=args.mean_std_filepath, augmentation_config='{}', + specgram_type=args.specgram_type, num_threads=args.num_threads_data) # create network config