From 5a318e999d9624c25127b8eb004553b57831b2fd Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 8 Sep 2017 15:20:23 +0800 Subject: [PATCH] adapt to the new folder structure of DS2 --- deep_speech_2/examples/librispeech/generate.sh | 6 +++--- deep_speech_2/examples/librispeech/run_test.sh | 8 ++++---- deep_speech_2/infer.py | 4 +++- deep_speech_2/models/model.py | 12 ++++++++---- .../{deploy => models/swig_decoders}/README.md | 0 .../{deploy => models/swig_decoders}/__init__.py | 0 .../{deploy => models/swig_decoders}/_init_paths.py | 0 .../swig_decoders}/ctc_decoders.cpp | 4 ++-- .../{deploy => models/swig_decoders}/ctc_decoders.h | 2 +- .../swig_decoders}/decoder_utils.cpp | 0 .../{deploy => models/swig_decoders}/decoder_utils.h | 0 .../{deploy => models/swig_decoders}/decoders.i | 0 .../{deploy => models/swig_decoders}/path_trie.cpp | 0 .../{deploy => models/swig_decoders}/path_trie.h | 0 .../{deploy => models/swig_decoders}/scorer.cpp | 0 .../{deploy => models/swig_decoders}/scorer.h | 0 .../{deploy => models/swig_decoders}/setup.py | 0 .../{deploy => models}/swig_decoders_wrapper.py | 4 ++-- deep_speech_2/test.py | 3 ++- 19 files changed, 25 insertions(+), 18 deletions(-) rename deep_speech_2/{deploy => models/swig_decoders}/README.md (100%) rename deep_speech_2/{deploy => models/swig_decoders}/__init__.py (100%) rename deep_speech_2/{deploy => models/swig_decoders}/_init_paths.py (100%) rename deep_speech_2/{deploy => models/swig_decoders}/ctc_decoders.cpp (98%) rename deep_speech_2/{deploy => models/swig_decoders}/ctc_decoders.h (96%) rename deep_speech_2/{deploy => models/swig_decoders}/decoder_utils.cpp (100%) rename deep_speech_2/{deploy => models/swig_decoders}/decoder_utils.h (100%) rename deep_speech_2/{deploy => models/swig_decoders}/decoders.i (100%) rename deep_speech_2/{deploy => models/swig_decoders}/path_trie.cpp (100%) rename deep_speech_2/{deploy => models/swig_decoders}/path_trie.h (100%) rename deep_speech_2/{deploy => models/swig_decoders}/scorer.cpp (100%) rename deep_speech_2/{deploy => models/swig_decoders}/scorer.h (100%) rename deep_speech_2/{deploy => models/swig_decoders}/setup.py (100%) rename deep_speech_2/{deploy => models}/swig_decoders_wrapper.py (97%) diff --git a/deep_speech_2/examples/librispeech/generate.sh b/deep_speech_2/examples/librispeech/generate.sh index a34b7bc1..752aafb6 100644 --- a/deep_speech_2/examples/librispeech/generate.sh +++ b/deep_speech_2/examples/librispeech/generate.sh @@ -12,9 +12,9 @@ python -u infer.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=0.36 \ ---beta=0.25 \ ---cutoff_prob=0.99 \ +--alpha=2.15 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/deep_speech_2/examples/librispeech/run_test.sh b/deep_speech_2/examples/librispeech/run_test.sh index 5a14cb68..350db8f0 100644 --- a/deep_speech_2/examples/librispeech/run_test.sh +++ b/deep_speech_2/examples/librispeech/run_test.sh @@ -3,7 +3,7 @@ pushd ../.. CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u evaluate.py \ +python -u test.py \ --batch_size=128 \ --trainer_count=8 \ --beam_size=500 \ @@ -12,9 +12,9 @@ python -u evaluate.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=0.36 \ ---beta=0.25 \ ---cutoff_prob=0.99 \ +--alpha=2.15 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/deep_speech_2/infer.py b/deep_speech_2/infer.py index 1ce969ae..44ee9358 100644 --- a/deep_speech_2/infer.py +++ b/deep_speech_2/infer.py @@ -84,6 +84,8 @@ def infer(): use_gru=args.use_gru, pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) + + vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] result_transcripts = ds2_model.infer_batch( infer_data=infer_data, decoding_method=args.decoding_method, @@ -91,7 +93,7 @@ def infer(): beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, - vocab_list=data_generator.vocab_list, + vocab_list=vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) diff --git a/deep_speech_2/models/model.py b/deep_speech_2/models/model.py index 93c4c41b..b239d5f3 100644 --- a/deep_speech_2/models/model.py +++ b/deep_speech_2/models/model.py @@ -8,8 +8,9 @@ import os import time import gzip import paddle.v2 as paddle -from lm.lm_scorer import LmScorer -from models.decoder import ctc_greedy_decoder, ctc_beam_search_decoder +from models.swig_decoders_wrapper import Scorer +from models.swig_decoders_wrapper import ctc_greedy_decoder +from models.swig_decoders_wrapper import ctc_beam_search_decoder_batch from models.network import deep_speech_v2_network @@ -199,9 +200,12 @@ class DeepSpeech2Model(object): elif decoding_method == "ctc_beam_search": # initialize external scorer if self._ext_scorer == None: - self._ext_scorer = LmScorer(beam_alpha, beam_beta, - language_model_path) + self._ext_scorer = Scorer(beam_alpha, beam_beta, + language_model_path) self._loaded_lm_path = language_model_path + self._ext_scorer.set_char_map(vocab_list) + if (not self._ext_scorer.is_character_based()): + self._ext_scorer.fill_dictionary(True) else: self._ext_scorer.reset_params(beam_alpha, beam_beta) assert self._loaded_lm_path == language_model_path diff --git a/deep_speech_2/deploy/README.md b/deep_speech_2/models/swig_decoders/README.md similarity index 100% rename from deep_speech_2/deploy/README.md rename to deep_speech_2/models/swig_decoders/README.md diff --git a/deep_speech_2/deploy/__init__.py b/deep_speech_2/models/swig_decoders/__init__.py similarity index 100% rename from deep_speech_2/deploy/__init__.py rename to deep_speech_2/models/swig_decoders/__init__.py diff --git a/deep_speech_2/deploy/_init_paths.py b/deep_speech_2/models/swig_decoders/_init_paths.py similarity index 100% rename from deep_speech_2/deploy/_init_paths.py rename to deep_speech_2/models/swig_decoders/_init_paths.py diff --git a/deep_speech_2/deploy/ctc_decoders.cpp b/deep_speech_2/models/swig_decoders/ctc_decoders.cpp similarity index 98% rename from deep_speech_2/deploy/ctc_decoders.cpp rename to deep_speech_2/models/swig_decoders/ctc_decoders.cpp index cedb943e..e60e6696 100644 --- a/deep_speech_2/deploy/ctc_decoders.cpp +++ b/deep_speech_2/models/swig_decoders/ctc_decoders.cpp @@ -10,8 +10,8 @@ #include "fst/fstlib.h" #include "path_trie.h" -std::string ctc_best_path_decoder(std::vector> probs_seq, - std::vector vocabulary) { +std::string ctc_greedy_decoder(std::vector> probs_seq, + std::vector vocabulary) { // dimension check int num_time_steps = probs_seq.size(); for (int i = 0; i < num_time_steps; i++) { diff --git a/deep_speech_2/deploy/ctc_decoders.h b/deep_speech_2/models/swig_decoders/ctc_decoders.h similarity index 96% rename from deep_speech_2/deploy/ctc_decoders.h rename to deep_speech_2/models/swig_decoders/ctc_decoders.h index 78edefb7..a0028a32 100644 --- a/deep_speech_2/deploy/ctc_decoders.h +++ b/deep_speech_2/models/swig_decoders/ctc_decoders.h @@ -16,7 +16,7 @@ * A vector that each element is a pair of score and decoding result, * in desending order. */ -std::string ctc_best_path_decoder(std::vector> probs_seq, +std::string ctc_greedy_decoder(std::vector> probs_seq, std::vector vocabulary); /* CTC Beam Search Decoder diff --git a/deep_speech_2/deploy/decoder_utils.cpp b/deep_speech_2/models/swig_decoders/decoder_utils.cpp similarity index 100% rename from deep_speech_2/deploy/decoder_utils.cpp rename to deep_speech_2/models/swig_decoders/decoder_utils.cpp diff --git a/deep_speech_2/deploy/decoder_utils.h b/deep_speech_2/models/swig_decoders/decoder_utils.h similarity index 100% rename from deep_speech_2/deploy/decoder_utils.h rename to deep_speech_2/models/swig_decoders/decoder_utils.h diff --git a/deep_speech_2/deploy/decoders.i b/deep_speech_2/models/swig_decoders/decoders.i similarity index 100% rename from deep_speech_2/deploy/decoders.i rename to deep_speech_2/models/swig_decoders/decoders.i diff --git a/deep_speech_2/deploy/path_trie.cpp b/deep_speech_2/models/swig_decoders/path_trie.cpp similarity index 100% rename from deep_speech_2/deploy/path_trie.cpp rename to deep_speech_2/models/swig_decoders/path_trie.cpp diff --git a/deep_speech_2/deploy/path_trie.h b/deep_speech_2/models/swig_decoders/path_trie.h similarity index 100% rename from deep_speech_2/deploy/path_trie.h rename to deep_speech_2/models/swig_decoders/path_trie.h diff --git a/deep_speech_2/deploy/scorer.cpp b/deep_speech_2/models/swig_decoders/scorer.cpp similarity index 100% rename from deep_speech_2/deploy/scorer.cpp rename to deep_speech_2/models/swig_decoders/scorer.cpp diff --git a/deep_speech_2/deploy/scorer.h b/deep_speech_2/models/swig_decoders/scorer.h similarity index 100% rename from deep_speech_2/deploy/scorer.h rename to deep_speech_2/models/swig_decoders/scorer.h diff --git a/deep_speech_2/deploy/setup.py b/deep_speech_2/models/swig_decoders/setup.py similarity index 100% rename from deep_speech_2/deploy/setup.py rename to deep_speech_2/models/swig_decoders/setup.py diff --git a/deep_speech_2/deploy/swig_decoders_wrapper.py b/deep_speech_2/models/swig_decoders_wrapper.py similarity index 97% rename from deep_speech_2/deploy/swig_decoders_wrapper.py rename to deep_speech_2/models/swig_decoders_wrapper.py index b44fae0a..202440bf 100644 --- a/deep_speech_2/deploy/swig_decoders_wrapper.py +++ b/deep_speech_2/models/swig_decoders_wrapper.py @@ -23,7 +23,7 @@ class Scorer(swig_decoders.Scorer): swig_decoders.Scorer.__init__(self, alpha, beta, model_path) -def ctc_best_path_decoder(probs_seq, vocabulary): +def ctc_greedy_decoder(probs_seq, vocabulary): """Wrapper for ctc best path decoder in swig. :param probs_seq: 2-D list of probability distributions over each time @@ -35,7 +35,7 @@ def ctc_best_path_decoder(probs_seq, vocabulary): :return: Decoding result string. :rtype: basestring """ - return swig_decoders.ctc_best_path_decoder(probs_seq.tolist(), vocabulary) + return swig_decoders.ctc_greedy_decoder(probs_seq.tolist(), vocabulary) def ctc_beam_search_decoder(probs_seq, diff --git a/deep_speech_2/test.py b/deep_speech_2/test.py index 747e40df..ec5d17f3 100644 --- a/deep_speech_2/test.py +++ b/deep_speech_2/test.py @@ -85,6 +85,7 @@ def evaluate(): pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) + vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] error_rate_func = cer if args.error_rate_type == 'cer' else wer error_sum, num_ins = 0.0, 0 for infer_data in batch_reader(): @@ -95,7 +96,7 @@ def evaluate(): beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, - vocab_list=data_generator.vocab_list, + vocab_list=vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) target_transcripts = [ -- GitLab