diff --git a/examples/librispeech/generate.sh b/examples/librispeech/generate.sh index a34b7bc1009e1ce32ca676898d72064d9022f2ab..752aafb6a31b16317240472de373743f30d7b1c6 100644 --- a/examples/librispeech/generate.sh +++ b/examples/librispeech/generate.sh @@ -12,9 +12,9 @@ python -u infer.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=0.36 \ ---beta=0.25 \ ---cutoff_prob=0.99 \ +--alpha=2.15 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/examples/librispeech/run_test.sh b/examples/librispeech/run_test.sh index 5a14cb682135bac8ce6097acfd07b5f2f615c1ba..350db8f020ebce110ca415b9182674ce969d88a6 100644 --- a/examples/librispeech/run_test.sh +++ b/examples/librispeech/run_test.sh @@ -3,7 +3,7 @@ pushd ../.. CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ -python -u evaluate.py \ +python -u test.py \ --batch_size=128 \ --trainer_count=8 \ --beam_size=500 \ @@ -12,9 +12,9 @@ python -u evaluate.py \ --num_conv_layers=2 \ --num_rnn_layers=3 \ --rnn_layer_size=2048 \ ---alpha=0.36 \ ---beta=0.25 \ ---cutoff_prob=0.99 \ +--alpha=2.15 \ +--beta=0.35 \ +--cutoff_prob=1.0 \ --use_gru=False \ --use_gpu=True \ --share_rnn_weights=True \ diff --git a/infer.py b/infer.py index 1ce969ae07b649a4b2d2669683b3ae537bb8edc2..44ee93581dbfd811d904a15eba079cd988c59bdd 100644 --- a/infer.py +++ b/infer.py @@ -84,6 +84,8 @@ def infer(): use_gru=args.use_gru, pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) + + vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] result_transcripts = ds2_model.infer_batch( infer_data=infer_data, decoding_method=args.decoding_method, @@ -91,7 +93,7 @@ def infer(): beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, - vocab_list=data_generator.vocab_list, + vocab_list=vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) diff --git a/models/model.py b/models/model.py index 93c4c41bf761a519f7f6e70bd5221cec9773f3f9..b239d5f39fc4b5bde9efbc567ffca71003132889 100644 --- a/models/model.py +++ b/models/model.py @@ -8,8 +8,9 @@ import os import time import gzip import paddle.v2 as paddle -from lm.lm_scorer import LmScorer -from models.decoder import ctc_greedy_decoder, ctc_beam_search_decoder +from models.swig_decoders_wrapper import Scorer +from models.swig_decoders_wrapper import ctc_greedy_decoder +from models.swig_decoders_wrapper import ctc_beam_search_decoder_batch from models.network import deep_speech_v2_network @@ -199,9 +200,12 @@ class DeepSpeech2Model(object): elif decoding_method == "ctc_beam_search": # initialize external scorer if self._ext_scorer == None: - self._ext_scorer = LmScorer(beam_alpha, beam_beta, - language_model_path) + self._ext_scorer = Scorer(beam_alpha, beam_beta, + language_model_path) self._loaded_lm_path = language_model_path + self._ext_scorer.set_char_map(vocab_list) + if (not self._ext_scorer.is_character_based()): + self._ext_scorer.fill_dictionary(True) else: self._ext_scorer.reset_params(beam_alpha, beam_beta) assert self._loaded_lm_path == language_model_path diff --git a/deploy/README.md b/models/swig_decoders/README.md similarity index 100% rename from deploy/README.md rename to models/swig_decoders/README.md diff --git a/deploy/__init__.py b/models/swig_decoders/__init__.py similarity index 100% rename from deploy/__init__.py rename to models/swig_decoders/__init__.py diff --git a/deploy/_init_paths.py b/models/swig_decoders/_init_paths.py similarity index 100% rename from deploy/_init_paths.py rename to models/swig_decoders/_init_paths.py diff --git a/deploy/ctc_decoders.cpp b/models/swig_decoders/ctc_decoders.cpp similarity index 98% rename from deploy/ctc_decoders.cpp rename to models/swig_decoders/ctc_decoders.cpp index cedb943ea27f5857cb2af6476c590a5b30232bc4..e60e669659d1a08d49620beb35491eb67731f21e 100644 --- a/deploy/ctc_decoders.cpp +++ b/models/swig_decoders/ctc_decoders.cpp @@ -10,8 +10,8 @@ #include "fst/fstlib.h" #include "path_trie.h" -std::string ctc_best_path_decoder(std::vector> probs_seq, - std::vector vocabulary) { +std::string ctc_greedy_decoder(std::vector> probs_seq, + std::vector vocabulary) { // dimension check int num_time_steps = probs_seq.size(); for (int i = 0; i < num_time_steps; i++) { diff --git a/deploy/ctc_decoders.h b/models/swig_decoders/ctc_decoders.h similarity index 96% rename from deploy/ctc_decoders.h rename to models/swig_decoders/ctc_decoders.h index 78edefb77f09be74647db2fe11c1138de024b8d7..a0028a324778a3e30017dc37720b07c386401cf2 100644 --- a/deploy/ctc_decoders.h +++ b/models/swig_decoders/ctc_decoders.h @@ -16,7 +16,7 @@ * A vector that each element is a pair of score and decoding result, * in desending order. */ -std::string ctc_best_path_decoder(std::vector> probs_seq, +std::string ctc_greedy_decoder(std::vector> probs_seq, std::vector vocabulary); /* CTC Beam Search Decoder diff --git a/deploy/decoder_utils.cpp b/models/swig_decoders/decoder_utils.cpp similarity index 100% rename from deploy/decoder_utils.cpp rename to models/swig_decoders/decoder_utils.cpp diff --git a/deploy/decoder_utils.h b/models/swig_decoders/decoder_utils.h similarity index 100% rename from deploy/decoder_utils.h rename to models/swig_decoders/decoder_utils.h diff --git a/deploy/decoders.i b/models/swig_decoders/decoders.i similarity index 100% rename from deploy/decoders.i rename to models/swig_decoders/decoders.i diff --git a/deploy/path_trie.cpp b/models/swig_decoders/path_trie.cpp similarity index 100% rename from deploy/path_trie.cpp rename to models/swig_decoders/path_trie.cpp diff --git a/deploy/path_trie.h b/models/swig_decoders/path_trie.h similarity index 100% rename from deploy/path_trie.h rename to models/swig_decoders/path_trie.h diff --git a/deploy/scorer.cpp b/models/swig_decoders/scorer.cpp similarity index 100% rename from deploy/scorer.cpp rename to models/swig_decoders/scorer.cpp diff --git a/deploy/scorer.h b/models/swig_decoders/scorer.h similarity index 100% rename from deploy/scorer.h rename to models/swig_decoders/scorer.h diff --git a/deploy/setup.py b/models/swig_decoders/setup.py similarity index 100% rename from deploy/setup.py rename to models/swig_decoders/setup.py diff --git a/deploy/swig_decoders_wrapper.py b/models/swig_decoders_wrapper.py similarity index 97% rename from deploy/swig_decoders_wrapper.py rename to models/swig_decoders_wrapper.py index b44fae0aee1b39216b2f1f25510404ede7a3e0bc..202440bfba2e23e05278d41fdd8ebeedbd797f32 100644 --- a/deploy/swig_decoders_wrapper.py +++ b/models/swig_decoders_wrapper.py @@ -23,7 +23,7 @@ class Scorer(swig_decoders.Scorer): swig_decoders.Scorer.__init__(self, alpha, beta, model_path) -def ctc_best_path_decoder(probs_seq, vocabulary): +def ctc_greedy_decoder(probs_seq, vocabulary): """Wrapper for ctc best path decoder in swig. :param probs_seq: 2-D list of probability distributions over each time @@ -35,7 +35,7 @@ def ctc_best_path_decoder(probs_seq, vocabulary): :return: Decoding result string. :rtype: basestring """ - return swig_decoders.ctc_best_path_decoder(probs_seq.tolist(), vocabulary) + return swig_decoders.ctc_greedy_decoder(probs_seq.tolist(), vocabulary) def ctc_beam_search_decoder(probs_seq, diff --git a/test.py b/test.py index 747e40df872cd3f9e0844ed7ad82b2f9cfecf196..ec5d17f30d1dadf93ed14a238e27751d741beb5d 100644 --- a/test.py +++ b/test.py @@ -85,6 +85,7 @@ def evaluate(): pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) + vocab_list = [chars.encode("utf-8") for chars in data_generator.vocab_list] error_rate_func = cer if args.error_rate_type == 'cer' else wer error_sum, num_ins = 0.0, 0 for infer_data in batch_reader(): @@ -95,7 +96,7 @@ def evaluate(): beam_beta=args.beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, - vocab_list=data_generator.vocab_list, + vocab_list=vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) target_transcripts = [