diff --git a/deep_speech_2/deploy.py b/deep_speech_2/deploy.py index 70a9b9efee73762d4e5635a73226a0c5f3d3d84e..091d82892bf2efd12b2b1c85b42fbd96eca3cc12 100644 --- a/deep_speech_2/deploy.py +++ b/deep_speech_2/deploy.py @@ -11,7 +11,7 @@ import paddle.v2 as paddle from data_utils.data import DataGenerator from model import deep_speech2 from deploy.swig_decoders import * -from swig_scorer import LmScorer +from swig_scorer import Scorer from error_rate import wer import utils import time @@ -19,7 +19,7 @@ import time parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--num_samples", - default=100, + default=10, type=int, help="Number of samples for inference. (default: %(default)s)") parser.add_argument( @@ -164,7 +164,7 @@ def infer(): ] # external scorer - ext_scorer = LmScorer(args.alpha, args.beta, args.language_model_path) + ext_scorer = Scorer(args.alpha, args.beta, args.language_model_path) ## decode and print time_begin = time.time() diff --git a/deep_speech_2/deploy/ctc_decoders.cpp b/deep_speech_2/deploy/ctc_decoders.cpp index 4cff6d5e544ce04583935331474776b1714f9ad2..75555c018b33c43f43d70c8299ff6b1f2e94aa8f 100644 --- a/deep_speech_2/deploy/ctc_decoders.cpp +++ b/deep_speech_2/deploy/ctc_decoders.cpp @@ -5,9 +5,11 @@ #include #include #include "ctc_decoders.h" +#include "decoder_utils.h" typedef double log_prob_type; + template bool pair_comp_first_rev(const std::pair a, const std::pair b) { @@ -81,7 +83,7 @@ std::vector > std::vector vocabulary, int blank_id, double cutoff_prob, - LmScorer *ext_scorer, + Scorer *ext_scorer, bool nproc) { // dimension check int num_time_steps = probs_seq.size(); diff --git a/deep_speech_2/deploy/ctc_decoders.h b/deep_speech_2/deploy/ctc_decoders.h index da08a2c58a62ecfec2f6854158f7bf6f40d8427b..50a6014f0abecf483acd049917a9fa6202793405 100644 --- a/deep_speech_2/deploy/ctc_decoders.h +++ b/deep_speech_2/deploy/ctc_decoders.h @@ -28,7 +28,7 @@ std::vector > std::vector vocabulary, int blank_id, double cutoff_prob=1.0, - LmScorer *ext_scorer=NULL, + Scorer *ext_scorer=NULL, bool nproc=false ); diff --git a/deep_speech_2/deploy/ctc_decoders.i b/deep_speech_2/deploy/ctc_decoders.i index c7d05238e5b0849b4db6217084ac1352fa919f82..8c9dd1643d994dfd0e3a713acd96fe27301491aa 100644 --- a/deep_speech_2/deploy/ctc_decoders.i +++ b/deep_speech_2/deploy/ctc_decoders.i @@ -19,4 +19,5 @@ namespace std{ } %import scorer.h +%import decoder_utils.h %include "ctc_decoders.h" diff --git a/deep_speech_2/deploy/decoder_setup.py b/deep_speech_2/deploy/decoder_setup.py index aed45faafc3f7ec84ec3f4949cbc413d36ef2a6d..146538f557f727ae798ab8b9322bac2b646531ca 100644 --- a/deep_speech_2/deploy/decoder_setup.py +++ b/deep_speech_2/deploy/decoder_setup.py @@ -39,8 +39,10 @@ os.system('swig -python -c++ ./ctc_decoders.i') ctc_beam_search_decoder_module = [ Extension( name='_swig_ctc_decoders', - sources=FILES + - ['scorer.cpp', 'ctc_decoders_wrap.cxx', 'ctc_decoders.cpp'], + sources=FILES + [ + 'scorer.cpp', 'ctc_decoders_wrap.cxx', 'ctc_decoders.cpp', + 'decoder_utils.cpp' + ], language='C++', include_dirs=['.', './kenlm'], libraries=LIBS, diff --git a/deep_speech_2/deploy/decoder_utils.cpp b/deep_speech_2/deploy/decoder_utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..82e4cd1467fbc759ab1d4c941dab4fe06a7dff6c --- /dev/null +++ b/deep_speech_2/deploy/decoder_utils.cpp @@ -0,0 +1,5 @@ +#include +#include +#include +#include "decoder_utils.h" + diff --git a/deep_speech_2/deploy/decoder_utils.h b/deep_speech_2/deploy/decoder_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..6d58bf1f30d8731c1add89a24953bde94762d55b --- /dev/null +++ b/deep_speech_2/deploy/decoder_utils.h @@ -0,0 +1,15 @@ +#ifndef DECODER_UTILS_H +#define DECODER_UTILS_H +#pragma once +#include + +/* +template +bool pair_comp_first_rev(const std::pair a, const std::pair b); + +template +bool pair_comp_second_rev(const std::pair a, const std::pair b); + +template T log_sum_exp(T x, T y); +*/ +#endif // DECODER_UTILS_H diff --git a/deep_speech_2/deploy/scorer.cpp b/deep_speech_2/deploy/scorer.cpp index 7a66daad9c3c0783dabf7cd72f6d68b91d621555..e9a74b989a589ed920ec69b7e5bd5cb15f4a6d11 100644 --- a/deep_speech_2/deploy/scorer.cpp +++ b/deep_speech_2/deploy/scorer.cpp @@ -7,7 +7,7 @@ using namespace lm::ngram; -LmScorer::LmScorer(float alpha, float beta, std::string lm_model_path) { +Scorer::Scorer(float alpha, float beta, std::string lm_model_path) { this->_alpha = alpha; this->_beta = beta; @@ -18,7 +18,7 @@ LmScorer::LmScorer(float alpha, float beta, std::string lm_model_path) { this->_language_model = LoadVirtual(lm_model_path.c_str()); } -LmScorer::~LmScorer(){ +Scorer::~Scorer(){ delete (lm::base::Model *)this->_language_model; } @@ -57,7 +57,7 @@ inline void strip(std::string &str, char ch=' ') { } } -int LmScorer::word_count(std::string sentence) { +int Scorer::word_count(std::string sentence) { strip(sentence); int cnt = 1; for (int i=0; i_language_model; State state, out_state; lm::FullScoreReturn ret; @@ -84,12 +84,12 @@ double LmScorer::language_model_score(std::string sentence) { return log_prob; } -void LmScorer::reset_params(float alpha, float beta) { +void Scorer::reset_params(float alpha, float beta) { this->_alpha = alpha; this->_beta = beta; } -double LmScorer::get_score(std::string sentence, bool log) { +double Scorer::get_score(std::string sentence, bool log) { double lm_score = language_model_score(sentence); int word_cnt = word_count(sentence); diff --git a/deep_speech_2/deploy/scorer.h b/deep_speech_2/deploy/scorer.h index 90a1a84a0a06314e6457b48344ea7487af41dd11..a18e119bcf155d2380972c8560d588d0ccf43efc 100644 --- a/deep_speech_2/deploy/scorer.h +++ b/deep_speech_2/deploy/scorer.h @@ -8,10 +8,10 @@ * count and language model scoring. * Example: - * LmScorer ext_scorer(alpha, beta, "path_to_language_model.klm"); + * Scorer ext_scorer(alpha, beta, "path_to_language_model.klm"); * double score = ext_scorer.get_score("sentence_to_score"); */ -class LmScorer{ +class Scorer{ private: float _alpha; float _beta; @@ -23,9 +23,9 @@ private: double language_model_score(std::string); public: - LmScorer(){} - LmScorer(float alpha, float beta, std::string lm_model_path); - ~LmScorer(); + Scorer(){} + Scorer(float alpha, float beta, std::string lm_model_path); + ~Scorer(); // reset params alpha & beta void reset_params(float alpha, float beta); diff --git a/deep_speech_2/deploy/swig_decoders.py b/deep_speech_2/deploy/swig_decoders.py index 8e4a39252b2012d39ac500f76471335e7764f291..0247c0c9ea512089c2293f71745b66432d8d6007 100644 --- a/deep_speech_2/deploy/swig_decoders.py +++ b/deep_speech_2/deploy/swig_decoders.py @@ -4,7 +4,8 @@ from __future__ import division from __future__ import print_function import swig_ctc_decoders -import multiprocessing +#import multiprocessing +from pathos.multiprocessing import Pool def ctc_best_path_decoder(probs_seq, vocabulary): @@ -73,14 +74,37 @@ def ctc_beam_search_decoder_batch(probs_split, if not num_processes > 0: raise ValueError("Number of processes must be positive!") - pool = multiprocessing.Pool(processes=num_processes) + pool = Pool(processes=num_processes) results = [] + args_list = [] for i, probs_list in enumerate(probs_split): args = (probs_list, beam_size, vocabulary, blank_id, cutoff_prob, ext_scoring_func) + args_list.append(args) results.append(pool.apply_async(ctc_beam_search_decoder, args)) pool.close() pool.join() beam_search_results = [result.get() for result in results] + """ + len_args = len(probs_split) + beam_search_results = pool.map(ctc_beam_search_decoder, + probs_split, + [beam_size for i in xrange(len_args)], + [vocabulary for i in xrange(len_args)], + [blank_id for i in xrange(len_args)], + [cutoff_prob for i in xrange(len_args)], + [ext_scoring_func for i in xrange(len_args)] + ) + """ + ''' + processes = [mp.Process(target=ctc_beam_search_decoder, + args=(probs_list, beam_size, vocabulary, blank_id, cutoff_prob, + ext_scoring_func) for probs_list in probs_split] + for p in processes: + p.start() + for p in processes: + p.join() + beam_search_results = [] + ''' return beam_search_results