From 724b0fb2466ea6ac786075d15ae8364c3adab41b Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Thu, 29 Jun 2017 10:05:02 +0800 Subject: [PATCH] add initial files for deployment --- .../deploy/ctc_beam_search_decoder.cpp | 143 ++++++++++++++++++ .../deploy/ctc_beam_search_decoder.h | 19 +++ .../deploy/ctc_beam_search_decoder.i | 22 +++ deep_speech_2/deploy/decoder_setup.py | 58 +++++++ deep_speech_2/deploy/scorer.cpp | 82 ++++++++++ deep_speech_2/deploy/scorer.h | 22 +++ deep_speech_2/deploy/scorer.i | 8 + deep_speech_2/deploy/scorer_setup.py | 54 +++++++ 8 files changed, 408 insertions(+) create mode 100644 deep_speech_2/deploy/ctc_beam_search_decoder.cpp create mode 100644 deep_speech_2/deploy/ctc_beam_search_decoder.h create mode 100644 deep_speech_2/deploy/ctc_beam_search_decoder.i create mode 100644 deep_speech_2/deploy/decoder_setup.py create mode 100644 deep_speech_2/deploy/scorer.cpp create mode 100644 deep_speech_2/deploy/scorer.h create mode 100644 deep_speech_2/deploy/scorer.i create mode 100644 deep_speech_2/deploy/scorer_setup.py diff --git a/deep_speech_2/deploy/ctc_beam_search_decoder.cpp b/deep_speech_2/deploy/ctc_beam_search_decoder.cpp new file mode 100644 index 00000000..297c7c24 --- /dev/null +++ b/deep_speech_2/deploy/ctc_beam_search_decoder.cpp @@ -0,0 +1,143 @@ +#include +#include +#include +#include +#include +#include "ctc_beam_search_decoder.h" + +template +bool pair_comp_first_rev(const std::pair a, const std::pair b) { + return a.first > b.first; +} + +template +bool pair_comp_second_rev(const std::pair a, const std::pair b) { + return a.second > b.second; +} + +/* CTC beam search decoder in C++, the interface is consistent with the original + decoder in Python version. +*/ +std::vector > + ctc_beam_search_decoder(std::vector > probs_seq, + int beam_size, + std::vector vocabulary, + int blank_id, + double cutoff_prob, + Scorer *ext_scorer, + bool nproc + ) +{ + int num_time_steps = probs_seq.size(); + + // assign space ID + std::vector::iterator it = std::find(vocabulary.begin(), vocabulary.end(), " "); + int space_id = it-vocabulary.begin(); + if(space_id >= vocabulary.size()) { + std::cout<<"The character space is not in the vocabulary!"; + exit(1); + } + + // initialize + // two sets containing selected and candidate prefixes respectively + std::map prefix_set_prev, prefix_set_next; + // probability of prefixes ending with blank and non-blank + std::map probs_b_prev, probs_nb_prev; + std::map probs_b_cur, probs_nb_cur; + prefix_set_prev["\t"] = 1.0; + probs_b_prev["\t"] = 1.0; + probs_nb_prev["\t"] = 0.0; + + for (int time_step=0; time_step prob = probs_seq[time_step]; + + std::vector > prob_idx; + for (int i=0; i(i, prob[i])); + } + // pruning of vacobulary + if (cutoff_prob < 1.0) { + std::sort(prob_idx.begin(), prob_idx.end(), pair_comp_second_rev); + float cum_prob = 0.0; + int cutoff_len = 0; + for (int i=0; i= cutoff_prob) break; + } + prob_idx = std::vector >(prob_idx.begin(), prob_idx.begin()+cutoff_len); + } + // extend prefix + for (std::map::iterator it = prefix_set_prev.begin(); + it != prefix_set_prev.end(); it++) { + std::string l = it->first; + if( prefix_set_next.find(l) == prefix_set_next.end()) { + probs_b_cur[l] = probs_nb_cur[l] = 0.0; + } + + for (int index=0; index 1) { + score = ext_scorer->get_score(l.substr(1)); + } + probs_nb_cur[l_plus] += score * prob_c * ( + probs_b_prev[l] + probs_nb_prev[l]); + } else { + probs_nb_cur[l_plus] += prob_c * ( + probs_b_prev[l] + probs_nb_prev[l]); + } + prefix_set_next[l_plus] = probs_nb_cur[l_plus]+probs_b_cur[l_plus]; + } + } + + prefix_set_next[l] = probs_b_cur[l]+probs_nb_cur[l]; + } + + probs_b_prev = probs_b_cur; + probs_nb_prev = probs_nb_cur; + std::vector > + prefix_vec_next(prefix_set_next.begin(), prefix_set_next.end()); + std::sort(prefix_vec_next.begin(), prefix_vec_next.end(), pair_comp_second_rev); + int k = beam_size + (prefix_vec_next.begin(), prefix_vec_next.begin()+k); + } + + // post processing + std::vector > beam_result; + for (std::map::iterator it = prefix_set_prev.begin(); + it != prefix_set_prev.end(); it++) { + if (it->second > 0.0 && it->first.size() > 1) { + double prob = it->second; + std::string sentence = it->first.substr(1); + // scoring the last word + if (ext_scorer != NULL && sentence[sentence.size()-1] != ' ') { + prob = prob * ext_scorer->get_score(sentence); + } + double log_prob = log(it->second); + beam_result.push_back(std::pair(log_prob, it->first)); + } + } + // sort the result and return + std::sort(beam_result.begin(), beam_result.end(), pair_comp_first_rev); + return beam_result; +} diff --git a/deep_speech_2/deploy/ctc_beam_search_decoder.h b/deep_speech_2/deploy/ctc_beam_search_decoder.h new file mode 100644 index 00000000..d23252ac --- /dev/null +++ b/deep_speech_2/deploy/ctc_beam_search_decoder.h @@ -0,0 +1,19 @@ +#ifndef CTC_BEAM_SEARCH_DECODER_H_ +#define CTC_BEAM_SEARCH_DECODER_H_ + +#include +#include +#include +#include "scorer.h" + +std::vector > + ctc_beam_search_decoder(std::vector > probs_seq, + int beam_size, + std::vector vocabulary, + int blank_id=0, + double cutoff_prob=1.0, + Scorer *ext_scorer=NULL, + bool nproc=false + ); + +#endif // CTC_BEAM_SEARCH_DECODER_H_ diff --git a/deep_speech_2/deploy/ctc_beam_search_decoder.i b/deep_speech_2/deploy/ctc_beam_search_decoder.i new file mode 100644 index 00000000..09e893d3 --- /dev/null +++ b/deep_speech_2/deploy/ctc_beam_search_decoder.i @@ -0,0 +1,22 @@ +%module swig_ctc_beam_search_decoder +%{ +#include "ctc_beam_search_decoder.h" +%} + +%include "std_vector.i" +%include "std_pair.i" +%include "std_string.i" + +namespace std{ + %template(DoubleVector) std::vector; + %template(IntVector) std::vector; + %template(StringVector) std::vector; + %template(VectorOfStructVector) std::vector >; + %template(FloatVector) std::vector; + %template(Pair) std::pair; + %template(PairFloatStringVector) std::vector >; + %template(PairDoubleStringVector) std::vector >; +} + +%import scorer.h +%include "ctc_beam_search_decoder.h" diff --git a/deep_speech_2/deploy/decoder_setup.py b/deep_speech_2/deploy/decoder_setup.py new file mode 100644 index 00000000..5201172b --- /dev/null +++ b/deep_speech_2/deploy/decoder_setup.py @@ -0,0 +1,58 @@ +from setuptools import setup, Extension +import glob +import platform +import os + + +def compile_test(header, library): + dummy_path = os.path.join(os.path.dirname(__file__), "dummy") + command = "bash -c \"g++ -include " + header + " -l" + library + " -x c++ - <<<'int main() {}' -o " + dummy_path + " >/dev/null 2>/dev/null && rm " + dummy_path + " 2>/dev/null\"" + return os.system(command) == 0 + + +FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob( + 'util/double-conversion/*.cc') +FILES = [ + fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc')) +] + +LIBS = ['stdc++'] +if platform.system() != 'Darwin': + LIBS.append('rt') + +ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6'] + +if compile_test('zlib.h', 'z'): + ARGS.append('-DHAVE_ZLIB') + LIBS.append('z') + +if compile_test('bzlib.h', 'bz2'): + ARGS.append('-DHAVE_BZLIB') + LIBS.append('bz2') + +if compile_test('lzma.h', 'lzma'): + ARGS.append('-DHAVE_XZLIB') + LIBS.append('lzma') + +os.system('swig -python -c++ ./ctc_beam_search_decoder.i') + +ctc_beam_search_decoder_module = [ + Extension( + name='_swig_ctc_beam_search_decoder', + sources=FILES + [ + 'scorer.cpp', 'ctc_beam_search_decoder_wrap.cxx', + 'ctc_beam_search_decoder.cpp' + ], + language='C++', + include_dirs=['.'], + libraries=LIBS, + extra_compile_args=ARGS) +] + +setup( + name='swig_ctc_beam_search_decoder', + version='0.1', + author='Yibing Liu', + description="""CTC beam search decoder""", + ext_modules=ctc_beam_search_decoder_module, + py_modules=['swig_ctc_beam_search_decoder'], ) diff --git a/deep_speech_2/deploy/scorer.cpp b/deep_speech_2/deploy/scorer.cpp new file mode 100644 index 00000000..9cb68055 --- /dev/null +++ b/deep_speech_2/deploy/scorer.cpp @@ -0,0 +1,82 @@ +#include + +#include "scorer.h" +#include "lm/model.hh" +#include "util/tokenize_piece.hh" +#include "util/string_piece.hh" + +using namespace lm::ngram; + +Scorer::Scorer(float alpha, float beta, std::string lm_model_path) { + this->_alpha = alpha; + this->_beta = beta; + this->_language_model = new Model(lm_model_path.c_str()); +} + +Scorer::~Scorer(){ + delete (Model *)this->_language_model; +} + +inline void strip(std::string &str, char ch=' ') { + if (str.size() == 0) return; + int start = 0; + int end = str.size()-1; + for (int i=0; i=0; i--) { + if (str[i] == ch) { + end --; + } else { + break; + } + } + + if (start == 0 && end == str.size()-1) return; + if (start > end) { + std::string emp_str; + str = emp_str; + } else { + str = str.substr(start, end-start+1); + } +} + +int Scorer::word_count(std::string sentence) { + strip(sentence); + int cnt = 0; + for (int i=0; i 0) cnt ++; + return cnt; +} + +double Scorer::language_model_score(std::string sentence) { + Model *model = (Model *)this->_language_model; + State state, out_state; + lm::FullScoreReturn ret; + state = model->BeginSentenceState(); + + for (util::TokenIter it(sentence, ' '); it; ++it){ + lm::WordIndex vocab = model->GetVocabulary().Index(*it); + ret = model->FullScore(state, vocab, out_state); + state = out_state; + } + double score = ret.prob; + + return pow(10, score); +} + +double Scorer::get_score(std::string sentence) { + double lm_score = language_model_score(sentence); + int word_cnt = word_count(sentence); + + double final_score = pow(lm_score, _alpha) * pow(word_cnt, _beta); + return final_score; +} diff --git a/deep_speech_2/deploy/scorer.h b/deep_speech_2/deploy/scorer.h new file mode 100644 index 00000000..47261bb5 --- /dev/null +++ b/deep_speech_2/deploy/scorer.h @@ -0,0 +1,22 @@ +#ifndef SCORER_H_ +#define SCORER_H_ + +#include + + +class Scorer{ +private: + float _alpha; + float _beta; + void *_language_model; + +public: + Scorer(){} + Scorer(float alpha, float beta, std::string lm_model_path); + ~Scorer(); + int word_count(std::string); + double language_model_score(std::string); + double get_score(std::string); +}; + +#endif diff --git a/deep_speech_2/deploy/scorer.i b/deep_speech_2/deploy/scorer.i new file mode 100644 index 00000000..8380e15a --- /dev/null +++ b/deep_speech_2/deploy/scorer.i @@ -0,0 +1,8 @@ +%module swig_scorer +%{ +#include "scorer.h" +%} + +%include "std_string.i" + +%include "scorer.h" diff --git a/deep_speech_2/deploy/scorer_setup.py b/deep_speech_2/deploy/scorer_setup.py new file mode 100644 index 00000000..c0006e07 --- /dev/null +++ b/deep_speech_2/deploy/scorer_setup.py @@ -0,0 +1,54 @@ +from setuptools import setup, Extension +import glob +import platform +import os + + +def compile_test(header, library): + dummy_path = os.path.join(os.path.dirname(__file__), "dummy") + command = "bash -c \"g++ -include " + header + " -l" + library + " -x c++ - <<<'int main() {}' -o " + dummy_path + " >/dev/null 2>/dev/null && rm " + dummy_path + " 2>/dev/null\"" + return os.system(command) == 0 + + +FILES = glob.glob('util/*.cc') + glob.glob('lm/*.cc') + glob.glob( + 'util/double-conversion/*.cc') +FILES = [ + fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc')) +] + +LIBS = ['stdc++'] +if platform.system() != 'Darwin': + LIBS.append('rt') + +ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6'] + +if compile_test('zlib.h', 'z'): + ARGS.append('-DHAVE_ZLIB') + LIBS.append('z') + +if compile_test('bzlib.h', 'bz2'): + ARGS.append('-DHAVE_BZLIB') + LIBS.append('bz2') + +if compile_test('lzma.h', 'lzma'): + ARGS.append('-DHAVE_XZLIB') + LIBS.append('lzma') + +os.system('swig -python -c++ ./scorer.i') + +ext_modules = [ + Extension( + name='_swig_scorer', + sources=FILES + ['scorer_wrap.cxx', 'scorer.cpp'], + language='C++', + include_dirs=['.'], + libraries=LIBS, + extra_compile_args=ARGS) +] + +setup( + name='swig_scorer', + version='0.1', + ext_modules=ext_modules, + include_package_data=True, + py_modules=['swig_scorer'], ) -- GitLab