diff --git a/speechx/speechx/decoder/ctc_beam_search_opt.h b/speechx/speechx/decoder/ctc_beam_search_opt.h index d21b3abd8cee374ded993634106a997c3f0c8af9..4a4f817d4e36d7ec92210e849b75e9a6d9f3fd9e 100644 --- a/speechx/speechx/decoder/ctc_beam_search_opt.h +++ b/speechx/speechx/decoder/ctc_beam_search_opt.h @@ -37,13 +37,13 @@ struct CTCBeamSearchOptions { // u2 int first_beam_size; int second_beam_size; - CTCBeamSearchOptions() + explicit CTCBeamSearchOptions() : blank(0), dict_file("vocab.txt"), lm_path(""), + beam_size(300), alpha(1.9f), beta(5.0), - beam_size(300), cutoff_prob(0.99f), cutoff_top_n(40), num_proc_bsearch(10), diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc b/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc index 2986ea7eb7228b0f2aa63b69031b9d4247aaf006..7414d06de22695c60afa999250bfe7e8477816d7 100644 --- a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc +++ b/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc @@ -31,7 +31,7 @@ using paddle::platform::TracerEventType; namespace ppspeech { -CTCPrefixBeamSearch::CTCPrefixBeamSearch(const std::string vocab_path, +CTCPrefixBeamSearch::CTCPrefixBeamSearch(const std::string& vocab_path, const CTCBeamSearchOptions& opts) : opts_(opts) { unit_table_ = std::shared_ptr( @@ -55,10 +55,7 @@ void CTCPrefixBeamSearch::Reset() { // empty hyp with Score std::vector empty; PrefixScore prefix_score; - prefix_score.b = 0.0f; // log(1) - prefix_score.nb = -kBaseFloatMax; // log(0) - prefix_score.v_b = 0.0f; // log(1) - prefix_score.v_nb = 0.0f; // log(1) + prefix_score.InitEmpty(); cur_hyps_[empty] = prefix_score; outputs_.emplace_back(empty); @@ -287,19 +284,7 @@ void CTCPrefixBeamSearch::UpdateOutputs( int s = 0; int e = 0; for (int i = 0; i < input.size(); ++i) { - // if (s < start_boundaries.size() && i == start_boundaries[s]){ - // // - // output.emplace_back(context_graph_->start_tag_id()); - // ++s; - // } - output.emplace_back(input[i]); - - // if (e < end_boundaries.size() && i == end_boundaries[e]){ - // // - // output.emplace_back(context_graph_->end_tag_id()); - // ++e; - // } } outputs_.emplace_back(output); diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h b/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h index 475b4d3575f60e0c8fad0f4ee36e015d1a4be5ad..a0c2a74ed5b19869c1a5c852855b6ca7418dfd06 100644 --- a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h +++ b/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h @@ -27,7 +27,7 @@ namespace ppspeech { class ContextGraph; class CTCPrefixBeamSearch : public DecoderBase { public: - explicit CTCPrefixBeamSearch(const std::string vocab_path, + explicit CTCPrefixBeamSearch(const std::string& vocab_path, const CTCBeamSearchOptions& opts); ~CTCPrefixBeamSearch() {} @@ -77,7 +77,7 @@ class CTCPrefixBeamSearch : public DecoderBase { private: CTCBeamSearchOptions opts_; - std::shared_ptr unit_table_; + std::shared_ptr unit_table_{nullptr}; std::unordered_map, PrefixScore, PrefixScoreHash> cur_hyps_; @@ -92,7 +92,7 @@ class CTCPrefixBeamSearch : public DecoderBase { // Outputs contain the hypotheses_ and tags lik: and std::vector> outputs_; - std::shared_ptr context_graph_ = nullptr; + std::shared_ptr context_graph_{nullptr}; DISALLOW_COPY_AND_ASSIGN(CTCPrefixBeamSearch); }; diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder_main.cc b/speechx/speechx/decoder/ctc_prefix_beam_search_decoder_main.cc index d9cca14719a69304dcba4e485280444164603763..69f32686749245af0140e88f2d53742e86bd58a7 100644 --- a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder_main.cc +++ b/speechx/speechx/decoder/ctc_prefix_beam_search_decoder_main.cc @@ -64,12 +64,11 @@ int main(int argc, char* argv[]) { // nnet ppspeech::ModelOptions model_opts; model_opts.model_path = FLAGS_model_path; - std::shared_ptr nnet(new ppspeech::U2Nnet(model_opts)); + std::shared_ptr nnet = std::make_shared(model_opts); // decodeable - std::shared_ptr raw_data(new ppspeech::DataCache()); - std::shared_ptr decodable( - new ppspeech::Decodable(nnet, raw_data)); + std::shared_ptr raw_data = std::make_shared(); + std::shared_ptr decodable = std::make_shared(nnet, raw_data); // decoder ppspeech::CTCBeamSearchOptions opts; diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_score.h b/speechx/speechx/decoder/ctc_prefix_beam_search_score.h index 3547b2b7bf5df5990078a80c98d71fdc697b1644..76b09e9b8c532a543d09bef5d5d9e7962bf50ee1 100644 --- a/speechx/speechx/decoder/ctc_prefix_beam_search_score.h +++ b/speechx/speechx/decoder/ctc_prefix_beam_search_score.h @@ -73,6 +73,13 @@ struct PrefixScore { int prefix_len) { CHECK(false); } + + void InitEmpty() { + b = 0.0f; // log(1) + nb = -kBaseFloatMax; // log(0) + v_b = 0.0f; // log(1) + v_nb = 0.0f; // log(1) + } }; struct PrefixScoreHash { diff --git a/speechx/speechx/decoder/ctc_tlg_decoder.h b/speechx/speechx/decoder/ctc_tlg_decoder.h index f250ac25e26fcfb2abbf4b0331401cbd42e51021..0ff1de2a59eeb18513552934361f5deea0db4d14 100644 --- a/speechx/speechx/decoder/ctc_tlg_decoder.h +++ b/speechx/speechx/decoder/ctc_tlg_decoder.h @@ -31,8 +31,8 @@ namespace ppspeech { struct TLGDecoderOptions { kaldi::LatticeFasterDecoderConfig opts{}; // todo remove later, add into decode resource - std::string word_symbol_table{}; - std::string fst_path{}; + std::string word_symbol_table; + std::string fst_path; static TLGDecoderOptions InitFromFlags() { TLGDecoderOptions decoder_opts;