add cmakelist of decoder, nnet

d2d53cce · SmileGoat · e9043828 · d2d53cce · d2d53cce · d2d53cce
17 changed file
--- a/speechx/CMakeLists.txt
+++ b/speechx/CMakeLists.txt
@@ -39,16 +39,40 @@ FetchContent_Declare(
  GIT_TAG "20210324.1"
 )
 FetchContent_MakeAvailable(absl)
-include_directories(${absl_SOURCE_DIR}/absl)
+include_directories(${absl_SOURCE_DIR})
 # libsndfile
+#include(FetchContent)
+#FetchContent_Declare(
+#  libsndfile
+#  GIT_REPOSITORY "https://github.com/libsndfile/libsndfile.git"
+#  GIT_TAG "1.0.31"
+#)
+#FetchContent_MakeAvailable(libsndfile)
+# todo boost build
+#include(FetchContent)
+#FetchContent_Declare(
+#  Boost
+#  URL      https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.zip
+#  URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a
+#)
+#FetchContent_MakeAvailable(Boost)
+#include_directories(${Boost_SOURCE_DIR})
+set(BOOST_ROOT ${fc_patch}/boost-subbuild/boost-populate-prefix/src/boost_1_75_0)
+include_directories(${fc_patch}/boost-subbuild/boost-populate-prefix/src/boost_1_75_0)
+link_directories(${fc_patch}/boost-subbuild/boost-populate-prefix/src/boost_1_75_0/stage/lib)
 include(FetchContent)
 FetchContent_Declare(
-  libsndfile
+  kenlm
-  GIT_REPOSITORY "https://github.com/libsndfile/libsndfile.git"
+  GIT_REPOSITORY "https://github.com/kpu/kenlm.git"
-  GIT_TAG "1.0.31"
+  GIT_TAG "df2d717e95183f79a90b2fa6e4307083a351ca6a"
 )
-FetchContent_MakeAvailable(libsndfile)
+FetchContent_MakeAvailable(kenlm)
+add_dependencies(kenlm Boost)
+include_directories(${kenlm_SOURCE_DIR})
 # gflags
 FetchContent_Declare(
@@ -94,6 +118,22 @@ add_dependencies(openfst gflags glog)
 link_directories(${openfst_PREFIX_DIR}/lib)
 include_directories(${openfst_PREFIX_DIR}/include)
+set(PADDLE_LIB ${fc_patch}/paddle-lib/paddle_inference)
+include_directories("${PADDLE_LIB}/paddle/include")
+set(PADDLE_LIB_THIRD_PARTY_PATH "${PADDLE_LIB}/third_party/install/")
+include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include")
+#include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/include")
+#include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/include")
+include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/include")
+include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}cryptopp/include")
+link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/lib")
+#link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/lib")
+#link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib")
+link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib")
+link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}cryptopp/lib")
+link_directories("${PADDLE_LIB}/paddle/lib")
 add_subdirectory(speechx)
 #openblas

--- a/speechx/speechx/CMakeLists.txt
+++ b/speechx/speechx/CMakeLists.txt
@@ -12,14 +12,35 @@ ${CMAKE_CURRENT_SOURCE_DIR}/kaldi
 )
 add_subdirectory(kaldi)
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}
+${CMAKE_CURRENT_SOURCE_DIR}/utils
+)
+add_subdirectory(utils)
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}
 ${CMAKE_CURRENT_SOURCE_DIR}/frontend
 )
 add_subdirectory(frontend)
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}
+${CMAKE_CURRENT_SOURCE_DIR}/nnet
+)
+add_subdirectory(nnet)
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}
+${CMAKE_CURRENT_SOURCE_DIR}/decoder
+)
+add_subdirectory(decoder)
 add_executable(mfcc-test codelab/feat_test/feature-mfcc-test.cc)
 target_link_libraries(mfcc-test kaldi-mfcc)
 add_executable(linear_spectrogram_main codelab/feat_test/linear_spectrogram_main.cc)
 target_link_libraries(linear_spectrogram_main frontend kaldi-util kaldi-feat-common gflags glog)
+#add_executable(offline_decoder_main codelab/decoder_test/offline_decoder_main.cc)
+#target_link_libraries(offline_decoder_main nnet decoder gflags glog)
--- a/speechx/speechx/base/common.h
+++ b/speechx/speechx/base/common.h
@@ -17,6 +17,7 @@
 #include <deque>
 #include <iostream>
 #include <istream>
+#include <fstream>
 #include <map>
 #include <memory>
 #include <ostream>
@@ -27,7 +28,9 @@
 #include <vector>
 #include <unordered_map>
 #include <unordered_set>
+#include <mutex>
 #include "base/log.h"
+#include "base/flags.h"
 #include "base/basic_types.h"
 #include "base/macros.h"
--- a/speechx/speechx/base/macros.h
+++ b/speechx/speechx/base/macros.h
@@ -16,8 +16,10 @@
 namespace ppspeech {
+#ifndef DISALLOW_COPY_AND_ASSIGN
 #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
  TypeName(const TypeName&) = delete;               \
  void operator=(const TypeName&) = delete
+#endif
 }  // namespace pp_speech
\ No newline at end of file
--- a/speechx/speechx/decoder/CMakeLists.txt
+++ b/speechx/speechx/decoder/CMakeLists.txt
-aux_source_directory(. DIR_LIB_SRCS)
+project(decoder)
-add_library(decoder STATIC ${DIR_LIB_SRCS})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR/ctc_decoders})
+add_library(decoder
+  ctc_beam_search_decoder.cc
+  ctc_decoders/decoder_utils.cpp
+  ctc_decoders/path_trie.cpp
+  ctc_decoders/scorer.cpp
+)
+target_link_libraries(decoder kenlm)
\ No newline at end of file
--- a/speechx/speechx/decoder/ctc_beam_search_decoder.cc
+++ b/speechx/speechx/decoder/ctc_beam_search_decoder.cc
@@ -2,33 +2,32 @@
 #include "base/basic_types.h"
 #include "decoder/ctc_decoders/decoder_utils.h"
+#include "utils/file_utils.h"
 namespace ppspeech {
 using std::vector;
 using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
-CTCBeamSearch::CTCBeamSearch(std::shared_ptr<CTCBeamSearchOptions> opts) :
+CTCBeamSearch::CTCBeamSearch(const CTCBeamSearchOptions& opts) :
    opts_(opts),
-    vocabulary_(nullptr),
    init_ext_scorer_(nullptr), 
    blank_id(-1),
    space_id(-1),
-    num_frame_decoded(0),
+    num_frame_decoded_(0),
    root(nullptr) {
    LOG(INFO) << "dict path: " << opts_.dict_file;
-    vocabulary_ = std::make_shared<vector<string>>();
+    if (!ReadFileToVector(opts_.dict_file, &vocabulary_)) {
-    if (!basr::ReadDictToVector(opts_.dict_file, *vocabulary_)) {
        LOG(INFO) << "load the dict failed";
    }
-    LOG(INFO) << "read the vocabulary success, dict size: " << vocabulary_->size();
+    LOG(INFO) << "read the vocabulary success, dict size: " << vocabulary_.size();
    LOG(INFO) << "language model path: " << opts_.lm_path;
    init_ext_scorer_ = std::make_shared<Scorer>(opts_.alpha, 
                                                opts_.beta, 
                                                opts_.lm_path, 
-                                                *vocabulary_);
+                                                vocabulary_);
 }
 void CTCBeamSearch::Reset() {
@@ -39,11 +38,11 @@ void CTCBeamSearch::Reset() {
 void CTCBeamSearch::InitDecoder() {
    blank_id = 0;
-    auto it = std::find(vocabulary_->begin(), vocabulary_->end(), " ");
+    auto it = std::find(vocabulary_.begin(), vocabulary_.end(), " ");
-    space_id = it - vocabulary_->begin();
+    space_id = it - vocabulary_.begin();
    // if no space in vocabulary
-    if ((size_t)space_id >= vocabulary_->size()) {
+    if ((size_t)space_id >= vocabulary_.size()) {
        space_id = -2;
    }  
@@ -63,19 +62,24 @@ void CTCBeamSearch::InitDecoder() {
    }
 }
+void CTCBeamSearch::Decode(std::shared_ptr<kaldi::DecodableInterface> decodable) {
+  return;
+}
 int32 CTCBeamSearch::NumFrameDecoded() {
  return num_frame_decoded_;
 }
 // todo rename, refactor
-void CTCBeamSearch::AdvanceDecode(const std::shared_ptr<kaldi::DecodableInterface>& decodable, int max_frames) {
+void CTCBeamSearch::AdvanceDecode(const std::shared_ptr<kaldi::DecodableInterface>& decodable,
+                                  int max_frames) {
    while (max_frames > 0) {
      vector<vector<BaseFloat>> likelihood;
      if (decodable->IsLastFrame(NumFrameDecoded() + 1)) {
        break;
      }
      likelihood.push_back(decodable->FrameLogLikelihood(NumFrameDecoded() + 1));
-      AdvanceDecoding(result);
+      AdvanceDecoding(likelihood);
      max_frames--;
    }
 }
@@ -91,32 +95,21 @@ void CTCBeamSearch::ResetPrefixes() {
 int CTCBeamSearch::DecodeLikelihoods(const vector<vector<float>>&probs, 
                                      vector<string>& nbest_words) {
-  std::thread::id this_id = std::this_thread::get_id();
+  kaldi::Timer timer;
-  Timer timer;
-  vector<vector<double>> double_probs(probs.size(), vector<double>(probs[0].size(), 0));
-  int row = probs.size();
-  int col = probs[0].size();
-  for(int i = 0; i < row; i++) {
-    for (int j = 0; j < col; j++){
-      double_probs[i][j] = static_cast<double>(probs[i][j]);
-    }
-  }
  timer.Reset();
-  AdvanceDecoding(double_probs);
+  AdvanceDecoding(probs);
  LOG(INFO) <<"ctc decoding elapsed time(s) " << static_cast<float>(timer.Elapsed()) / 1000.0f;
  return 0;
 } 
 vector<std::pair<double, string>> CTCBeamSearch::GetNBestPath() {
-  return get_beam_search_result(prefixes, *vocabulary_, opts_.beam_size);
+  return get_beam_search_result(prefixes, vocabulary_, opts_.beam_size);
 }
 string CTCBeamSearch::GetBestPath() {
  std::vector<std::pair<double, std::string>> result;
-  result = get_beam_search_result(prefixes, *vocabulary_, opts_.beam_size);
+  result = get_beam_search_result(prefixes, vocabulary_, opts_.beam_size);
-  return result[0]->second;
+  return result[0].second;
 }
 string CTCBeamSearch::GetFinalBestPath() {
@@ -125,12 +118,22 @@ string CTCBeamSearch::GetFinalBestPath() {
  return GetBestPath();
 }
-void CTCBeamSearch::AdvanceDecoding(const vector<vector<double>>& probs_seq) {
+void CTCBeamSearch::AdvanceDecoding(const vector<vector<BaseFloat>>& probs) {
-  size_t num_time_steps = probs_seq.size();
+  size_t num_time_steps = probs.size();
  size_t beam_size = opts_.beam_size;
  double cutoff_prob = opts_.cutoff_prob;
  size_t cutoff_top_n = opts_.cutoff_top_n;
+  vector<vector<double>> probs_seq(probs.size(), vector<double>(probs[0].size(), 0));
+  int row = probs.size();
+  int col = probs[0].size();
+  for(int i = 0; i < row; i++) {
+    for (int j = 0; j < col; j++){
+      probs_seq[i][j] = static_cast<double>(probs[i][j]);
+    }
+  }
  for (size_t time_step = 0; time_step < num_time_steps; time_step++) {
    const auto& prob = probs_seq[time_step];
@@ -158,6 +161,7 @@ void CTCBeamSearch::AdvanceDecoding(const vector<vector<double>>& probs_seq) {
    size_t log_prob_idx_len = log_prob_idx.size();
    for (size_t index = 0; index < log_prob_idx_len; index++) {            
      SearchOneChar(full_beam, log_prob_idx[index], min_cutoff);
+    } 
    prefixes.clear();
@@ -177,9 +181,9 @@ void CTCBeamSearch::AdvanceDecoding(const vector<vector<double>>& probs_seq) {
  } // for probs_seq
 }
-int CTCBeamSearch::SearchOneChar(const bool& full_beam,
+int32 CTCBeamSearch::SearchOneChar(const bool& full_beam,
-                                 const std::pair<size_t, float>& log_prob_idx,
+                                 const std::pair<size_t, BaseFloat>& log_prob_idx,
-                                 const float& min_cutoff) {
+                                 const BaseFloat& min_cutoff) {
  size_t beam_size = opts_.beam_size;
  const auto& c = log_prob_idx.first;
  const auto& log_prob_c = log_prob_idx.second;

--- a/speechx/speechx/decoder/ctc_beam_search_decoder.h
+++ b/speechx/speechx/decoder/ctc_beam_search_decoder.h
-#include "base/basic_types.h"
+#include "base/common.h"
 #include "nnet/decodable-itf.h"
+#include "util/parse-options.h"
+#include "decoder/ctc_decoders/scorer.h"
+#include "decoder/ctc_decoders/path_trie.h"
 #pragma once
@@ -38,13 +41,10 @@ struct CTCBeamSearchOptions {
 };
 class CTCBeamSearch {
-public: 
+ public: 
+    explicit CTCBeamSearch(const CTCBeamSearchOptions& opts);
-    CTCBeamSearch(std::shared_ptr<CTCBeamSearchOptions> opts);
+    ~CTCBeamSearch() {}
+    void InitDecoder();
-    ~CTCBeamSearch() {
-    }
-    bool InitDecoder();
    void Decode(std::shared_ptr<kaldi::DecodableInterface> decodable);
    std::string GetBestPath(); 
    std::vector<std::pair<double, std::string>> GetNBestPath(); 
@@ -52,27 +52,28 @@ public:
    int NumFrameDecoded();
    int DecodeLikelihoods(const std::vector<std::vector<BaseFloat>>&probs, 
                          std::vector<std::string>& nbest_words);
+    void AdvanceDecode(const std::shared_ptr<kaldi::DecodableInterface>& decodable,
+                       int max_frames);
    void Reset();
+ private:
-private:
  void ResetPrefixes();
  int32 SearchOneChar(const bool& full_beam,
                      const std::pair<size_t, BaseFloat>& log_prob_idx,
                      const BaseFloat& min_cutoff);
  void CalculateApproxScore();
  void LMRescore();
-  void AdvanceDecoding(const std::vector<std::vector<double>>& probs_seq);
+  void AdvanceDecoding(const std::vector<std::vector<BaseFloat>>& probs);
  CTCBeamSearchOptions opts_;
  std::shared_ptr<Scorer> init_ext_scorer_; // todo separate later
  //std::vector<DecodeResult> decoder_results_;
-  std::vector<std::vector<std::string>> vocabulary_; // todo remove later
+  std::vector<std::string> vocabulary_; // todo remove later
  size_t blank_id;        
  int space_id;
  std::shared_ptr<PathTrie> root;
  std::vector<PathTrie*> prefixes;
  int num_frame_decoded_;
+  DISALLOW_COPY_AND_ASSIGN(CTCBeamSearch);
 };
 } // namespace basr
\ No newline at end of file
--- a/speechx/speechx/nnet/CMakeLists.txt
+++ b/speechx/speechx/nnet/CMakeLists.txt
+aux_source_directory(. DIR_LIB_SRCS)
+add_library(nnet STATIC ${DIR_LIB_SRCS})
--- a/speechx/speechx/nnet/decodable-itf.h
+++ b/speechx/speechx/nnet/decodable-itf.h
@@ -114,6 +114,8 @@ class DecodableInterface {
  /// this is for compatibility with OpenFst).
  virtual int32 NumIndices() const = 0;
+  virtual std::vector<BaseFloat> FrameLogLikelihood(int32 frame);
  virtual ~DecodableInterface() {}
 };
 /// @}

--- a/speechx/speechx/nnet/decodable.cc
+++ b/speechx/speechx/nnet/decodable.cc
@@ -2,15 +2,24 @@
 namespace ppspeech {
-Decodable::Acceptlikelihood(const kaldi::Matrix<BaseFloat>& likelihood) {
+using kaldi::BaseFloat;
-  frames_ready_ += likelihood.NumRows();
+using kaldi::Matrix;
+Decodable::Decodable(const std::shared_ptr<NnetInterface>& nnet):
+  frontend_(NULL),
+  nnet_(nnet),
+  finished_(false),
+  frames_ready_(0) {
 }
-Decodable::Init(DecodableConfig config) {
+void Decodable::Acceptlikelihood(const Matrix<BaseFloat>& likelihood) {
+  frames_ready_ += likelihood.NumRows();
 }
-Decodable::IsLastFrame(int32 frame) const {
+//Decodable::Init(DecodableConfig config) {
+//}
+bool Decodable::IsLastFrame(int32 frame) const {
  CHECK_LE(frame, frames_ready_);
  return finished_ && (frame == frames_ready_ - 1);
 }
@@ -19,12 +28,11 @@ int32 Decodable::NumIndices() const {
  return 0;
 }
-void Decodable::LogLikelihood(int32 frame, int32 index) {
+BaseFloat Decodable::LogLikelihood(int32 frame, int32 index) {
-  return ;
+  return 0;
 }
-void Decodable::FeedFeatures(const kaldi::Matrix<kaldi::BaseFloat>& features) {
+void Decodable::FeedFeatures(const Matrix<kaldi::BaseFloat>& features) {
-  // skip frame ???
  nnet_->FeedForward(features, &nnet_cache_); 
  frames_ready_ += nnet_cache_.NumRows(); 
  return ;

--- a/speechx/speechx/nnet/decodable.h
+++ b/speechx/speechx/nnet/decodable.h
 #include "nnet/decodable-itf.h"
 #include "base/common.h"
+#include "kaldi/matrix/kaldi-matrix.h"
+#include "frontend/feature_extractor_interface.h"
+#include "nnet/nnet_interface.h"
 namespace ppspeech {
-struct DecodableConfig;
+struct DecodableOpts;
 class Decodable : public kaldi::DecodableInterface {
  public: 
-    virtual void Init(DecodableOpts config);
+    explicit Decodable(const std::shared_ptr<NnetInterface>& nnet);
+    //void Init(DecodableOpts config);
    virtual kaldi::BaseFloat LogLikelihood(int32 frame, int32 index);
    virtual bool IsLastFrame(int32 frame) const;
    virtual int32 NumIndices() const;

--- a/speechx/speechx/nnet/nnet_interface.h
+++ b/speechx/speechx/nnet/nnet_interface.h
@@ -3,13 +3,14 @@
 #include "base/basic_types.h"
 #include "kaldi/base/kaldi-types.h"
+#include "kaldi/matrix/kaldi-matrix.h"
 namespace ppspeech {
 class NnetInterface {
  public:
    virtual ~NnetInterface() {}
-    virtual void FeedForward(const kaldi::Matrix<BaseFloat>& features,
+    virtual void FeedForward(const kaldi::Matrix<kaldi::BaseFloat>& features,
                             kaldi::Matrix<kaldi::BaseFloat>* inferences); 
    virtual void Reset();

--- a/speechx/speechx/nnet/paddle_nnet.cc
+++ b/speechx/speechx/nnet/paddle_nnet.cc
@@ -3,6 +3,11 @@
 namespace ppspeech {
+using std::vector;
+using std::string;
+using std::shared_ptr;
+using kaldi::Matrix;
 void PaddleNnet::InitCacheEncouts(const ModelOptions& opts) {
  std::vector<std::string> cache_names;
  cache_names = absl::StrSplit(opts.cache_names, ", ");
@@ -25,14 +30,14 @@ void PaddleNnet::InitCacheEncouts(const ModelOptions& opts) {
  }
 }
-PaddleNet::PaddleNnet(const ModelOptions& opts) {
+PaddleNnet::PaddleNnet(const ModelOptions& opts) {
    paddle_infer::Config config;
    config.SetModel(opts.model_path, opts.params_path);
    if (opts.use_gpu) {
      config.EnableUseGpu(500, 0);
    }
    config.SwitchIrOptim(opts.switch_ir_optim);
-    if (opts.enbale_fc_padding) {
+    if (opts.enable_fc_padding) {
      config.DisableFCPadding();
    }
    if (opts.enable_profile) {
@@ -42,7 +47,7 @@ PaddleNet::PaddleNnet(const ModelOptions& opts) {
    if (pool == nullptr) {
        LOG(ERROR) << "create the predictor pool failed";
    }
-    pool_usages.resize(num_thread);
+    pool_usages.resize(opts.thread_num);
    std::fill(pool_usages.begin(), pool_usages.end(), false);
    LOG(INFO) << "load paddle model success";
@@ -51,7 +56,7 @@ PaddleNet::PaddleNnet(const ModelOptions& opts) {
    LOG(INFO) << "output names: " << opts.output_names;
    vector<string> input_names_vec = absl::StrSplit(opts.input_names, ", ");
    vector<string> output_names_vec = absl::StrSplit(opts.output_names, ", ");
-    paddle_infer::Predictor* predictor = get_predictor();
+    paddle_infer::Predictor* predictor = GetPredictor();
    std::vector<std::string> model_input_names = predictor->GetInputNames();
    assert(input_names_vec.size() == model_input_names.size());
@@ -64,12 +69,12 @@ PaddleNet::PaddleNnet(const ModelOptions& opts) {
    for (size_t i = 0;i < output_names_vec.size(); i++) {
        assert(output_names_vec[i] == model_output_names[i]);
    }
-    release_predictor(predictor);
+    ReleasePredictor(predictor);
    InitCacheEncouts(opts);
 }
-paddle_infer::Predictor* PaddleNnet::get_predictor() {
+paddle_infer::Predictor* PaddleNnet::GetPredictor() {
    LOG(INFO) << "attempt to get a new predictor instance " << std::endl;
    paddle_infer::Predictor* predictor = nullptr;
    std::lock_guard<std::mutex> guard(pool_mutex);
@@ -111,19 +116,18 @@ int PaddleNnet::ReleasePredictor(paddle_infer::Predictor* predictor) {
    return 0;
 }
 shared_ptr<Tensor<BaseFloat>> PaddleNnet::GetCacheEncoder(const string& name) {
  auto iter = cache_names_idx_.find(name);
  if (iter == cache_names_idx_.end()) {
    return nullptr;
  }
  assert(iter->second < cache_encouts_.size());
-  return cache_encouts_[iter->second].get(); 
+  return cache_encouts_[iter->second]; 
 }
-void PaddleNet::FeedForward(const Matrix<BaseFloat>& features, Matrix<BaseFloat>* inferences) const {
+void PaddleNnet::FeedForward(const Matrix<BaseFloat>& features, Matrix<BaseFloat>* inferences) {
+    paddle_infer::Predictor* predictor = GetPredictor(); 
    // 1. 得到所有的 input tensor 的名称
    int row = features.NumRows();
    int col = features.NumCols();
@@ -144,15 +148,13 @@ void PaddleNet::FeedForward(const Matrix<BaseFloat>& features, Matrix<BaseFloat>
    input_len->CopyFromCpu(audio_len.data());
    // 输入流式的缓存数据
    std::unique_ptr<paddle_infer::Tensor> h_box = predictor->GetInputHandle(input_names[2]);
-    share_ptr<Tensor<BaseFloat>> h_cache = GetCacheEncoder(input_names[2]));
+    shared_ptr<Tensor<BaseFloat>> h_cache = GetCacheEncoder(input_names[2]);
    h_box->Reshape(h_cache->get_shape());
    h_box->CopyFromCpu(h_cache->get_data().data());
    std::unique_ptr<paddle_infer::Tensor> c_box = predictor->GetInputHandle(input_names[3]);
-    share_ptr<Tensor<float>> c_cache = GetCacheEncoder(input_names[3]);
+    shared_ptr<Tensor<float>> c_cache = GetCacheEncoder(input_names[3]);
    c_box->Reshape(c_cache->get_shape());
    c_box->CopyFromCpu(c_cache->get_data().data());
-    std::thread::id this_id = std::this_thread::get_id();
-    LOG(INFO) << this_id << " start to compute the probability";
    bool success = predictor->Run();
    if (success == false) {
@@ -172,8 +174,9 @@ void PaddleNet::FeedForward(const Matrix<BaseFloat>& features, Matrix<BaseFloat>
    std::vector<int> output_shape = output_tensor->shape();
    row = output_shape[1];
    col = output_shape[2];
-    inference.Resize(row, col);
+    inferences->Resize(row, col);
-    output_tensor->CopyToCpu(inference.Data());
+    output_tensor->CopyToCpu(inferences->Data());
+    ReleasePredictor(predictor);
 }
 } // namespace ppspeech           
\ No newline at end of file
--- a/speechx/speechx/nnet/paddle_nnet.h
+++ b/speechx/speechx/nnet/paddle_nnet.h
@@ -3,8 +3,12 @@
 #include "nnet/nnet_interface.h"
 #include "base/common.h"
-#include "paddle/paddle_inference_api.h"
+#include "paddle_inference_api.h"
+#include "kaldi/matrix/kaldi-matrix.h"
+#include "kaldi/util/options-itf.h"
+#include <numeric>
 namespace ppspeech {
@@ -20,7 +24,7 @@ struct ModelOptions {
  std::string cache_shape;
  bool enable_fc_padding;
  bool enable_profile;
-  ModelDecoderOptions() : 
+  ModelOptions() : 
      model_path("model/final.zip"),
      params_path("model/avg_1.jit.pdmodel"),
      thread_num(2),
@@ -49,16 +53,6 @@ struct ModelOptions {
  }
 };
-    void Register(kaldi::OptionsItf* opts) {
-        _model_opts.Register(opts);
-        opts->Register("subsampling-rate", &subsampling_rate, 
-                       "subsampling rate for deepspeech model");
-        opts->Register("receptive-field-length", &receptive_field_length, 
-                       "receptive field length for deepspeech model");
-    }
-};
 template<typename T>
 class Tensor {
 public:
@@ -91,15 +85,19 @@ private:
 class PaddleNnet : public NnetInterface {
  public:
    PaddleNnet(const ModelOptions& opts);
-    virtual void FeedForward(const kaldi::Matrix<BaseFloat>& features, 
+    virtual void FeedForward(const kaldi::Matrix<kaldi::BaseFloat>& features, 
-                             kaldi::Matrix<kaldi::BaseFloat>* inferences) const;
+                             kaldi::Matrix<kaldi::BaseFloat>* inferences);
    std::shared_ptr<Tensor<kaldi::BaseFloat>> GetCacheEncoder(const std::string& name);
    void InitCacheEncouts(const ModelOptions& opts);
  private:
+    paddle_infer::Predictor* GetPredictor();
+    int ReleasePredictor(paddle_infer::Predictor* predictor); 
    std::unique_ptr<paddle_infer::services::PredictorPool> pool;
    std::vector<bool> pool_usages;
    std::mutex pool_mutex;
+    std::map<paddle_infer::Predictor*, int> predictor_to_thread_id;
    std::map<std::string, int> cache_names_idx_;
    std::vector<std::shared_ptr<Tensor<kaldi::BaseFloat>>> cache_encouts_;

--- a/speechx/speechx/utils/CMakeLists.txt
+++ b/speechx/speechx/utils/CMakeLists.txt
+add_library(utils
+  file_utils.cc
+)
--- a/speechx/speechx/utils/file_utils.cc
+++ b/speechx/speechx/utils/file_utils.cc
+#include "utils/file_utils.h"
+bool ReadFileToVector(const std::string& filename,
+                      std::vector<std::string>* vocabulary) {
+    std::ifstream file_in(filename);
+    if (!file_in) {
+        std::cerr << "please input a valid file" << std::endl;
+        return false;
+    }
+    std::string line;
+    while (std::getline(file_in, line)) {
+        vocabulary->emplace_back(line);
+    }
+    return true;
+}
--- a/speechx/speechx/utils/file_utils.h
+++ b/speechx/speechx/utils/file_utils.h
+#include "base/common.h"
+namespace ppspeech {
+bool ReadFileToVector(const std::string& filename, 
+                      std::vector<std::string>* data); 
+}