arm inference use paddle inference 2.0

d64b4ac5 · zhangjun · ff2aff64 · d64b4ac5 · d64b4ac5 · d64b4ac5
4 changed file
--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -58,7 +58,7 @@ else()
 endif()

 if(WITH_LITE)
-    SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/fluid_inference.tgz")
+    SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
 else()
    SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference.tgz")
 endif()

--- a/paddle_inference/CMakeLists.txt
+++ b/paddle_inference/CMakeLists.txt
@@ -13,13 +13,13 @@
 # limitations under the License

 if (NOT CLIENT_ONLY)
-add_subdirectory(inferencer-fluid-cpu)
-
-if (WITH_GPU)
-add_subdirectory(inferencer-fluid-gpu)
-endif()
-
-if (WITH_LITE)
-add_subdirectory(inferencer-fluid-arm)
-endif()
+    add_subdirectory(inferencer-fluid-cpu)
+    
+    if (WITH_GPU)
+        add_subdirectory(inferencer-fluid-gpu)
+    endif()
+    
+    if (WITH_LITE)
+        add_subdirectory(inferencer-fluid-arm)
+    endif()
 endif()
--- a/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h
+++ b/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h
@@ -28,8 +28,6 @@ namespace baidu {
 namespace paddle_serving {
 namespace fluid_arm {

-using configure::SigmoidConf;
-
 class AutoLock {
 public:
  explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
@@ -57,31 +55,37 @@ class GlobalPaddleCreateMutex {
  pthread_mutex_t _mut;
 };

-class GlobalSigmoidCreateMutex {
- public:
-  pthread_mutex_t& mutex() { return _mut; }
-  static pthread_mutex_t& instance() {
-    static GlobalSigmoidCreateMutex gmutex;
-    return gmutex.mutex();
-  }
-
- private:
-  GlobalSigmoidCreateMutex() { pthread_mutex_init(&_mut, NULL); }
-
-  pthread_mutex_t _mut;
-};
+using paddle_infer::Config;
+using paddle_infer::Predictor;
+using paddle_infer::Tensor;
+using paddle_infer::PrecisionType;
+using paddle_infer::CreatePredictor;

 // data interface
 class FluidFamilyCore {
 public:
  virtual ~FluidFamilyCore() {}
-  virtual bool Run(const void* in_data, void* out_data) {
-    if (!_core->Run(*(std::vector<paddle::PaddleTensor>*)in_data,
-                    (std::vector<paddle::PaddleTensor>*)out_data)) {
+  virtual std::vector<std::string> GetInputNames() {
+    return _core->GetInputNames();
+  }
+
+  virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
+    return _core->GetInputHandle(name);
+  }
+
+  virtual std::vector<std::string> GetOutputNames() {
+    return _core->GetOutputNames();
+  }
+
+  virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
+    return _core->GetOutputHandle(name);
+  }
+
+  virtual bool Run() {
+    if (!_core->Run()) {
      LOG(ERROR) << "Failed call Run with paddle predictor";
      return false;
    }
-
    return true;
  }

@@ -92,8 +96,7 @@ class FluidFamilyCore {
      LOG(ERROR) << "origin paddle Predictor is null.";
      return -1;
    }
-    paddle::PaddlePredictor* p_predictor =
-        (paddle::PaddlePredictor*)origin_core;
+    Predictor* p_predictor = (Predictor*)origin_core;
    _core = p_predictor->Clone();
    if (_core.get() == NULL) {
      LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
@@ -105,7 +108,7 @@ class FluidFamilyCore {
  virtual void* get() { return _core.get(); }

 protected:
-  std::unique_ptr<paddle::PaddlePredictor> _core;
+  std::shared_ptr<Predictor> _core;
 };

 // infer interface
@@ -119,67 +122,37 @@ class FluidArmAnalysisCore : public FluidFamilyCore {
      return -1;
    }

-    paddle::AnalysisConfig analysis_config;
-    analysis_config.SetParamsFile(data_path + "/__params__");
-    analysis_config.SetProgFile(data_path + "/__model__");
-    analysis_config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32, true);
-    analysis_config.SetCpuMathLibraryNumThreads(1);
+    Config config;
+    config.SetParamsFile(data_path + "/__params__");
+    config.SetProgFile(data_path + "/__model__");
+    config.DisableGpu();
+    config.SetCpuMathLibraryNumThreads(1);
+
+    if (params.enable_memory_optimization()) {
+      config.EnableMemoryOptim();
+    }

    if (params.enable_memory_optimization()) {
-      analysis_config.EnableMemoryOptim();
+      config.EnableMemoryOptim();
    }

    if (params.use_lite()) {
-      analysis_config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32, true);
+      config.EnableLiteEngine(PrecisionType::kFloat32, true);
    }

    if (params.use_xpu()) {
-      analysis_config.EnableXpu(100);
+      config.EnableXpu(100);
    }

-    analysis_config.SwitchSpecifyInputNames(true);
+    config.SwitchSpecifyInputNames(true);
    AutoLock lock(GlobalPaddleCreateMutex::instance());
-    _core =
-        paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
+    _core = CreatePredictor(config);
    if (NULL == _core.get()) {
      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
      return -1;
    }

-    VLOG(2) << "[FluidArmAnalysisCore] create paddle predictor sucess, path: " << data_path;
-    params.dump();
-    return 0;
-  }
-};
-
-class FluidArmNativeCore : public FluidFamilyCore {
- public:
-  int create(const predictor::InferEngineCreationParams& params) {
-    std::string data_path = params.get_path();
-    if (access(data_path.c_str(), F_OK) == -1) {
-      LOG(ERROR) << "create paddle predictor failed, path not exits: "
-                 << data_path;
-      return -1;
-    }
-
-    paddle::NativeConfig native_config;
-    native_config.param_file = data_path + "/__params__";
-    native_config.prog_file = data_path + "/__model__";
-    native_config.use_gpu = false;
-    native_config.device = 0;
-    native_config.fraction_of_gpu_memory = 0;
-
-    AutoLock lock(GlobalPaddleCreateMutex::instance());
-    _core = paddle::CreatePaddlePredictor<paddle::NativeConfig,
-                                          paddle::PaddleEngineKind::kNative>(
-        native_config);
-    if (NULL == _core.get()) {
-      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
-      return -1;
-    }
-
-    VLOG(2) << "[FluidArmNativeCore] create paddle predictor sucess, path: " << data_path;
-    params.dump();
+    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
    return 0;
  }
 };
@@ -194,63 +167,32 @@ class FluidArmAnalysisDirCore : public FluidFamilyCore {
      return -1;
    }

-    paddle::AnalysisConfig analysis_config;
-    analysis_config.SetModel(data_path);
-    analysis_config.DisableGpu();
-    analysis_config.SwitchSpecifyInputNames(true);
-    analysis_config.SetCpuMathLibraryNumThreads(1);
+    Config config;
+    config.SetModel(data_path);
+    config.DisableGpu();
+    config.SwitchSpecifyInputNames(true);
+    config.SetCpuMathLibraryNumThreads(1);

    if (params.enable_memory_optimization()) {
-      analysis_config.EnableMemoryOptim();
+      config.EnableMemoryOptim();
    }

    if (params.enable_ir_optimization()) {
-      analysis_config.SwitchIrOptim(true);
+      config.SwitchIrOptim(true);
    } else {
-      analysis_config.SwitchIrOptim(false);
+      config.SwitchIrOptim(false);
    }

    if (params.use_lite()) {
-      analysis_config.EnableLiteEngine(paddle::AnalysisConfig::Precision::kFloat32, true);
+      config.EnableLiteEngine(PrecisionType::kFloat32, true);
    }

    if (params.use_xpu()) {
-      analysis_config.EnableXpu(100);
-    }
-
-    AutoLock lock(GlobalPaddleCreateMutex::instance());
-    _core =
-        paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
-    if (NULL == _core.get()) {
-      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
-      return -1;
-    }
-
-    VLOG(2) << "[FluidArmAnalysisDirCore] create paddle predictor sucess, path: " << data_path;
-    params.dump();
-    return 0;
-  }
-};
-
-class FluidArmNativeDirCore : public FluidFamilyCore {
- public:
-  int create(const predictor::InferEngineCreationParams& params) {
-    std::string data_path = params.get_path();
-    if (access(data_path.c_str(), F_OK) == -1) {
-      LOG(ERROR) << "create paddle predictor failed, path not exits: "
-                 << data_path;
-      return -1;
+      config.EnableXpu(100);
    }

-    paddle::NativeConfig native_config;
-    native_config.model_dir = data_path;
-    native_config.use_gpu = false;
-    native_config.device = 0;
-    native_config.fraction_of_gpu_memory = 0;
    AutoLock lock(GlobalPaddleCreateMutex::instance());
-    _core = paddle::CreatePaddlePredictor<paddle::NativeConfig,
-                                          paddle::PaddleEngineKind::kNative>(
-        native_config);
+    _core = CreatePredictor(config);
    if (NULL == _core.get()) {
      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
      return -1;
@@ -342,214 +284,6 @@ class Parameter {
  float* _params;
 };

-class SigmoidModel {
- public:
-  ~SigmoidModel() {}
-  int load(const char* sigmoid_w_file,
-           const char* sigmoid_b_file,
-           float exp_max,
-           float exp_min) {
-    AutoLock lock(GlobalSigmoidCreateMutex::instance());
-    if (0 != _sigmoid_w.init(2, 1, sigmoid_w_file) || 0 != _sigmoid_w.load()) {
-      LOG(ERROR) << "load params sigmoid_w failed.";
-      return -1;
-    }
-    VLOG(2) << "load sigmoid_w [" << _sigmoid_w._params[0] << "] ["
-            << _sigmoid_w._params[1] << "].";
-    if (0 != _sigmoid_b.init(2, 1, sigmoid_b_file) || 0 != _sigmoid_b.load()) {
-      LOG(ERROR) << "load params sigmoid_b failed.";
-      return -1;
-    }
-    VLOG(2) << "load sigmoid_b [" << _sigmoid_b._params[0] << "] ["
-            << _sigmoid_b._params[1] << "].";
-    _exp_max_input = exp_max;
-    _exp_min_input = exp_min;
-    return 0;
-  }
-
-  int softmax(float x, double& o) {  // NOLINT
-    float _y0 = x * _sigmoid_w._params[0] + _sigmoid_b._params[0];
-    float _y1 = x * _sigmoid_w._params[1] + _sigmoid_b._params[1];
-    _y0 = (_y0 > _exp_max_input)
-              ? _exp_max_input
-              : ((_y0 < _exp_min_input) ? _exp_min_input : _y0);
-    _y1 = (_y1 > _exp_max_input)
-              ? _exp_max_input
-              : ((_y1 < _exp_min_input) ? _exp_min_input : _y1);
-    o = 1.0f / (1.0f + exp(_y0 - _y1));
-    return 0;
-  }
-
- public:
-  Parameter _sigmoid_w;
-  Parameter _sigmoid_b;
-  float _exp_max_input;
-  float _exp_min_input;
-};
-
-class SigmoidFluidModel {
- public:
-  int softmax(float x, double& o) {  // NOLINT
-    return _sigmoid_core->softmax(x, o);
-  }  // NOLINT
-
-  std::unique_ptr<SigmoidFluidModel> Clone() {
-    std::unique_ptr<SigmoidFluidModel> clone_model;
-    clone_model.reset(new SigmoidFluidModel());
-    clone_model->_sigmoid_core = _sigmoid_core;
-    clone_model->_fluid_core = _fluid_core->Clone();
-    return std::move(clone_model);  // NOLINT
-  }
-
- public:
-  std::unique_ptr<paddle::PaddlePredictor> _fluid_core;
-  std::shared_ptr<SigmoidModel> _sigmoid_core;
-};
-
-class FluidArmWithSigmoidCore : public FluidFamilyCore {
- public:
-  virtual ~FluidArmWithSigmoidCore() {}
-
- public:
-  int create(const predictor::InferEngineCreationParams& params) {
-    std::string model_path = params.get_path();
-    size_t pos = model_path.find_last_of("/\\");
-    std::string conf_path = model_path.substr(0, pos);
-    std::string conf_file = model_path.substr(pos);
-    configure::SigmoidConf conf;
-    if (configure::read_proto_conf(conf_path, conf_file, &conf) != 0) {
-      LOG(ERROR) << "failed load model path: " << model_path;
-      return -1;
-    }
-
-    _core.reset(new SigmoidFluidModel);
-
-    std::string fluid_model_data_path = conf.dnn_model_path();
-    predictor::InferEngineCreationParams new_params(params);
-    new_params.set_path(fluid_model_data_path);
-    int ret = load_fluid_model(new_params);
-    if (ret < 0) {
-      LOG(ERROR) << "fail to load fluid model.";
-      return -1;
-    }
-    const char* sigmoid_w_file = conf.sigmoid_w_file().c_str();
-    const char* sigmoid_b_file = conf.sigmoid_b_file().c_str();
-    float exp_max = conf.exp_max_input();
-    float exp_min = conf.exp_min_input();
-    _core->_sigmoid_core.reset(new SigmoidModel);
-    VLOG(2) << "create sigmoid core[" << _core->_sigmoid_core.get()
-            << "], use count[" << _core->_sigmoid_core.use_count() << "].";
-    ret = _core->_sigmoid_core->load(
-        sigmoid_w_file, sigmoid_b_file, exp_max, exp_min);
-    if (ret < 0) {
-      LOG(ERROR) << "fail to load sigmoid model.";
-      return -1;
-    }
-    return 0;
-  }
-
-  virtual bool Run(const void* in_data, void* out_data) {
-    if (!_core->_fluid_core->Run(
-            *(std::vector<paddle::PaddleTensor>*)in_data,
-            (std::vector<paddle::PaddleTensor>*)out_data)) {
-      LOG(ERROR) << "Failed call Run with paddle predictor";
-      return false;
-    }
-
-    return true;
-  }
-
-  virtual int clone(SigmoidFluidModel* origin_core) {
-    if (origin_core == NULL) {
-      LOG(ERROR) << "origin paddle Predictor is null.";
-      return -1;
-    }
-    _core = origin_core->Clone();
-    if (_core.get() == NULL) {
-      LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
-      return -1;
-    }
-    VLOG(2) << "clone sigmoid core[" << _core->_sigmoid_core.get()
-            << "] use count[" << _core->_sigmoid_core.use_count() << "].";
-    return 0;
-  }
-
-  virtual SigmoidFluidModel* get() { return _core.get(); }
-
-  virtual int load_fluid_model(
-      const predictor::InferEngineCreationParams& params) = 0;
-
-  int softmax(float x, double& o) {  // NOLINT
-    return _core->_sigmoid_core->softmax(x, o);
-  }
-
- protected:
-  std::unique_ptr<SigmoidFluidModel> _core;  // NOLINT
-};
-
-class FluidArmNativeDirWithSigmoidCore : public FluidArmWithSigmoidCore {
- public:
-  int load_fluid_model(const predictor::InferEngineCreationParams& params) {
-    std::string data_path = params.get_path();
-    if (access(data_path.c_str(), F_OK) == -1) {
-      LOG(ERROR) << "create paddle predictor failed, path not exits: "
-                 << data_path;
-      return -1;
-    }
-
-    paddle::NativeConfig native_config;
-    native_config.model_dir = data_path;
-    native_config.use_gpu = false;
-    native_config.device = 0;
-    native_config.fraction_of_gpu_memory = 0;
-    AutoLock lock(GlobalPaddleCreateMutex::instance());
-    _core->_fluid_core =
-        paddle::CreatePaddlePredictor<paddle::NativeConfig,
-                                      paddle::PaddleEngineKind::kNative>(
-            native_config);
-    if (NULL == _core.get()) {
-      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
-      return -1;
-    }
-
-    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
-    return 0;
-  }
-};
-
-class FluidArmAnalysisDirWithSigmoidCore : public FluidArmWithSigmoidCore {
- public:
-  int load_fluid_model(const predictor::InferEngineCreationParams& params) {
-    std::string data_path = params.get_path();
-    if (access(data_path.c_str(), F_OK) == -1) {
-      LOG(ERROR) << "create paddle predictor failed, path not exits: "
-                 << data_path;
-      return -1;
-    }
-
-    paddle::AnalysisConfig analysis_config;
-    analysis_config.SetModel(data_path);
-    analysis_config.DisableGpu();
-    analysis_config.SwitchSpecifyInputNames(true);
-    analysis_config.SetCpuMathLibraryNumThreads(1);
-
-    if (params.enable_memory_optimization()) {
-      analysis_config.EnableMemoryOptim();
-    }
-
-    AutoLock lock(GlobalPaddleCreateMutex::instance());
-    _core->_fluid_core =
-        paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
-    if (NULL == _core.get()) {
-      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
-      return -1;
-    }
-
-    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
-    return 0;
-  }
-};
-
 }  // namespace fluid_arm
 }  // namespace paddle_serving
 }  // namespace baidu
--- a/paddle_inference/inferencer-fluid-arm/src/fluid_arm_engine.cpp
+++ b/paddle_inference/inferencer-fluid-arm/src/fluid_arm_engine.cpp
@@ -19,8 +19,6 @@ namespace baidu {
 namespace paddle_serving {
 namespace fluid_arm {

-DEFINE_int32(gpuid, 0, "device id to use");
-
 REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
    ::baidu::paddle_serving::predictor::FluidInferEngine<FluidArmAnalysisCore>,
    ::baidu::paddle_serving::predictor::InferEngine,
@@ -32,28 +30,6 @@ REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
    ::baidu::paddle_serving::predictor::InferEngine,
    "FLUID_ARM_ANALYSIS_DIR");

-REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
-    ::baidu::paddle_serving::predictor::FluidInferEngine<
-        FluidArmAnalysisDirWithSigmoidCore>,
-    ::baidu::paddle_serving::predictor::InferEngine,
-    "FLUID_ARM_ANALYSIS_DIR_SIGMOID");
-
-REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
-    ::baidu::paddle_serving::predictor::FluidInferEngine<FluidArmNativeCore>,
-    ::baidu::paddle_serving::predictor::InferEngine,
-    "FLUID_ARM_NATIVE");
-
-REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
-    ::baidu::paddle_serving::predictor::FluidInferEngine<FluidArmNativeDirCore>,
-    ::baidu::paddle_serving::predictor::InferEngine,
-    "FLUID_ARM_NATIVE_DIR");
-
-REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
-    ::baidu::paddle_serving::predictor::FluidInferEngine<
-        FluidArmNativeDirWithSigmoidCore>,
-    ::baidu::paddle_serving::predictor::InferEngine,
-    "FLUID_ARM_NATIVE_DIR_SIGMOID");
-
 }  // namespace fluid_arm
 }  // namespace paddle_serving
 }  // namespace baidu