Merge pull request #1080 from zhangjun/infer

opt c++ inference code, remove redundant code

Merge pull request #1080 from zhangjun/infer
opt c++ inference code, remove redundant code
adccfda1 · TeslaZhao · GitHub · 25d3b826 · cd11fd38 · adccfda1
20 changed file
--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -18,7 +18,7 @@ SET(PADDLE_SOURCES_DIR ${THIRD_PARTY_PATH}/Paddle)
 SET(PADDLE_DOWNLOAD_DIR ${PADDLE_SOURCES_DIR}/src/extern_paddle)
 SET(PADDLE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/Paddle/)
 SET(PADDLE_INCLUDE_DIR "${PADDLE_INSTALL_DIR}/include" CACHE PATH "PaddlePaddle include directory." FORCE)
-SET(PADDLE_LIBRARIES "${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a" CACHE FILEPATH "Paddle library." FORCE)
+SET(PADDLE_LIBRARIES "${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a" CACHE FILEPATH "Paddle library." FORCE)
 message("paddle install dir: " ${PADDLE_INSTALL_DIR})
@@ -31,7 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}")
 # Paddle Version should be one of:
 # latest: latest develop build
 # version number like 1.5.2
-SET(PADDLE_VERSION "2.0.0")
+SET(PADDLE_VERSION "2.0.1")
 if (WITH_GPU)
    if(CUDA_VERSION EQUAL 11.0)
        set(CUDA_SUFFIX "cuda11-cudnn8-avx-mkl")
@@ -55,9 +55,9 @@ if (WITH_GPU)
    SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-${CUDA_SUFFIX}")
 elseif (WITH_LITE)
    if (WITH_XPU)
-        SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm-xpu")
+        SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}-xpu")
    else()
-        SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm")
+        SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}")
    endif()
 else()
    if (WITH_AVX)
@@ -139,8 +139,8 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
 ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
-ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL)
+ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL)
-SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a)
+SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a)
 if (WITH_TRT)
    ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)

--- a/core/configure/proto/server_configure.proto
+++ b/core/configure/proto/server_configure.proto
@@ -20,7 +20,7 @@ message EngineDesc {
  required string type = 2;
  required string reloadable_meta = 3;
  required string reloadable_type = 4;
-  required string model_data_path = 5;
+  required string model_dir = 5;
  required int32 runtime_thread_num = 6;
  required int32 batch_infer_size = 7;
  required int32 enable_batch_align = 8;
@@ -41,12 +41,13 @@ message EngineDesc {
  optional SparseParamServiceType sparse_param_service_type = 11;
  optional string sparse_param_service_table_name = 12;
  optional bool enable_memory_optimization = 13;
-  optional bool static_optimization = 14;
+  optional bool enable_ir_optimization = 14;
-  optional bool force_update_static_cache = 15;
+  optional bool use_trt = 15;
-  optional bool enable_ir_optimization = 16;
+  optional bool use_lite = 16;
-  optional bool use_trt = 17;
+  optional bool use_xpu = 17;
-  optional bool use_lite = 18;
+  optional bool use_gpu = 18;
-  optional bool use_xpu = 19;
+  optional bool combined_model = 19;
+  optional bool encrypted_model = 20;
 };
 // model_toolkit conf

--- a/core/configure/tests/test_configure.cpp
+++ b/core/configure/tests/test_configure.cpp
@@ -69,8 +69,6 @@ int test_write_conf() {
  engine->set_sparse_param_service_type(EngineDesc::LOCAL);
  engine->set_sparse_param_service_table_name("local_kv");
  engine->set_enable_memory_optimization(true);
-  engine->set_static_optimization(false);
-  engine->set_force_update_static_cache(false);
  int ret = baidu::paddle_serving::configure::write_proto_conf(
      &model_toolkit_conf, output_dir, model_toolkit_conf_file);

--- a/core/general-server/CMakeLists.txt
+++ b/core/general-server/CMakeLists.txt
@@ -2,33 +2,25 @@ include_directories(SYSTEM  ${CMAKE_CURRENT_LIST_DIR}/../../)
 include(op/CMakeLists.txt)
 include(proto/CMakeLists.txt)
 add_executable(serving ${serving_srcs})
-add_dependencies(serving pdcodegen fluid_cpu_engine pdserving paddle_fluid cube-api utils)
+add_dependencies(serving pdcodegen paddle_inference_engine pdserving paddle_inference cube-api utils)
 if (WITH_GPU)
-    add_dependencies(serving fluid_gpu_engine)
+    add_dependencies(serving paddle_inference_engine)
 endif()
 if (WITH_LITE)
-    add_dependencies(serving fluid_arm_engine)
+    add_dependencies(serving paddle_inference_engine)
 endif()
 target_include_directories(serving PUBLIC
        ${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
-        )
+)
-    include_directories(${CUDNN_ROOT}/include/)
+include_directories(${CUDNN_ROOT}/include/)
-if(WITH_GPU)
-    target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine
-            -Wl,--no-whole-archive)
-endif()
-if(WITH_LITE)
-    target_link_libraries(serving -Wl,--whole-archive fluid_arm_engine
-            -Wl,--no-whole-archive)
-endif()
-target_link_libraries(serving -Wl,--whole-archive fluid_cpu_engine
+target_link_libraries(serving -Wl,--whole-archive paddle_inference_engine
        -Wl,--no-whole-archive)
-target_link_libraries(serving paddle_fluid ${paddle_depend_libs})
+target_link_libraries(serving paddle_inference ${paddle_depend_libs})
 target_link_libraries(serving brpc)
 target_link_libraries(serving protobuf)
 target_link_libraries(serving pdserving)

--- a/core/predictor/CMakeLists.txt
+++ b/core/predictor/CMakeLists.txt
@@ -12,12 +12,12 @@ set_source_files_properties(
        ${pdserving_srcs}
        PROPERTIES
        COMPILE_FLAGS  "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
-add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure extern_paddle paddle_fluid)
+add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure extern_paddle paddle_inference)
 if (WITH_TRT)
    add_definitions(-DWITH_TRT)
 endif()
 target_link_libraries(pdserving
-        brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_fluid ${paddle_depend_libs})
+        brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_inference ${paddle_depend_libs})
 # install
 install(TARGETS pdserving
        RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin

--- a/core/predictor/common/utils.h
+++ b/core/predictor/common/utils.h
@@ -14,6 +14,7 @@
 #pragma once
 #include <string>
+#include <fstream>
 #include "core/predictor/common/inner_common.h"
 #include "core/predictor/common/macros.h"
@@ -148,6 +149,16 @@ class IsDerivedFrom {
  }
 };
+static void ReadBinaryFile(const std::string& filename, std::string* contents) {
+  std::ifstream fin(filename, std::ios::in | std::ios::binary);
+  fin.seekg(0, std::ios::end);
+  contents->clear();
+  contents->resize(fin.tellg());
+  fin.seekg(0, std::ios::beg);
+  fin.read(&(contents->at(0)), contents->size());
+  fin.close();
+}
 }  // namespace predictor
 }  // namespace paddle_serving
 }  // namespace baidu
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -16,6 +16,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
+#include <pthread.h>
 #include <string>
 #include <utility>
 #include <vector>
@@ -29,83 +30,29 @@ namespace predictor {
 using configure::ModelToolkitConf;
-class InferEngineCreationParams {
+class AutoLock {
 public:
-  InferEngineCreationParams() {
+  explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
-    _path = "";
+    pthread_mutex_lock(&mutex);
-    _enable_memory_optimization = false;
-    _enable_ir_optimization = false;
-    _static_optimization = false;
-    _force_update_static_cache = false;
-    _use_trt = false;
-    _use_lite = false;
-    _use_xpu = false;
  }
+  ~AutoLock() { pthread_mutex_unlock(&_mut); }
-  void set_path(const std::string& path) { _path = path; }
+ private:
+  pthread_mutex_t& _mut;
-  void set_enable_memory_optimization(bool enable_memory_optimization) {
+};
-    _enable_memory_optimization = enable_memory_optimization;
-  }
-  void set_enable_ir_optimization(bool enable_ir_optimization) {
-    _enable_ir_optimization = enable_ir_optimization;
-  }
-  void set_use_trt(bool use_trt) { _use_trt = use_trt; }
-  void set_use_lite(bool use_lite) { _use_lite = use_lite; }
-  void set_use_xpu(bool use_xpu) { _use_xpu = use_xpu; }
-  bool enable_memory_optimization() const {
-    return _enable_memory_optimization;
-  }
-  bool enable_ir_optimization() const { return _enable_ir_optimization; }
-  bool use_trt() const { return _use_trt; }
-  bool use_lite() const { return _use_lite; }
-  bool use_xpu() const { return _use_xpu; }
-  void set_static_optimization(bool static_optimization = false) {
-    _static_optimization = static_optimization;
-  }
-  void set_force_update_static_cache(bool force_update_static_cache = false) {
-    _force_update_static_cache = force_update_static_cache;
-  }
-  bool static_optimization() const { return _static_optimization; }
-  bool force_update_static_cache() const { return _force_update_static_cache; }
-  std::string get_path() const { return _path; }
+class GlobalCreateMutex {
+ public:
+  pthread_mutex_t& mutex() { return _mut; }
-  void dump() const {
+  static pthread_mutex_t& instance() {
-    LOG(INFO) << "InferEngineCreationParams: "
+    static GlobalCreateMutex gmutex;
-              << "model_path = " << _path << ", "
+    return gmutex.mutex();
-              << "enable_memory_optimization = " << _enable_memory_optimization
-              << ", "
-              << "enable_tensorrt = " << _use_trt << ", "
-              << "enable_lite = " << _use_lite << ", "
-              << "enable_xpu = " << _use_xpu << ", "
-              << "enable_ir_optimization = " << _enable_ir_optimization << ", "
-              << "static_optimization = " << _static_optimization << ", "
-              << "force_update_static_cache = " << _force_update_static_cache;
  }
 private:
-  std::string _path;
+  GlobalCreateMutex() { pthread_mutex_init(&_mut, NULL); }
-  bool _enable_memory_optimization;
+  pthread_mutex_t _mut;
-  bool _enable_ir_optimization;
-  bool _static_optimization;
-  bool _force_update_static_cache;
-  bool _use_trt;
-  bool _use_lite;
-  bool _use_xpu;
 };
 class InferEngine {
@@ -152,57 +99,19 @@ class ReloadableInferEngine : public InferEngine {
    uint64_t last_revision;
  };
-  virtual int load(const InferEngineCreationParams& params) = 0;
+  virtual int load(const configure::EngineDesc& conf) = 0;
  int proc_initialize_impl(const configure::EngineDesc& conf, bool version) {
    _reload_tag_file = conf.reloadable_meta();
    _reload_mode_tag = conf.reloadable_type();
-    _model_data_path = conf.model_data_path();
+    _model_data_path = conf.model_dir();
    _infer_thread_num = conf.runtime_thread_num();
    _infer_batch_size = conf.batch_infer_size();
    _infer_batch_align = conf.enable_batch_align();
-    bool enable_memory_optimization = false;
+    _conf = conf;
-    if (conf.has_enable_memory_optimization()) {
-      enable_memory_optimization = conf.enable_memory_optimization();
-    }
-    bool static_optimization = false;
-    if (conf.has_static_optimization()) {
-      static_optimization = conf.static_optimization();
-    }
-    bool force_update_static_cache = false;
-    if (conf.has_force_update_static_cache()) {
-      force_update_static_cache = conf.force_update_static_cache();
-    }
-    if (conf.has_enable_ir_optimization()) {
+    if (!check_need_reload() || load(conf) != 0) {
-      _infer_engine_params.set_enable_ir_optimization(
-          conf.enable_ir_optimization());
-    }
-    _infer_engine_params.set_path(_model_data_path);
-    if (enable_memory_optimization) {
-      _infer_engine_params.set_enable_memory_optimization(true);
-      _infer_engine_params.set_static_optimization(static_optimization);
-      _infer_engine_params.set_force_update_static_cache(
-          force_update_static_cache);
-    }
-    if (conf.has_use_trt()) {
-      _infer_engine_params.set_use_trt(conf.use_trt());
-    }
-    if (conf.has_use_lite()) {
-      _infer_engine_params.set_use_lite(conf.use_lite());
-    }
-    if (conf.has_use_xpu()) {
-      _infer_engine_params.set_use_xpu(conf.use_xpu());
-    }
-    if (!check_need_reload() || load(_infer_engine_params) != 0) {
      LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
      return -1;
    }
@@ -230,7 +139,6 @@ class ReloadableInferEngine : public InferEngine {
    if (_infer_thread_num > 0) {
      return 0;
    }
    return thrd_initialize_impl();
  }
@@ -254,13 +162,13 @@ class ReloadableInferEngine : public InferEngine {
  int reload() {
    if (check_need_reload()) {
      LOG(WARNING) << "begin reload model[" << _model_data_path << "].";
-      return load(_infer_engine_params);
+      return load(_conf);
    }
    return 0;
  }
  uint64_t version() const { return _version; }
  uint32_t thread_num() const { return _infer_thread_num; }
 private:
@@ -322,7 +230,7 @@ class ReloadableInferEngine : public InferEngine {
 protected:
  std::string _model_data_path;
-  InferEngineCreationParams _infer_engine_params;
+  configure::EngineDesc _conf;
 private:
  std::string _reload_tag_file;
@@ -361,25 +269,25 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
    return ReloadableInferEngine::proc_initialize(conf, version);
  }
-  virtual int load(const InferEngineCreationParams& params) {
+  virtual int load(const configure::EngineDesc& conf) {
    if (_reload_vec.empty()) {
      return 0;
    }
    for (uint32_t ti = 0; ti < _reload_vec.size(); ++ti) {
-      if (load_data(_reload_vec[ti], params) != 0) {
+      if (load_data(_reload_vec[ti], conf) != 0) {
        LOG(ERROR) << "Failed reload engine model: " << ti;
        return -1;
      }
    }
-    LOG(WARNING) << "Succ load engine, path: " << params.get_path();
+    LOG(WARNING) << "Succ load engine, path: " << conf.model_dir();
    return 0;
  }
  int load_data(ModelData<EngineCore>* md,
-                const InferEngineCreationParams& params) {
+                const configure::EngineDesc& conf) {
    uint32_t next_idx = (md->current_idx + 1) % 2;
    if (md->cores[next_idx]) {
      delete md->cores[next_idx];
@@ -387,9 +295,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
    md->cores[next_idx] = new (std::nothrow) EngineCore;
-    params.dump();
+    //params.dump();
-    if (!md->cores[next_idx] || md->cores[next_idx]->create(params) != 0) {
+    if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) {
-      LOG(ERROR) << "Failed create model, path: " << params.get_path();
+      LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
      return -1;
    }
    md->current_idx = next_idx;
@@ -400,9 +308,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
    // memory pool to be inited in non-serving-threads
    ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>;
-    if (!md || load_data(md, _infer_engine_params) != 0) {
+    if (!md || load_data(md, _conf) != 0) {
      LOG(ERROR) << "Failed create thread data from "
-                 << _infer_engine_params.get_path();
+                 << _conf.model_dir();
      return -1;
    }
@@ -458,16 +366,16 @@ class CloneDBReloadableInferEngine
    return DBReloadableInferEngine<EngineCore>::proc_initialize(conf, version);
  }
-  virtual int load(const InferEngineCreationParams& params) {
+  virtual int load(const configure::EngineDesc& conf) {
    // 加载进程级模型数据
    if (!_pd ||
-        DBReloadableInferEngine<EngineCore>::load_data(_pd, params) != 0) {
+        DBReloadableInferEngine<EngineCore>::load_data(_pd, conf) != 0) {
-      LOG(ERROR) << "Failed to create common model from [" << params.get_path()
+      LOG(ERROR) << "Failed to create common model from [" << conf.model_dir()
                 << "].";
      return -1;
    }
    LOG(WARNING) << "Succ load common model[" << _pd->cores[_pd->current_idx]
-                 << "], path[" << params.get_path() << "].";
+                 << "], path[" << conf.model_dir() << "].";
    if (DBReloadableInferEngine<EngineCore>::_reload_vec.empty()) {
      return 0;
@@ -483,7 +391,7 @@ class CloneDBReloadableInferEngine
      }
    }
-    LOG(WARNING) << "Succ load clone model, path[" << params.get_path() << "]";
+    LOG(WARNING) << "Succ load clone model, path[" << conf.model_dir() << "]";
    return 0;
  }
@@ -527,18 +435,18 @@ class CloneDBReloadableInferEngine
      _pd;  // 进程级EngineCore，多个线程级EngineCore共用该对象的模型数据
 };
-template <typename FluidFamilyCore>
+template <typename PaddleInferenceCore>
 #ifdef WITH_TRT
-class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> {
+class FluidInferEngine : public DBReloadableInferEngine<PaddleInferenceCore> {
 #else
-class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
+class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore> {
 #endif
 public:  // NOLINT
  FluidInferEngine() {}
  ~FluidInferEngine() {}
  std::vector<std::string> GetInputNames() {
-    FluidFamilyCore* core =
+    PaddleInferenceCore* core =
-        DBReloadableInferEngine<FluidFamilyCore>::get_core();
+        DBReloadableInferEngine<PaddleInferenceCore>::get_core();
    if (!core || !core->get()) {
      LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
    }
@@ -546,8 +454,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
  }
  std::vector<std::string> GetOutputNames() {
-    FluidFamilyCore* core =
+    PaddleInferenceCore* core =
-        DBReloadableInferEngine<FluidFamilyCore>::get_core();
+        DBReloadableInferEngine<PaddleInferenceCore>::get_core();
    if (!core || !core->get()) {
      LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
    }
@@ -556,8 +464,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
  std::unique_ptr<paddle_infer::Tensor> GetInputHandle(
      const std::string& name) {
-    FluidFamilyCore* core =
+    PaddleInferenceCore* core =
-        DBReloadableInferEngine<FluidFamilyCore>::get_core();
+        DBReloadableInferEngine<PaddleInferenceCore>::get_core();
    if (!core || !core->get()) {
      LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
    }
@@ -566,8 +474,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
  std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(
      const std::string& name) {
-    FluidFamilyCore* core =
+    PaddleInferenceCore* core =
-        DBReloadableInferEngine<FluidFamilyCore>::get_core();
+        DBReloadableInferEngine<PaddleInferenceCore>::get_core();
    if (!core || !core->get()) {
      LOG(ERROR) << "Failed get fluid core in GetOutputHandle()";
    }
@@ -575,8 +483,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
  }
  int infer_impl() {
-    FluidFamilyCore* core =
+    PaddleInferenceCore* core =
-        DBReloadableInferEngine<FluidFamilyCore>::get_core();
+        DBReloadableInferEngine<PaddleInferenceCore>::get_core();
    if (!core || !core->get()) {
      LOG(ERROR) << "Failed get fluid core in infer_impl()";
      return -1;

--- a/paddle_inference/CMakeLists.txt
+++ b/paddle_inference/CMakeLists.txt
@@ -13,13 +13,5 @@
 # limitations under the License
 if (NOT CLIENT_ONLY)
-    add_subdirectory(inferencer-fluid-cpu)
+    add_subdirectory(paddle)
-    if (WITH_GPU)
-        add_subdirectory(inferencer-fluid-gpu)
-    endif()
-    if (WITH_LITE)
-        add_subdirectory(inferencer-fluid-arm)
-    endif()
 endif()
--- a/paddle_inference/inferencer-fluid-arm/CMakeLists.txt
+++ b/paddle_inference/inferencer-fluid-arm/CMakeLists.txt
-FILE(GLOB fluid_arm_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
-add_library(fluid_arm_engine ${fluid_arm_engine_srcs})
-target_include_directories(fluid_arm_engine PUBLIC
-        ${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
-add_dependencies(fluid_arm_engine pdserving extern_paddle configure)
-target_link_libraries(fluid_arm_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
-install(TARGETS fluid_arm_engine 
-        ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
-        )
--- a/paddle_inference/inferencer-fluid-cpu/CMakeLists.txt
+++ b/paddle_inference/inferencer-fluid-cpu/CMakeLists.txt
-FILE(GLOB fluid_cpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
-add_library(fluid_cpu_engine ${fluid_cpu_engine_srcs})
-target_include_directories(fluid_cpu_engine PUBLIC
-        ${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
-add_dependencies(fluid_cpu_engine pdserving extern_paddle configure)
-target_link_libraries(fluid_cpu_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
-install(TARGETS fluid_cpu_engine 
-        ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
-        )
--- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
+++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <pthread.h>
-#include <fstream>
-#include <map>
-#include <string>
-#include <vector>
-#include "core/configure/include/configure_parser.h"
-#include "core/configure/inferencer_configure.pb.h"
-#include "core/predictor/framework/infer.h"
-#include "paddle_inference_api.h"  // NOLINT
-namespace baidu {
-namespace paddle_serving {
-namespace fluid_cpu {
-class AutoLock {
- public:
-  explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
-    pthread_mutex_lock(&mutex);
-  }
-  ~AutoLock() { pthread_mutex_unlock(&_mut); }
- private:
-  pthread_mutex_t& _mut;
-};
-class GlobalPaddleCreateMutex {
- public:
-  pthread_mutex_t& mutex() { return _mut; }
-  static pthread_mutex_t& instance() {
-    static GlobalPaddleCreateMutex gmutex;
-    return gmutex.mutex();
-  }
- private:
-  GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
-  pthread_mutex_t _mut;
-};
-using paddle_infer::Config;
-using paddle_infer::Predictor;
-using paddle_infer::Tensor;
-using paddle_infer::CreatePredictor;
-// data interface
-class FluidFamilyCore {
- public:
-  virtual ~FluidFamilyCore() {}
-  virtual std::vector<std::string> GetInputNames() {
-    return _core->GetInputNames();
-  }
-  virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
-    return _core->GetInputHandle(name);
-  }
-  virtual std::vector<std::string> GetOutputNames() {
-    return _core->GetOutputNames();
-  }
-  virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
-    return _core->GetOutputHandle(name);
-  }
-  virtual bool Run() {
-    if (!_core->Run()) {
-      LOG(ERROR) << "Failed call Run with paddle predictor";
-      return false;
-    }
-    return true;
-  }
-  virtual int create(const predictor::InferEngineCreationParams& params) = 0;
-  virtual int clone(void* origin_core) {
-    if (origin_core == NULL) {
-      LOG(ERROR) << "origin paddle Predictor is null.";
-      return -1;
-    }
-    Predictor* p_predictor = (Predictor*)origin_core;
-    _core = p_predictor->Clone();
-    if (_core.get() == NULL) {
-      LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
-      return -1;
-    }
-    return 0;
-  }
-  virtual void* get() { return _core.get(); }
- protected:
-  std::shared_ptr<Predictor> _core;
-};
-// infer interface
-class FluidCpuAnalysisCore : public FluidFamilyCore {
- public:
-  int create(const predictor::InferEngineCreationParams& params) {
-    std::string data_path = params.get_path();
-    if (access(data_path.c_str(), F_OK) == -1) {
-      LOG(ERROR) << "create paddle predictor failed, path not exits: "
-                 << data_path;
-      return -1;
-    }
-    Config config;
-    config.SetParamsFile(data_path + "/__params__");
-    config.SetProgFile(data_path + "/__model__");
-    config.DisableGpu();
-    config.SetCpuMathLibraryNumThreads(1);
-    if (params.enable_memory_optimization()) {
-      config.EnableMemoryOptim();
-    }
-    config.SwitchSpecifyInputNames(true);
-    AutoLock lock(GlobalPaddleCreateMutex::instance());
-    _core = CreatePredictor(config);
-    if (NULL == _core.get()) {
-      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
-      return -1;
-    }
-    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
-    return 0;
-  }
-};
-class FluidCpuAnalysisDirCore : public FluidFamilyCore {
- public:
-  int create(const predictor::InferEngineCreationParams& params) {
-    std::string data_path = params.get_path();
-    if (access(data_path.c_str(), F_OK) == -1) {
-      LOG(ERROR) << "create paddle predictor failed, path not exits: "
-                 << data_path;
-      return -1;
-    }
-    Config config;
-    config.SetModel(data_path);
-    config.DisableGpu();
-    config.SwitchSpecifyInputNames(true);
-    config.SetCpuMathLibraryNumThreads(1);
-    if (params.enable_memory_optimization()) {
-      config.EnableMemoryOptim();
-    }
-    if (params.enable_ir_optimization()) {
-      config.SwitchIrOptim(true);
-    } else {
-      config.SwitchIrOptim(false);
-    }
-    AutoLock lock(GlobalPaddleCreateMutex::instance());
-    _core = CreatePredictor(config);
-    if (NULL == _core.get()) {
-      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
-      return -1;
-    }
-    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
-    return 0;
-  }
-};
-class Parameter {
- public:
-  Parameter() : _row(0), _col(0), _params(NULL) {}
-  ~Parameter() {
-    VLOG(2) << "before destroy Parameter, file_name[" << _file_name << "]";
-    destroy();
-  }
-  int init(int row, int col, const char* file_name) {
-    destroy();
-    _file_name = file_name;
-    _row = row;
-    _col = col;
-    _params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
-    if (_params == NULL) {
-      LOG(ERROR) << "Load " << _file_name << " malloc error.";
-      return -1;
-    }
-    VLOG(2) << "Load parameter file[" << _file_name << "] success.";
-    return 0;
-  }
-  void destroy() {
-    _row = 0;
-    _col = 0;
-    if (_params != NULL) {
-      free(_params);
-      _params = NULL;
-    }
-  }
-  int load() {
-    if (_params == NULL || _row <= 0 || _col <= 0) {
-      LOG(ERROR) << "load parameter error [not inited].";
-      return -1;
-    }
-    FILE* fs = fopen(_file_name.c_str(), "rb");
-    if (fs == NULL) {
-      LOG(ERROR) << "load " << _file_name << " fopen error.";
-      return -1;
-    }
-    static const uint32_t MODEL_FILE_HEAD_LEN = 16;
-    char head[MODEL_FILE_HEAD_LEN] = {0};
-    if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
-      destroy();
-      LOG(ERROR) << "Load " << _file_name << " read head error.";
-      if (fs != NULL) {
-        fclose(fs);
-        fs = NULL;
-      }
-      return -1;
-    }
-    uint32_t matrix_size = _row * _col;
-    if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
-      if (fs != NULL) {
-        fclose(fs);
-        fs = NULL;
-      }
-      VLOG(2) << "load " << _file_name << " read ok.";
-      return 0;
-    } else {
-      LOG(ERROR) << "load " << _file_name << " read error.";
-      destroy();
-      if (fs != NULL) {
-        fclose(fs);
-        fs = NULL;
-      }
-      return -1;
-    }
-    return 0;
-  }
- public:
-  std::string _file_name;
-  int _row;
-  int _col;
-  float* _params;
-};
-class FluidCpuAnalysisEncryptCore : public FluidFamilyCore {
- public:
-  void ReadBinaryFile(const std::string& filename, std::string* contents) {
-    std::ifstream fin(filename, std::ios::in | std::ios::binary);
-    fin.seekg(0, std::ios::end);
-    contents->clear();
-    contents->resize(fin.tellg());
-    fin.seekg(0, std::ios::beg);
-    fin.read(&(contents->at(0)), contents->size());
-    fin.close();
-  }
-  int create(const predictor::InferEngineCreationParams& params) {
-    std::string data_path = params.get_path();
-    if (access(data_path.c_str(), F_OK) == -1) {
-      LOG(ERROR) << "create paddle predictor failed, path note exits: "
-                 << data_path;
-      return -1;
-    }
-    std::string model_buffer, params_buffer, key_buffer;
-    ReadBinaryFile(data_path + "encrypt_model", &model_buffer);
-    ReadBinaryFile(data_path + "encrypt_params", &params_buffer);
-    ReadBinaryFile(data_path + "key", &key_buffer);
-    VLOG(2) << "prepare for encryption model";
-    auto cipher = paddle::MakeCipher("");
-    std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
-    std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer);
-    Config analysis_config;
-    // paddle::AnalysisConfig analysis_config;
-    analysis_config.SetModelBuffer(&real_model_buffer[0],
-                                   real_model_buffer.size(),
-                                   &real_params_buffer[0],
-                                   real_params_buffer.size());
-    analysis_config.DisableGpu();
-    analysis_config.SetCpuMathLibraryNumThreads(1);
-    if (params.enable_memory_optimization()) {
-      analysis_config.EnableMemoryOptim();
-    }
-    analysis_config.SwitchSpecifyInputNames(true);
-    AutoLock lock(GlobalPaddleCreateMutex::instance());
-    VLOG(2) << "decrypt model file sucess";
-    _core = CreatePredictor(analysis_config);
-    if (NULL == _core.get()) {
-      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
-      return -1;
-    }
-    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
-    return 0;
-  }
-};
-}  // namespace fluid_cpu
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp
+++ b/paddle_inference/inferencer-fluid-cpu/src/fluid_cpu_engine.cpp
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h"
-#include "core/predictor/framework/factory.h"
-namespace baidu {
-namespace paddle_serving {
-namespace fluid_cpu {
-REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
-    ::baidu::paddle_serving::predictor::FluidInferEngine<FluidCpuAnalysisCore>,
-    ::baidu::paddle_serving::predictor::InferEngine,
-    "FLUID_CPU_ANALYSIS");
-REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
-    ::baidu::paddle_serving::predictor::FluidInferEngine<
-        FluidCpuAnalysisDirCore>,
-    ::baidu::paddle_serving::predictor::InferEngine,
-    "FLUID_CPU_ANALYSIS_DIR");
-#if 1
-REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
-    ::baidu::paddle_serving::predictor::FluidInferEngine<
-        FluidCpuAnalysisEncryptCore>,
-    ::baidu::paddle_serving::predictor::InferEngine,
-    "FLUID_CPU_ANALYSIS_ENCRYPT");
-#endif
-}  // namespace fluid_cpu
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/paddle_inference/inferencer-fluid-gpu/CMakeLists.txt
+++ b/paddle_inference/inferencer-fluid-gpu/CMakeLists.txt
-FILE(GLOB fluid_gpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
-add_library(fluid_gpu_engine ${fluid_gpu_engine_srcs})
-target_include_directories(fluid_gpu_engine PUBLIC
-        ${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
-add_dependencies(fluid_gpu_engine pdserving extern_paddle configure)
-target_link_libraries(fluid_gpu_engine pdserving paddle_fluid iomp5 mklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
-install(TARGETS fluid_gpu_engine 
-        ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
-        )
--- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
+++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <pthread.h>
-#include <fstream>
-#include <map>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
-#include "core/configure/include/configure_parser.h"
-#include "core/configure/inferencer_configure.pb.h"
-#include "core/predictor/framework/infer.h"
-#include "paddle_inference_api.h"  // NOLINT
-DECLARE_int32(gpuid);
-namespace baidu {
-namespace paddle_serving {
-namespace fluid_gpu {
-using configure::SigmoidConf;
-class AutoLock {
- public:
-  explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
-    pthread_mutex_lock(&mutex);
-  }
-  ~AutoLock() { pthread_mutex_unlock(&_mut); }
- private:
-  pthread_mutex_t& _mut;
-};
-class GlobalPaddleCreateMutex {
- public:
-  pthread_mutex_t& mutex() { return _mut; }
-  static pthread_mutex_t& instance() {
-    static GlobalPaddleCreateMutex gmutex;
-    return gmutex.mutex();
-  }
- private:
-  GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
-  pthread_mutex_t _mut;
-};
-using paddle_infer::Config;
-using paddle_infer::Predictor;
-using paddle_infer::Tensor;
-using paddle_infer::CreatePredictor;
-// data interface
-class FluidFamilyCore {
- public:
-  virtual ~FluidFamilyCore() {}
-  virtual std::vector<std::string> GetInputNames() {
-    return _core->GetInputNames();
-  }
-  virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
-    return _core->GetInputHandle(name);
-  }
-  virtual std::vector<std::string> GetOutputNames() {
-    return _core->GetOutputNames();
-  }
-  virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
-    return _core->GetOutputHandle(name);
-  }
-  virtual bool Run() {
-    if (!_core->Run()) {
-      LOG(ERROR) << "Failed call Run with paddle predictor";
-      return false;
-    }
-    return true;
-  }
-  virtual int create(const predictor::InferEngineCreationParams& params) = 0;
-  virtual int clone(void* origin_core) {
-    if (origin_core == NULL) {
-      LOG(ERROR) << "origin paddle Predictor is null.";
-      return -1;
-    }
-    Predictor* p_predictor = (Predictor*)origin_core;
-    _core = p_predictor->Clone();
-    if (_core.get() == NULL) {
-      LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
-      return -1;
-    }
-    return 0;
-  }
-  virtual void* get() { return _core.get(); }
- protected:
-  std::shared_ptr<Predictor> _core;
-};
-// infer interface
-class FluidGpuAnalysisCore : public FluidFamilyCore {
- public:
-  int create(const predictor::InferEngineCreationParams& params) {
-    std::string data_path = params.get_path();
-    if (access(data_path.c_str(), F_OK) == -1) {
-      LOG(ERROR) << "create paddle predictor failed, path not exits: "
-                 << data_path;
-      return -1;
-    }
-    Config config;
-    config.SetParamsFile(data_path + "/__params__");
-    config.SetProgFile(data_path + "/__model__");
-    config.EnableUseGpu(100, FLAGS_gpuid);
-    config.SetCpuMathLibraryNumThreads(1);
-    if (params.enable_memory_optimization()) {
-      config.EnableMemoryOptim();
-    }
-    config.SwitchSpecifyInputNames(true);
-    AutoLock lock(GlobalPaddleCreateMutex::instance());
-    _core = CreatePredictor(config);
-    if (NULL == _core.get()) {
-      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
-      return -1;
-    }
-    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
-    return 0;
-  }
-};
-class FluidGpuAnalysisDirCore : public FluidFamilyCore {
- public:
-  int create(const predictor::InferEngineCreationParams& params) {
-    std::string data_path = params.get_path();
-    if (access(data_path.c_str(), F_OK) == -1) {
-      LOG(ERROR) << "create paddle predictor failed, path not exits: "
-                 << data_path;
-      return -1;
-    }
-    Config config;
-    config.SetModel(data_path);
-    config.EnableUseGpu(1500, FLAGS_gpuid);
-    config.SwitchSpecifyInputNames(true);
-    config.SetCpuMathLibraryNumThreads(1);
-    if (params.enable_memory_optimization()) {
-      config.EnableMemoryOptim();
-    }
-    int max_batch = 32;
-    int min_subgraph_size = 3;
-    if (params.use_trt()) {
-      config.EnableTensorRtEngine(1 << 20,
-                                  max_batch,
-                                  min_subgraph_size,
-                                  Config::Precision::kFloat32,
-                                  false,
-                                  false);
-      LOG(INFO) << "create TensorRT predictor";
-    } else {
-      if (params.enable_memory_optimization()) {
-        config.EnableMemoryOptim();
-      }
-      if (params.enable_ir_optimization()) {
-        config.SwitchIrOptim(true);
-      } else {
-        config.SwitchIrOptim(false);
-      }
-    }
-    AutoLock lock(GlobalPaddleCreateMutex::instance());
-    _core = CreatePredictor(config);
-    if (NULL == _core.get()) {
-      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
-      return -1;
-    }
-    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
-    return 0;
-  }
-};
-class Parameter {
- public:
-  Parameter() : _row(0), _col(0), _params(NULL) {}
-  ~Parameter() {
-    LOG(INFO) << "before destroy Parameter, file_name[" << _file_name << "]";
-    destroy();
-  }
-  int init(int row, int col, const char* file_name) {
-    destroy();
-    _file_name = file_name;
-    _row = row;
-    _col = col;
-    _params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
-    if (_params == NULL) {
-      LOG(ERROR) << "Load " << _file_name << " malloc error.";
-      return -1;
-    }
-    VLOG(2) << "Load parameter file[" << _file_name << "] success.";
-    return 0;
-  }
-  void destroy() {
-    _row = 0;
-    _col = 0;
-    if (_params != NULL) {
-      free(_params);
-      _params = NULL;
-    }
-  }
-  int load() {
-    if (_params == NULL || _row <= 0 || _col <= 0) {
-      LOG(ERROR) << "load parameter error [not inited].";
-      return -1;
-    }
-    FILE* fs = fopen(_file_name.c_str(), "rb");
-    if (fs == NULL) {
-      LOG(ERROR) << "load " << _file_name << " fopen error.";
-      return -1;
-    }
-    static const uint32_t MODEL_FILE_HEAD_LEN = 16;
-    char head[MODEL_FILE_HEAD_LEN] = {0};
-    if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
-      destroy();
-      LOG(ERROR) << "Load " << _file_name << " read head error.";
-      if (fs != NULL) {
-        fclose(fs);
-        fs = NULL;
-      }
-      return -1;
-    }
-    uint32_t matrix_size = _row * _col;
-    if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
-      if (fs != NULL) {
-        fclose(fs);
-        fs = NULL;
-      }
-      LOG(INFO) << "load " << _file_name << " read ok.";
-      return 0;
-    } else {
-      LOG(ERROR) << "load " << _file_name << " read error.";
-      destroy();
-      if (fs != NULL) {
-        fclose(fs);
-        fs = NULL;
-      }
-      return -1;
-    }
-    return 0;
-  }
- public:
-  std::string _file_name;
-  int _row;
-  int _col;
-  float* _params;
-};
-class FluidGpuAnalysisEncryptCore : public FluidFamilyCore {
- public:
-  void ReadBinaryFile(const std::string& filename, std::string* contents) {
-    std::ifstream fin(filename, std::ios::in | std::ios::binary);
-    fin.seekg(0, std::ios::end);
-    contents->clear();
-    contents->resize(fin.tellg());
-    fin.seekg(0, std::ios::beg);
-    fin.read(&(contents->at(0)), contents->size());
-    fin.close();
-  }
-  int create(const predictor::InferEngineCreationParams& params) {
-    std::string data_path = params.get_path();
-    if (access(data_path.c_str(), F_OK) == -1) {
-      LOG(ERROR) << "create paddle predictor failed, path note exits: "
-                 << data_path;
-      return -1;
-    }
-    std::string model_buffer, params_buffer, key_buffer;
-    ReadBinaryFile(data_path + "encrypt_model", &model_buffer);
-    ReadBinaryFile(data_path + "encrypt_params", &params_buffer);
-    ReadBinaryFile(data_path + "key", &key_buffer);
-    VLOG(2) << "prepare for encryption model";
-    auto cipher = paddle::MakeCipher("");
-    std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
-    std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer);
-    Config analysis_config;
-    analysis_config.SetModelBuffer(&real_model_buffer[0],
-                                   real_model_buffer.size(),
-                                   &real_params_buffer[0],
-                                   real_params_buffer.size());
-    analysis_config.EnableUseGpu(100, FLAGS_gpuid);
-    analysis_config.SetCpuMathLibraryNumThreads(1);
-    if (params.enable_memory_optimization()) {
-      analysis_config.EnableMemoryOptim();
-    }
-    analysis_config.SwitchSpecifyInputNames(true);
-    AutoLock lock(GlobalPaddleCreateMutex::instance());
-    VLOG(2) << "decrypt model file sucess";
-    _core = CreatePredictor(analysis_config);
-    if (NULL == _core.get()) {
-      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
-      return -1;
-    }
-    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
-    return 0;
-  }
-};
-}  // namespace fluid_gpu
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp
+++ b/paddle_inference/inferencer-fluid-gpu/src/fluid_gpu_engine.cpp
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h"
-#include "core/predictor/framework/factory.h"
-DEFINE_int32(gpuid, 0, "GPU device id to use");
-namespace baidu {
-namespace paddle_serving {
-namespace fluid_gpu {
-REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
-    ::baidu::paddle_serving::predictor::FluidInferEngine<FluidGpuAnalysisCore>,
-    ::baidu::paddle_serving::predictor::InferEngine,
-    "FLUID_GPU_ANALYSIS");
-REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
-    ::baidu::paddle_serving::predictor::FluidInferEngine<
-        FluidGpuAnalysisDirCore>,
-    ::baidu::paddle_serving::predictor::InferEngine,
-    "FLUID_GPU_ANALYSIS_DIR");
-REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
-    ::baidu::paddle_serving::predictor::FluidInferEngine<
-        FluidGpuAnalysisEncryptCore>,
-    ::baidu::paddle_serving::predictor::InferEngine,
-    "FLUID_GPU_ANALYSIS_ENCRPT")
-}  // namespace fluid_gpu
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/paddle_inference/paddle/CMakeLists.txt
+++ b/paddle_inference/paddle/CMakeLists.txt
+FILE(GLOB paddle_inference_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
+add_library(paddle_inference_engine ${paddle_inference_engine_srcs})
+target_include_directories(paddle_inference_engine PUBLIC
+        ${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
+add_dependencies(paddle_inference_engine pdserving extern_paddle configure)
+target_link_libraries(paddle_inference_engine pdserving paddle_inference -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
+install(TARGETS paddle_inference_engine 
+        ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
+        )
--- a/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h
+++ b/paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -17,275 +17,174 @@
 #include <pthread.h>
 #include <fstream>
 #include <map>
+#include <memory>
 #include <string>
 #include <vector>
 #include "core/configure/include/configure_parser.h"
 #include "core/configure/inferencer_configure.pb.h"
+#include "core/predictor/common/utils.h"
 #include "core/predictor/framework/infer.h"
 #include "paddle_inference_api.h"  // NOLINT
 namespace baidu {
 namespace paddle_serving {
-namespace fluid_arm {
+namespace inference {
-class AutoLock {
- public:
-  explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
-    pthread_mutex_lock(&mutex);
-  }
-  ~AutoLock() { pthread_mutex_unlock(&_mut); }
- private:
-  pthread_mutex_t& _mut;
-};
-class GlobalPaddleCreateMutex {
- public:
-  pthread_mutex_t& mutex() { return _mut; }
-  static pthread_mutex_t& instance() {
-    static GlobalPaddleCreateMutex gmutex;
-    return gmutex.mutex();
-  }
- private:
-  GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
-  pthread_mutex_t _mut;
-};
 using paddle_infer::Config;
+using paddle_infer::PrecisionType;
 using paddle_infer::Predictor;
 using paddle_infer::Tensor;
-using paddle_infer::PrecisionType;
 using paddle_infer::CreatePredictor;
-// data interface
+DECLARE_int32(gpuid);
-class FluidFamilyCore {
+static const int max_batch = 32;
+static const int min_subgraph_size = 3;
+// Engine Base
+class PaddleEngineBase {
 public:
-  virtual ~FluidFamilyCore() {}
+  virtual ~PaddleEngineBase() {}
  virtual std::vector<std::string> GetInputNames() {
-    return _core->GetInputNames();
+    return _predictor->GetInputNames();
  }
  virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
-    return _core->GetInputHandle(name);
+    return _predictor->GetInputHandle(name);
  }
  virtual std::vector<std::string> GetOutputNames() {
-    return _core->GetOutputNames();
+    return _predictor->GetOutputNames();
  }
  virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
-    return _core->GetOutputHandle(name);
+    return _predictor->GetOutputHandle(name);
  }
  virtual bool Run() {
-    if (!_core->Run()) {
+    if (!_predictor->Run()) {
      LOG(ERROR) << "Failed call Run with paddle predictor";
      return false;
    }
    return true;
  }
-  virtual int create(const predictor::InferEngineCreationParams& params) = 0;
+  virtual int create(const configure::EngineDesc& conf) = 0;
-  virtual int clone(void* origin_core) {
+  virtual int clone(void* predictor) {
-    if (origin_core == NULL) {
+    if (predictor == NULL) {
      LOG(ERROR) << "origin paddle Predictor is null.";
      return -1;
    }
-    Predictor* p_predictor = (Predictor*)origin_core;
+    Predictor* prep = static_cast<Predictor*>(predictor);
-    _core = p_predictor->Clone();
+    _predictor = prep->Clone();
-    if (_core.get() == NULL) {
+    if (_predictor.get() == NULL) {
-      LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
+      LOG(ERROR) << "fail to clone paddle predictor: " << predictor;
      return -1;
    }
    return 0;
  }
-  virtual void* get() { return _core.get(); }
+  virtual void* get() { return _predictor.get(); }
 protected:
-  std::shared_ptr<Predictor> _core;
+  std::shared_ptr<Predictor> _predictor;
 };
-// infer interface
+// Paddle Inference Engine
-class FluidArmAnalysisCore : public FluidFamilyCore {
+class PaddleInferenceEngine : public PaddleEngineBase {
 public:
-  int create(const predictor::InferEngineCreationParams& params) {
+  int create(const configure::EngineDesc& engine_conf) {
-    std::string data_path = params.get_path();
+    std::string model_path = engine_conf.model_dir();
-    if (access(data_path.c_str(), F_OK) == -1) {
+    if (access(model_path.c_str(), F_OK) == -1) {
      LOG(ERROR) << "create paddle predictor failed, path not exits: "
-                 << data_path;
+                 << model_path;
      return -1;
    }
    Config config;
-    config.SetParamsFile(data_path + "/__params__");
+    // todo, auto config(zhangjun)
-    config.SetProgFile(data_path + "/__model__");
+    if (engine_conf.has_combined_model()) {
-    config.DisableGpu();
+      if (!engine_conf.combined_model()) {
-    config.SetCpuMathLibraryNumThreads(1);
+        config.SetModel(model_path);
+      } else {
-    if (params.use_lite()) {
+        config.SetParamsFile(model_path + "/__params__");
-      config.EnableLiteEngine(PrecisionType::kFloat32, true);
+        config.SetProgFile(model_path + "/__model__");
-    }
+      }
-    if (params.use_xpu()) {
-      config.EnableXpu(2 * 1024 * 1024);
-    }
-    if (params.enable_memory_optimization()) {
-      config.EnableMemoryOptim();
-    }
-    if (params.enable_ir_optimization()) {
-      config.SwitchIrOptim(true);
    } else {
-      config.SwitchIrOptim(false);
+      config.SetParamsFile(model_path + "/__params__");
+      config.SetProgFile(model_path + "/__model__");
    }
    config.SwitchSpecifyInputNames(true);
-    AutoLock lock(GlobalPaddleCreateMutex::instance());
+    config.SetCpuMathLibraryNumThreads(1);
-    _core = CreatePredictor(config);
+    if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) {
-    if (NULL == _core.get()) {
+      // 2000MB GPU memory
-      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
+      config.EnableUseGpu(2000, FLAGS_gpuid);
-      return -1;
    }
-    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
+    if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
-    return 0;
+      if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
-  }
+        config.EnableUseGpu(2000, FLAGS_gpuid);
-};
+      }
+      config.EnableTensorRtEngine(1 << 20,
-class FluidArmAnalysisDirCore : public FluidFamilyCore {
+                                  max_batch,
- public:
+                                  min_subgraph_size,
-  int create(const predictor::InferEngineCreationParams& params) {
+                                  Config::Precision::kFloat32,
-    std::string data_path = params.get_path();
+                                  false,
-    if (access(data_path.c_str(), F_OK) == -1) {
+                                  false);
-      LOG(ERROR) << "create paddle predictor failed, path not exits: "
+      LOG(INFO) << "create TensorRT predictor";
-                 << data_path;
-      return -1;
    }
-    Config config;
+    if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
-    config.SetModel(data_path);
-    config.DisableGpu();
-    config.SwitchSpecifyInputNames(true);
-    config.SetCpuMathLibraryNumThreads(1);
-    if (params.use_lite()) {
      config.EnableLiteEngine(PrecisionType::kFloat32, true);
    }
-    if (params.use_xpu()) {
+    if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) {
+      // 2 MB l3 cache
      config.EnableXpu(2 * 1024 * 1024);
    }
+    if (engine_conf.has_enable_ir_optimization() &&
-    if (params.enable_memory_optimization()) {
+        !engine_conf.enable_ir_optimization()) {
-      config.EnableMemoryOptim();
-    }
-    if (params.enable_ir_optimization()) {
-      config.SwitchIrOptim(true);
-    } else {
      config.SwitchIrOptim(false);
+    } else {
+      config.SwitchIrOptim(true);
    }
-    AutoLock lock(GlobalPaddleCreateMutex::instance());
+    if (engine_conf.has_enable_memory_optimization() &&
-    _core = CreatePredictor(config);
+        engine_conf.enable_memory_optimization()) {
-    if (NULL == _core.get()) {
+      config.EnableMemoryOptim();
-      LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
-      return -1;
-    }
-    VLOG(2) << "create paddle predictor sucess, path: " << data_path;
-    return 0;
-  }
-};
-class Parameter {
- public:
-  Parameter() : _row(0), _col(0), _params(NULL) {}
-  ~Parameter() {
-    VLOG(2) << "before destroy Parameter, file_name[" << _file_name << "]";
-    destroy();
-  }
-  int init(int row, int col, const char* file_name) {
-    destroy();
-    _file_name = file_name;
-    _row = row;
-    _col = col;
-    _params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
-    if (_params == NULL) {
-      LOG(ERROR) << "Load " << _file_name << " malloc error.";
-      return -1;
    }
-    VLOG(2) << "Load parameter file[" << _file_name << "] success.";
-    return 0;
-  }
-  void destroy() {
+    if (engine_conf.has_encrypted_model() && engine_conf.encrypted_model()) {
-    _row = 0;
+      // decrypt model
-    _col = 0;
+      std::string model_buffer, params_buffer, key_buffer;
-    if (_params != NULL) {
+      predictor::ReadBinaryFile(model_path + "encrypt_model", &model_buffer);
-      free(_params);
+      predictor::ReadBinaryFile(model_path + "encrypt_params", &params_buffer);
-      _params = NULL;
+      predictor::ReadBinaryFile(model_path + "key", &key_buffer);
-    }
-  }
-  int load() {
+      auto cipher = paddle::MakeCipher("");
-    if (_params == NULL || _row <= 0 || _col <= 0) {
+      std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
-      LOG(ERROR) << "load parameter error [not inited].";
+      std::string real_params_buffer =
-      return -1;
+          cipher->Decrypt(params_buffer, key_buffer);
+      config.SetModelBuffer(&real_model_buffer[0],
+                            real_model_buffer.size(),
+                            &real_params_buffer[0],
+                            real_params_buffer.size());
    }
-    FILE* fs = fopen(_file_name.c_str(), "rb");
+    predictor::AutoLock lock(predictor::GlobalCreateMutex::instance());
-    if (fs == NULL) {
+    _predictor = CreatePredictor(config);
-      LOG(ERROR) << "load " << _file_name << " fopen error.";
+    if (NULL == _predictor.get()) {
-      return -1;
+      LOG(ERROR) << "create paddle predictor failed, path: " << model_path;
-    }
-    static const uint32_t MODEL_FILE_HEAD_LEN = 16;
-    char head[MODEL_FILE_HEAD_LEN] = {0};
-    if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
-      destroy();
-      LOG(ERROR) << "Load " << _file_name << " read head error.";
-      if (fs != NULL) {
-        fclose(fs);
-        fs = NULL;
-      }
      return -1;
    }
-    uint32_t matrix_size = _row * _col;
+    VLOG(2) << "create paddle predictor sucess, path: " << model_path;
-    if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
-      if (fs != NULL) {
-        fclose(fs);
-        fs = NULL;
-      }
-      VLOG(2) << "load " << _file_name << " read ok.";
-      return 0;
-    } else {
-      LOG(ERROR) << "load " << _file_name << " read error.";
-      destroy();
-      if (fs != NULL) {
-        fclose(fs);
-        fs = NULL;
-      }
-      return -1;
-    }
    return 0;
  }
- public:
-  std::string _file_name;
-  int _row;
-  int _col;
-  float* _params;
 };
-}  // namespace fluid_arm
+}  // namespace inference
 }  // namespace paddle_serving
 }  // namespace baidu
--- a/paddle_inference/inferencer-fluid-arm/src/fluid_arm_engine.cpp
+++ b/paddle_inference/inferencer-fluid-arm/src/fluid_arm_engine.cpp
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -12,24 +12,20 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h"
+#include "paddle_inference/paddle/include/paddle_engine.h"
 #include "core/predictor/framework/factory.h"
 namespace baidu {
 namespace paddle_serving {
-namespace fluid_arm {
+namespace inference {
-REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
+DEFINE_int32(gpuid, 0, "GPU device id to use");
-    ::baidu::paddle_serving::predictor::FluidInferEngine<FluidArmAnalysisCore>,
-    ::baidu::paddle_serving::predictor::InferEngine,
-    "FLUID_ARM_ANALYSIS");
 REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
-    ::baidu::paddle_serving::predictor::FluidInferEngine<
+    ::baidu::paddle_serving::predictor::FluidInferEngine<PaddleInferenceEngine>,
-        FluidArmAnalysisDirCore>,
    ::baidu::paddle_serving::predictor::InferEngine,
-    "FLUID_ARM_ANALYSIS_DIR");
+    "PADDLE_INFER");
-}  // namespace fluid_arm
+}  // namespace inference
 }  // namespace paddle_serving
 }  // namespace baidu
--- a/tools/cpp_examples/demo-serving/CMakeLists.txt
+++ b/tools/cpp_examples/demo-serving/CMakeLists.txt
@@ -41,24 +41,24 @@ include_directories(SYSTEM  ${CMAKE_CURRENT_LIST_DIR}/../kvdb/include)
 include(op/CMakeLists.txt)
 include(proto/CMakeLists.txt)
 add_executable(serving ${serving_srcs})
-add_dependencies(serving pdcodegen fluid_cpu_engine pdserving paddle_fluid
+add_dependencies(serving pdcodegen paddle_inference_engine pdserving paddle_inference
        opencv_imgcodecs cube-api)
 if (WITH_GPU)
-    add_dependencies(serving fluid_gpu_engine)
+    add_dependencies(serving paddle_inference_engine)
 endif()
 target_include_directories(serving PUBLIC
        ${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
        )
 if(WITH_GPU)
-    target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine
+    target_link_libraries(serving -Wl,--whole-archive paddle_inference_engine
            -Wl,--no-whole-archive)
 endif()
-target_link_libraries(serving -Wl,--whole-archive fluid_cpu_engine
+target_link_libraries(serving -Wl,--whole-archive paddle_inference_engine
        -Wl,--no-whole-archive)
-target_link_libraries(serving paddle_fluid ${paddle_depend_libs})
+target_link_libraries(serving paddle_inference ${paddle_depend_libs})
 target_link_libraries(serving opencv_imgcodecs
        ${opencv_depend_libs})

--- a/tools/cpp_examples/elastic-ctr/serving/CMakeLists.txt
+++ b/tools/cpp_examples/elastic-ctr/serving/CMakeLists.txt
@@ -18,16 +18,16 @@ include_directories(SYSTEM  ${CMAKE_CURRENT_LIST_DIR}/../kvdb/include)
 include(op/CMakeLists.txt)
 include(proto/CMakeLists.txt)
 add_executable(elastic_serving ${serving_srcs})
-add_dependencies(elastic_serving pdcodegen fluid_cpu_engine pdserving paddle_fluid cube-api)
+add_dependencies(elastic_serving pdcodegen paddle_inference_engine pdserving paddle_inference cube-api)
 target_include_directories(elastic_serving PUBLIC
        ${CMAKE_CURRENT_BINARY_DIR}/../../predictor
        )
-target_link_libraries(elastic_serving -Wl,--whole-archive fluid_cpu_engine
+target_link_libraries(elastic_serving -Wl,--whole-archive paddle_inference_engine
        -Wl,--no-whole-archive)
-target_link_libraries(elastic_serving paddle_fluid ${paddle_depend_libs})
+target_link_libraries(elastic_serving paddle_inference ${paddle_depend_libs})
 target_link_libraries(elastic_serving pdserving)
 target_link_libraries(elastic_serving cube-api)