add pybind for client

f9d9463f · guru4elephant · bf98fb59 · f9d9463f · f9d9463f · f9d9463f
13 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -73,6 +73,8 @@ include(external/gflags)
 include(external/glog)
 include(external/snappy)
 include(external/gtest)
+include(external/pybind11)
+include(external/python)
 include(generic)
 include(flags)


--- a/cmake/external/pybind11.cmake
+++ b/cmake/external/pybind11.cmake
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+include(ExternalProject)
+
+set(PYBIND_SOURCE_DIR ${THIRD_PARTY_PATH}/pybind)
+
+include_directories(${PYBIND_SOURCE_DIR}/src/extern_pybind/include)
+
+ExternalProject_Add(
+        extern_pybind
+        ${EXTERNAL_PROJECT_LOG_ARGS}
+        GIT_REPOSITORY  "https://github.com/pybind/pybind11.git"
+        GIT_TAG         "v2.2.4"
+        PREFIX          ${PYBIND_SOURCE_DIR}
+        UPDATE_COMMAND  ""
+        CONFIGURE_COMMAND ""
+        BUILD_COMMAND     ""
+        INSTALL_COMMAND   ""
+        TEST_COMMAND      ""
+)
+
+if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
+    set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/pybind_dummy.c)
+    file(WRITE ${dummyfile} "const char * dummy_pybind = \"${dummyfile}\";")
+    add_library(pybind STATIC ${dummyfile})
+else()
+    add_library(pybind INTERFACE)
+endif()
+
+add_dependencies(pybind extern_pybind)
--- a/cmake/external/python.cmake
+++ b/cmake/external/python.cmake
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FIND_PACKAGE(PythonInterp ${PY_VERSION} REQUIRED)
+FIND_PACKAGE(PythonLibs ${PY_VERSION} REQUIRED)
+
+if(WIN32)
+    execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
+"from distutils import sysconfig as s;import sys;import struct;
+print(sys.prefix);
+print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
+"
+            RESULT_VARIABLE _PYTHON_SUCCESS
+            OUTPUT_VARIABLE _PYTHON_VALUES
+            ERROR_VARIABLE _PYTHON_ERROR_VALUE)
+
+    if(NOT _PYTHON_SUCCESS MATCHES 0)
+        set(PYTHONLIBS_FOUND FALSE)
+        return()
+    endif()
+
+    # Convert the process output into a list
+    string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES})
+    string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES})
+    list(GET _PYTHON_VALUES 0 PYTHON_PREFIX)
+    list(GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX)
+
+    # Make sure all directory separators are '/'
+    string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX})
+
+    set(PYTHON_LIBRARY
+            "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
+
+    # when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the
+    # original python installation. They may be found relative to PYTHON_INCLUDE_DIR.
+    if(NOT EXISTS "${PYTHON_LIBRARY}")
+        get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY)
+        set(PYTHON_LIBRARY
+                "${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib")
+    endif()
+
+    # raise an error if the python libs are still not found.
+    if(NOT EXISTS "${PYTHON_LIBRARY}")
+        message(FATAL_ERROR "Python libraries not found")
+    endif()
+    SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}")
+endif(WIN32)
+
+# Fixme: Maybe find a static library. Get SHARED/STATIC by FIND_PACKAGE.
+ADD_LIBRARY(python SHARED IMPORTED GLOBAL)
+SET_PROPERTY(TARGET python PROPERTY IMPORTED_LOCATION ${PYTHON_LIBRARIES})
+
+SET(py_env "")
+INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
--- a/demo-client/CMakeLists.txt
+++ b/demo-client/CMakeLists.txt
@@ -30,6 +30,11 @@ target_link_libraries(load_general_model -Wl,--whole-archive sdk-cpp -Wl,--no-wh
        -lpthread -lcrypto -lm -lrt -lssl -ldl
        -lz)

+add_library(paddle_serving_client SHARED src/general_model.cpp src/pybind_general_model.cpp)
+add_dependencies(paddle_serving_client pybind)
+target_link_libraries(paddle_serving_client brpc configure protobuf leveldb -lcrypto
+        -lssl -lz -lrt)
+
 add_executable(echo ${CMAKE_CURRENT_LIST_DIR}/src/echo.cpp)
 target_link_libraries(echo -Wl,--whole-archive sdk-cpp -Wl,--no-whole-archive
        -lpthread -lcrypto -lm -lrt -lssl -ldl

--- a/demo-client/src/general_model.cpp
+++ b/demo-client/src/general_model.cpp
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fstream>
+#include "general_model.h"
+#include "sdk-cpp/builtin_format.pb.h"
+#include "sdk-cpp/load_general_model_service.pb.h"
+#include "sdk-cpp/include/common.h"
+#include "sdk-cpp/include/predictor_sdk.h"
+
+using baidu::paddle_serving::predictor::general_model::Request;
+using baidu::paddle_serving::predictor::general_model::Response;
+using baidu::paddle_serving::predictor::general_model::Tensor;
+using baidu::paddle_serving::predictor::general_model::FeedInst;
+using baidu::paddle_serving::predictor::general_model::FetchInst;
+
+namespace baidu {
+namespace paddle_serving {
+namespace general_model {
+
+void PredictorClient::connect(const std::vector<std::string> & ep_list) {
+  _eplist = ep_list;
+}
+
+FetchedMap & PredictorClient::predict(
+    const std::vector<std::vector<float> > & float_feed,
+    const std::vector<std::string> & float_feed_name,
+    const std::vector<std::vector<int64_t> > & int_feed,
+    const std::vector<std::string> & int_feed_name,
+    const std::vector<std::string> & fetch_name) {
+  Request req;
+  std::vector<Tensor *> tensor_vec;
+  FeedInst * inst = req.add_insts();
+  for (auto & name : float_feed_name) {
+    tensor_vec.push_back(inst->add_tensor_array());
+  }
+
+  for (auto & name : int_feed_name) {
+    tensor_vec.push_back(inst->add_tensor_array());
+  }
+
+  int vec_idx = 0;
+  for (auto & name : float_feed_name) {
+    int idx = _feed_name_to_idx[name];
+    Tensor * tensor = tensor_vec[idx];
+    for (int j = 0; j < _shape[idx].size(); ++j) {
+      tensor->add_shape(_shape[idx][j]);
+    }
+    tensor->set_elem_type(1);
+    tensor->mutable_data()->Reserve(
+        float_feed[vec_idx].size() * sizeof(float));
+    void * dst_ptr = tensor->mutable_data()->mutable_data();
+    memcpy(dst_ptr, float_feed[vec_idx].data(),
+           float_feed[vec_idx].size() * sizeof(float));
+    vec_idx++;
+  }
+
+  vec_idx = 0;
+  for (auto & name : int_feed_name) {
+    int idx = _feed_name_to_idx[name];
+    Tensor * tensor = tensor_vec[idx];
+    for (int j = 0; j < _shape[idx].size(); ++j) {
+      tensor->add_shape(_shape[idx][j]);
+    }
+    tensor->set_elem_type(0);
+    tensor->mutable_data()->Reserve(
+        int_feed[vec_idx].size() * sizeof(int64_t));
+    void * dst_ptr = tensor->mutable_data()->mutable_data();
+    memcpy(dst_ptr, int_feed[vec_idx].data(),
+           int_feed[idx].size() * sizeof(int64_t));
+  }
+
+  std::map<std::string, std::vector<float> > result;
+  Response res;
+  if (_predictor->inference(&req, &res) != 0) {
+    FetchInst * inst = res.add_insts();
+    for (auto & name : fetch_name) {
+      int idx = _fetch_name_to_idx[name];
+      result[name].resize(inst->tensor_array(idx).data_size() / sizeof(float));
+      memcpy(result[name].data(),
+             inst->mutable_tensor_array(idx)->mutable_data(),
+             inst->tensor_array(idx).data_size() / sizeof(float));
+    }
+  }
+
+  return result;
+}
+
+FetchedMap & PredictorClient::predict_with_profile(
+    const std::vector<std::vector<float> > & float_feed,
+    const std::vector<std::string> & float_feed_name,
+    const std::vector<std::vector<int64_t> > & int_feed,
+    const std::vector<std::string> & int_feed_name,
+    const std::vector<std::string> & fetch_name) {
+  FetchedMap res;
+  return res;
+}
+
+}  // namespace general_model
+}  // namespace paddle_serving
+}  // namespace baidu
--- a/demo-client/src/general_model.h
+++ b/demo-client/src/general_model.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <fstream>
+#include <string>
+#include <vector>
+#include <map>
+
+#include "sdk-cpp/builtin_format.pb.h"
+#include "sdk-cpp/general_model_service.pb.h"
+#include "sdk-cpp/include/common.h"
+#include "sdk-cpp/include/predictor_sdk.h"
+
+using baidu::paddle_serving::sdk_cpp::Predictor;
+using baidu::paddle_serving::sdk_cpp::PredictorApi;
+
+// given some input data, pack into pb, and send request
+namespace baidu {
+namespace paddle_serving {
+namespace general_model {
+
+typedef std::map<std::string, std::vector<float>> FetchedMap;
+
+class PredictorClient {
+ public:
+  PredictorClient() {}
+  ~PredictorClient() {}
+
+  void init(const std::string & client_conf);
+  void connect(const std::vector<std::string> & ep_list);
+  
+  FetchedMap & predict(
+      const std::vector<std::vector<float> > & float_feed,
+      const std::vector<std::string> & float_feed_name,
+      const std::vector<std::vector<int64_t> > & int_feed,
+      const std::vector<std::string> & int_feed_name,
+      const std::vector<std::string> & fetch_name);
+
+  FetchedMap & predict_with_profile(
+      const std::vector<std::vector<float> > & float_feed,
+      const std::vector<std::string> & float_feed_name,
+      const std::vector<std::vector<int64_t> > & int_feed,
+      const std::vector<std::string> & int_feed_name,
+      const std::vector<std::string> & fetch_name);
+
+ private:
+  PredictorApi _api;
+  Predictor * _predictor;
+  std::vector<std::string> _eplist;
+  std::map<std::string, int> _feed_name_to_idx;
+  std::map<std::string, int> _fetch_name_to_idx;
+  std::vector<std::vector<int> > _shape;
+};
+
+}  // namespace general_model
+}  // namespace paddle_serving
+}  // namespace baidu
+
+/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */
--- a/demo-client/src/pybind_general_model.cpp
+++ b/demo-client/src/pybind_general_model.cpp
+#include <pybind11/pybind11.h>
+#include "general_model.h"
+
+#include <pybind11/stl.h>
+
+namespace py = pybind11;
+
+namespace baidu {
+namespace paddle_serving {
+namespace general_model {
+
+PYBIND11_MODULE(paddle_serving_client, m) {
+  m.doc() = R"pddoc(this is a practice
+       )pddoc";
+  py::class_<PredictorClient>(m, "PredictorClient", py::buffer_protocol())
+      .def(py::init())
+      .def("init",
+           [](PredictorClient &self, const std::string & conf) {
+             self.init(conf);
+           })
+      .def("connect",
+           [](PredictorClient &self, const std::vector<std::string> & ep_list) {
+             self.connect(ep_list);
+           })
+      .def("predict",
+           [](PredictorClient &self,
+              const std::vector<std::vector<float> > & float_feed,
+              const std::vector<std::string> & float_feed_name,
+              const std::vector<std::vector<int64_t> > & int_feed,
+              const std::vector<std::string> & int_feed_name,
+              const std::vector<std::string> & fetch_name) {
+             return self.predict(float_feed, float_feed_name,
+                                 int_feed, int_feed_name, fetch_name);
+           });
+}
+
+}  // namespace general_model
+}  // namespace paddle_serving
+}  // namespace baidu
--- a/demo-serving/conf/model_toolkit.prototxt
+++ b/demo-serving/conf/model_toolkit.prototxt
@@ -12,6 +12,17 @@ engines {
  force_update_static_cache: false
 }

+engines {
+  name: "general_model"
+  type: "FLUID_CPU_ANALYSIS_DIR"
+  reloadable_meta: "./data/model/paddle/fluid_time_file"
+  reloadable_type: "timestamp_ne"
+  model_data_path: "./data/model/paddle/fluid/text_classification"
+  runtime_thread_num: 0
+  batch_infer_size: 0
+  enable_batch_align: 0
+}
+
 engines {
  name: "text_classification_bow"
  type: "FLUID_CPU_ANALYSIS_DIR"

--- a/demo-serving/op/general_model_op.cpp
+++ b/demo-serving/op/general_model_op.cpp
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "demo-serving/op/general_model_op.h"
+#include <algorithm>
+#include <sstream>
+#include <iostream>
+#include "predictor/framework/infer.h"
+#include "predictor/framework/memory.h"
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+using baidu::paddle_serving::predictor::MempoolWrapper;
+using baidu::paddle_serving::predictor::general_model::Tensor;
+using baidu::paddle_serving::predictor::general_model::Request;
+using baidu::paddle_serving::predictor::general_model::FeedInst;
+using baidu::paddle_serving::predictor::general_model::Response;
+using baidu::paddle_serving::predictor::general_model::FetchInst;
+
+static std::once_flag g_proto_init_flag;
+
+int GeneralModelOp::inference() {
+  const Request *req = dynamic_cast<const Request *>(get_request_message());
+
+  TensorVector *in = butil::get_object<TensorVector>();
+
+  int batch_size = req->insts_size();
+  int input_var_num = 0;
+
+  std::vector<int> elem_type;
+  std::vector<int> elem_size;
+  std::vector<int> capacity;
+  if (batch_size > 0) {
+    int var_num = req->insts(0).tensor_array_size();
+    elem_type.resize(var_num);
+    elem_size.resize(var_num);
+    capacity.resize(var_num);
+    paddle::PaddleTensor lod_tensor;
+    for (int i = 0; i < var_num; ++i) {
+      elem_type[i] = req->insts(0).tensor_array(i).elem_type();
+      if (elem_type[i] == 0) {  // int64
+        elem_size[i] = sizeof(int64_t);
+        lod_tensor.dtype = paddle::PaddleDType::INT64;
+      } else {
+        elem_size[i] = sizeof(float);
+        lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
+      }
+      if (req->insts(0).tensor_array(i).shape(0) == -1) {
+        lod_tensor.lod.resize(1);
+        lod_tensor.lod[0].push_back(0);
+      } else {
+        lod_tensor.shape.push_back(batch_size);
+        capacity[i] = 1;
+        for (int k = 0;
+             k < req->insts(0).tensor_array(i).shape_size();
+             ++k) {
+          int dim = req->insts(0).tensor_array(i).shape(k);
+          capacity[i] *= dim;
+          lod_tensor.shape.push_back(dim);
+        }
+      }
+      in->push_back(lod_tensor);
+    }
+
+    for (int i = 0; i < var_num; ++i) {
+      if ((*in)[i].lod.size() > 0) {
+        for (int j = 0; j < batch_size; ++j) {
+          const Tensor & tensor = req->insts(j).tensor_array(i);
+          int data_len = tensor.data_size() / elem_size[i];
+          int cur_len = (*in)[i].lod[0].back();
+          (*in)[i].lod[0].push_back(cur_len + data_len);
+        }
+        (*in)[i].data.Resize((*in)[i].lod[0].back());
+      } else {
+        (*in)[i].data.Resize(batch_size * capacity[i]);
+      }
+    }
+
+    for (int i = 0; i < var_num; ++i) {
+      void * dst_ptr = (*in)[i].data.data();
+      int offset = 0;
+      for (int j = 0; j < batch_size; ++j) {
+        memcpy(dst_ptr + offset,
+               (void *)(req->insts(j).tensor_array(i).data().data()),
+               req->insts(j).tensor_array(i).data_size() * elem_size[i]);
+        if ((*in)[i].lod.size() > 0) {
+          offset += (*in)[i].lod[0][j + 1] * elem_size[i];
+        } else {
+          offset += capacity[i] * elem_size[i];
+        }
+      }
+    }
+
+    TensorVector *out = butil::get_object<TensorVector>();
+    if (!out) {
+      LOG(ERROR) << "Failed get tls output object";
+      return -1;
+    }
+
+    if (predictor::InferManager::instance().infer(
+            GENERAL_MODEL_NAME, in, out, batch_size)) {
+      LOG(ERROR) << "Failed do infer in fluid model: "
+                  << GENERAL_MODEL_NAME;
+      return -1;
+    }
+
+    Response * res = mutable_data<Response>();
+    
+    // we suppose the dtype of all fetch variables is float
+    for (int i = 0; i < batch_size; ++i) {
+      FetchInst * fetch_inst = res->add_insts();
+      for (int j = 0; j < out->size(); ++j) {
+        Tensor * tensor = fetch_inst->add_tensor_array();
+        tensor->set_elem_type(1);
+        if ((*out)[j].lod.size() > 0) {
+          tensor->add_shape(-1);
+          tensor->mutable_data()->Reserve(
+              (*out)[j].lod[0].back() * sizeof(float));
+        } else {
+          int cap = 1;
+          for (int k = 1; k < (*out)[j].shape.size(); ++k) {
+            cap *= (*out)[j].shape[k];
+            tensor->add_shape((*out)[j].shape[k]);
+          }
+          tensor->mutable_data()->Reserve(cap * sizeof(float));
+        }
+      }
+    }
+
+    for (int i = 0; i < out->size(); ++i) {
+      if ((*out)[i].lod.size() > 0) {
+        for (int j = 0; j < batch_size; ++j) {
+          Tensor * tensor = res->mutable_insts(j)->mutable_tensor_array(i);
+          void * dst_ptr = tensor->mutable_data()->mutable_data();
+          memcpy(dst_ptr,
+                 (*out)[i].data.data() + (*out)[i].lod[0][j] * elem_size[i],
+                 ((*out)[i].lod[0][j + 1] - (*out)[i].lod[0][j])
+                 * elem_size[i]);
+        }
+      } else {
+        for (int j = 0; j < batch_size; ++j) {
+          Tensor * tensor = res->mutable_insts(j)->mutable_tensor_array(i);
+          void * dst_ptr = tensor->mutable_data()->mutable_data();
+          memcpy(dst_ptr,
+                 (*out)[i].data.data() + j * capacity[i] * elem_size[i],
+                 capacity[i] * elem_size[i]);
+        }
+      }
+    }
+    
+    for (size_t i = 0; i < in->size(); ++i) {
+      (*in)[i].shape.clear();
+    }
+    in->clear();
+    butil::return_object<TensorVector>(in);
+
+    for (size_t i = 0; i < out->size(); ++i) {
+      (*out)[i].shape.clear();
+    }
+    out->clear();
+    butil::return_object<TensorVector>(out);
+  }
+
+  return 0;
+}
+
+DEFINE_OP(GeneralModelOp);
+
+}  // namespace serving
+}  // namespace paddle_serving
+}  // namespace baidu
--- a/demo-serving/op/general_model_op.h
+++ b/demo-serving/op/general_model_op.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <vector>
+#ifdef BCLOUD
+#ifdef WITH_GPU
+#include "paddle/paddle_inference_api.h"
+#else
+#include "paddle/fluid/inference/api/paddle_inference_api.h"
+#endif
+#else
+#include "paddle_inference_api.h"  // NOLINT
+#endif
+#include "demo-serving/general_model_service.pb.h"
+
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+static const char* GENERAL_MODEL_NAME = "general_model";
+
+class GeneralModelOp
+    : public baidu::paddle_serving::predictor::OpWithChannel<
+    baidu::paddle_serving::predictor::general_model::Response> {
+ public:
+  typedef std::vector<paddle::PaddleTensor> TensorVector;
+
+  DECLARE_OP(GeneralModelOp);
+
+  int inference();
+};
+
+}  // namespace serving
+}  // namespace paddle_serving
+}  // namespace baidu
--- a/demo-serving/proto/CMakeLists.txt
+++ b/demo-serving/proto/CMakeLists.txt
@@ -9,6 +9,7 @@ LIST(APPEND protofiles
        ${CMAKE_CURRENT_LIST_DIR}/ctr_prediction.proto
        ${CMAKE_CURRENT_LIST_DIR}/bert_service.proto
 	${CMAKE_CURRENT_LIST_DIR}/load_general_model_service.proto
+	${CMAKE_CURRENT_LIST_DIR}/general_model_service.proto
 )

 PROTOBUF_GENERATE_SERVING_CPP(TRUE PROTO_SRCS PROTO_HDRS ${protofiles})

--- a/demo-serving/proto/general_model_service.proto
+++ b/demo-serving/proto/general_model_service.proto
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+import "pds_option.proto";
+import "builtin_format.proto";
+package baidu.paddle_serving.predictor.general_model;
+
+option cc_generic_services = true;
+
+message Tensor {
+  repeated bytes data = 1;
+  optional int32 elem_type = 2;
+  repeated int32 shape = 3;
+};
+
+message FeedInst {
+  repeated Tensor tensor_array = 1;
+};
+
+message FetchInst {
+  repeated Tensor tensor_array = 1;
+};
+
+message Request {
+  repeated FeedInst insts = 1;
+};
+
+message Response {
+  repeated FetchInst insts = 1;
+};
+
+service GeneralModelService {
+  rpc inference(Request) returns (Response);
+  rpc debug(Request) returns (Response);
+  option (pds.options).generate_impl = true;
+};
--- a/sdk-cpp/proto/general_model_service.proto
+++ b/sdk-cpp/proto/general_model_service.proto
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+import "pds_option.proto";
+import "builtin_format.proto";
+package baidu.paddle_serving.predictor.general_model;
+
+option cc_generic_services = true;
+
+message Tensor {
+  repeated bytes data = 1;
+  optional int32 elem_type = 2;
+  repeated int32 shape = 3;
+};
+
+message FeedInst {
+  repeated Tensor tensor_array = 1;
+};
+
+message FetchInst {
+  repeated Tensor tensor_array = 1;
+};
+
+message Request {
+  repeated FeedInst insts = 1;
+};
+
+message Response {
+  repeated FetchInst insts = 1;
+};
+
+service GeneralModelService {
+  rpc inference(Request) returns (Response);
+  rpc debug(Request) returns (Response);
+  option (pds.options).generate_stub = true;
+};