reduce memory copy

8970af96 · MRXLT · 4c0f6820 · 8970af96 · 8970af96 · 8970af96
4 changed file
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -59,6 +59,18 @@ class PredictorRes {
  std::map<std::string, std::vector<std::vector<float>>> _float_map;
 };
+class PredictorResBatch {
+ public:
+  PredictorResBatch() {}
+  ~PredictorResBatch() {}
+ public:
+  const PredictorRes& at(const int index) { return _predictres_vector[index]; }
+ public:
+  std::vector<PredictorRes> _predictres_vector;
+};
 class PredictorClient {
 public:
  PredictorClient() {}
@@ -91,6 +103,15 @@ class PredictorClient {
      const std::vector<std::string>& int_feed_name,
      const std::vector<std::string>& fetch_name);
+  int batch_predict(
+      const std::vector<std::vector<std::vector<float>>>& float_feed_batch,
+      const std::vector<std::string>& float_feed_name,
+      const std::vector<std::vector<std::vector<int64_t>>>& int_feed_batch,
+      const std::vector<std::string>& int_feed_name,
+      const std::vector<std::string>& fetch_name,
+      PredictorResBatch& predict_res,  // NOLINT
+      const int& pid);
  std::vector<PredictorRes> batch_predict(
      const std::vector<std::vector<std::vector<float>>>& float_feed_batch,
      const std::vector<std::string>& float_feed_name,

--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -264,22 +264,20 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
  return 0;
 }
-std::vector<PredictorRes> PredictorClient::batch_predict(
+int PredictorClient::batch_predict(
    const std::vector<std::vector<std::vector<float>>> &float_feed_batch,
    const std::vector<std::string> &float_feed_name,
    const std::vector<std::vector<std::vector<int64_t>>> &int_feed_batch,
    const std::vector<std::string> &int_feed_name,
    const std::vector<std::string> &fetch_name,
+    PredictorResBatch &predict_res_batch,
    const int &pid) {
  int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
-  std::vector<std::vector<std::vector<float>>> fetch_result_batch;
-  std::vector<PredictorRes> predict_res_batch;
  Timer timeline;
  int64_t preprocess_start = timeline.TimeStampUS();
-  predict_res_batch.resize(batch_size);
+  predict_res_batch._predictres_vector.resize(batch_size);
  int fetch_name_num = fetch_name.size();
  _api.thrd_clear();
@@ -370,8 +368,8 @@ std::vector<PredictorRes> PredictorClient::batch_predict(
    postprocess_start = client_infer_end;
    for (int bi = 0; bi < batch_size; bi++) {
-      predict_res_batch[bi]._int64_map.clear();
+      predict_res_batch._predictres_vector[bi]._int64_map.clear();
-      predict_res_batch[bi]._float_map.clear();
+      predict_res_batch._predictres_vector[bi]._float_map.clear();
      for (auto &name : fetch_name) {
        int idx = _fetch_name_to_idx[name];
@@ -379,24 +377,26 @@ std::vector<PredictorRes> PredictorClient::batch_predict(
        if (_fetch_name_to_type[name] == 0) {
          int len = res.insts(bi).tensor_array(idx).int64_data_size();
          VLOG(2) << "fetch tensor : " << name << " type: int64 len : " << len;
-          predict_res_batch[bi]._int64_map[name].resize(1);
+          predict_res_batch._predictres_vector[bi]._int64_map[name].resize(1);
-          predict_res_batch[bi]._int64_map[name][0].resize(len);
+          predict_res_batch._predictres_vector[bi]._int64_map[name]
+                                                             [0].resize(len);
          VLOG(2) << "fetch name " << name << " index " << idx << " first data "
                  << res.insts(bi).tensor_array(idx).int64_data(0);
          for (int i = 0; i < len; ++i) {
-            predict_res_batch[bi]._int64_map[name][0][i] =
+            predict_res_batch._predictres_vector[bi]._int64_map[name][0][i] =
                res.insts(bi).tensor_array(idx).int64_data(i);
          }
        } else if (_fetch_name_to_type[name] == 1) {
          int len = res.insts(bi).tensor_array(idx).float_data_size();
          VLOG(2) << "fetch tensor : " << name
                  << " type: float32 len : " << len;
-          predict_res_batch[bi]._float_map[name].resize(1);
+          predict_res_batch._predictres_vector[bi]._float_map[name].resize(1);
-          predict_res_batch[bi]._float_map[name][0].resize(len);
+          predict_res_batch._predictres_vector[bi]._float_map[name]
+                                                             [0].resize(len);
          VLOG(2) << "fetch name " << name << " index " << idx << " first data "
                  << res.insts(bi).tensor_array(idx).float_data(0);
          for (int i = 0; i < len; ++i) {
-            predict_res_batch[bi]._float_map[name][0][i] =
+            predict_res_batch._predictres_vector[bi]._float_map[name][0][i] =
                res.insts(bi).tensor_array(idx).float_data(i);
          }
        }
@@ -427,7 +427,7 @@ std::vector<PredictorRes> PredictorClient::batch_predict(
    fprintf(stderr, "%s\n", oss.str().c_str());
  }
-  return predict_res_batch;
+  return 0;
 }
 }  // namespace general_model

--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -41,6 +41,12 @@ PYBIND11_MODULE(serving_client, m) {
           },
           py::return_value_policy::reference);
+  py::class_<PredictorResBatch>(m, "PredictorResBatch", py::buffer_protocol())
+      .def(py::init())
+      .def("at",
+           [](PredictorResBatch &self, int index) { return self.at(index); },
+           py::return_value_policy::reference);
  py::class_<PredictorClient>(m, "PredictorClient", py::buffer_protocol())
      .def(py::init())
      .def("init_gflags",
@@ -91,12 +97,14 @@ PYBIND11_MODULE(serving_client, m) {
                  &int_feed_batch,
              const std::vector<std::string> &int_feed_name,
              const std::vector<std::string> &fetch_name,
+              PredictorResBatch &predict_res_batch,
              const int &pid) {
             return self.batch_predict(float_feed_batch,
                                       float_feed_name,
                                       int_feed_batch,
                                       int_feed_name,
                                       fetch_name,
+                                       predict_res_batch,
                                       pid);
           });
 }

--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -89,6 +89,7 @@ class Client(object):
    def load_client_config(self, path):
        from .serving_client import PredictorClient
        from .serving_client import PredictorRes
+        from .serving_client import PredictorResBatch
        model_conf = m_config.GeneralModelConfig()
        f = open(path, 'r')
        model_conf = google.protobuf.text_format.Merge(
@@ -99,6 +100,7 @@ class Client(object):
        # get feed shapes, feed types
        # map feed names to index
        self.result_handle_ = PredictorRes()
+        self.result_batch_handle_ = PredictorResBatch()
        self.client_handle_ = PredictorClient()
        self.client_handle_.init(path)
        read_env_flags = ["profile_client", "profile_server"]
@@ -180,6 +182,7 @@ class Client(object):
        float_feed_names = []
        fetch_names = []
        counter = 0
+        batch_size = len(feed_batch)
        for feed in feed_batch:
            int_slot = []
            float_slot = []
@@ -202,19 +205,21 @@ class Client(object):
            if key in self.fetch_names_:
                fetch_names.append(key)
-        result_batch = self.client_handle_.batch_predict(
+        result_batch = self.result_batch_handle_
+        res = self.client_handle_.batch_predict(
            float_slot_batch, float_feed_names, int_slot_batch, int_feed_names,
-            fetch_names, self.pid)
+            fetch_names, result_batch, self.pid)
        result_map_batch = []
-        for result in result_batch:
+        for index in range(batch_size):
+            result = result_batch.at(index)
            result_map = {}
            for i, name in enumerate(fetch_names):
                if self.fetch_names_to_type_[name] == int_type:
                    result_map[name] = result.get_int64_by_name(name)[0]
                elif self.fetch_names_to_type_[name] == float_type:
                    result_map[name] = result.get_float_by_name(name)[0]
-            result_map_batch.appenf(result_map)
+            result_map_batch.append(result_map)
        return result_map_batch