From ee3f32d73caec907c892877a666596b5b34cd4e2 Mon Sep 17 00:00:00 2001
From: dongdaxiang <guru4elephant@gmail.com>
Date: Tue, 14 Apr 2020 21:30:41 +0800
Subject: [PATCH] make fetch variable's shape consistent with output of
 inference. support numpy array input for predict interface

---
 core/general-client/include/general_model.h   |  33 ++-
 core/general-client/src/general_model.cpp     | 254 ++++--------------
 .../src/pybind_general_model.cpp              |  31 +--
 .../general-server/op/general_response_op.cpp |  93 +++----
 .../proto/general_model_service.proto         |   1 +
 .../sdk-cpp/proto/general_model_service.proto |   1 +
 python/paddle_serving_client/__init__.py      |  49 ++--
 7 files changed, 140 insertions(+), 322 deletions(-)
diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h
index ca0b27b5..703593f8 100644
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -45,13 +45,17 @@ class PredictorRes {
   ~PredictorRes() {}
 
  public:
-  const std::vector<std::vector<int64_t>>& get_int64_by_name(
-      const std::string& name) {
-    return _int64_map[name];
+  const std::vector<int64_t>& get_int64_by_name(const std::string& name) {
+    return _int64_value_map[name];
   }
-  const std::vector<std::vector<float>>& get_float_by_name(
-      const std::string& name) {
-    return _float_map[name];
+  const std::vector<float>& get_float_by_name(const std::string& name) {
+    return _float_value_map[name];
+  }
+  const std::vector<int>& get_shape(const std::string& name) {
+    return _shape_map[name];
+  }
+  const std::vector<int>& get_lod(const std::string& name) {
+    return _lod_map[name];
   }
   void set_variant_tag(const std::string& variant_tag) {
     _variant_tag = variant_tag;
@@ -59,8 +63,10 @@ class PredictorRes {
   const std::string& variant_tag() { return _variant_tag; }
 
  public:
-  std::map<std::string, std::vector<std::vector<int64_t>>> _int64_map;
-  std::map<std::string, std::vector<std::vector<float>>> _float_map;
+  std::map<std::string, std::vector<int64_t>> _int64_value_map;
+  std::map<std::string, std::vector<float>> _float_value_map;
+  std::map<std::string, std::vector<int>> _shape_map;
+  std::map<std::string, std::vector<int>> _lod_map;
 
  private:
   std::string _variant_tag;
@@ -81,17 +87,8 @@ class PredictorClient {
   int create_predictor_by_desc(const std::string& sdk_desc);
 
   int create_predictor();
-  int destroy_predictor();
 
-  int predict(const std::vector<std::vector<float>>& float_feed,
-              const std::vector<std::string>& float_feed_name,
-              const std::vector<std::vector<int>>& float_shape,
-              const std::vector<std::vector<int64_t>>& int_feed,
-              const std::vector<std::string>& int_feed_name,
-              const std::vector<std::vector<int>>& int_shape,
-              const std::vector<std::string>& fetch_name,
-              PredictorRes& predict_res,  // NOLINT
-              const int& pid);
+  int destroy_predictor();
 
   int batch_predict(
       const std::vector<std::vector<std::vector<float>>>& float_feed_batch,
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index 947953f9..c12bb79a 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -132,154 +132,6 @@ int PredictorClient::create_predictor() {
   _api.thrd_initialize();
 }
 
-int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
-                             const std::vector<std::string> &float_feed_name,
-                             const std::vector<std::vector<int>> &float_shape,
-                             const std::vector<std::vector<int64_t>> &int_feed,
-                             const std::vector<std::string> &int_feed_name,
-                             const std::vector<std::vector<int>> &int_shape,
-                             const std::vector<std::string> &fetch_name,
-                             PredictorRes &predict_res,
-                             const int &pid) {  // NOLINT
-  predict_res._int64_map.clear();
-  predict_res._float_map.clear();
-  Timer timeline;
-  int64_t preprocess_start = timeline.TimeStampUS();
-  _api.thrd_clear();
-  std::string variant_tag;
-  _predictor = _api.fetch_predictor("general_model", &variant_tag);
-  predict_res.set_variant_tag(variant_tag);
-
-  Request req;
-  for (auto &name : fetch_name) {
-    req.add_fetch_var_names(name);
-  }
-
-  std::vector<Tensor *> tensor_vec;
-  FeedInst *inst = req.add_insts();
-  for (auto &name : float_feed_name) {
-    tensor_vec.push_back(inst->add_tensor_array());
-  }
-
-  for (auto &name : int_feed_name) {
-    tensor_vec.push_back(inst->add_tensor_array());
-  }
-
-  int vec_idx = 0;
-  for (int i = 0; i < float_feed_name.size(); ++i) {
-    int idx = _feed_name_to_idx[float_feed_name[i]];
-    Tensor *tensor = tensor_vec[idx];
-    if (float_shape.size() == 0) {
-      for (int j = 0; j < _shape[idx].size(); ++j) {
-        tensor->add_shape(_shape[idx][j]);
-      }
-    } else {
-      for (int j = 0; j < float_shape[i].size(); ++j) {
-        tensor->add_shape(float_shape[i][j]);
-      }
-    }
-    tensor->set_elem_type(1);
-    for (int j = 0; j < float_feed[vec_idx].size(); ++j) {
-      tensor->add_float_data(float_feed[vec_idx][j]);
-    }
-    vec_idx++;
-  }
-
-  VLOG(2) << "feed float feed var done.";
-  vec_idx = 0;
-
-  for (int i = 0; i < int_feed_name.size(); ++i) {
-    int idx = _feed_name_to_idx[int_feed_name[i]];
-    Tensor *tensor = tensor_vec[idx];
-    if (int_shape.size() == 0) {
-      for (int j = 0; j < int_shape[i].size(); ++j) {
-        tensor->add_shape(int_shape[i][j]);
-      }
-    } else {
-      for (int j = 0; j < _shape[idx].size(); ++j) {
-        tensor->add_shape(_shape[idx][j]);
-      }
-    }
-    tensor->set_elem_type(0);
-    for (int j = 0; j < int_feed[vec_idx].size(); ++j) {
-      tensor->add_int64_data(int_feed[vec_idx][j]);
-    }
-    vec_idx++;
-  }
-
-  int64_t preprocess_end = timeline.TimeStampUS();
-  int64_t client_infer_start = timeline.TimeStampUS();
-  Response res;
-
-  int64_t client_infer_end = 0;
-  int64_t postprocess_start = 0;
-  int64_t postprocess_end = 0;
-
-  if (FLAGS_profile_client) {
-    if (FLAGS_profile_server) {
-      req.set_profile_server(true);
-    }
-  }
-
-  res.Clear();
-  if (_predictor->inference(&req, &res) != 0) {
-    LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
-    return -1;
-  } else {
-    VLOG(2) << "predict done.";
-    client_infer_end = timeline.TimeStampUS();
-    postprocess_start = client_infer_end;
-    for (auto &name : fetch_name) {
-      int idx = _fetch_name_to_idx[name];
-      VLOG(2) << "fetch name: " << name;
-      if (_fetch_name_to_type[name] == 0) {
-        int len = res.insts(0).tensor_array(idx).int64_data_size();
-        VLOG(2) << "fetch tensor : " << name << " type: int64 len : " << len;
-        predict_res._int64_map[name].resize(1);
-        predict_res._int64_map[name][0].resize(len);
-        for (int i = 0; i < len; ++i) {
-          predict_res._int64_map[name][0][i] =
-              res.insts(0).tensor_array(idx).int64_data(i);
-        }
-      } else if (_fetch_name_to_type[name] == 1) {
-        int len = res.insts(0).tensor_array(idx).float_data_size();
-        VLOG(2) << "fetch tensor : " << name << " type: float32 len : " << len;
-        predict_res._float_map[name].resize(1);
-        predict_res._float_map[name][0].resize(len);
-        for (int i = 0; i < len; ++i) {
-          predict_res._float_map[name][0][i] =
-              res.insts(0).tensor_array(idx).float_data(i);
-        }
-      }
-      postprocess_end = timeline.TimeStampUS();
-    }
-  }
-
-  if (FLAGS_profile_client) {
-    std::ostringstream oss;
-    oss << "PROFILE\t"
-        << "pid:" << pid << "\t"
-        << "prepro_0:" << preprocess_start << " "
-        << "prepro_1:" << preprocess_end << " "
-        << "client_infer_0:" << client_infer_start << " "
-        << "client_infer_1:" << client_infer_end << " ";
-
-    if (FLAGS_profile_server) {
-      int op_num = res.profile_time_size() / 2;
-      for (int i = 0; i < op_num; ++i) {
-        oss << "op" << i << "_0:" << res.profile_time(i * 2) << " ";
-        oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " ";
-      }
-    }
-
-    oss << "postpro_0:" << postprocess_start << " ";
-    oss << "postpro_1:" << postprocess_end;
-
-    fprintf(stderr, "%s\n", oss.str().c_str());
-  }
-  return 0;
-}
-
 int PredictorClient::batch_predict(
     const std::vector<std::vector<std::vector<float>>> &float_feed_batch,
     const std::vector<std::string> &float_feed_name,
@@ -292,8 +144,10 @@ int PredictorClient::batch_predict(
     const int &pid) {
   int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
 
-  predict_res_batch._int64_map.clear();
-  predict_res_batch._float_map.clear();
+  predict_res_batch._int64_value_map.clear();
+  predict_res_batch._float_value_map.clear();
+  predict_res_batch._shape_map.clear();
+  predict_res_batch._lod_map.clear();
   Timer timeline;
   int64_t preprocess_start = timeline.TimeStampUS();
 
@@ -310,7 +164,7 @@ int PredictorClient::batch_predict(
   for (auto &name : fetch_name) {
     req.add_fetch_var_names(name);
   }
-  //
+
   for (int bi = 0; bi < batch_size; bi++) {
     VLOG(2) << "prepare batch " << bi;
     std::vector<Tensor *> tensor_vec;
@@ -325,20 +179,14 @@ int PredictorClient::batch_predict(
       tensor_vec.push_back(inst->add_tensor_array());
     }
 
-    VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name "
+    VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name"
             << "prepared";
     int vec_idx = 0;
-    for (int i = 0; i < float_feed_name.size(); ++i) {
-      int idx = _feed_name_to_idx[float_feed_name[i]];
+    for (auto &name : float_feed_name) {
+      int idx = _feed_name_to_idx[name];
       Tensor *tensor = tensor_vec[idx];
-      if (float_shape.size() == float_feed_name.size()) {
-        for (int j = 0; j < float_shape[i].size(); ++j) {
-          tensor->add_shape(float_shape[i][j]);
-        }
-      } else {
-        for (int j = 0; j < _shape[idx].size(); ++j) {
-          tensor->add_shape(_shape[idx][j]);
-        }
+      for (int j = 0; j < float_shape[vec_idx].size(); ++j) {
+        tensor->add_shape(float_shape[vec_idx][j]);
       }
       tensor->set_elem_type(1);
       for (int j = 0; j < float_feed[vec_idx].size(); ++j) {
@@ -351,20 +199,14 @@ int PredictorClient::batch_predict(
             << "float feed value prepared";
 
     vec_idx = 0;
-    for (int i = 0; i < int_feed_name.size(); ++i) {
-      int idx = _feed_name_to_idx[int_feed_name[i]];
+    for (auto &name : int_feed_name) {
+      int idx = _feed_name_to_idx[name];
       Tensor *tensor = tensor_vec[idx];
-      if (int_shape.size() == int_feed_name.size()) {
-        for (int j = 0; j < int_shape[i].size(); ++j) {
-          tensor->add_shape(int_shape[i][j]);
-        }
-      } else {
-        for (int j = 0; j < _shape[idx].size(); ++j) {
-          tensor->add_shape(_shape[idx][j]);
-        }
+      for (int j = 0; j < int_shape[vec_idx].size(); ++j) {
+        tensor->add_shape(int_shape[vec_idx][j]);
       }
       tensor->set_elem_type(0);
-      VLOG(3) << "feed var name " << float_feed_name[i] << " index " << vec_idx
+      VLOG(3) << "feed var name " << name << " index " << vec_idx
               << "first data " << int_feed[vec_idx][0];
       for (int j = 0; j < int_feed[vec_idx].size(); ++j) {
         tensor->add_int64_data(int_feed[vec_idx][j]);
@@ -395,43 +237,47 @@ int PredictorClient::batch_predict(
   res.Clear();
   if (_predictor->inference(&req, &res) != 0) {
     LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
-    return -1;
+    exit(-1);
   } else {
     client_infer_end = timeline.TimeStampUS();
     postprocess_start = client_infer_end;
+
     for (auto &name : fetch_name) {
-      predict_res_batch._int64_map[name].resize(batch_size);
-      predict_res_batch._float_map[name].resize(batch_size);
+      int idx = _fetch_name_to_idx[name];
+      int shape_size = res.insts(0).tensor_array(idx).shape_size();
+      predict_res_batch._shape_map[name].resize(shape_size);
+      for (int i = 0; i < shape_size; ++i) {
+        predict_res_batch._shape_map[name][i] =
+            res.insts(0).tensor_array(idx).shape(i);
+      }
+      int lod_size = res.insts(0).tensor_array(idx).lod_size();
+      if (lod_size > 0) {
+        predict_res_batch._lod_map[name].resize(lod_size);
+        for (int i = 0; i < lod_size; ++i) {
+          predict_res_batch._lod_map[name][i] =
+              res.insts(0).tensor_array(idx).lod(i);
+        }
+      }
     }
-    VLOG(2) << "response batch size " << res.insts_size();
-    VLOG(2) << "response var nmae " << res.insts(0).tensor_array_size();
-    for (int bi = 0; bi < batch_size; bi++) {
-      int idx = 0;
-      for (auto &name : fetch_name) {
-        int len = res.insts(bi).tensor_array(idx).data_size();
-        if (_fetch_name_to_type[name] == 0) {
-          int len = res.insts(bi).tensor_array(idx).int64_data_size();
-          VLOG(2) << "fetch tensor : " << name << " type: int64 len : " << len;
-          predict_res_batch._int64_map[name][bi].resize(len);
-          VLOG(2) << "fetch name " << name << " index " << idx << " first data "
-                  << res.insts(bi).tensor_array(idx).int64_data(0);
-          for (int i = 0; i < len; ++i) {
-            predict_res_batch._int64_map[name][bi][i] =
-                res.insts(bi).tensor_array(idx).int64_data(i);
-          }
-        } else if (_fetch_name_to_type[name] == 1) {
-          int len = res.insts(bi).tensor_array(idx).float_data_size();
-          VLOG(2) << "fetch tensor : " << name
-                  << " type: float32 len : " << len;
-          predict_res_batch._float_map[name][bi].resize(len);
-          VLOG(2) << "fetch name " << name << " index " << idx << " first data "
-                  << res.insts(bi).tensor_array(idx).float_data(0);
-          for (int i = 0; i < len; ++i) {
-            predict_res_batch._float_map[name][bi][i] =
-                res.insts(bi).tensor_array(idx).float_data(i);
-          }
+
+    for (auto &name : fetch_name) {
+      int idx = _fetch_name_to_idx[name];
+      if (_fetch_name_to_type[name] == 0) {
+        predict_res_batch._int64_value_map[name].resize(
+            res.insts(0).tensor_array(idx).int64_data_size());
+        int size = res.insts(0).tensor_array(idx).int64_data_size();
+        for (int i = 0; i < size; ++i) {
+          predict_res_batch._int64_value_map[name][i] =
+              res.insts(0).tensor_array(idx).int64_data(i);
+        }
+      } else {
+        predict_res_batch._float_value_map[name].resize(
+            res.insts(0).tensor_array(idx).float_data_size());
+        int size = res.insts(0).tensor_array(idx).float_data_size();
+        for (int i = 0; i < size; ++i) {
+          predict_res_batch._float_value_map[name][i] =
+              res.insts(0).tensor_array(idx).float_data(i);
         }
-        idx += 1;
       }
     }
     postprocess_end = timeline.TimeStampUS();
diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp
index fa95c742..14349bfb 100644
--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -40,6 +40,16 @@ PYBIND11_MODULE(serving_client, m) {
              return self.get_float_by_name(name);
            },
            py::return_value_policy::reference)
+      .def("get_shape",
+           [](PredictorRes &self, std::string &name) {
+             return self.get_shape(name);
+           },
+           py::return_value_policy::reference)
+      .def("get_lod",
+           [](PredictorRes &self, std::string &name) {
+             return self.get_lod(name);
+           },
+           py::return_value_policy::reference)
       .def("variant_tag",
            [](PredictorRes &self) { return self.variant_tag(); });
 
@@ -67,27 +77,6 @@ PYBIND11_MODULE(serving_client, m) {
            [](PredictorClient &self) { self.create_predictor(); })
       .def("destroy_predictor",
            [](PredictorClient &self) { self.destroy_predictor(); })
-      .def("predict",
-           [](PredictorClient &self,
-              const std::vector<std::vector<float>> &float_feed,
-              const std::vector<std::string> &float_feed_name,
-              const std::vector<std::vector<int>> &float_shape,
-              const std::vector<std::vector<int64_t>> &int_feed,
-              const std::vector<std::string> &int_feed_name,
-              const std::vector<std::vector<int>> &int_shape,
-              const std::vector<std::string> &fetch_name,
-              PredictorRes &predict_res,
-              const int &pid) {
-             return self.predict(float_feed,
-                                 float_feed_name,
-                                 float_shape,
-                                 int_feed,
-                                 int_feed_name,
-                                 int_shape,
-                                 fetch_name,
-                                 predict_res,
-                                 pid);
-           })
       .def("batch_predict",
            [](PredictorClient &self,
               const std::vector<std::vector<std::vector<float>>>
diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp
index c5248227..c4d4c253 100644
--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -73,22 +73,21 @@ int GeneralResponseOp::inference() {
 
   // response inst with only fetch_var_names
   Response *res = mutable_data<Response>();
-
-  for (int i = 0; i < batch_size; ++i) {
-    FetchInst *fetch_inst = res->add_insts();
-    for (auto &idx : fetch_index) {
-      Tensor *tensor = fetch_inst->add_tensor_array();
-      // currently only response float tensor or lod_tensor
-      tensor->set_elem_type(1);
-      if (model_config->_is_lod_fetch[idx]) {
-        VLOG(2) << "out[" << idx << " is lod_tensor";
-        tensor->add_shape(-1);
-      } else {
-        VLOG(2) << "out[" << idx << "] is tensor";
-        for (int k = 1; k < in->at(idx).shape.size(); ++k) {
-          VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k];
-          tensor->add_shape(in->at(idx).shape[k]);
-        }
+  FetchInst *fetch_inst = res->add_insts();
+  for (auto &idx : fetch_index) {
+    Tensor *tensor = fetch_inst->add_tensor_array();
+    tensor->set_elem_type(1);
+    if (model_config->_is_lod_fetch[idx]) {
+      VLOG(2) << "out[" << idx << "] is lod_tensor";
+      for (int k = 0; k < in->at(idx).shape.size(); ++k) {
+        VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
+        tensor->add_shape(in->at(idx).shape[k]);
+      }
+    } else {
+      VLOG(2) << "out[" << idx << "] is tensor";
+      for (int k = 0; k < in->at(idx).shape.size(); ++k) {
+        VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
+        tensor->add_shape(in->at(idx).shape[k]);
       }
     }
   }
@@ -96,62 +95,42 @@ int GeneralResponseOp::inference() {
   int var_idx = 0;
   for (auto &idx : fetch_index) {
     int cap = 1;
-    for (int j = 1; j < in->at(idx).shape.size(); ++j) {
+    for (int j = 0; j < in->at(idx).shape.size(); ++j) {
       cap *= in->at(idx).shape[j];
     }
     if (in->at(idx).dtype == paddle::PaddleDType::INT64) {
       int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
       if (model_config->_is_lod_fetch[idx]) {
-        for (int j = 0; j < batch_size; ++j) {
-          for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
-               k++) {
-            FetchInst *fetch_p = res->mutable_insts(j);
-            fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[k]);
-          }
+        FetchInst *fetch_p = res->mutable_insts(0);
+        for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
+          fetch_p->mutable_tensor_array(var_idx)->add_lod(
+              in->at(idx).lod[0][j]);
+        }
+        for (int j = 0; j < cap; ++j) {
+          fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[j]);
         }
       } else {
-        int var_size = in->at(idx).shape[0];
-        if (var_size == batch_size) {
-          for (int j = 0; j < batch_size; ++j) {
-            for (int k = j * cap; k < (j + 1) * cap; ++k) {
-              FetchInst *fetch_p = res->mutable_insts(j);
-              fetch_p->mutable_tensor_array(var_idx)->add_int64_data(
-                  data_ptr[k]);
-            }
-          }
-        } else {
-          for (int j = 0; j < batch_size; ++j) {
-            FetchInst *fetch_p = res->mutable_insts(j);
-            fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[0]);
-          }
+        FetchInst *fetch_p = res->mutable_insts(0);
+        for (int j = 0; j < cap; ++j) {
+          fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
         }
       }
       var_idx++;
     } else if (in->at(idx).dtype == paddle::PaddleDType::FLOAT32) {
       float *data_ptr = static_cast<float *>(in->at(idx).data.data());
       if (model_config->_is_lod_fetch[idx]) {
-        for (int j = 0; j < batch_size; ++j) {
-          for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
-               k++) {
-            FetchInst *fetch_p = res->mutable_insts(j);
-            fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[k]);
-          }
+        FetchInst *fetch_p = res->mutable_insts(0);
+        for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
+          fetch_p->mutable_tensor_array(var_idx)->add_lod(
+              in->at(idx).lod[0][j]);
+        }
+        for (int j = 0; j < cap; ++j) {
+          fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
         }
       } else {
-        int var_size = in->at(idx).shape[0];
-        if (var_size == batch_size) {
-          for (int j = 0; j < batch_size; ++j) {
-            for (int k = j * cap; k < (j + 1) * cap; ++k) {
-              FetchInst *fetch_p = res->mutable_insts(j);
-              fetch_p->mutable_tensor_array(var_idx)->add_float_data(
-                  data_ptr[k]);
-            }
-          }
-        } else {
-          for (int j = 0; j < batch_size; ++j) {
-            FetchInst *fetch_p = res->mutable_insts(j);
-            fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[0]);
-          }
+        FetchInst *fetch_p = res->mutable_insts(0);
+        for (int j = 0; j < cap; ++j) {
+          fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
         }
       }
       var_idx++;
diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto
index 09e2854d..ad1128c3 100644
--- a/core/general-server/proto/general_model_service.proto
+++ b/core/general-server/proto/general_model_service.proto
@@ -26,6 +26,7 @@ message Tensor {
   repeated float float_data = 4;
   optional int32 elem_type = 5;
   repeated int32 shape = 6;
+  repeated int32 lod = 7; // only for fetch tensor currently
 };
 
 message FeedInst { repeated Tensor tensor_array = 1; };
diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto
index 827bb880..39c09f09 100644
--- a/core/sdk-cpp/proto/general_model_service.proto
+++ b/core/sdk-cpp/proto/general_model_service.proto
@@ -26,6 +26,7 @@ message Tensor {
   repeated float float_data = 4;
   optional int32 elem_type = 5;
   repeated int32 shape = 6;
+  repeated int32 lod = 7; // only for fetch tensor currently
 };
 
 message FeedInst { repeated Tensor tensor_array = 1; };
diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py
index 198d7c05..765c368a 100644
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -121,6 +121,7 @@ class Client(object):
         self.fetch_names_to_idx_ = {}
         self.lod_tensor_set = set()
         self.feed_tensor_len = {}
+
         for i, var in enumerate(model_conf.feed_var):
             self.feed_names_to_idx_[var.alias_name] = i
             self.feed_types_[var.alias_name] = var.feed_type
@@ -133,11 +134,11 @@ class Client(object):
                 for dim in self.feed_shapes_[var.alias_name]:
                     counter *= dim
                 self.feed_tensor_len[var.alias_name] = counter
-
         for i, var in enumerate(model_conf.fetch_var):
             self.fetch_names_to_idx_[var.alias_name] = i
             self.fetch_names_to_type_[var.alias_name] = var.fetch_type
-
+            if var.is_lod_tensor:
+                self.lod_tensor_set.add(var.alias_name)
         return
 
     def add_variant(self, tag, cluster, variant_weight):
@@ -164,7 +165,6 @@ class Client(object):
                     "parameter endpoints({}) will not take effect, because you use the add_variant function.".
                     format(endpoints))
         sdk_desc = self.predictor_sdk_.gen_desc()
-        print(sdk_desc)
         self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString(
         ))
 
@@ -228,23 +228,25 @@ class Client(object):
             for key in feed_i:
                 if key not in self.feed_names_:
                     raise ValueError("Wrong feed name: {}.".format(key))
-                self.shape_check(feed_i, key)
+                if not isinstance(feed_i[key], np.ndarray):
+                    self.shape_check(feed_i, key)
                 if self.feed_types_[key] == int_type:
                     if i == 0:
                         int_feed_names.append(key)
                         if isinstance(feed_i[key], np.ndarray):
-                            int_shape.append(feed_i[key].shape)
+                            int_shape.append(list(feed_i[key].shape))
                     if isinstance(feed_i[key], np.ndarray):
-                        int_slot.append(feed_i[key].tolist())
+                        int_slot.append(np.reshape(feed_i[key], (-1)).tolist())
                     else:
                         int_slot.append(feed_i[key])
                 elif self.feed_types_[key] == float_type:
                     if i == 0:
                         float_feed_names.append(key)
                         if isinstance(feed_i[key], np.ndarray):
-                            float_shape.append(feed_i[key].shape)
+                            float_shape.append(list(feed_i[key].shape))
                     if isinstance(feed_i[key], np.ndarray):
-                        float_slot.append(feed_i[key].tolist())
+                        float_slot.append(
+                            np.reshape(feed_i[key], (-1)).tolist())
                     else:
                         float_slot.append(feed_i[key])
             int_slot_batch.append(int_slot)
@@ -252,31 +254,34 @@ class Client(object):
 
         result_batch = self.result_handle_
         res = self.client_handle_.batch_predict(
-            float_slot_batch, float_feed_names, int_slot_batch, int_feed_names,
-            fetch_names, result_batch, self.pid)
+            float_slot_batch, float_feed_names, float_shape, int_slot_batch,
+            int_feed_names, int_shape, fetch_names, result_batch, self.pid)
 
         if res == -1:
             return None
 
         result_map_batch = []
         result_map = {}
+        # result map needs to be a numpy array
         for i, name in enumerate(fetch_names):
             if self.fetch_names_to_type_[name] == int_type:
                 result_map[name] = result_batch.get_int64_by_name(name)
+                shape = result_batch.get_shape(name)
+                result_map[name] = np.array(result_map[name])
+                result_map[name].shape = shape
+                if name in self.lod_tensor_set:
+                    result_map["{}.lod".format(name)] = result_batch.get_lod(
+                        name)
             elif self.fetch_names_to_type_[name] == float_type:
                 result_map[name] = result_batch.get_float_by_name(name)
-        for i in range(batch_size):
-            single_result = {}
-            for key in result_map:
-                single_result[key] = result_map[key][i]
-            result_map_batch.append(single_result)
-
-        if batch_size == 1:
-            return [result_map_batch[0], self.result_handle_.variant_tag()
-                    ] if need_variant_tag else result_map_batch[0]
-        else:
-            return [result_map_batch, self.result_handle_.variant_tag()
-                    ] if need_variant_tag else result_map_batch
+                shape = result_batch.get_shape(name)
+                result_map[name] = np.array(result_map[name])
+                result_map[name].shape = shape
+                if name in self.lod_tensor_set:
+                    result_map["{}.lod".format(name)] = result_batch.get_lod(
+                        name)
+
+        return result_map
 
     def release(self):
         self.client_handle_.destroy_predictor()
-- 
GitLab