diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h
index 21d3108a6e5ee8f20de477c7afa42eede536bfab..2ab3d1758e3bd8fa0ce54a9404630753a446a3d1 100644
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -56,13 +56,17 @@ class ModelRes {
                       std::make_move_iterator(std::end(res._float_map)));
   }
   ~ModelRes() {}
-  const std::vector<std::vector<int64_t>>& get_int64_by_name(
-      const std::string& name) {
-    return _int64_map[name];
+  const std::vector<int64_t>& get_int64_by_name(const std::string& name) {
+    return _int64_value_map[name];
   }
-  const std::vector<std::vector<float>>& get_float_by_name(
-      const std::string& name) {
-    return _float_map[name];
+  const std::vector<float>& get_float_by_name(const std::string& name) {
+    return _float_value_map[name];
+  }
+  const std::vector<int>& get_shape(const std::string& name) {
+    return _shape_map[name];
+  }
+  const std::vector<int>& get_lod(const std::string& name) {
+    return _lod_map[name];
   }
   void set_engine_name(const std::string& engine_name) {
     _engine_name = engine_name;
@@ -81,8 +85,10 @@ class ModelRes {
 
  public:
   std::string _engine_name;
-  std::map<std::string, std::vector<std::vector<int64_t>>> _int64_map;
-  std::map<std::string, std::vector<std::vector<float>>> _float_map;
+  std::map<std::string, std::vector<int64_t>> _int64_value_map;
+  std::map<std::string, std::vector<float>> _float_value_map;
+  std::map<std::string, std::vector<int>> _shape_map;
+  std::map<std::string, std::vector<int>> _lod_map;
 };
 
 class PredictorRes {
@@ -95,14 +101,22 @@ class PredictorRes {
     _models.clear();
     _engine_names.clear();
   }
-  const std::vector<std::vector<int64_t>>& get_int64_by_name(
-      const int model_idx, const std::string& name) {
+  const std::vector<int64_t>& get_int64_by_name(const int model_idx,
+                                                const std::string& name) {
     return _models[model_idx].get_int64_by_name(name);
   }
-  const std::vector<std::vector<float>>& get_float_by_name(
-      const int model_idx, const std::string& name) {
+  const std::vector<float>& get_float_by_name(const int model_idx,
+                                              const std::string& name) {
     return _models[model_idx].get_float_by_name(name);
   }
+  const std::vector<int>& get_shape(const int model_idx,
+                                    const std::string& name) {
+    return _models[model_idx].get_shape(name);
+  }
+  const std::vector<int>& get_lod(const int model_idx,
+                                  const std::string& name) {
+    return _models[model_idx].get_lod(name);
+  }
   void add_model_res(ModelRes&& res) {
     _engine_names.push_back(res.engine_name());
     _models.emplace_back(std::move(res));
@@ -134,21 +148,16 @@ class PredictorClient {
   int create_predictor_by_desc(const std::string& sdk_desc);
 
   int create_predictor();
-  int destroy_predictor();
 
-  int predict(const std::vector<std::vector<float>>& float_feed,
-              const std::vector<std::string>& float_feed_name,
-              const std::vector<std::vector<int64_t>>& int_feed,
-              const std::vector<std::string>& int_feed_name,
-              const std::vector<std::string>& fetch_name,
-              PredictorRes& predict_res,  // NOLINT
-              const int& pid);
+  int destroy_predictor();
 
   int batch_predict(
       const std::vector<std::vector<std::vector<float>>>& float_feed_batch,
       const std::vector<std::string>& float_feed_name,
+      const std::vector<std::vector<int>>& float_shape,
       const std::vector<std::vector<std::vector<int64_t>>>& int_feed_batch,
       const std::vector<std::string>& int_feed_name,
+      const std::vector<std::vector<int>>& int_shape,
       const std::vector<std::string>& fetch_name,
       PredictorRes& predict_res_batch,  // NOLINT
       const int& pid);
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index 8f5c5a1c22c6a13958e5273a2133f267bfa3073f..e78cdf722cf72c3fc4f97c48a5cb02f8ad823671 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -135,154 +135,13 @@ int PredictorClient::create_predictor() {
   return 0;
 }
 
-int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
-                             const std::vector<std::string> &float_feed_name,
-                             const std::vector<std::vector<int64_t>> &int_feed,
-                             const std::vector<std::string> &int_feed_name,
-                             const std::vector<std::string> &fetch_name,
-                             PredictorRes &predict_res,
-                             const int &pid) {  // NOLINT
-  predict_res.clear();
-  Timer timeline;
-  int64_t preprocess_start = timeline.TimeStampUS();
-  _api.thrd_clear();
-  std::string variant_tag;
-  _predictor = _api.fetch_predictor("general_model", &variant_tag);
-  predict_res.set_variant_tag(variant_tag);
-
-  Request req;
-  for (auto &name : fetch_name) {
-    req.add_fetch_var_names(name);
-  }
-
-  std::vector<Tensor *> tensor_vec;
-  FeedInst *inst = req.add_insts();
-  for (auto &name : float_feed_name) {
-    tensor_vec.push_back(inst->add_tensor_array());
-  }
-
-  for (auto &name : int_feed_name) {
-    tensor_vec.push_back(inst->add_tensor_array());
-  }
-
-  int vec_idx = 0;
-  for (auto &name : float_feed_name) {
-    int idx = _feed_name_to_idx[name];
-    Tensor *tensor = tensor_vec[idx];
-    for (uint32_t j = 0; j < _shape[idx].size(); ++j) {
-      tensor->add_shape(_shape[idx][j]);
-    }
-    tensor->set_elem_type(1);
-    for (uint32_t j = 0; j < float_feed[vec_idx].size(); ++j) {
-      tensor->add_float_data(float_feed[vec_idx][j]);
-    }
-    vec_idx++;
-  }
-
-  VLOG(2) << "feed float feed var done.";
-  vec_idx = 0;
-
-  for (auto &name : int_feed_name) {
-    int idx = _feed_name_to_idx[name];
-    Tensor *tensor = tensor_vec[idx];
-    for (uint32_t j = 0; j < _shape[idx].size(); ++j) {
-      tensor->add_shape(_shape[idx][j]);
-    }
-    tensor->set_elem_type(0);
-    for (uint32_t j = 0; j < int_feed[vec_idx].size(); ++j) {
-      tensor->add_int64_data(int_feed[vec_idx][j]);
-    }
-    vec_idx++;
-  }
-
-  int64_t preprocess_end = timeline.TimeStampUS();
-  int64_t client_infer_start = timeline.TimeStampUS();
-  Response res;
-
-  int64_t client_infer_end = 0;
-  int64_t postprocess_start = 0;
-  int64_t postprocess_end = 0;
-
-  if (FLAGS_profile_client) {
-    if (FLAGS_profile_server) {
-      req.set_profile_server(true);
-    }
-  }
-
-  res.Clear();
-  if (_predictor->inference(&req, &res) != 0) {
-    LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
-    return -1;
-  } else {
-    VLOG(2) << "predict done.";
-    client_infer_end = timeline.TimeStampUS();
-    postprocess_start = client_infer_end;
-    // multi-model output
-    uint32_t model_num = res.outputs_size();
-    // predict_res._models.resize(model_num);
-    for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
-      VLOG(2) << "process model output index: " << m_idx;
-      auto output = res.outputs(m_idx);
-      ModelRes model;
-      model.set_engine_name(output.engine_name());
-      for (auto &name : fetch_name) {
-        int idx = _fetch_name_to_idx[name];
-        VLOG(2) << "fetch name: " << name;
-        if (_fetch_name_to_type[name] == 0) {
-          int len = output.insts(0).tensor_array(idx).int64_data_size();
-          VLOG(2) << "fetch tensor : " << name << " type: int64 len : " << len;
-          model._int64_map[name].resize(1);
-          model._int64_map[name][0].resize(len);
-          for (int i = 0; i < len; ++i) {
-            model._int64_map[name][0][i] =
-                output.insts(0).tensor_array(idx).int64_data(i);
-          }
-        } else if (_fetch_name_to_type[name] == 1) {
-          int len = output.insts(0).tensor_array(idx).float_data_size();
-          VLOG(2) << "fetch tensor : " << name
-                  << " type: float32 len : " << len;
-          model._float_map[name].resize(1);
-          model._float_map[name][0].resize(len);
-          for (int i = 0; i < len; ++i) {
-            model._float_map[name][0][i] =
-                output.insts(0).tensor_array(idx).float_data(i);
-          }
-        }
-      }
-      predict_res.add_model_res(std::move(model));
-    }
-    postprocess_end = timeline.TimeStampUS();
-  }
-
-  if (FLAGS_profile_client) {
-    std::ostringstream oss;
-    oss << "PROFILE\t"
-        << "pid:" << pid << "\t"
-        << "prepro_0:" << preprocess_start << " "
-        << "prepro_1:" << preprocess_end << " "
-        << "client_infer_0:" << client_infer_start << " "
-        << "client_infer_1:" << client_infer_end << " ";
-    if (FLAGS_profile_server) {
-      int op_num = res.profile_time_size() / 2;
-      for (int i = 0; i < op_num; ++i) {
-        oss << "op" << i << "_0:" << res.profile_time(i * 2) << " ";
-        oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " ";
-      }
-    }
-
-    oss << "postpro_0:" << postprocess_start << " ";
-    oss << "postpro_1:" << postprocess_end;
-
-    fprintf(stderr, "%s\n", oss.str().c_str());
-  }
-  return 0;
-}
-
 int PredictorClient::batch_predict(
     const std::vector<std::vector<std::vector<float>>> &float_feed_batch,
     const std::vector<std::string> &float_feed_name,
+    const std::vector<std::vector<int>> &float_shape,
     const std::vector<std::vector<std::vector<int64_t>>> &int_feed_batch,
     const std::vector<std::string> &int_feed_name,
+    const std::vector<std::vector<int>> &int_shape,
     const std::vector<std::string> &fetch_name,
     PredictorRes &predict_res_batch,
     const int &pid) {
@@ -320,14 +179,14 @@ int PredictorClient::batch_predict(
       tensor_vec.push_back(inst->add_tensor_array());
     }
 
-    VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name "
+    VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name"
             << "prepared";
     int vec_idx = 0;
     for (auto &name : float_feed_name) {
       int idx = _feed_name_to_idx[name];
       Tensor *tensor = tensor_vec[idx];
-      for (uint32_t j = 0; j < _shape[idx].size(); ++j) {
-        tensor->add_shape(_shape[idx][j]);
+      for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
+        tensor->add_shape(float_shape[vec_idx][j]);
       }
       tensor->set_elem_type(1);
       for (uint32_t j = 0; j < float_feed[vec_idx].size(); ++j) {
@@ -343,8 +202,8 @@ int PredictorClient::batch_predict(
     for (auto &name : int_feed_name) {
       int idx = _feed_name_to_idx[name];
       Tensor *tensor = tensor_vec[idx];
-      for (uint32_t j = 0; j < _shape[idx].size(); ++j) {
-        tensor->add_shape(_shape[idx][j]);
+      for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
+        tensor->add_shape(int_shape[vec_idx][j]);
       }
       tensor->set_elem_type(0);
       VLOG(3) << "feed var name " << name << " index " << vec_idx
@@ -384,48 +243,47 @@ int PredictorClient::batch_predict(
     postprocess_start = client_infer_end;
 
     uint32_t model_num = res.outputs_size();
-    // predict_res_batch._models.resize(model_num);
     for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
       VLOG(2) << "process model output index: " << m_idx;
       auto output = res.outputs(m_idx);
       ModelRes model;
       model.set_engine_name(output.engine_name());
+
       for (auto &name : fetch_name) {
-        model._int64_map[name].resize(batch_size);
-        model._float_map[name].resize(batch_size);
+        int idx = _fetch_name_to_idx[name];
+        int shape_size = output.insts(0).tensor_array(idx).shape_size();
+        model._shape_map[name].resize(shape_size);
+        for (int i = 0; i < shape_size; ++i) {
+          model._shape_map[name][i] =
+              output.insts(0).tensor_array(idx).shape(i);
+        }
+        int lod_size = output.insts(0).tensor_array(idx).lod_size();
+        if (lod_size > 0) {
+          model._lod_map[name].resize(lod_size);
+          for (int i = 0; i < lod_size; ++i) {
+            model._lod_map[name][i] = output.insts(0).tensor_array(idx).lod(i);
+          }
+        }
       }
-      VLOG(2) << "response batch size " << output.insts_size();
-      VLOG(2) << "response var nmae " << output.insts(0).tensor_array_size();
-      for (int bi = 0; bi < batch_size; bi++) {
-        int idx = 0;
-        for (auto &name : fetch_name) {
-          int len = output.insts(bi).tensor_array(idx).data_size();
-          if (_fetch_name_to_type[name] == 0) {
-            int len = output.insts(bi).tensor_array(idx).int64_data_size();
-            VLOG(2) << "fetch tensor : " << name
-                    << " type: int64 len : " << len;
-            model._int64_map[name][bi].resize(len);
-            VLOG(2) << "fetch name " << name << " index " << idx
-                    << " first data "
-                    << output.insts(bi).tensor_array(idx).int64_data(0);
-            for (int i = 0; i < len; ++i) {
-              model._int64_map[name][bi][i] =
-                  output.insts(bi).tensor_array(idx).int64_data(i);
-            }
-          } else if (_fetch_name_to_type[name] == 1) {
-            int len = output.insts(bi).tensor_array(idx).float_data_size();
-            VLOG(2) << "fetch tensor : " << name
-                    << " type: float32 len : " << len;
-            model._float_map[name][bi].resize(len);
-            VLOG(2) << "fetch name " << name << " index " << idx
-                    << " first data "
-                    << output.insts(bi).tensor_array(idx).float_data(0);
-            for (int i = 0; i < len; ++i) {
-              model._float_map[name][bi][i] =
-                  output.insts(bi).tensor_array(idx).float_data(i);
-            }
+
+      for (auto &name : fetch_name) {
+        int idx = _fetch_name_to_idx[name];
+        if (_fetch_name_to_type[name] == 0) {
+          model._int64_value_map[name].resize(
+              output.insts(0).tensor_array(idx).int64_data_size());
+          int size = output.insts(0).tensor_array(idx).int64_data_size();
+          for (int i = 0; i < size; ++i) {
+            model._int64_value_map[name][i] =
+                output.insts(0).tensor_array(idx).int64_data(i);
+          }
+        } else {
+          model._float_value_map[name].resize(
+              output.insts(0).tensor_array(idx).float_data_size());
+          int size = output.insts(0).tensor_array(idx).float_data_size();
+          for (int i = 0; i < size; ++i) {
+            model._float_value_map[name][i] =
+                output.insts(0).tensor_array(idx).float_data(i);
           }
-          idx += 1;
         }
       }
       predict_res_batch.add_model_res(std::move(model));
diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp
index abb43dad5e9136906923950d56554f7471ed99e8..066a2cfbe7af64807d4be1982a8822f93a6c32ec 100644
--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -40,6 +40,16 @@ PYBIND11_MODULE(serving_client, m) {
              return self.get_float_by_name(model_idx, name);
            },
            py::return_value_policy::reference)
+      .def("get_shape",
+           [](PredictorRes &self, int model_idx, std::string &name) {
+             return self.get_shape(model_idx, name);
+           },
+           py::return_value_policy::reference)
+      .def("get_lod",
+           [](PredictorRes &self, int model_idx, std::string &name) {
+             return self.get_lod(model_idx, name);
+           },
+           py::return_value_policy::reference)
       .def("variant_tag", [](PredictorRes &self) { return self.variant_tag(); })
       .def("get_engine_names",
            [](PredictorRes &self) { return self.get_engine_names(); });
@@ -68,42 +78,31 @@ PYBIND11_MODULE(serving_client, m) {
            [](PredictorClient &self) { self.create_predictor(); })
       .def("destroy_predictor",
            [](PredictorClient &self) { self.destroy_predictor(); })
-      .def("predict",
-           [](PredictorClient &self,
-              const std::vector<std::vector<float>> &float_feed,
-              const std::vector<std::string> &float_feed_name,
-              const std::vector<std::vector<int64_t>> &int_feed,
-              const std::vector<std::string> &int_feed_name,
-              const std::vector<std::string> &fetch_name,
-              PredictorRes &predict_res,
-              const int &pid) {
-             return self.predict(float_feed,
-                                 float_feed_name,
-                                 int_feed,
-                                 int_feed_name,
-                                 fetch_name,
-                                 predict_res,
-                                 pid);
-           })
+
       .def("batch_predict",
            [](PredictorClient &self,
               const std::vector<std::vector<std::vector<float>>>
                   &float_feed_batch,
               const std::vector<std::string> &float_feed_name,
+              const std::vector<std::vector<int>> &float_shape,
               const std::vector<std::vector<std::vector<int64_t>>>
                   &int_feed_batch,
               const std::vector<std::string> &int_feed_name,
+              const std::vector<std::vector<int>> &int_shape,
               const std::vector<std::string> &fetch_name,
               PredictorRes &predict_res_batch,
               const int &pid) {
              return self.batch_predict(float_feed_batch,
                                        float_feed_name,
+                                       float_shape,
                                        int_feed_batch,
                                        int_feed_name,
+                                       int_shape,
                                        fetch_name,
                                        predict_res_batch,
                                        pid);
-           });
+           },
+           py::call_guard<py::gil_scoped_release>());
 }
 
 }  // namespace general_model
diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp
index 6992227cd51128d53253c97aee784af31a593dfc..5622970cc44e852864215c5bba14464362d99312 100644
--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -79,27 +79,25 @@ int GeneralResponseOp::inference() {
     }
 
     const TensorVector *in = &input_blob->tensor_vector;
-    int batch_size = input_blob->GetBatchSize();
-    VLOG(2) << "input batch size: " << batch_size;
 
     ModelOutput *output = res->add_outputs();
-    output->set_engine_name(
-        pre_name);  // To get the order of model return values
-    for (int i = 0; i < batch_size; ++i) {
-      FetchInst *fetch_inst = output->add_insts();
-      for (auto &idx : fetch_index) {
-        Tensor *tensor = fetch_inst->add_tensor_array();
-        // currently only response float tensor or lod_tensor
-        tensor->set_elem_type(1);
-        if (model_config->_is_lod_fetch[idx]) {
-          VLOG(2) << "out[" << idx << " is lod_tensor";
-          tensor->add_shape(-1);
-        } else {
-          VLOG(2) << "out[" << idx << "] is tensor";
-          for (int k = 1; k < in->at(idx).shape.size(); ++k) {
-            VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k];
-            tensor->add_shape(in->at(idx).shape[k]);
-          }
+    // To get the order of model return values
+    output->set_engine_name(pre_name);
+    FetchInst *fetch_inst = output->add_insts();
+    for (auto &idx : fetch_index) {
+      Tensor *tensor = fetch_inst->add_tensor_array();
+      tensor->set_elem_type(1);
+      if (model_config->_is_lod_fetch[idx]) {
+        VLOG(2) << "out[" << idx << "] is lod_tensor";
+        for (int k = 0; k < in->at(idx).shape.size(); ++k) {
+          VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
+          tensor->add_shape(in->at(idx).shape[k]);
+        }
+      } else {
+        VLOG(2) << "out[" << idx << "] is tensor";
+        for (int k = 0; k < in->at(idx).shape.size(); ++k) {
+          VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
+          tensor->add_shape(in->at(idx).shape[k]);
         }
       }
     }
@@ -107,66 +105,42 @@ int GeneralResponseOp::inference() {
     int var_idx = 0;
     for (auto &idx : fetch_index) {
       int cap = 1;
-      for (int j = 1; j < in->at(idx).shape.size(); ++j) {
+      for (int j = 0; j < in->at(idx).shape.size(); ++j) {
         cap *= in->at(idx).shape[j];
       }
       if (in->at(idx).dtype == paddle::PaddleDType::INT64) {
         int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
         if (model_config->_is_lod_fetch[idx]) {
-          for (int j = 0; j < batch_size; ++j) {
-            for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
-                 k++) {
-              FetchInst *fetch_p = output->mutable_insts(j);
-              fetch_p->mutable_tensor_array(var_idx)->add_int64_data(
-                  data_ptr[k]);
-            }
+          FetchInst *fetch_p = output->mutable_insts(0);
+          for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
+            fetch_p->mutable_tensor_array(var_idx)->add_lod(
+                in->at(idx).lod[0][j]);
+          }
+          for (int j = 0; j < cap; ++j) {
+            fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[j]);
           }
         } else {
-          int var_size = in->at(idx).shape[0];
-          if (var_size == batch_size) {
-            for (int j = 0; j < batch_size; ++j) {
-              for (int k = j * cap; k < (j + 1) * cap; ++k) {
-                FetchInst *fetch_p = output->mutable_insts(j);
-                fetch_p->mutable_tensor_array(var_idx)->add_int64_data(
-                    data_ptr[k]);
-              }
-            }
-          } else {
-            for (int j = 0; j < batch_size; ++j) {
-              FetchInst *fetch_p = output->mutable_insts(j);
-              fetch_p->mutable_tensor_array(var_idx)->add_int64_data(
-                  data_ptr[0]);
-            }
+          FetchInst *fetch_p = output->mutable_insts(0);
+          for (int j = 0; j < cap; ++j) {
+            fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
           }
         }
         var_idx++;
       } else if (in->at(idx).dtype == paddle::PaddleDType::FLOAT32) {
         float *data_ptr = static_cast<float *>(in->at(idx).data.data());
         if (model_config->_is_lod_fetch[idx]) {
-          for (int j = 0; j < batch_size; ++j) {
-            for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
-                 k++) {
-              FetchInst *fetch_p = output->mutable_insts(j);
-              fetch_p->mutable_tensor_array(var_idx)->add_float_data(
-                  data_ptr[k]);
-            }
+          FetchInst *fetch_p = output->mutable_insts(0);
+          for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
+            fetch_p->mutable_tensor_array(var_idx)->add_lod(
+                in->at(idx).lod[0][j]);
+          }
+          for (int j = 0; j < cap; ++j) {
+            fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
           }
         } else {
-          int var_size = in->at(idx).shape[0];
-          if (var_size == batch_size) {
-            for (int j = 0; j < batch_size; ++j) {
-              for (int k = j * cap; k < (j + 1) * cap; ++k) {
-                FetchInst *fetch_p = output->mutable_insts(j);
-                fetch_p->mutable_tensor_array(var_idx)->add_float_data(
-                    data_ptr[k]);
-              }
-            }
-          } else {
-            for (int j = 0; j < batch_size; ++j) {
-              FetchInst *fetch_p = output->mutable_insts(j);
-              fetch_p->mutable_tensor_array(var_idx)->add_float_data(
-                  data_ptr[0]);
-            }
+          FetchInst *fetch_p = output->mutable_insts(0);
+          for (int j = 0; j < cap; ++j) {
+            fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
           }
         }
         var_idx++;
diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto
index 9ddd029982c3b2116a1e782a87939f56da60aa1b..8581ecb2a2e10deced910a20ce26c2beaca956fa 100644
--- a/core/general-server/proto/general_model_service.proto
+++ b/core/general-server/proto/general_model_service.proto
@@ -26,6 +26,7 @@ message Tensor {
   repeated float float_data = 4;
   optional int32 elem_type = 5;
   repeated int32 shape = 6;
+  repeated int32 lod = 7; // only for fetch tensor currently
 };
 
 message FeedInst { repeated Tensor tensor_array = 1; };
diff --git a/core/predictor/Dockerfile b/core/predictor/Dockerfile
deleted file mode 100644
index 71b824c158f5231e41cfe885516c902fedfc521d..0000000000000000000000000000000000000000
--- a/core/predictor/Dockerfile
+++ /dev/null
@@ -1,20 +0,0 @@
-FROM registry.baidu.com/public/centos6u3-online:gcc482
-MAINTAINER predictor@baidu.com
-LABEL Description="paddle serving docker image"
-USER root
-RUN echo "Enjoy your paddle serving journey!"
-ADD conf /home/work/paddle-serving/conf
-ADD data /home/work/paddle-serving/data
-ADD bin /home/work/paddle-serving/bin
-RUN wget ftp://st01-rdqa-dev055-wanlijin01.epc.baidu.com/home/users/wanlijin01/workspace/baidu/paddle-serving/predictor/data.tar.gz -O /tmp/data.tar.gz \
-    && tar -C /home/work/paddle-serving -xvzf /tmp/data.tar.gz \
-    && rm /tmp/data.tar.gz \
-    && cd /home/work/paddle-serving/ \
-    && chmod a+x bin/pdserving  \
-    && chmod a+x bin/start.sh \
-    && sed -i 's/\.\/conf/\/home\/work\/paddle-serving\/conf/g' conf/workflow.conf \
-    && sed -i 's/\.\/conf/\/home\/work\/paddle-serving\/conf/g' conf/resource.conf \
-    && sed -i 's/\.\/log/\/home\/work\/paddle-serving\/log/g' conf/log.conf \
-    && sed -i 's/\.\/data/\/home\/work\/paddle-serving\/data/g' conf/model_toolkit.conf \
-    && mkdir -p /home/work/paddle-serving/log
-CMD sh /home/work/paddle-serving/bin/start.sh -c "trap : TERM INT; sleep infinity & wait"
diff --git a/core/predictor/Dockerfile.gpu b/core/predictor/Dockerfile.gpu
deleted file mode 100644
index f0922dc83d65b6139730ad818c9fc781d40df994..0000000000000000000000000000000000000000
--- a/core/predictor/Dockerfile.gpu
+++ /dev/null
@@ -1,20 +0,0 @@
-FROM registry.baidu.com/paddlecloud/paddlecloud-runenv-centos6u3-bce:paddlecloud-fluid-gcc482-cuda8.0_cudnn5_bce
-MAINTAINER predictor@baidu.com
-LABEL Description="paddle serving docker image"
-USER root
-RUN echo "Enjoy your paddle serving journey!"
-ADD conf /home/work/paddle-serving/conf
-ADD data /home/work/paddle-serving/data
-ADD bin /home/work/paddle-serving/bin
-RUN wget ftp://st01-rdqa-dev055-wanlijin01.epc.baidu.com/home/users/wanlijin01/workspace/baidu/paddle-serving/predictor/data.tar.gz -O /tmp/data.tar.gz \
-    && tar -C /home/work/paddle-serving -xvzf /tmp/data.tar.gz \
-    && rm /tmp/data.tar.gz \
-    && cd /home/work/paddle-serving/ \
-    && chmod a+x bin/pdserving  \
-    && chmod a+x bin/start.sh \
-    && sed -i 's/\.\/conf/\/home\/work\/paddle-serving\/conf/g' conf/workflow.conf \
-    && sed -i 's/\.\/conf/\/home\/work\/paddle-serving\/conf/g' conf/resource.conf \
-    && sed -i 's/\.\/log/\/home\/work\/paddle-serving\/log/g' conf/log.conf \
-    && sed -i 's/\.\/data/\/home\/work\/paddle-serving\/data/g' conf/model_toolkit.conf \
-    && mkdir -p /home/work/paddle-serving/log
-CMD sh /home/work/paddle-serving/bin/start.sh -c "trap : TERM INT; sleep infinity & wait"
diff --git a/core/predictor/build.sh b/core/predictor/build.sh
deleted file mode 100755
index 781af834c233b879f96463d924c8facd185422f3..0000000000000000000000000000000000000000
--- a/core/predictor/build.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/bash
-function install_pdserving_lib(){
-    ret=1
-    local pdserving_lib_mode=$1
-    case $pdserving_lib_mode in
-        local)
-            local pdserving_local_path=$2
-            if [ ! -d $pdserving_local_path ]; then
-                echo "[WARN failed to find local path]"
-                return ret
-            fi
-            lib_name=`basename $pdserving_local_path`
-            if [ -d ${CITOOLS}/$lib_name ]; then
-                rm -rf ${CITOOLS}/$lib_name
-            fi
-            cp -rf $pdserving_local_path ${CITOOLS}/
-            source ${CITOOLS}/$lib_name/predictor_build_lib.sh
-        ;;
-        ftp)
-            local wgetOptions="--tries=3 --retry-connrefused -r -l0 -nv --limit-rate=50m -nH --cut-dirs=5"
-            pdserving_lib_ftp_path="ftp://tc-orp-app2.tc.baidu.com:/home/heqing/scmbak/common_lib/pdserving_cts/pdserving_lib"
-            lib_name=`basename $pdserving_lib_ftp_path`
-            if [ -d ${CITOOLS}/$lib_name ]; then
-                rm -rf ${CITOOLS}/$lib_name
-            fi
-            echo "wget cmd is :$wgetOptions  $pdserving_lib_ftp_path"
-            echo "lib_name is :${lib_name}"
-            wget $wgetOptions$cur_dirs  $pdserving_lib_ftp_path
-            mv ${lib_name} ${CITOOLS}/
-            source ${CITOOLS}/${lib_name}/predictor_build_lib.sh
-        ;;
-        *)
-            ret=0
-            echo "todo"
-        ;;
-    esac
-    return $ret
-}
-
-CUR_PATH=$(pwd)
-WORK_PATH=$(pwd)
-WORK_ROOT=${WORK_PATH%%/baidu/*}
-#co citools
-CITOOLS="${WORK_ROOT}/baidu/fengchao-qa/citools"
-if [ -d ${CITOOLS} ];then
-    rm -rf ${CITOOLS}
-fi
-git clone --depth 1 ssh://git@icode.baidu.com:8235/baidu/fengchao-qa/citools $CITOOLS >/dev/null
-[[ $? != 0 ]] && exit 1
-source  $CITOOLS/lib/localbuild_lib.sh
-#source����·�����ܸı䣬��Ҫ���¸�ֵ
-CITOOLS="${WORK_ROOT}/baidu/fengchao-qa/citools"
-
-#install_pdserving_lib
-pdserving_lib_mode="ftp"
-install_pdserving_lib ${pdserving_lib_mode}    #����ģʽ:�����local����Ҫָ��������pdserving_lib��·��
-#source ${CITOOLS}/pdserving_lib/predictor_build_lib.sh
-
-COVMODULEID=8652
-TYPE=framework
-#ִ�б�ģ�鹹����ʼ��
-predictor_build_init
-WORKROOT=$WORK_ROOT
-#ִ�й�������
-predictor_build_do $@
-
-exit 0
diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto
index cd8f59f64de9add013fd3bb6e45321fff250d4e7..51c0335a9db896e1260e83915de81e51451a904b 100644
--- a/core/sdk-cpp/proto/general_model_service.proto
+++ b/core/sdk-cpp/proto/general_model_service.proto
@@ -26,6 +26,7 @@ message Tensor {
   repeated float float_data = 4;
   optional int32 elem_type = 5;
   repeated int32 shape = 6;
+  repeated int32 lod = 7; // only for fetch tensor currently
 };
 
 message FeedInst { repeated Tensor tensor_array = 1; };
diff --git a/doc/DESIGN.md b/doc/DESIGN.md
index 5d00d02171dccf07bfdafb9cdd85222a92c20113..4d4055a7936fb1791ebe15a4c41c10a00a78c1f8 100644
--- a/doc/DESIGN.md
+++ b/doc/DESIGN.md
@@ -260,6 +260,7 @@ class Op {
 
 ```
 
+
 ### 5.4 Interfaces related to framework
 
 Service
diff --git a/doc/SAVE_CN.md b/doc/SAVE_CN.md
index 0e2ecd5b71b860e887027564940e9e64522e097f..43b62c2ac623b386505356194ac136ea305fe683 100644
--- a/doc/SAVE_CN.md
+++ b/doc/SAVE_CN.md
@@ -2,7 +2,7 @@
 
 (简体中文|[English](./SAVE.md))
 
-- 目前，Paddle服务提供了一个save_model接口供用户访问，该接口与Paddle的`save_inference_model`类似。
+- 目前，Paddle Serving提供了一个save_model接口供用户访问，该接口与Paddle的`save_inference_model`类似。
 
 ``` python
 import paddle_serving_client.io as serving_io
diff --git a/doc/DOCKER.md b/doc/deprecated/DOCKER.md
similarity index 100%
rename from doc/DOCKER.md
rename to doc/deprecated/DOCKER.md
diff --git a/doc/DOCKER_CN.md b/doc/deprecated/DOCKER_CN.md
similarity index 100%
rename from doc/DOCKER_CN.md
rename to doc/deprecated/DOCKER_CN.md
diff --git a/doc/serving_logo.png b/doc/serving_logo.png
deleted file mode 100644
index 2510fd62ecc2bf2954a4a2ff7491f565f1528ebf..0000000000000000000000000000000000000000
Binary files a/doc/serving_logo.png and /dev/null differ
diff --git a/python/examples/criteo_ctr/test_client.py b/python/examples/criteo_ctr/test_client.py
index d53c5541c36f4eb52618e3498eda571dd2bcab53..2beac850228291c49d56c1180365fdd8e627ffc0 100644
--- a/python/examples/criteo_ctr/test_client.py
+++ b/python/examples/criteo_ctr/test_client.py
@@ -51,6 +51,5 @@ for ei in range(1000):
     for i in range(1, 27):
         feed_dict["sparse_{}".format(i - 1)] = data[0][i]
     fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
-    #print(fetch_map)
 end = time.time()
 print(end - start)
diff --git a/python/examples/criteo_ctr_with_cube/test_client.py b/python/examples/criteo_ctr_with_cube/test_client.py
index de205ebc68af02e8dd978da51a4c43bef0cec0d4..ca752b763e067b6a73e28c1d2ab9f58b9b98ba5d 100755
--- a/python/examples/criteo_ctr_with_cube/test_client.py
+++ b/python/examples/criteo_ctr_with_cube/test_client.py
@@ -40,7 +40,7 @@ for ei in range(10000):
     for i in range(1, 27):
         feed_dict["embedding_{}.tmp_0".format(i - 1)] = data[0][i]
     fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
-    prob_list.append(fetch_map['prob'][1])
+    prob_list.append(fetch_map['prob'][0][1])
     label_list.append(data[0][-1][0])
 
 print(auc(label_list, prob_list))
diff --git a/python/examples/fit_a_line/test_numpy_input_client.py b/python/examples/fit_a_line/test_numpy_input_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..8557ed09fe0118d35e1cb169cec4e93442cf927a
--- /dev/null
+++ b/python/examples/fit_a_line/test_numpy_input_client.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
+from paddle_serving_client import Client
+import numpy as np
+import sys
+
+client = Client()
+client.load_client_config(sys.argv[1])
+client.connect(["127.0.0.1:9393"])
+
+import paddle
+test_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.uci_housing.test(), buf_size=500),
+    batch_size=1)
+
+for data in test_reader():
+    fetch_map = client.predict(
+        feed={"x": np.array(data[0][0])}, fetch=["price"])
+    print("{} {}".format(fetch_map["price"][0][0], data[0][1][0]))
diff --git a/python/examples/imdb/test_client.py b/python/examples/imdb/test_client.py
index 548a40e4931e7f0a2ea4a4e9d3c05f40e7d34426..9de7a45b0646167c43ea7d3b98b0f3782112f6f0 100644
--- a/python/examples/imdb/test_client.py
+++ b/python/examples/imdb/test_client.py
@@ -31,4 +31,4 @@ for line in sys.stdin:
     feed = {"words": word_ids}
     fetch = ["acc", "cost", "prediction"]
     fetch_map = client.predict(feed=feed, fetch=fetch)
-    print("{} {}".format(fetch_map["prediction"][1], label[0]))
+    print("{} {}".format(fetch_map["prediction"][0][1], label[0]))
diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py
index 03542de7481c24b0b21b72635e541b77f60d6d16..52e0467c7115258fd188ea2fc8c6036a6d903499 100644
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -18,6 +18,8 @@ import os
 from .proto import sdk_configure_pb2 as sdk
 from .proto import general_model_config_pb2 as m_config
 import google.protobuf.text_format
+import numpy as np
+import time
 import sys
 
 int_type = 0
@@ -119,6 +121,7 @@ class Client(object):
         self.fetch_names_to_idx_ = {}
         self.lod_tensor_set = set()
         self.feed_tensor_len = {}
+
         for i, var in enumerate(model_conf.feed_var):
             self.feed_names_to_idx_[var.alias_name] = i
             self.feed_types_[var.alias_name] = var.feed_type
@@ -131,11 +134,11 @@ class Client(object):
                 for dim in self.feed_shapes_[var.alias_name]:
                     counter *= dim
                 self.feed_tensor_len[var.alias_name] = counter
-
         for i, var in enumerate(model_conf.fetch_var):
             self.fetch_names_to_idx_[var.alias_name] = i
             self.fetch_names_to_type_[var.alias_name] = var.fetch_type
-
+            if var.is_lod_tensor:
+                self.lod_tensor_set.add(var.alias_name)
         return
 
     def add_variant(self, tag, cluster, variant_weight):
@@ -162,7 +165,6 @@ class Client(object):
                     "parameter endpoints({}) will not take effect, because you use the add_variant function.".
                     format(endpoints))
         sdk_desc = self.predictor_sdk_.gen_desc()
-        print(sdk_desc)
         self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString(
         ))
 
@@ -203,6 +205,8 @@ class Client(object):
         float_slot_batch = []
         int_feed_names = []
         float_feed_names = []
+        int_shape = []
+        float_shape = []
         fetch_names = []
         counter = 0
         batch_size = len(feed_batch)
@@ -219,64 +223,85 @@ class Client(object):
         for i, feed_i in enumerate(feed_batch):
             int_slot = []
             float_slot = []
+            int_shape = []
+            float_shape = []
             for key in feed_i:
                 if key not in self.feed_names_:
                     raise ValueError("Wrong feed name: {}.".format(key))
-                self.shape_check(feed_i, key)
+                if not isinstance(feed_i[key], np.ndarray):
+                    self.shape_check(feed_i, key)
                 if self.feed_types_[key] == int_type:
                     if i == 0:
                         int_feed_names.append(key)
-                    int_slot.append(feed_i[key])
+                        if isinstance(feed_i[key], np.ndarray):
+                            int_shape.append(list(feed_i[key].shape))
+                        else:
+                            int_shape.append(self.feed_shapes_[key])
+                    if isinstance(feed_i[key], np.ndarray):
+                        int_slot.append(np.reshape(feed_i[key], (-1)).tolist())
+                    else:
+                        int_slot.append(feed_i[key])
                 elif self.feed_types_[key] == float_type:
                     if i == 0:
                         float_feed_names.append(key)
-                    float_slot.append(feed_i[key])
-            if len(int_slot) + len(float_slot) == 0:
-                raise ValueError("No feed data for predict.")
+                        if isinstance(feed_i[key], np.ndarray):
+                            float_shape.append(list(feed_i[key].shape))
+                        else:
+                            float_shape.append(self.feed_shapes_[key])
+                    if isinstance(feed_i[key], np.ndarray):
+                        float_slot.append(
+                            np.reshape(feed_i[key], (-1)).tolist())
+                    else:
+                        float_slot.append(feed_i[key])
             int_slot_batch.append(int_slot)
             float_slot_batch.append(float_slot)
 
         result_batch = self.result_handle_
         res = self.client_handle_.batch_predict(
-            float_slot_batch, float_feed_names, int_slot_batch, int_feed_names,
-            fetch_names, result_batch, self.pid)
+            float_slot_batch, float_feed_names, float_shape, int_slot_batch,
+            int_feed_names, int_shape, fetch_names, result_batch, self.pid)
 
         if res == -1:
             return None
 
-        multi_result_map_batch = []
+        multi_result_map = []
         model_engine_names = result_batch.get_engine_names()
         for mi, engine_name in enumerate(model_engine_names):
-            result_map_batch = []
             result_map = {}
+            # result map needs to be a numpy array
             for i, name in enumerate(fetch_names):
                 if self.fetch_names_to_type_[name] == int_type:
                     result_map[name] = result_batch.get_int64_by_name(mi, name)
+                    shape = result_batch.get_shape(mi, name)
+                    result_map[name] = np.array(result_map[name])
+                    result_map[name].shape = shape
+                    if name in self.lod_tensor_set:
+                        result_map["{}.lod".format(
+                            name)] = result_batch.get_lod(mi, name)
                 elif self.fetch_names_to_type_[name] == float_type:
                     result_map[name] = result_batch.get_float_by_name(mi, name)
-            for i in range(batch_size):
-                single_result = {}
-                for key in result_map:
-                    single_result[key] = result_map[key][i]
-                result_map_batch.append(single_result)
-            multi_result_map_batch.append(result_map_batch)
+                    shape = result_batch.get_shape(mi, name)
+                    result_map[name] = np.array(result_map[name])
+                    result_map[name].shape = shape
+                    if name in self.lod_tensor_set:
+                        result_map["{}.lod".format(
+                            name)] = result_batch.get_lod(mi, name)
+            multi_result_map.append(result_map)
 
         ret = None
         if len(model_engine_names) == 1:
-            if batch_size == 1:
-                ret = multi_result_map_batch[0][0]
-            else:
-                ret = multi_result_map_batch[0]
+            # If only one model result is returned, the format of ret is result_map
+            ret = multi_result_map[0]
         else:
-            ret = {}
-            if batch_size == 1:
-                for mi, result_map_batch in enumerate(multi_result_map_batch):
-                    ret[model_engine_names[mi]] = result_map_batch[0]
-            else:
-                for mi, result_map_batch in enumerate(multi_result_map_batch):
-                    ret[model_engine_names[mi]] = result_map_batch
-        return [ret,
-                self.result_handle_.variant_tag()] if need_variant_tag else ret
+            # If multiple model results are returned, the format of ret is {name: result_map}
+            ret = {
+                engine_name: multi_result_map[mi]
+                for mi, engine_name in enumerate(model_engine_names)
+            }
+
+        return ret if not need_variant_tag else [
+            ret, self.result_handle_.variant_tag()
+        ]
 
     def release(self):
         self.client_handle_.destroy_predictor()
diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py
index c1a86eaecc899c987bd346f8a747fb486d4789ee..ca43426c2a82a0c8be296c8410361acbf498fc5c 100755
--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -67,11 +67,15 @@ class WebService(object):
                     feed_batch=feed, fetch=fetch)
                 fetch_map_batch = self.postprocess(
                     feed=request.json, fetch=fetch, fetch_map=fetch_map_batch)
+                for key in fetch_map_batch:
+                    fetch_map_batch[key] = fetch_map_batch[key].tolist()
                 result = {"result": fetch_map_batch}
             elif isinstance(feed, dict):
                 if "fetch" in feed:
                     del feed["fetch"]
                 fetch_map = self.client_service.predict(feed=feed, fetch=fetch)
+                for key in fetch_map:
+                    fetch_map[key] = fetch_map[key][0].tolist()
                 result = self.postprocess(
                     feed=request.json, fetch=fetch, fetch_map=fetch_map)
         except ValueError:
diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py
old mode 100755
new mode 100644
index 25e3a315dd5b848c77b9533a974d7707e5b67991..37425acd6b7209fbdcf38a52c5a78e0c15b4cf61
--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -104,22 +104,13 @@ class WebService(object):
             abort(400)
         if "fetch" not in request.json:
             abort(400)
-        try:
-            feed, fetch = self.preprocess(request.json, request.json["fetch"])
-            if isinstance(feed, list):
-                fetch_map_batch = self.client.predict(
-                    feed_batch=feed, fetch=fetch)
-                fetch_map_batch = self.postprocess(
-                    feed=request.json, fetch=fetch, fetch_map=fetch_map_batch)
-                result = {"result": fetch_map_batch}
-            elif isinstance(feed, dict):
-                if "fetch" in feed:
-                    del feed["fetch"]
-                fetch_map = self.client.predict(feed=feed, fetch=fetch)
-                result = self.postprocess(
-                    feed=request.json, fetch=fetch, fetch_map=fetch_map)
-        except ValueError:
-            result = {"result": "Request Value Error"}
+        feed, fetch = self.preprocess(request.json, request.json["fetch"])
+        fetch_map_batch = self.client.predict(feed=feed, fetch=fetch)
+        fetch_map_batch = self.postprocess(
+            feed=request.json, fetch=fetch, fetch_map=fetch_map_batch)
+        for key in fetch_map_batch:
+            fetch_map_batch[key] = fetch_map_batch[key].tolist()
+        result = {"result": fetch_map_batch}
         return result
 
     def run_server(self):
diff --git a/python/requirements.txt b/python/requirements.txt
index 5359d565e8f612822e1a0c61ee27018daa4b0e1b..d445216b3112ea3d5791045b43a6a3147865522f 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,3 +1 @@
-protobuf>=3.1.0
-six
-paddlepaddle-gpu
+numpy>=1.12, <=1.16.4 ; python_version<"3.5"
diff --git a/python/setup.py.client.in b/python/setup.py.client.in
index 381fb2a8853cc4d5494e3eac520ab183db6eab09..58061f7c887be23223f554d383c98bd75fb4828b 100644
--- a/python/setup.py.client.in
+++ b/python/setup.py.client.in
@@ -53,7 +53,7 @@ if '${PACK}' == 'ON':
 
 
 REQUIRED_PACKAGES = [
-    'six >= 1.10.0', 'protobuf >= 3.1.0'
+    'six >= 1.10.0', 'protobuf >= 3.1.0', 'numpy >= 1.12'
 ]
 
 if not find_package("paddlepaddle") and not find_package("paddlepaddle-gpu"):
diff --git a/tools/serving_build.sh b/tools/serving_build.sh
index b613701110ee7f2ddb123bf611e2174b18d69346..ccd4c2f608e12a01c2a711ff503f99bc754bae2e 100644
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -18,6 +18,7 @@ function init() {
     export PYTHONROOT=/usr
     cd Serving
     export SERVING_WORKDIR=$PWD
+    $PYTHONROOT/bin/python -m pip install -r python/requirements.txt
 }
 
 function check_cmd() {