diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h
index 9d0cc8b66cfe2e3fe2f4d012c7920f518d32ef5a..7e04ae11f2106bc8e03fb9045976abc2460e1864 100644
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -17,18 +17,17 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include <pybind11/numpy.h>
 #include <algorithm>
 #include <fstream>
 #include <map>
 #include <string>
 #include <utility>  // move
 #include <vector>
-
 #include "core/sdk-cpp/builtin_format.pb.h"
 #include "core/sdk-cpp/general_model_service.pb.h"
 #include "core/sdk-cpp/include/common.h"
 #include "core/sdk-cpp/include/predictor_sdk.h"
-
 using baidu::paddle_serving::sdk_cpp::Predictor;
 using baidu::paddle_serving::sdk_cpp::PredictorApi;
 
@@ -36,6 +35,7 @@ DECLARE_bool(profile_client);
 DECLARE_bool(profile_server);
 
 // given some input data, pack into pb, and send request
+namespace py = pybind11;
 namespace baidu {
 namespace paddle_serving {
 namespace general_model {
@@ -178,6 +178,17 @@ class PredictorClient {
       PredictorRes& predict_res_batch,  // NOLINT
       const int& pid);
 
+  int numpy_predict(
+      const std::vector<std::vector<py::array_t<float>>>& float_feed_batch,
+      const std::vector<std::string>& float_feed_name,
+      const std::vector<std::vector<int>>& float_shape,
+      const std::vector<std::vector<py::array_t<int64_t>>>& int_feed_batch,
+      const std::vector<std::string>& int_feed_name,
+      const std::vector<std::vector<int>>& int_shape,
+      const std::vector<std::string>& fetch_name,
+      PredictorRes& predict_res_batch,  // NOLINT
+      const int& pid);
+
  private:
   PredictorApi _api;
   Predictor* _predictor;
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index 92dca7eeba53c2fa23020526faa83a19a38633b6..5d4f732fc19b605cb2e130c61a2e3cc0b2edc13a 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -30,6 +30,7 @@ using baidu::paddle_serving::predictor::general_model::FeedInst;
 using baidu::paddle_serving::predictor::general_model::FetchInst;
 
 std::once_flag gflags_init_flag;
+namespace py = pybind11;
 
 namespace baidu {
 namespace paddle_serving {
@@ -332,6 +333,284 @@ int PredictorClient::batch_predict(
   return 0;
 }
 
+int PredictorClient::numpy_predict(
+    const std::vector<std::vector<py::array_t<float>>> &float_feed_batch,
+    const std::vector<std::string> &float_feed_name,
+    const std::vector<std::vector<int>> &float_shape,
+    const std::vector<std::vector<py::array_t<int64_t>>> &int_feed_batch,
+    const std::vector<std::string> &int_feed_name,
+    const std::vector<std::vector<int>> &int_shape,
+    const std::vector<std::string> &fetch_name,
+    PredictorRes &predict_res_batch,
+    const int &pid) {
+  int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
+
+  predict_res_batch.clear();
+  Timer timeline;
+  int64_t preprocess_start = timeline.TimeStampUS();
+
+  int fetch_name_num = fetch_name.size();
+
+  _api.thrd_initialize();
+  std::string variant_tag;
+  _predictor = _api.fetch_predictor("general_model", &variant_tag);
+  predict_res_batch.set_variant_tag(variant_tag);
+  VLOG(2) << "fetch general model predictor done.";
+  VLOG(2) << "float feed name size: " << float_feed_name.size();
+  VLOG(2) << "int feed name size: " << int_feed_name.size();
+  VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
+  Request req;
+  for (auto &name : fetch_name) {
+    req.add_fetch_var_names(name);
+  }
+
+  for (int bi = 0; bi < batch_size; bi++) {
+    VLOG(2) << "prepare batch " << bi;
+    std::vector<Tensor *> tensor_vec;
+    FeedInst *inst = req.add_insts();
+    std::vector<py::array_t<float>> float_feed = float_feed_batch[bi];
+    std::vector<py::array_t<int64_t>> int_feed = int_feed_batch[bi];
+    for (auto &name : float_feed_name) {
+      tensor_vec.push_back(inst->add_tensor_array());
+    }
+
+    for (auto &name : int_feed_name) {
+      tensor_vec.push_back(inst->add_tensor_array());
+    }
+
+    VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name "
+            << "prepared";
+
+    int vec_idx = 0;
+    VLOG(2) << "tensor_vec size " << tensor_vec.size() << " float shape "
+            << float_shape.size();
+    for (auto &name : float_feed_name) {
+      int idx = _feed_name_to_idx[name];
+      Tensor *tensor = tensor_vec[idx];
+      VLOG(2) << "prepare float feed " << name << " shape size "
+              << float_shape[vec_idx].size();
+      for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
+        tensor->add_shape(float_shape[vec_idx][j]);
+      }
+      tensor->set_elem_type(1);
+      const int float_shape_size = float_shape[vec_idx].size();
+      switch (float_shape_size) {
+        case 4: {
+          auto float_array = float_feed[vec_idx].unchecked<4>();
+          for (ssize_t i = 0; i < float_array.shape(0); i++) {
+            for (ssize_t j = 0; j < float_array.shape(1); j++) {
+              for (ssize_t k = 0; k < float_array.shape(2); k++) {
+                for (ssize_t l = 0; l < float_array.shape(3); l++) {
+                  tensor->add_float_data(float_array(i, j, k, l));
+                }
+              }
+            }
+          }
+          break;
+        }
+        case 3: {
+          auto float_array = float_feed[vec_idx].unchecked<3>();
+          for (ssize_t i = 0; i < float_array.shape(0); i++) {
+            for (ssize_t j = 0; j < float_array.shape(1); j++) {
+              for (ssize_t k = 0; k < float_array.shape(2); k++) {
+                tensor->add_float_data(float_array(i, j, k));
+              }
+            }
+          }
+          break;
+        }
+        case 2: {
+          auto float_array = float_feed[vec_idx].unchecked<2>();
+          for (ssize_t i = 0; i < float_array.shape(0); i++) {
+            for (ssize_t j = 0; j < float_array.shape(1); j++) {
+              tensor->add_float_data(float_array(i, j));
+            }
+          }
+          break;
+        }
+        case 1: {
+          auto float_array = float_feed[vec_idx].unchecked<1>();
+          for (ssize_t i = 0; i < float_array.shape(0); i++) {
+            tensor->add_float_data(float_array(i));
+          }
+          break;
+        }
+      }
+      vec_idx++;
+    }
+
+    VLOG(2) << "batch [" << bi << "] "
+            << "float feed value prepared";
+
+    vec_idx = 0;
+    for (auto &name : int_feed_name) {
+      int idx = _feed_name_to_idx[name];
+      Tensor *tensor = tensor_vec[idx];
+      VLOG(2) << "prepare int feed " << name << " shape size "
+              << int_shape[vec_idx].size();
+      for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
+        tensor->add_shape(int_shape[vec_idx][j]);
+      }
+      tensor->set_elem_type(0);
+
+      const int int_shape_size = int_shape[vec_idx].size();
+      switch (int_shape_size) {
+        case 4: {
+          auto int_array = int_feed[vec_idx].unchecked<4>();
+          for (ssize_t i = 0; i < int_array.shape(0); i++) {
+            for (ssize_t j = 0; j < int_array.shape(1); j++) {
+              for (ssize_t k = 0; k < int_array.shape(2); k++) {
+                for (ssize_t l = 0; k < int_array.shape(3); l++) {
+                  tensor->add_float_data(int_array(i, j, k, l));
+                }
+              }
+            }
+          }
+          break;
+        }
+        case 3: {
+          auto int_array = int_feed[vec_idx].unchecked<3>();
+          for (ssize_t i = 0; i < int_array.shape(0); i++) {
+            for (ssize_t j = 0; j < int_array.shape(1); j++) {
+              for (ssize_t k = 0; k < int_array.shape(2); k++) {
+                tensor->add_float_data(int_array(i, j, k));
+              }
+            }
+          }
+          break;
+        }
+        case 2: {
+          auto int_array = int_feed[vec_idx].unchecked<2>();
+          for (ssize_t i = 0; i < int_array.shape(0); i++) {
+            for (ssize_t j = 0; j < int_array.shape(1); j++) {
+              tensor->add_float_data(int_array(i, j));
+            }
+          }
+          break;
+        }
+        case 1: {
+          auto int_array = int_feed[vec_idx].unchecked<1>();
+          for (ssize_t i = 0; i < int_array.shape(0); i++) {
+            tensor->add_float_data(int_array(i));
+          }
+          break;
+        }
+      }
+      vec_idx++;
+    }
+
+    VLOG(2) << "batch [" << bi << "] "
+            << "int feed value prepared";
+  }
+
+  int64_t preprocess_end = timeline.TimeStampUS();
+
+  int64_t client_infer_start = timeline.TimeStampUS();
+
+  Response res;
+
+  int64_t client_infer_end = 0;
+  int64_t postprocess_start = 0;
+  int64_t postprocess_end = 0;
+
+  if (FLAGS_profile_client) {
+    if (FLAGS_profile_server) {
+      req.set_profile_server(true);
+    }
+  }
+
+  res.Clear();
+  if (_predictor->inference(&req, &res) != 0) {
+    LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
+    return -1;
+  } else {
+    client_infer_end = timeline.TimeStampUS();
+    postprocess_start = client_infer_end;
+    VLOG(2) << "get model output num";
+    uint32_t model_num = res.outputs_size();
+    VLOG(2) << "model num: " << model_num;
+    for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
+      VLOG(2) << "process model output index: " << m_idx;
+      auto output = res.outputs(m_idx);
+      ModelRes model;
+      model.set_engine_name(output.engine_name());
+
+      for (auto &name : fetch_name) {
+        // int idx = _fetch_name_to_idx[name];
+        int idx = 0;
+        int shape_size = output.insts(0).tensor_array(idx).shape_size();
+        VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
+                << shape_size;
+        model._shape_map[name].resize(shape_size);
+        for (int i = 0; i < shape_size; ++i) {
+          model._shape_map[name][i] =
+              output.insts(0).tensor_array(idx).shape(i);
+        }
+        int lod_size = output.insts(0).tensor_array(idx).lod_size();
+        if (lod_size > 0) {
+          model._lod_map[name].resize(lod_size);
+          for (int i = 0; i < lod_size; ++i) {
+            model._lod_map[name][i] = output.insts(0).tensor_array(idx).lod(i);
+          }
+        }
+        idx += 1;
+      }
+
+      for (auto &name : fetch_name) {
+        // int idx = _fetch_name_to_idx[name];
+        int idx = 0;
+        if (_fetch_name_to_type[name] == 0) {
+          VLOG(2) << "ferch var " << name << "type int";
+          model._int64_value_map[name].resize(
+              output.insts(0).tensor_array(idx).int64_data_size());
+          int size = output.insts(0).tensor_array(idx).int64_data_size();
+          for (int i = 0; i < size; ++i) {
+            model._int64_value_map[name][i] =
+                output.insts(0).tensor_array(idx).int64_data(i);
+          }
+        } else {
+          VLOG(2) << "fetch var " << name << "type float";
+          model._float_value_map[name].resize(
+              output.insts(0).tensor_array(idx).float_data_size());
+          int size = output.insts(0).tensor_array(idx).float_data_size();
+          for (int i = 0; i < size; ++i) {
+            model._float_value_map[name][i] =
+                output.insts(0).tensor_array(idx).float_data(i);
+          }
+        }
+        idx += 1;
+      }
+      predict_res_batch.add_model_res(std::move(model));
+    }
+    postprocess_end = timeline.TimeStampUS();
+  }
+
+  if (FLAGS_profile_client) {
+    std::ostringstream oss;
+    oss << "PROFILE\t"
+        << "pid:" << pid << "\t"
+        << "prepro_0:" << preprocess_start << " "
+        << "prepro_1:" << preprocess_end << " "
+        << "client_infer_0:" << client_infer_start << " "
+        << "client_infer_1:" << client_infer_end << " ";
+    if (FLAGS_profile_server) {
+      int op_num = res.profile_time_size() / 2;
+      for (int i = 0; i < op_num; ++i) {
+        oss << "op" << i << "_0:" << res.profile_time(i * 2) << " ";
+        oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " ";
+      }
+    }
+
+    oss << "postpro_0:" << postprocess_start << " ";
+    oss << "postpro_1:" << postprocess_end;
+
+    fprintf(stderr, "%s\n", oss.str().c_str());
+  }
+
+  _api.thrd_clear();
+  return 0;
+}
+
 }  // namespace general_model
 }  // namespace paddle_serving
 }  // namespace baidu
diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp
index 5eab58989d23c6ab95a8351d76f11316bc28c76a..b0d1d2d624d616a1df3805364cf7802cc19fc46b 100644
--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -100,6 +100,29 @@ PYBIND11_MODULE(serving_client, m) {
                                        fetch_name,
                                        predict_res_batch,
                                        pid);
+           })
+      .def("numpy_predict",
+           [](PredictorClient &self,
+              const std::vector<std::vector<py::array_t<float>>>
+                  &float_feed_batch,
+              const std::vector<std::string> &float_feed_name,
+              const std::vector<std::vector<int>> &float_shape,
+              const std::vector<std::vector<py::array_t<int64_t>>>
+                  &int_feed_batch,
+              const std::vector<std::string> &int_feed_name,
+              const std::vector<std::vector<int>> &int_shape,
+              const std::vector<std::string> &fetch_name,
+              PredictorRes &predict_res_batch,
+              const int &pid) {
+             return self.numpy_predict(float_feed_batch,
+                                       float_feed_name,
+                                       float_shape,
+                                       int_feed_batch,
+                                       int_feed_name,
+                                       int_shape,
+                                       fetch_name,
+                                       predict_res_batch,
+                                       pid);
            },
            py::call_guard<py::gil_scoped_release>());
 }
diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py
index 07a9ab6630fa5a907423236d37dd66951b012f72..3380934931d5872afca81934724f72614bb64a13 100644
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -118,6 +118,8 @@ class Client(object):
         self.producers = []
         self.consumer = None
         self.profile_ = _Profiler()
+        self.all_numpy_input = True
+        self.has_numpy_input = False
 
     def rpath(self):
         lib_path = os.path.dirname(paddle_serving_client.__file__)
@@ -269,9 +271,12 @@ class Client(object):
                         else:
                             int_shape.append(self.feed_shapes_[key])
                     if isinstance(feed_i[key], np.ndarray):
-                        int_slot.append(np.reshape(feed_i[key], (-1)).tolist())
+                        #int_slot.append(np.reshape(feed_i[key], (-1)).tolist())
+                        int_slot.append(feed_i[key])
+                        self.has_numpy_input = True
                     else:
                         int_slot.append(feed_i[key])
+                        self.all_numpy_input = False
                 elif self.feed_types_[key] == float_type:
                     if i == 0:
                         float_feed_names.append(key)
@@ -280,10 +285,12 @@ class Client(object):
                         else:
                             float_shape.append(self.feed_shapes_[key])
                     if isinstance(feed_i[key], np.ndarray):
-                        float_slot.append(
-                            np.reshape(feed_i[key], (-1)).tolist())
+                        #float_slot.append(np.reshape(feed_i[key], (-1)).tolist())
+                        float_slot.append(feed_i[key])
+                        self.has_numpy_input = True
                     else:
                         float_slot.append(feed_i[key])
+                        self.all_numpy_input = False
             int_slot_batch.append(int_slot)
             float_slot_batch.append(float_slot)
 
@@ -291,9 +298,18 @@ class Client(object):
         self.profile_.record('py_client_infer_0')
 
         result_batch = self.result_handle_
-        res = self.client_handle_.batch_predict(
-            float_slot_batch, float_feed_names, float_shape, int_slot_batch,
-            int_feed_names, int_shape, fetch_names, result_batch, self.pid)
+        if self.all_numpy_input:
+            res = self.client_handle_.numpy_predict(
+                float_slot_batch, float_feed_names, float_shape, int_slot_batch,
+                int_feed_names, int_shape, fetch_names, result_batch, self.pid)
+        elif self.has_numpy_input == False:
+            res = self.client_handle_.batch_predict(
+                float_slot_batch, float_feed_names, float_shape, int_slot_batch,
+                int_feed_names, int_shape, fetch_names, result_batch, self.pid)
+        else:
+            raise SystemExit(
+                "Please make sure the inputs are all in list type or all in numpy.array type"
+            )
 
         self.profile_.record('py_client_infer_1')
         self.profile_.record('py_postpro_0')