diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h
index 8a4c5e8c3c15bdcb59fc5faa7461713bf291bab4..b379188854c30587d24962bc827aa099c3a39183 100644
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -78,12 +78,18 @@ class ModelRes {
   std::vector<float>&& get_float_by_name_with_rv(const std::string& name) {
     return std::move(_float_value_map[name]);
   }
-  const std::vector<int>& get_shape(const std::string& name) {
+  const std::vector<int>& get_shape_by_name(const std::string& name) {
     return _shape_map[name];
   }
-  const std::vector<int>& get_lod(const std::string& name) {
+  std::vector<int>&& get_shape_by_name_with_rv(const std::string& name) {
+    return std::move(_shape_map[name]);
+  }
+  const std::vector<int>& get_lod_by_name(const std::string& name) {
     return _lod_map[name];
   }
+  std::vector<int>&& get_lod_by_name_with_rv(const std::string& name) {
+    return std::move(_lod_map[name]);
+  }
   void set_engine_name(const std::string& engine_name) {
     _engine_name = engine_name;
   }
@@ -139,13 +145,21 @@ class PredictorRes {
                                                  const std::string& name) {
     return std::move(_models[model_idx].get_float_by_name_with_rv(name));
   }
-  const std::vector<int>& get_shape(const int model_idx,
-                                    const std::string& name) {
-    return _models[model_idx].get_shape(name);
+  const std::vector<int>& get_shape_by_name(const int model_idx,
+                                            const std::string& name) {
+    return _models[model_idx].get_shape_by_name(name);
+  }
+  const std::vector<int>&& get_shape_by_name_with_rv(const int model_idx,
+                                                     const std::string& name) {
+    return std::move(_models[model_idx].get_shape_by_name_with_rv(name));
   }
-  const std::vector<int>& get_lod(const int model_idx,
-                                  const std::string& name) {
-    return _models[model_idx].get_lod(name);
+  const std::vector<int>& get_lod_by_name(const int model_idx,
+                                          const std::string& name) {
+    return _models[model_idx].get_lod_by_name(name);
+  }
+  const std::vector<int>&& get_lod_by_name_with_rv(const int model_idx,
+                                                   const std::string& name) {
+    return std::move(_models[model_idx].get_lod_by_name_with_rv(name));
   }
   void add_model_res(ModelRes&& res) {
     _engine_names.push_back(res.engine_name());
diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp
index 676114e4e44a9553cb06f00defb19b6c754d51e6..3e065e4de1ff3c01ff6bc05cb39a2607620915b4 100644
--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -51,14 +51,22 @@ PYBIND11_MODULE(serving_client, m) {
            })
       .def("get_shape",
            [](PredictorRes &self, int model_idx, std::string &name) {
-             return self.get_shape(model_idx, name);
-           },
-           py::return_value_policy::reference)
+             std::vector<int> *ptr = new std::vector<int>(
+                 std::move(self.get_shape_by_name_with_rv(model_idx, name)));
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<int> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
       .def("get_lod",
            [](PredictorRes &self, int model_idx, std::string &name) {
-             return self.get_lod(model_idx, name);
-           },
-           py::return_value_policy::reference)
+             std::vector<int> *ptr = new std::vector<int>(
+                 std::move(self.get_lod_by_name_with_rv(model_idx, name)));
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<int> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
       .def("variant_tag", [](PredictorRes &self) { return self.variant_tag(); })
       .def("get_engine_names",
            [](PredictorRes &self) { return self.get_engine_names(); });
@@ -109,7 +117,8 @@ PYBIND11_MODULE(serving_client, m) {
                                        fetch_name,
                                        predict_res_batch,
                                        pid);
-           })
+           },
+           py::call_guard<py::gil_scoped_release>())
       .def("numpy_predict",
            [](PredictorClient &self,
               const std::vector<std::vector<py::array_t<float>>>
diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp
index 8695da2591a30725d5b2390ad287f9ceae40052b..7d48949b22d0ace289ab3b9214f092819f5476e0 100644
--- a/core/general-server/op/general_reader_op.cpp
+++ b/core/general-server/op/general_reader_op.cpp
@@ -131,7 +131,7 @@ int GeneralReaderOp::inference() {
       lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
     }
 
-    if (req->insts(0).tensor_array(i).shape(0) == -1) {
+    if (model_config->_is_lod_feed[i]) {
       lod_tensor.lod.resize(1);
       lod_tensor.lod[0].push_back(0);
       VLOG(2) << "var[" << i << "] is lod_tensor";
@@ -153,6 +153,7 @@ int GeneralReaderOp::inference() {
   // specify the memory needed for output tensor_vector
   for (int i = 0; i < var_num; ++i) {
     if (out->at(i).lod.size() == 1) {
+      int tensor_size = 0;
       for (int j = 0; j < batch_size; ++j) {
         const Tensor &tensor = req->insts(j).tensor_array(i);
         int data_len = 0;
@@ -162,15 +163,28 @@ int GeneralReaderOp::inference() {
           data_len = tensor.float_data_size();
         }
         VLOG(2) << "tensor size for var[" << i << "]: " << data_len;
+        tensor_size += data_len;
 
         int cur_len = out->at(i).lod[0].back();
         VLOG(2) << "current len: " << cur_len;
 
-        out->at(i).lod[0].push_back(cur_len + data_len);
-        VLOG(2) << "new len: " << cur_len + data_len;
+        int sample_len = 0;
+        if (tensor.shape_size() == 1) {
+          sample_len = data_len;
+        } else {
+          sample_len = tensor.shape(0);
+        }
+        out->at(i).lod[0].push_back(cur_len + sample_len);
+        VLOG(2) << "new len: " << cur_len + sample_len;
+      }
+      out->at(i).data.Resize(tensor_size * elem_size[i]);
+      out->at(i).shape = {out->at(i).lod[0].back()};
+      for (int j = 1; j < req->insts(0).tensor_array(i).shape_size(); ++j) {
+        out->at(i).shape.push_back(req->insts(0).tensor_array(i).shape(j));
+      }
+      if (out->at(i).shape.size() == 1) {
+        out->at(i).shape.push_back(1);
       }
-      out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
-      out->at(i).shape = {out->at(i).lod[0].back(), 1};
       VLOG(2) << "var[" << i
               << "] is lod_tensor and len=" << out->at(i).lod[0].back();
     } else {
diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp
index 4d853f88eef88716c498b2b95c1498f1abdeb3d0..126accfd0a406f420f57eef4e04268e9081c744f 100644
--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -15,8 +15,10 @@
 #include "core/general-server/op/general_response_op.h"
 #include <algorithm>
 #include <iostream>
+#include <map>
 #include <memory>
 #include <sstream>
+#include <utility>
 #include "core/general-server/op/general_infer_helper.h"
 #include "core/predictor/framework/infer.h"
 #include "core/predictor/framework/memory.h"
@@ -86,37 +88,51 @@ int GeneralResponseOp::inference() {
     // To get the order of model return values
     output->set_engine_name(pre_name);
     FetchInst *fetch_inst = output->add_insts();
+
+    std::map<std::string, int> fetch_index_map;
+    for (int i = 0; i < in->size(); ++i) {
+      VLOG(2) << "index " << i << " var " << in->at(i).name;
+      fetch_index_map.insert(std::pair<std::string, int>(in->at(i).name, i));
+    }
+
     for (auto &idx : fetch_index) {
       Tensor *tensor = fetch_inst->add_tensor_array();
       tensor->set_elem_type(1);
+      int true_idx = fetch_index_map[model_config->_fetch_name[idx]];
       if (model_config->_is_lod_fetch[idx]) {
-        VLOG(2) << "out[" << idx << "] is lod_tensor";
-        for (int k = 0; k < in->at(idx).shape.size(); ++k) {
+        VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx]
+                << " is lod_tensor";
+        for (int k = 0; k < in->at(true_idx).shape.size(); ++k) {
           VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
-          tensor->add_shape(in->at(idx).shape[k]);
+          tensor->add_shape(in->at(true_idx).shape[k]);
         }
       } else {
-        VLOG(2) << "out[" << idx << "] is tensor";
-        for (int k = 0; k < in->at(idx).shape.size(); ++k) {
-          VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
-          tensor->add_shape(in->at(idx).shape[k]);
+        VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx]
+                << " is tensor";
+        for (int k = 0; k < in->at(true_idx).shape.size(); ++k) {
+          VLOG(2) << "shape[" << k << "]: " << in->at(true_idx).shape[k];
+          tensor->add_shape(in->at(true_idx).shape[k]);
         }
       }
     }
 
     int var_idx = 0;
     for (auto &idx : fetch_index) {
+      int true_idx = fetch_index_map[model_config->_fetch_name[idx]];
       int cap = 1;
-      for (int j = 0; j < in->at(idx).shape.size(); ++j) {
-        cap *= in->at(idx).shape[j];
+      for (int j = 0; j < in->at(true_idx).shape.size(); ++j) {
+        cap *= in->at(true_idx).shape[j];
       }
-      if (in->at(idx).dtype == paddle::PaddleDType::INT64) {
-        int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
+      if (in->at(true_idx).dtype == paddle::PaddleDType::INT64) {
+        VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx]
+                << "].";
+        int64_t *data_ptr =
+            static_cast<int64_t *>(in->at(true_idx).data.data());
         if (model_config->_is_lod_fetch[idx]) {
           FetchInst *fetch_p = output->mutable_insts(0);
-          for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
+          for (int j = 0; j < in->at(true_idx).lod[0].size(); ++j) {
             fetch_p->mutable_tensor_array(var_idx)->add_lod(
-                in->at(idx).lod[0][j]);
+                in->at(true_idx).lod[0][j]);
           }
           for (int j = 0; j < cap; ++j) {
             fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[j]);
@@ -127,14 +143,17 @@ int GeneralResponseOp::inference() {
             fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[j]);
           }
         }
+        VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready";
         var_idx++;
-      } else if (in->at(idx).dtype == paddle::PaddleDType::FLOAT32) {
-        float *data_ptr = static_cast<float *>(in->at(idx).data.data());
+      } else if (in->at(true_idx).dtype == paddle::PaddleDType::FLOAT32) {
+        VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx]
+                << "].";
+        float *data_ptr = static_cast<float *>(in->at(true_idx).data.data());
         if (model_config->_is_lod_fetch[idx]) {
           FetchInst *fetch_p = output->mutable_insts(0);
-          for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
+          for (int j = 0; j < in->at(true_idx).lod[0].size(); ++j) {
             fetch_p->mutable_tensor_array(var_idx)->add_lod(
-                in->at(idx).lod[0][j]);
+                in->at(true_idx).lod[0][j]);
           }
           for (int j = 0; j < cap; ++j) {
             fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
@@ -145,6 +164,7 @@ int GeneralResponseOp::inference() {
             fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
           }
         }
+        VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready";
         var_idx++;
       }
     }
diff --git a/python/paddle_serving_app/models/model_list.py b/python/paddle_serving_app/models/model_list.py
index ca33933b7797dc0bd1cb5881e73ba3fc9d82f3c1..b22bbe8934816e9ced881d352b9e2a54ed3c9234 100644
--- a/python/paddle_serving_app/models/model_list.py
+++ b/python/paddle_serving_app/models/model_list.py
@@ -26,7 +26,7 @@ class ServingModels(object):
         self.model_dict["ChineseWordSegmentation"] = ["lac"]
         self.model_dict["ObjectDetection"] = ["faster_rcnn", "yolov3"]
         self.model_dict["ImageSegmentation"] = [
-            "unet", "deeplabv3", "mobilenet_cityspaces"
+            "unet", "deeplabv3", "deeplabv3+cityscapes"
         ]
         self.model_dict["ImageClassification"] = [
             "resnet_v2_50_imagenet", "mobilenet_v2_imagenet"
diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py
index 537a7bae29e73b839c5bde73d388a94cf5c0413a..8c189d415b5718788da2ff0e6757ba3af259e750 100644
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -21,6 +21,7 @@ import google.protobuf.text_format
 import numpy as np
 import time
 import sys
+from .serving_client import PredictorRes
 
 int_type = 0
 float_type = 1
@@ -108,7 +109,6 @@ class Client(object):
         self.feed_names_ = []
         self.fetch_names_ = []
         self.client_handle_ = None
-        self.result_handle_ = None
         self.feed_shapes_ = {}
         self.feed_types_ = {}
         self.feed_names_to_idx_ = {}
@@ -122,7 +122,6 @@ class Client(object):
 
     def load_client_config(self, path):
         from .serving_client import PredictorClient
-        from .serving_client import PredictorRes
         model_conf = m_config.GeneralModelConfig()
         f = open(path, 'r')
         model_conf = google.protobuf.text_format.Merge(
@@ -132,7 +131,6 @@ class Client(object):
         # get feed vars, fetch vars
         # get feed shapes, feed types
         # map feed names to index
-        self.result_handle_ = PredictorRes()
         self.client_handle_ = PredictorClient()
         self.client_handle_.init(path)
         if "FLAGS_max_body_size" not in os.environ:
@@ -203,7 +201,12 @@ class Client(object):
     def shape_check(self, feed, key):
         if key in self.lod_tensor_set:
             return
-        if len(feed[key]) != self.feed_tensor_len[key]:
+        if isinstance(feed[key],
+                      list) and len(feed[key]) != self.feed_tensor_len[key]:
+            raise SystemExit("The shape of feed tensor {} not match.".format(
+                key))
+        if type(feed[key]).__module__ == np.__name__ and np.size(feed[
+                key]) != self.feed_tensor_len[key]:
             raise SystemExit("The shape of feed tensor {} not match.".format(
                 key))
 
@@ -254,23 +257,16 @@ class Client(object):
             for key in feed_i:
                 if key not in self.feed_names_:
                     raise ValueError("Wrong feed name: {}.".format(key))
-                if not isinstance(feed_i[key], np.ndarray):
-                    self.shape_check(feed_i, key)
+                #if not isinstance(feed_i[key], np.ndarray):
+                self.shape_check(feed_i, key)
                 if self.feed_types_[key] == int_type:
                     if i == 0:
                         int_feed_names.append(key)
                         if isinstance(feed_i[key], np.ndarray):
-                            if key in self.lod_tensor_set:
-                                raise ValueError(
-                                    "LodTensor var can not be ndarray type.")
                             int_shape.append(list(feed_i[key].shape))
                         else:
                             int_shape.append(self.feed_shapes_[key])
                     if isinstance(feed_i[key], np.ndarray):
-                        if key in self.lod_tensor_set:
-                            raise ValueError(
-                                "LodTensor var can not be ndarray type.")
-                        #int_slot.append(np.reshape(feed_i[key], (-1)).tolist())
                         int_slot.append(feed_i[key])
                         self.has_numpy_input = True
                     else:
@@ -280,17 +276,10 @@ class Client(object):
                     if i == 0:
                         float_feed_names.append(key)
                         if isinstance(feed_i[key], np.ndarray):
-                            if key in self.lod_tensor_set:
-                                raise ValueError(
-                                    "LodTensor var can not be ndarray type.")
                             float_shape.append(list(feed_i[key].shape))
                         else:
                             float_shape.append(self.feed_shapes_[key])
                     if isinstance(feed_i[key], np.ndarray):
-                        if key in self.lod_tensor_set:
-                            raise ValueError(
-                                "LodTensor var can not be ndarray type.")
-                        #float_slot.append(np.reshape(feed_i[key], (-1)).tolist())
                         float_slot.append(feed_i[key])
                         self.has_numpy_input = True
                     else:
@@ -302,15 +291,17 @@ class Client(object):
         self.profile_.record('py_prepro_1')
         self.profile_.record('py_client_infer_0')
 
-        result_batch = self.result_handle_
+        result_batch_handle = PredictorRes()
         if self.all_numpy_input:
             res = self.client_handle_.numpy_predict(
                 float_slot_batch, float_feed_names, float_shape, int_slot_batch,
-                int_feed_names, int_shape, fetch_names, result_batch, self.pid)
+                int_feed_names, int_shape, fetch_names, result_batch_handle,
+                self.pid)
         elif self.has_numpy_input == False:
             res = self.client_handle_.batch_predict(
                 float_slot_batch, float_feed_names, float_shape, int_slot_batch,
-                int_feed_names, int_shape, fetch_names, result_batch, self.pid)
+                int_feed_names, int_shape, fetch_names, result_batch_handle,
+                self.pid)
         else:
             raise SystemExit(
                 "Please make sure the inputs are all in list type or all in numpy.array type"
@@ -323,26 +314,28 @@ class Client(object):
             return None
 
         multi_result_map = []
-        model_engine_names = result_batch.get_engine_names()
+        model_engine_names = result_batch_handle.get_engine_names()
         for mi, engine_name in enumerate(model_engine_names):
             result_map = {}
             # result map needs to be a numpy array
             for i, name in enumerate(fetch_names):
                 if self.fetch_names_to_type_[name] == int_type:
                     # result_map[name] will be py::array(numpy array)
-                    result_map[name] = result_batch.get_int64_by_name(mi, name)
-                    shape = result_batch.get_shape(mi, name)
+                    result_map[name] = result_batch_handle.get_int64_by_name(
+                        mi, name)
+                    shape = result_batch_handle.get_shape(mi, name)
                     result_map[name].shape = shape
                     if name in self.lod_tensor_set:
-                        result_map["{}.lod".format(name)] = np.array(
-                            result_batch.get_lod(mi, name))
+                        result_map["{}.lod".format(
+                            name)] = result_batch_handle.get_lod(mi, name)
                 elif self.fetch_names_to_type_[name] == float_type:
-                    result_map[name] = result_batch.get_float_by_name(mi, name)
-                    shape = result_batch.get_shape(mi, name)
+                    result_map[name] = result_batch_handle.get_float_by_name(
+                        mi, name)
+                    shape = result_batch_handle.get_shape(mi, name)
                     result_map[name].shape = shape
                     if name in self.lod_tensor_set:
-                        result_map["{}.lod".format(name)] = np.array(
-                            result_batch.get_lod(mi, name))
+                        result_map["{}.lod".format(
+                            name)] = result_batch_handle.get_lod(mi, name)
             multi_result_map.append(result_map)
         ret = None
         if len(model_engine_names) == 1:
@@ -360,7 +353,7 @@ class Client(object):
 
         # When using the A/B test, the tag of variant needs to be returned
         return ret if not need_variant_tag else [
-            ret, self.result_handle_.variant_tag()
+            ret, result_batch_handle.variant_tag()
         ]
 
     def release(self):