add http client

0abd3ac6 · HexToString · 31640a40 · 0abd3ac6 · 31640a40 · 0abd3ac6
68 changed file
--- a/core/configure/CMakeLists.txt
+++ b/core/configure/CMakeLists.txt
@@ -33,9 +33,7 @@ if (WITH_PYTHON)
  add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
  add_dependencies(general_model_config_py_proto general_model_config_py_proto_init)
  
-  py_grpc_proto_compile(multi_lang_general_model_service_py_proto SRCS proto/multi_lang_general_model_service.proto)
-  add_custom_target(multi_lang_general_model_service_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
-  add_dependencies(multi_lang_general_model_service_py_proto multi_lang_general_model_service_py_proto_init)
+
  
  if (CLIENT)
    py_proto_compile(sdk_configure_py_proto SRCS proto/sdk_configure.proto)
@@ -53,11 +51,7 @@ if (WITH_PYTHON)
                    COMMENT "Copy generated general_model_config proto file into directory paddle_serving_client/proto."
                    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
    
-    add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
-                    COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
-                    COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
-                    COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_client/proto."
-                    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+
  endif()
  
  if (APP)
@@ -84,11 +78,6 @@ if (WITH_PYTHON)
    		COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto."
    		WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
    
-    add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
-                    COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
-                    COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
-                    COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server/proto."
-                    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
  endif()

 endif()
--- a/core/configure/proto/multi_lang_general_model_service.proto
+++ b/core/configure/proto/multi_lang_general_model_service.proto
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-syntax = "proto2";
-
-package baidu.paddle_serving.multi_lang;
-
-option java_multiple_files = true;
-option java_package = "io.paddle.serving.grpc";
-option java_outer_classname = "ServingProto";
-
-message Tensor {
-  optional bytes data = 1;
-  repeated int32 int_data = 2;
-  repeated int64 int64_data = 3;
-  repeated float float_data = 4;
-  optional int32 elem_type = 5;
-  repeated int32 shape = 6;
-  repeated int32 lod = 7; // only for fetch tensor currently
-};
-
-message FeedInst { repeated Tensor tensor_array = 1; };
-
-message FetchInst { repeated Tensor tensor_array = 1; };
-
-message InferenceRequest {
-  repeated FeedInst insts = 1;
-  repeated string feed_var_names = 2;
-  repeated string fetch_var_names = 3;
-  required bool is_python = 4 [ default = false ];
-  required uint64 log_id = 5 [ default = 0 ];
-};
-
-message InferenceResponse {
-  repeated ModelOutput outputs = 1;
-  optional string tag = 2;
-  required int32 err_code = 3;
-};
-
-message ModelOutput {
-  repeated FetchInst insts = 1;
-  optional string engine_name = 2;
-}
-
-message SetTimeoutRequest { required int32 timeout_ms = 1; }
-
-message SimpleResponse { required int32 err_code = 1; }
-
-message GetClientConfigRequest {}
-
-message GetClientConfigResponse { required string client_config_str = 1; }
-
-service MultiLangGeneralModelService {
-  rpc Inference(InferenceRequest) returns (InferenceResponse) {}
-  rpc SetTimeout(SetTimeoutRequest) returns (SimpleResponse) {}
-  rpc GetClientConfig(GetClientConfigRequest)
-      returns (GetClientConfigResponse) {}
-};
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -207,7 +207,7 @@ class PredictorClient {

  void init_gflags(std::vector<std::string> argv);

-  int init(const std::vector<std::string> &client_conf);
+  int init(const std::vector<std::string>& client_conf);

  void set_predictor_conf(const std::string& conf_path,
                          const std::string& conf_file);
@@ -218,23 +218,22 @@ class PredictorClient {

  int destroy_predictor();

-  int numpy_predict(
-      const std::vector<std::vector<py::array_t<float>>>& float_feed_batch,
-      const std::vector<std::string>& float_feed_name,
-      const std::vector<std::vector<int>>& float_shape,
-      const std::vector<std::vector<int>>& float_lod_slot_batch,
-      const std::vector<std::vector<py::array_t<int64_t>>>& int_feed_batch,
-      const std::vector<std::string>& int_feed_name,
-      const std::vector<std::vector<int>>& int_shape,
-      const std::vector<std::vector<int>>& int_lod_slot_batch,
-      const std::vector<std::vector<std::string>>& string_feed_batch,
-      const std::vector<std::string>& string_feed_name,
-      const std::vector<std::vector<int>>& string_shape,
-      const std::vector<std::vector<int>>& string_lod_slot_batch,
-      const std::vector<std::string>& fetch_name,
-      PredictorRes& predict_res_batch,  // NOLINT
-      const int& pid,
-      const uint64_t log_id);
+  int numpy_predict(const std::vector<py::array_t<float>>& float_feed,
+                    const std::vector<std::string>& float_feed_name,
+                    const std::vector<std::vector<int>>& float_shape,
+                    const std::vector<std::vector<int>>& float_lod_slot_batch,
+                    const std::vector<py::array_t<int64_t>>& int_feed,
+                    const std::vector<std::string>& int_feed_name,
+                    const std::vector<std::vector<int>>& int_shape,
+                    const std::vector<std::vector<int>>& int_lod_slot_batch,
+                    const std::vector<std::string>& string_feed,
+                    const std::vector<std::string>& string_feed_name,
+                    const std::vector<std::vector<int>>& string_shape,
+                    const std::vector<std::vector<int>>& string_lod_slot_batch,
+                    const std::vector<std::string>& fetch_name,
+                    PredictorRes& predict_res_batch,  // NOLINT
+                    const int& pid,
+                    const uint64_t log_id);

 private:
  PredictorApi _api;
@@ -243,6 +242,7 @@ class PredictorClient {
  std::string _predictor_path;
  std::string _conf_file;
  std::map<std::string, int> _feed_name_to_idx;
+  std::vector<std::string> _feed_name;
  std::map<std::string, int> _fetch_name_to_idx;
  std::map<std::string, std::string> _fetch_name_to_var_name;
  std::map<std::string, int> _fetch_name_to_type;

--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -25,8 +25,6 @@ using baidu::paddle_serving::Timer;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Tensor;
-using baidu::paddle_serving::predictor::general_model::FeedInst;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
 enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
 std::once_flag gflags_init_flag;
 namespace py = pybind11;
@@ -68,9 +66,13 @@ int PredictorClient::init(const std::vector<std::string> &conf_file) {
    _fetch_name_to_idx.clear();
    _shape.clear();
    int feed_var_num = model_config.feed_var_size();
+    _feed_name.clear();
    VLOG(2) << "feed var num: " << feed_var_num;
    for (int i = 0; i < feed_var_num; ++i) {
      _feed_name_to_idx[model_config.feed_var(i).alias_name()] = i;
+      VLOG(2) << "feed [" << i << "]"
+              << " name: " << model_config.feed_var(i).name();
+      _feed_name.push_back(model_config.feed_var(i).name());
      VLOG(2) << "feed alias name: " << model_config.feed_var(i).alias_name()
              << " index: " << i;
      std::vector<int> tmp_feed_shape;
@@ -146,15 +148,15 @@ int PredictorClient::create_predictor() {
 }

 int PredictorClient::numpy_predict(
-    const std::vector<std::vector<py::array_t<float>>> &float_feed_batch,
+    const std::vector<py::array_t<float>> &float_feed,
    const std::vector<std::string> &float_feed_name,
    const std::vector<std::vector<int>> &float_shape,
    const std::vector<std::vector<int>> &float_lod_slot_batch,
-    const std::vector<std::vector<py::array_t<int64_t>>> &int_feed_batch,
+    const std::vector<py::array_t<int64_t>> &int_feed,
    const std::vector<std::string> &int_feed_name,
    const std::vector<std::vector<int>> &int_shape,
    const std::vector<std::vector<int>> &int_lod_slot_batch,
-    const std::vector<std::vector<std::string>> &string_feed_batch,
+    const std::vector<std::string> &string_feed,
    const std::vector<std::string> &string_feed_name,
    const std::vector<std::vector<int>> &string_shape,
    const std::vector<std::vector<int>> &string_lod_slot_batch,
@@ -162,12 +164,6 @@ int PredictorClient::numpy_predict(
    PredictorRes &predict_res_batch,
    const int &pid,
    const uint64_t log_id) {
-  int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
-  batch_size = batch_size > string_feed_batch.size() ? batch_size
-                                                     : string_feed_batch.size();
-  VLOG(2) << "batch size: " << batch_size;
-  // batch_size must be 1, cause batch is already in Tensor.
-  // I suggest to remove the outside vector<>.
  predict_res_batch.clear();
  Timer timeline;
  int64_t preprocess_start = timeline.TimeStampUS();
@@ -190,136 +186,122 @@ int PredictorClient::numpy_predict(
  }

  int vec_idx = 0;
-  // batch_size can only be 1, cause batch is already in Tensor.
-  // if batch_size is not 1, error will occur in C++ part.
-  for (int bi = 0; bi < batch_size; bi++) {
-    VLOG(2) << "prepare batch " << bi;
-    std::vector<Tensor *> tensor_vec;
-    FeedInst *inst = req.add_insts();
-    std::vector<py::array_t<float>> float_feed = float_feed_batch[bi];
-    std::vector<py::array_t<int64_t>> int_feed = int_feed_batch[bi];
-    std::vector<std::string> string_feed = string_feed_batch[bi];
-    for (auto &name : float_feed_name) {
-      tensor_vec.push_back(inst->add_tensor_array());
-    }
-
-    for (auto &name : int_feed_name) {
-      tensor_vec.push_back(inst->add_tensor_array());
-    }
+  // batch is already in Tensor.
+  std::vector<Tensor *> tensor_vec;

-    for (auto &name : string_feed_name) {
-      tensor_vec.push_back(inst->add_tensor_array());
-    }
+  for (auto &name : float_feed_name) {
+    tensor_vec.push_back(req.add_tensor());
+  }

-    VLOG(2) << "batch [" << bi << "] "
-            << "prepared";
+  for (auto &name : int_feed_name) {
+    tensor_vec.push_back(req.add_tensor());
+  }

-    vec_idx = 0;
-    for (auto &name : float_feed_name) {
-      int idx = _feed_name_to_idx[name];
-      if (idx >= tensor_vec.size()) {
-        LOG(ERROR) << "idx > tensor_vec.size()";
-        return -1;
-      }
-      int nbytes = float_feed[vec_idx].nbytes();
-      void *rawdata_ptr = (void *)(float_feed[vec_idx].data(0));
-      int total_number = float_feed[vec_idx].size();
-      Tensor *tensor = tensor_vec[idx];
-
-      VLOG(2) << "prepare float feed " << name << " shape size "
-              << float_shape[vec_idx].size();
-      for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
-        tensor->add_shape(float_shape[vec_idx][j]);
-      }
-      for (uint32_t j = 0; j < float_lod_slot_batch[vec_idx].size(); ++j) {
-        tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
-      }
-      tensor->set_elem_type(P_FLOAT32);
+  for (auto &name : string_feed_name) {
+    tensor_vec.push_back(req.add_tensor());
+  }

-      tensor->mutable_float_data()->Resize(total_number, 0);
-      memcpy(tensor->mutable_float_data()->mutable_data(), rawdata_ptr, nbytes);
-      vec_idx++;
+  vec_idx = 0;
+  for (auto &name : float_feed_name) {
+    int idx = _feed_name_to_idx[name];
+    if (idx >= tensor_vec.size()) {
+      LOG(ERROR) << "idx > tensor_vec.size()";
+      return -1;
+    }
+    VLOG(2) << "prepare float feed " << name << " idx " << idx;
+    int nbytes = float_feed[vec_idx].nbytes();
+    void *rawdata_ptr = (void *)(float_feed[vec_idx].data(0));
+    int total_number = float_feed[vec_idx].size();
+    Tensor *tensor = tensor_vec[idx];
+
+    VLOG(2) << "prepare float feed " << name << " shape size "
+            << float_shape[vec_idx].size();
+    for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
+      tensor->add_shape(float_shape[vec_idx][j]);
+    }
+    for (uint32_t j = 0; j < float_lod_slot_batch[vec_idx].size(); ++j) {
+      tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
    }
+    tensor->set_elem_type(P_FLOAT32);

-    VLOG(2) << "batch [" << bi << "] "
-            << "float feed value prepared";
+    tensor->set_name(_feed_name[idx]);
+    tensor->set_alias_name(name);

-    vec_idx = 0;
-    for (auto &name : int_feed_name) {
-      int idx = _feed_name_to_idx[name];
-      if (idx >= tensor_vec.size()) {
-        LOG(ERROR) << "idx > tensor_vec.size()";
-        return -1;
-      }
-      Tensor *tensor = tensor_vec[idx];
-      int nbytes = int_feed[vec_idx].nbytes();
-      void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0));
-      int total_number = int_feed[vec_idx].size();
+    tensor->mutable_float_data()->Resize(total_number, 0);
+    memcpy(tensor->mutable_float_data()->mutable_data(), rawdata_ptr, nbytes);
+    vec_idx++;
+  }

-      for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
-        tensor->add_shape(int_shape[vec_idx][j]);
-      }
-      for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) {
-        tensor->add_lod(int_lod_slot_batch[vec_idx][j]);
-      }
-      tensor->set_elem_type(_type[idx]);
-
-      if (_type[idx] == P_INT64) {
-        tensor->mutable_int64_data()->Resize(total_number, 0);
-        memcpy(
-            tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
-      } else {
-        tensor->mutable_int_data()->Resize(total_number, 0);
-        memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
-      }
-      vec_idx++;
+  vec_idx = 0;
+  for (auto &name : int_feed_name) {
+    int idx = _feed_name_to_idx[name];
+    if (idx >= tensor_vec.size()) {
+      LOG(ERROR) << "idx > tensor_vec.size()";
+      return -1;
    }
+    Tensor *tensor = tensor_vec[idx];
+    int nbytes = int_feed[vec_idx].nbytes();
+    void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0));
+    int total_number = int_feed[vec_idx].size();

-    VLOG(2) << "batch [" << bi << "] "
-            << "int feed value prepared";
+    for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
+      tensor->add_shape(int_shape[vec_idx][j]);
+    }
+    for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) {
+      tensor->add_lod(int_lod_slot_batch[vec_idx][j]);
+    }
+    tensor->set_elem_type(_type[idx]);
+    tensor->set_name(_feed_name[idx]);
+    tensor->set_alias_name(name);
+
+    if (_type[idx] == P_INT64) {
+      tensor->mutable_int64_data()->Resize(total_number, 0);
+      memcpy(tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
+    } else {
+      tensor->mutable_int_data()->Resize(total_number, 0);
+      memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
+    }
+    vec_idx++;
+  }

-    vec_idx = 0;
-    for (auto &name : string_feed_name) {
-      int idx = _feed_name_to_idx[name];
-      if (idx >= tensor_vec.size()) {
-        LOG(ERROR) << "idx > tensor_vec.size()";
-        return -1;
-      }
-      Tensor *tensor = tensor_vec[idx];
+  vec_idx = 0;
+  for (auto &name : string_feed_name) {
+    int idx = _feed_name_to_idx[name];
+    if (idx >= tensor_vec.size()) {
+      LOG(ERROR) << "idx > tensor_vec.size()";
+      return -1;
+    }
+    Tensor *tensor = tensor_vec[idx];

-      for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) {
-        tensor->add_shape(string_shape[vec_idx][j]);
-      }
-      for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
-        tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
-      }
-      tensor->set_elem_type(P_STRING);
-
-      const int string_shape_size = string_shape[vec_idx].size();
-      // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
-      // we pass string via vector<vector<string> >.
-      if (string_shape_size != 1) {
-        LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
-                   << string_shape_size;
-        return -1;
-      }
-      switch (string_shape_size) {
-        case 1: {
-          tensor->add_data(string_feed[vec_idx]);
-          break;
-        }
+    for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) {
+      tensor->add_shape(string_shape[vec_idx][j]);
+    }
+    for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
+      tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
+    }
+    tensor->set_elem_type(P_STRING);
+    tensor->set_name(_feed_name[idx]);
+    tensor->set_alias_name(name);
+
+    const int string_shape_size = string_shape[vec_idx].size();
+    // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
+    // we pass string via vector<vector<string> >.
+    if (string_shape_size != 1) {
+      LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
+                 << string_shape_size;
+      return -1;
+    }
+    switch (string_shape_size) {
+      case 1: {
+        tensor->add_data(string_feed[vec_idx]);
+        break;
      }
-      vec_idx++;
    }
-
-    VLOG(2) << "batch [" << bi << "] "
-            << "string feed value prepared";
+    vec_idx++;
  }

  int64_t preprocess_end = timeline.TimeStampUS();
-
  int64_t client_infer_start = timeline.TimeStampUS();
-
  Response res;

  int64_t client_infer_end = 0;
@@ -351,19 +333,18 @@ int PredictorClient::numpy_predict(
      int idx = 0;
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int shape_size = output.insts(0).tensor_array(idx).shape_size();
+        int shape_size = output.tensor(idx).shape_size();
        VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
                << shape_size;
        model._shape_map[name].resize(shape_size);
        for (int i = 0; i < shape_size; ++i) {
-          model._shape_map[name][i] =
-              output.insts(0).tensor_array(idx).shape(i);
+          model._shape_map[name][i] = output.tensor(idx).shape(i);
        }
-        int lod_size = output.insts(0).tensor_array(idx).lod_size();
+        int lod_size = output.tensor(idx).lod_size();
        if (lod_size > 0) {
          model._lod_map[name].resize(lod_size);
          for (int i = 0; i < lod_size; ++i) {
-            model._lod_map[name][i] = output.insts(0).tensor_array(idx).lod(i);
+            model._lod_map[name][i] = output.tensor(idx).lod(i);
          }
        }
        idx += 1;
@@ -375,22 +356,22 @@ int PredictorClient::numpy_predict(
        // int idx = _fetch_name_to_idx[name];
        if (_fetch_name_to_type[name] == P_INT64) {
          VLOG(2) << "ferch var " << name << "type int64";
-          int size = output.insts(0).tensor_array(idx).int64_data_size();
+          int size = output.tensor(idx).int64_data_size();
          model._int64_value_map[name] = std::vector<int64_t>(
-              output.insts(0).tensor_array(idx).int64_data().begin(),
-              output.insts(0).tensor_array(idx).int64_data().begin() + size);
+              output.tensor(idx).int64_data().begin(),
+              output.tensor(idx).int64_data().begin() + size);
        } else if (_fetch_name_to_type[name] == P_FLOAT32) {
          VLOG(2) << "fetch var " << name << "type float";
-          int size = output.insts(0).tensor_array(idx).float_data_size();
+          int size = output.tensor(idx).float_data_size();
          model._float_value_map[name] = std::vector<float>(
-              output.insts(0).tensor_array(idx).float_data().begin(),
-              output.insts(0).tensor_array(idx).float_data().begin() + size);
+              output.tensor(idx).float_data().begin(),
+              output.tensor(idx).float_data().begin() + size);
        } else if (_fetch_name_to_type[name] == P_INT32) {
          VLOG(2) << "fetch var " << name << "type int32";
-          int size = output.insts(0).tensor_array(idx).int_data_size();
+          int size = output.tensor(idx).int_data_size();
          model._int32_value_map[name] = std::vector<int32_t>(
-              output.insts(0).tensor_array(idx).int_data().begin(),
-              output.insts(0).tensor_array(idx).int_data().begin() + size);
+              output.tensor(idx).int_data().begin(),
+              output.tensor(idx).int_data().begin() + size);
        }
        idx += 1;
      }

--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -97,33 +97,31 @@ PYBIND11_MODULE(serving_client, m) {
           [](PredictorClient &self) { self.destroy_predictor(); })
      .def("numpy_predict",
           [](PredictorClient &self,
-              const std::vector<std::vector<py::array_t<float>>>
-                  &float_feed_batch,
+              const std::vector<py::array_t<float>> &float_feed,
              const std::vector<std::string> &float_feed_name,
              const std::vector<std::vector<int>> &float_shape,
              const std::vector<std::vector<int>> &float_lod_slot_batch,
-              const std::vector<std::vector<py::array_t<int64_t>>>
-                  &int_feed_batch,
+              const std::vector<py::array_t<int64_t>> &int_feed,
              const std::vector<std::string> &int_feed_name,
              const std::vector<std::vector<int>> &int_shape,
              const std::vector<std::vector<int>> &int_lod_slot_batch,
-              const std::vector<std::vector<std::string>>& string_feed_batch,
-              const std::vector<std::string>& string_feed_name,
-              const std::vector<std::vector<int>>& string_shape,
-              const std::vector<std::vector<int>>& string_lod_slot_batch,
+              const std::vector<std::string> &string_feed,
+              const std::vector<std::string> &string_feed_name,
+              const std::vector<std::vector<int>> &string_shape,
+              const std::vector<std::vector<int>> &string_lod_slot_batch,
              const std::vector<std::string> &fetch_name,
              PredictorRes &predict_res_batch,
              const int &pid,
              const uint64_t log_id) {
-             return self.numpy_predict(float_feed_batch,
+             return self.numpy_predict(float_feed,
                                       float_feed_name,
                                       float_shape,
                                       float_lod_slot_batch,
-                                       int_feed_batch,
+                                       int_feed,
                                       int_feed_name,
                                       int_shape,
                                       int_lod_slot_batch,
-                                       string_feed_batch,
+                                       string_feed,
                                       string_feed_name,
                                       string_shape,
                                       string_lod_slot_batch,

--- a/core/general-server/op/general_copy_op.cpp
+++ b/core/general-server/op/general_copy_op.cpp
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "core/general-server/op/general_copy_op.h"
-#include <algorithm>
-#include <iostream>
-#include <memory>
-#include <sstream>
-#include "core/general-server/op/general_infer_helper.h"
-#include "core/predictor/framework/infer.h"
-#include "core/predictor/framework/memory.h"
-#include "core/util/include/timer.h"
-
-namespace baidu {
-namespace paddle_serving {
-namespace serving {
-
-using baidu::paddle_serving::Timer;
-using baidu::paddle_serving::predictor::MempoolWrapper;
-using baidu::paddle_serving::predictor::general_model::Tensor;
-using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FeedInst;
-using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
-
-int GeneralCopyOp::inference() {
-  // reade request from client
-  const std::vector<std::string> pre_node_names = pre_names();
-  if (pre_node_names.size() != 1) {
-    LOG(ERROR) << "This op(" << op_name()
-               << ") can only have one predecessor op, but received "
-               << pre_node_names.size();
-    return -1;
-  }
-  const std::string pre_name = pre_node_names[0];
-
-  const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
-  uint64_t log_id = input_blob->GetLogId();
-
-  VLOG(2) << "(logid=" << log_id << ") precedent name: " << pre_name;
-  const TensorVector *in = &input_blob->tensor_vector;
-  VLOG(2) << "(logid=" << log_id << ") input size: " << in->size();
-  int batch_size = input_blob->GetBatchSize();
-  int input_var_num = 0;
-
-  GeneralBlob *res = mutable_data<GeneralBlob>();
-  res->SetLogId(log_id);
-  TensorVector *out = &res->tensor_vector;
-
-  VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
-  res->SetBatchSize(batch_size);
-
-  if (!res) {
-    LOG(ERROR) << "(logid=" << log_id
-               << ") Failed get op tls reader object output";
-  }
-
-  Timer timeline;
-  int64_t start = timeline.TimeStampUS();
-
-  VLOG(2) << "(logid=" << log_id << ") Going to init lod tensor";
-  for (int i = 0; i < in->size(); ++i) {
-    paddle::PaddleTensor lod_tensor;
-    CopyLod(&in->at(i), &lod_tensor);
-    lod_tensor.dtype = in->at(i).dtype;
-    lod_tensor.name = in->at(i).name;
-    VLOG(2) << "(logid=" << log_id << ") lod tensor [" << i
-            << "].name = " << lod_tensor.name;
-    out->push_back(lod_tensor);
-  }
-
-  VLOG(2) << "(logid=" << log_id << ") pack done.";
-
-  for (int i = 0; i < out->size(); ++i) {
-    int64_t *src_ptr = static_cast<int64_t *>(in->at(i).data.data());
-    out->at(i).data.Resize(out->at(i).lod[0].back() * sizeof(int64_t));
-    out->at(i).shape = {out->at(i).lod[0].back(), 1};
-    int64_t *tgt_ptr = static_cast<int64_t *>(out->at(i).data.data());
-    for (int j = 0; j < out->at(i).lod[0].back(); ++j) {
-      tgt_ptr[j] = src_ptr[j];
-    }
-  }
-
-  VLOG(2) << "(logid=" << log_id << ") output done.";
-
-  timeline.Pause();
-  int64_t end = timeline.TimeStampUS();
-  CopyBlobInfo(input_blob, res);
-  AddBlobInfo(res, start);
-  AddBlobInfo(res, end);
-
-  VLOG(2) << "(logid=" << log_id << ") read data from client success";
-  return 0;
-}
-
-DEFINE_OP(GeneralCopyOp);
-}  // namespace serving
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/core/general-server/op/general_copy_op.h
+++ b/core/general-server/op/general_copy_op.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <string>
-#include <vector>
-#include "core/general-server/general_model_service.pb.h"
-#include "core/general-server/op/general_infer_helper.h"
-#include "core/predictor/framework/resource.h"
-#include "paddle_inference_api.h"  // NOLINT
-
-namespace baidu {
-namespace paddle_serving {
-namespace serving {
-
-class GeneralCopyOp
-    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
- public:
-  typedef std::vector<paddle::PaddleTensor> TensorVector;
-
-  DECLARE_OP(GeneralCopyOp);
-
-  int inference();
-};
-
-}  // namespace serving
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/core/general-server/op/general_detection_op.cpp
+++ b/core/general-server/op/general_detection_op.cpp
@@ -36,7 +36,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
 using baidu::paddle_serving::predictor::InferManager;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;


--- a/core/general-server/op/general_dist_kv_infer_op.cpp
+++ b/core/general-server/op/general_dist_kv_infer_op.cpp
@@ -34,7 +34,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
 using baidu::paddle_serving::predictor::InferManager;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;


--- a/core/general-server/op/general_dist_kv_infer_op.h
+++ b/core/general-server/op/general_dist_kv_infer_op.h
--- a/core/general-server/op/general_dist_kv_quant_infer_op.cpp
+++ b/core/general-server/op/general_dist_kv_quant_infer_op.cpp
@@ -35,7 +35,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
 using baidu::paddle_serving::predictor::InferManager;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;

@@ -117,9 +116,6 @@ int GeneralDistKVQuantInferOp::inference() {
  std::unordered_map<int, int> in_out_map;
  baidu::paddle_serving::predictor::Resource &resource =
      baidu::paddle_serving::predictor::Resource::instance();
-  //TODO:Temporary addition, specific details to be studied by HexToString
-  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config()[0];
  int cube_quant_bits = resource.get_cube_quant_bits();
  size_t EMBEDDING_SIZE = 0;
  if (cube_quant_bits == 0) {
@@ -146,7 +142,7 @@ int GeneralDistKVQuantInferOp::inference() {
    sparse_out[sparse_idx].shape.push_back(
        sparse_out[sparse_idx].lod[0].back());
    sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE);
-    sparse_out[sparse_idx].name = model_config->_feed_name[i];
+    sparse_out[sparse_idx].name = in->at(i).name;
    sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() *
                                       EMBEDDING_SIZE * sizeof(float));
    // END HERE

--- a/core/general-server/op/general_dist_kv_quant_infer_op.h
+++ b/core/general-server/op/general_dist_kv_quant_infer_op.h
--- a/core/general-server/op/general_infer_helper.h
+++ b/core/general-server/op/general_infer_helper.h
--- a/core/general-server/op/general_infer_op.cpp
+++ b/core/general-server/op/general_infer_op.cpp
@@ -31,7 +31,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
 using baidu::paddle_serving::predictor::InferManager;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;

@@ -49,7 +48,7 @@ int GeneralInferOp::inference() {
  const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
  if (!input_blob) {
    LOG(ERROR) << "input_blob is nullptr,error";
-      return -1;
+    return -1;
  }
  uint64_t log_id = input_blob->GetLogId();
  VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
@@ -57,7 +56,7 @@ int GeneralInferOp::inference() {
  GeneralBlob *output_blob = mutable_data<GeneralBlob>();
  if (!output_blob) {
    LOG(ERROR) << "output_blob is nullptr,error";
-      return -1;
+    return -1;
  }
  output_blob->SetLogId(log_id);


--- a/core/general-server/op/general_infer_op.h
+++ b/core/general-server/op/general_infer_op.h
--- a/core/general-server/op/general_reader_op.cpp
+++ b/core/general-server/op/general_reader_op.cpp
@@ -30,42 +30,8 @@ using baidu::paddle_serving::Timer;
 using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FeedInst;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
 enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
-int conf_check(const Request *req,
-               const std::shared_ptr<PaddleGeneralModelConfig> &model_config) {
-  int var_num = req->insts(0).tensor_array_size();
-  if (var_num != model_config->_feed_type.size()) {
-    LOG(ERROR) << "feed var number not match: model config["
-               << model_config->_feed_type.size() << "] vs. actual[" << var_num
-               << "]";
-    return -1;
-  }
-
-  VLOG(2) << "fetch var num in reader op: " << req->fetch_var_names_size();
-
-  for (int i = 0; i < var_num; ++i) {
-    const Tensor &tensor = req->insts(0).tensor_array(i);
-    if (model_config->_feed_type[i] != tensor.elem_type()) {
-      LOG(ERROR) << "feed type not match.";
-      return -1;
-    }
-    if (model_config->_feed_shape[i].size() == tensor.shape_size()) {
-      for (int j = 0; j < model_config->_feed_shape[i].size(); ++j) {
-        tensor.shape(j);
-        if (model_config->_feed_shape[i][j] != tensor.shape(j)) {
-          LOG(ERROR) << "feed shape not match.";
-          return -1;
-        }
-      }
-    } else {
-      LOG(ERROR) << "feed shape not match.";
-      return -1;
-    }
-  }
-  return 0;
-}

 int GeneralReaderOp::inference() {
  // read request from client
@@ -93,10 +59,8 @@ int GeneralReaderOp::inference() {
  res->SetLogId(log_id);
  Timer timeline;
  int64_t start = timeline.TimeStampUS();
-  // only get insts(0), cause batch is already in Tensor.
-  // req can only include 1 inst.
  // var_num means the number of feed_var.
-  int var_num = req->insts(0).tensor_array_size();
+  int var_num = req->tensor_size();

  VLOG(2) << "(logid=" << log_id << ") var num: " << var_num
          << ") start to call load general model_conf op";
@@ -105,19 +69,7 @@ int GeneralReaderOp::inference() {
      baidu::paddle_serving::predictor::Resource::instance();

  VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
-  // get the first InferOP's model_config as ReaderOp's model_config by default.
-  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config().front();

-  // TODO(guru4elephant): how to do conditional check?
-  /*
-  int ret = conf_check(req, model_config);
-  if (ret != 0) {
-    LOG(ERROR) << "model conf of server:";
-    resource.print_general_model_config(model_config);
-    return 0;
-  }
-  */
  // package tensor
  // prepare basic information for input
  // specify the memory needed for output tensor_vector
@@ -128,7 +80,7 @@ int GeneralReaderOp::inference() {
  int64_t databuf_size = 0;
  for (int i = 0; i < var_num; ++i) {
    paddle::PaddleTensor paddleTensor;
-    const Tensor &tensor = req->insts(0).tensor_array(i);
+    const Tensor &tensor = req->tensor(i);
    data_len = 0;
    elem_type = 0;
    elem_size = 0;
@@ -175,7 +127,7 @@ int GeneralReaderOp::inference() {
      VLOG(2) << "(logid=" << log_id << ") shape for var[" << i << "]: " << dim;
      paddleTensor.shape.push_back(dim);
    }
-    paddleTensor.name = model_config->_feed_name[i];
+    paddleTensor.name = tensor.name();
    out->push_back(paddleTensor);

    VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i

--- a/core/general-server/op/general_reader_op.h
+++ b/core/general-server/op/general_reader_op.h
--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -34,7 +34,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
 using baidu::paddle_serving::predictor::general_model::ModelOutput;
 using baidu::paddle_serving::predictor::InferManager;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
@@ -49,7 +48,6 @@ int GeneralResponseOp::inference() {
      get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();

  const Request *req = dynamic_cast<const Request *>(get_request_message());
-  // response inst with only fetch_var_names
  Response *res = mutable_data<Response>();

  Timer timeline;
@@ -63,7 +61,8 @@ int GeneralResponseOp::inference() {
      baidu::paddle_serving::predictor::Resource::instance();

  VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
-  //get the last InferOP's model_config as ResponseOp's model_config by default.
+  // get the last InferOP's model_config as ResponseOp's model_config by
+  // default.
  std::shared_ptr<PaddleGeneralModelConfig> model_config =
      resource.get_general_model_config().back();

@@ -71,6 +70,10 @@ int GeneralResponseOp::inference() {
          << ") max body size : " << brpc::fLU64::FLAGS_max_body_size;

  std::vector<int> fetch_index;
+  // this is based on GetOutPutNames() is ordered map.
+  // and the order of Output is the same as the prototxt FetchVar.
+  // otherwise, you can only get the Output by the corresponding of
+  // Name -- Alias_name.
  fetch_index.resize(req->fetch_var_names_size());
  for (int i = 0; i < req->fetch_var_names_size(); ++i) {
    fetch_index[i] =
@@ -95,40 +98,37 @@ int GeneralResponseOp::inference() {
    ModelOutput *output = res->add_outputs();
    // To get the order of model return values
    output->set_engine_name(pre_name);
-    FetchInst *fetch_inst = output->add_insts();

+    var_idx = 0;
+    // idx is the real index of FetchVar.
+    // idx is not the index of fetch_index.
    for (auto &idx : fetch_index) {
-      Tensor *tensor = fetch_inst->add_tensor_array();
-      //tensor->set_elem_type(1);
-      if (model_config->_is_lod_fetch[idx]) {
-        VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
-                << model_config->_fetch_name[idx] << " is lod_tensor";
-        for (int k = 0; k < in->at(idx).shape.size(); ++k) {
-          VLOG(2) << "(logid=" << log_id << ") shape[" << k
-                  << "]: " << in->at(idx).shape[k];
-          tensor->add_shape(in->at(idx).shape[k]);
-        }
-      } else {
-        VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
-                << model_config->_fetch_name[idx] << " is tensor";
-        for (int k = 0; k < in->at(idx).shape.size(); ++k) {
-          VLOG(2) << "(logid=" << log_id << ") shape[" << k
-                  << "]: " << in->at(idx).shape[k];
-          tensor->add_shape(in->at(idx).shape[k]);
+      Tensor *tensor = output->add_tensor();
+      tensor->set_name(in->at(idx).name);
+      tensor->set_alias_name(model_config->_fetch_alias_name[idx]);
+      for (int k = 0; k < in->at(idx).shape.size(); ++k) {
+        VLOG(2) << "(logid=" << log_id << ") shape[" << k
+                << "]: " << in->at(idx).shape[k];
+        tensor->add_shape(in->at(idx).shape[k]);
+      }
+      std::string str_tensor_type = "is tensor";
+      if (model_config->_is_lod_fetch[idx] && in->at(idx).lod.size() > 0) {
+        str_tensor_type = "is lod_tensor";
+        for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
+          tensor->add_lod(in->at(idx).lod[0][j]);
        }
      }
-    }
+      VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
+              << model_config->_fetch_name[idx] << str_tensor_type;

-    var_idx = 0;
-    for (auto &idx : fetch_index) {
      cap = 1;
      for (int j = 0; j < in->at(idx).shape.size(); ++j) {
        cap *= in->at(idx).shape[j];
      }

-      FetchInst *fetch_p = output->mutable_insts(0);
      auto dtype = in->at(idx).dtype;
      if (dtype == paddle::PaddleDType::INT64) {
+        tensor->set_elem_type(0);
        VLOG(2) << "(logid=" << log_id << ") Prepare int64 var ["
                << model_config->_fetch_name[idx] << "].";
        int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
@@ -137,35 +137,24 @@ int GeneralResponseOp::inference() {
        // `Swap` method is faster than `{}` method.
        google::protobuf::RepeatedField<int64_t> tmp_data(data_ptr,
                                                          data_ptr + cap);
-        fetch_p->mutable_tensor_array(var_idx)->mutable_int64_data()->Swap(
-            &tmp_data);
+        output->mutable_tensor(var_idx)->mutable_int64_data()->Swap(&tmp_data);
      } else if (dtype == paddle::PaddleDType::FLOAT32) {
+        tensor->set_elem_type(1);
        VLOG(2) << "(logid=" << log_id << ") Prepare float var ["
                << model_config->_fetch_name[idx] << "].";
-        
+
        float *data_ptr = static_cast<float *>(in->at(idx).data.data());
        google::protobuf::RepeatedField<float> tmp_data(data_ptr,
                                                        data_ptr + cap);
-        fetch_p->mutable_tensor_array(var_idx)->mutable_float_data()->Swap(
-            &tmp_data);
+        output->mutable_tensor(var_idx)->mutable_float_data()->Swap(&tmp_data);
      } else if (dtype == paddle::PaddleDType::INT32) {
-
+        tensor->set_elem_type(2);
        VLOG(2) << "(logid=" << log_id << ")Prepare int32 var ["
                << model_config->_fetch_name[idx] << "].";
        int32_t *data_ptr = static_cast<int32_t *>(in->at(idx).data.data());
        google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
                                                          data_ptr + cap);
-        fetch_p->mutable_tensor_array(var_idx)->mutable_int_data()->Swap(
-            &tmp_data);
-      }
-
-      if (model_config->_is_lod_fetch[idx]) {
-        if (in->at(idx).lod.size() > 0) {
-          for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
-            fetch_p->mutable_tensor_array(var_idx)->add_lod(
-                in->at(idx).lod[0][j]);
-          }
-        }
+        output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data);
      }

      VLOG(2) << "(logid=" << log_id << ") fetch var ["
@@ -205,4 +194,4 @@ DEFINE_OP(GeneralResponseOp);

 }  // namespace serving
 }  // namespace paddle_serving
-}  // namespace baidu
\ No newline at end of file
+}  // namespace baidu
--- a/core/general-server/op/general_response_op.h
+++ b/core/general-server/op/general_response_op.h
--- a/core/general-server/op/general_text_reader_op.cpp
+++ b/core/general-server/op/general_text_reader_op.cpp
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "core/general-server/op/general_text_reader_op.h"
-#include <algorithm>
-#include <iostream>
-#include <memory>
-#include <sstream>
-#include "core/predictor/framework/infer.h"
-#include "core/predictor/framework/memory.h"
-#include "core/util/include/timer.h"
-
-namespace baidu {
-namespace paddle_serving {
-namespace serving {
-
-using baidu::paddle_serving::Timer;
-using baidu::paddle_serving::predictor::MempoolWrapper;
-using baidu::paddle_serving::predictor::general_model::Tensor;
-using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FeedInst;
-using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
-
-int GeneralTextReaderOp::inference() {
-  // reade request from client
-  const Request *req = dynamic_cast<const Request *>(get_request_message());
-  uint64_t log_id = req->log_id();
-
-  int batch_size = req->insts_size();
-  int input_var_num = 0;
-
-  std::vector<int64_t> elem_type;
-  std::vector<int64_t> elem_size;
-  std::vector<int64_t> capacity;
-
-  GeneralBlob *res = mutable_data<GeneralBlob>();
-
-  if (!res) {
-    LOG(ERROR) << "(logid=" << log_id
-               << ") Failed get op tls reader object output";
-  }
-
-  TensorVector *out = &res->tensor_vector;
-  res->SetBatchSize(batch_size);
-  res->SetLogId(log_id);
-
-  if (batch_size <= 0) {
-    LOG(ERROR) << "(logid=" << log_id << ") Batch size < 0";
-    return -1;
-  }
-
-  Timer timeline;
-  int64_t start = timeline.TimeStampUS();
-
-  int var_num = req->insts(0).tensor_array_size();
-  VLOG(2) << "(logid=" << log_id << ") var num: " << var_num;
-
-  VLOG(2) << "(logid=" << log_id
-          << ") start to call load general model_conf op";
-  baidu::paddle_serving::predictor::Resource &resource =
-      baidu::paddle_serving::predictor::Resource::instance();
-
-  VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
-  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config()[0];
-
-  VLOG(2) << "(logid=" << log_id << ") print general model config done.";
-
-  elem_type.resize(var_num);
-  elem_size.resize(var_num);
-  capacity.resize(var_num);
-  for (int i = 0; i < var_num; ++i) {
-    paddle::PaddleTensor lod_tensor;
-    elem_type[i] = req->insts(0).tensor_array(i).elem_type();
-    VLOG(2) << "(logid=" << log_id << ") var[" << i
-            << "] has elem type: " << elem_type[i];
-    if (elem_type[i] == 0) {  // int64
-      elem_size[i] = sizeof(int64_t);
-      lod_tensor.dtype = paddle::PaddleDType::INT64;
-    } else {
-      elem_size[i] = sizeof(float);
-      lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
-    }
-
-    if (req->insts(0).tensor_array(i).shape(0) == -1) {
-      lod_tensor.lod.resize(1);
-      lod_tensor.lod[0].push_back(0);
-      VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
-    } else {
-      lod_tensor.shape.push_back(batch_size);
-      capacity[i] = 1;
-      for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
-        int dim = req->insts(0).tensor_array(i).shape(k);
-        VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
-                << "]: " << dim;
-        capacity[i] *= dim;
-        lod_tensor.shape.push_back(dim);
-      }
-      VLOG(2) << "(logid=" << log_id << ") var[" << i
-              << "] is tensor, capacity: " << capacity[i];
-    }
-    lod_tensor.name = model_config->_feed_name[i];
-    out->push_back(lod_tensor);
-  }
-
-  for (int i = 0; i < var_num; ++i) {
-    if (out->at(i).lod.size() == 1) {
-      for (int j = 0; j < batch_size; ++j) {
-        const Tensor &tensor = req->insts(j).tensor_array(i);
-        int data_len = tensor.int_data_size();
-        int cur_len = out->at(i).lod[0].back();
-        out->at(i).lod[0].push_back(cur_len + data_len);
-      }
-      out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
-      out->at(i).shape = {out->at(i).lod[0].back(), 1};
-      VLOG(2) << "(logid=" << log_id << ") var[" << i
-              << "] is lod_tensor and len=" << out->at(i).lod[0].back();
-    } else {
-      out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
-      VLOG(2) << "(logid=" << log_id << ") var[" << i
-              << "] is tensor and capacity=" << batch_size * capacity[i];
-    }
-  }
-
-  for (int i = 0; i < var_num; ++i) {
-    if (elem_type[i] == 0) {
-      int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
-      int offset = 0;
-      for (int j = 0; j < batch_size; ++j) {
-        for (int k = 0; k < req->insts(j).tensor_array(i).int_data_size();
-             ++k) {
-          dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k);
-        }
-        if (out->at(i).lod.size() == 1) {
-          offset = out->at(i).lod[0][j + 1];
-        } else {
-          offset += capacity[i];
-        }
-      }
-    } else {
-      float *dst_ptr = static_cast<float *>(out->at(i).data.data());
-      int offset = 0;
-      for (int j = 0; j < batch_size; ++j) {
-        for (int k = 0; k < req->insts(j).tensor_array(i).int_data_size();
-             ++k) {
-          dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k);
-        }
-        if (out->at(i).lod.size() == 1) {
-          offset = out->at(i).lod[0][j + 1];
-        } else {
-          offset += capacity[i];
-        }
-      }
-    }
-  }
-
-  int64_t end = timeline.TimeStampUS();
-  res->p_size = 0;
-  AddBlobInfo(res, start);
-  AddBlobInfo(res, end);
-
-  VLOG(2) << "(logid=" << log_id << ") read data from client success";
-  return 0;
-}
-DEFINE_OP(GeneralTextReaderOp);
-}  // namespace serving
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/core/general-server/op/general_text_reader_op.h
+++ b/core/general-server/op/general_text_reader_op.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <string>
-#include <vector>
-#include "core/general-server/general_model_service.pb.h"
-#include "core/general-server/load_general_model_service.pb.h"
-#include "core/general-server/op/general_infer_helper.h"
-#include "core/predictor/framework/resource.h"
-#include "paddle_inference_api.h"  // NOLINT
-
-namespace baidu {
-namespace paddle_serving {
-namespace serving {
-
-class GeneralTextReaderOp
-    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
- public:
-  typedef std::vector<paddle::PaddleTensor> TensorVector;
-
-  DECLARE_OP(GeneralTextReaderOp);
-
-  int inference();
-};
-
-}  // namespace serving
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/core/general-server/op/general_text_response_op.cpp
+++ b/core/general-server/op/general_text_response_op.cpp
-// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "core/general-server/op/general_text_response_op.h"
-#include <algorithm>
-#include <iostream>
-#include <memory>
-#include <sstream>
-#include "core/predictor/framework/infer.h"
-#include "core/predictor/framework/memory.h"
-#include "core/predictor/framework/resource.h"
-#include "core/util/include/timer.h"
-
-namespace baidu {
-namespace paddle_serving {
-namespace serving {
-
-using baidu::paddle_serving::Timer;
-using baidu::paddle_serving::predictor::MempoolWrapper;
-using baidu::paddle_serving::predictor::general_model::Tensor;
-using baidu::paddle_serving::predictor::general_model::Response;
-using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
-using baidu::paddle_serving::predictor::general_model::ModelOutput;
-using baidu::paddle_serving::predictor::InferManager;
-using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
-
-int GeneralTextResponseOp::inference() {
-  VLOG(2) << "Going to run inference";
-  const std::vector<std::string> pre_node_names = pre_names();
-  VLOG(2) << "pre node names size: " << pre_node_names.size();
-  const GeneralBlob *input_blob;
-  uint64_t log_id =
-      get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
-
-  const Request *req = dynamic_cast<const Request *>(get_request_message());
-  // response inst with only fetch_var_names
-  Response *res = mutable_data<Response>();
-
-  Timer timeline;
-  int64_t start = timeline.TimeStampUS();
-
-  VLOG(2) << "(logid=" << log_id
-          << ") start to call load general model_conf op";
-  baidu::paddle_serving::predictor::Resource &resource =
-      baidu::paddle_serving::predictor::Resource::instance();
-
-  VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
-  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config().back();
-
-  std::vector<int> fetch_index;
-  fetch_index.resize(req->fetch_var_names_size());
-  for (int i = 0; i < req->fetch_var_names_size(); ++i) {
-    fetch_index[i] =
-        model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
-  }
-
-  for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
-    const std::string &pre_name = pre_node_names[pi];
-    VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name
-            << " (" << pre_node_names.size() << ")";
-    input_blob = get_depend_argument<GeneralBlob>(pre_name);
-    if (!input_blob) {
-      LOG(ERROR) << "(logid=" << log_id
-                 << ") Failed mutable depended argument, op: " << pre_name;
-      return -1;
-    }
-
-    const TensorVector *in = &input_blob->tensor_vector;
-    int batch_size = input_blob->GetBatchSize();
-    VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
-
-    ModelOutput *output = res->add_outputs();
-    output->set_engine_name(
-        pre_name);  // To get the order of model return values
-    for (int i = 0; i < batch_size; ++i) {
-      FetchInst *fetch_inst = output->add_insts();
-      for (auto &idx : fetch_index) {
-        Tensor *tensor = fetch_inst->add_tensor_array();
-        // currently only response float tensor or lod_tensor
-        tensor->set_elem_type(1);
-        if (model_config->_is_lod_fetch[idx]) {
-          VLOG(2) << "(logid=" << log_id << ") out[" << idx << " is lod_tensor";
-          tensor->add_shape(-1);
-        } else {
-          VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] is tensor";
-          for (int k = 1; k < in->at(idx).shape.size(); ++k) {
-            VLOG(2) << "(logid=" << log_id << ") shape[" << k - 1
-                    << "]: " << in->at(idx).shape[k];
-            tensor->add_shape(in->at(idx).shape[k]);
-          }
-        }
-      }
-    }
-
-    int var_idx = 0;
-    for (auto &idx : fetch_index) {
-      float *data_ptr = static_cast<float *>(in->at(idx).data.data());
-      int cap = 1;
-      for (int j = 1; j < in->at(idx).shape.size(); ++j) {
-        cap *= in->at(idx).shape[j];
-      }
-      if (model_config->_is_lod_fetch[idx]) {
-        for (int j = 0; j < batch_size; ++j) {
-          for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
-               k++) {
-            output->mutable_insts(j)
-                ->mutable_tensor_array(var_idx)
-                ->add_float_data(data_ptr[k]);
-          }
-        }
-      } else {
-        for (int j = 0; j < batch_size; ++j) {
-          for (int k = j * cap; k < (j + 1) * cap; ++k) {
-            output->mutable_insts(j)
-                ->mutable_tensor_array(var_idx)
-                ->add_float_data(data_ptr[k]);
-          }
-        }
-      }
-      var_idx++;
-    }
-  }
-
-  if (req->profile_server()) {
-    int64_t end = timeline.TimeStampUS();
-    // TODO(barriery): multi-model profile_time.
-    // At present, only the response_op is multi-input, so here we get
-    // the profile_time by hard coding. It needs to be replaced with
-    // a more elegant way.
-    for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
-      input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
-      VLOG(2) << "(logid=" << log_id
-              << ") p size for input blob: " << input_blob->p_size;
-      int profile_time_idx = -1;
-      if (pi == 0) {
-        profile_time_idx = 0;
-      } else {
-        profile_time_idx = input_blob->p_size - 2;
-      }
-      for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) {
-        res->add_profile_time(input_blob->time_stamp[profile_time_idx]);
-      }
-    }
-    // TODO(guru4elephant): find more elegant way to do this
-    res->add_profile_time(start);
-    res->add_profile_time(end);
-  }
-
-  return 0;
-}
-DEFINE_OP(GeneralTextResponseOp);
-
-}  // namespace serving
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/core/general-server/op/general_text_response_op.h
+++ b/core/general-server/op/general_text_response_op.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-#include <string>
-#include <vector>
-#include "core/general-server/general_model_service.pb.h"
-#include "core/general-server/op/general_infer_helper.h"
-#include "paddle_inference_api.h"  // NOLINT
-
-namespace baidu {
-namespace paddle_serving {
-namespace serving {
-
-class GeneralTextResponseOp
-    : public baidu::paddle_serving::predictor::OpWithChannel<
-          baidu::paddle_serving::predictor::general_model::Response> {
- public:
-  typedef std::vector<paddle::PaddleTensor> TensorVector;
-
-  DECLARE_OP(GeneralTextResponseOp);
-
-  int inference();
-};
-
-}  // namespace serving
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/core/general-server/proto/general_model_service.proto
+++ b/core/general-server/proto/general_model_service.proto
@@ -24,17 +24,16 @@ message Tensor {
  repeated int32 int_data = 2;
  repeated int64 int64_data = 3;
  repeated float float_data = 4;
-  optional int32 elem_type = 5;// 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
-  repeated int32 shape = 6; // shape should include batch
-  repeated int32 lod = 7; // only for fetch tensor currently
+  optional int32 elem_type =
+      5; // 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
+  repeated int32 shape = 6;       // shape should include batch
+  repeated int32 lod = 7;         // only for fetch tensor currently
+  optional string name = 8;       // get from the Model prototxt
+  optional string alias_name = 9; // get from the Model prototxt
 };

-message FeedInst { repeated Tensor tensor_array = 1; };
-
-message FetchInst { repeated Tensor tensor_array = 1; };
-
 message Request {
-  repeated FeedInst insts = 1;
+  repeated Tensor tensor = 1;
  repeated string fetch_var_names = 2;
  optional bool profile_server = 3 [ default = false ];
  required uint64 log_id = 4 [ default = 0 ];
@@ -46,7 +45,7 @@ message Response {
 };

 message ModelOutput {
-  repeated FetchInst insts = 1;
+  repeated Tensor tensor = 1;
  optional string engine_name = 2;
 }


--- a/core/sdk-cpp/proto/general_model_service.proto
+++ b/core/sdk-cpp/proto/general_model_service.proto
@@ -24,17 +24,16 @@ message Tensor {
  repeated int32 int_data = 2;
  repeated int64 int64_data = 3;
  repeated float float_data = 4;
-  optional int32 elem_type = 5;
-  repeated int32 shape = 6;
-  repeated int32 lod = 7; // only for fetch tensor currently
+  optional int32 elem_type =
+      5; // 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
+  repeated int32 shape = 6;       // shape should include batch
+  repeated int32 lod = 7;         // only for fetch tensor currently
+  optional string name = 8;       // get from the Model prototxt
+  optional string alias_name = 9; // get from the Model prototxt
 };

-message FeedInst { repeated Tensor tensor_array = 1; };
-
-message FetchInst { repeated Tensor tensor_array = 1; };
-
 message Request {
-  repeated FeedInst insts = 1;
+  repeated Tensor tensor = 1;
  repeated string fetch_var_names = 2;
  optional bool profile_server = 3 [ default = false ];
  required uint64 log_id = 4 [ default = 0 ];
@@ -46,7 +45,7 @@ message Response {
 };

 message ModelOutput {
-  repeated FetchInst insts = 1;
+  repeated Tensor tensor = 1;
  optional string engine_name = 2;
 }


--- a/doc/HTTP_SERVICE_CN.md
+++ b/doc/HTTP_SERVICE_CN.md
@@ -37,7 +37,7 @@ python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 -
 ### 客户端使用curl访问

 ```shell
-curl -XPOST http://127.0.0.1:9393/GeneralModelService/inference -d ' {"insts":[{"tensor_array":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"shape":[1,13]}]}],"fetch_var_names":["price"],"log_id":0}'
+curl -XPOST http://127.0.0.1:9393/GeneralModelService/inference -d ' {"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"shape":[1,13]}],"fetch_var_names":["price"],"log_id":0}'
 ```
 其中`127.0.0.1:9393`为IP和Port，根据您服务端启动的IP和Port自行设定。

@@ -76,7 +76,7 @@ repeated int32 numbers = 1;
 // rapidjson
 {"numbers" : [12, 17, 1, 24] }
 ```
-#### shape
+#### elem_type

 表示数据类型，0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)

@@ -93,7 +93,7 @@ repeated int32 numbers = 1;
 以上面的fit_a_line为例，仍使用上文的请求数据体，但只作为示例演示用法，实际此时使用压缩得不偿失。

 ```shell
-echo ' {"insts":[{"tensor_array":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"shape":[1,13]}]}],"fetch_var_names":["price"],"log_id":0}' | gzip -c > data.txt.gz
+echo ' {"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"shape":[1,13]}],"fetch_var_names":["price"],"log_id":0}' | gzip -c > data.txt.gz
 ```

 ```shell

--- a/go/client_app/acc.go
+++ b/go/client_app/acc.go
-package main
-
-import (
-       "io"
-       "os"
-       "fmt"
-       "bufio"
-       "strings"
-       "strconv"
-)
-
-func main() {
-     score_file := os.Args[1]
-     fi, err := os.Open(score_file)
-     if err != nil {
-     	fmt.Print(err)
-     }
-
-     defer fi.Close()
-     br := bufio.NewReader(fi)     
-
-     total := int(0)
-     acc := int(0)
-     for {
-     	 line, err := br.ReadString('\n')
-	 if err == io.EOF {
-	    break
-	 }
-
-	 line = strings.Trim(line, "\n")
-	 s := strings.Split(line, "\t")
-	 prob_str := strings.Trim(s[0], " ")
-	 label_str := strings.Trim(s[1], " ")
-	 prob, err := strconv.ParseFloat(prob_str, 32)
-	 if err != nil {
-	    panic(err)
-	 }
-	 label, err := strconv.ParseFloat(label_str, 32)
-	 if err != nil {
-	    panic(err)
-	 }
-	 if (prob - 0.5) * (label - 0.5) > 0 {
-	    acc++
-	 }
-	 total++
-    }
-    fmt.Println("total num: ", total)
-    fmt.Println("acc num: ", acc)
-    fmt.Println("acc: ", float32(acc) / float32(total))    
-}
\ No newline at end of file
--- a/go/client_app/imdb_client.go
+++ b/go/client_app/imdb_client.go
-//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package main
-
-import (
-       "io"
-       "fmt"
-       "strings"
-       "bufio"
-       "strconv"
-       "os"
-       serving_client "github.com/PaddlePaddle/Serving/go/serving_client"
-)
-
-func main() {
-     var config_file_path string
-     config_file_path = os.Args[1]
-     handle := serving_client.LoadModelConfig(config_file_path)
-     handle = serving_client.Connect("127.0.0.1", "9292", handle)
-
-     test_file_path := os.Args[2]
-     fi, err := os.Open(test_file_path)
-     if err != nil {
-     	fmt.Print(err)
-     }
-
-     defer fi.Close()
-     br := bufio.NewReader(fi)
-
-     fetch := []string{"cost", "acc", "prediction"}     
-
-     var result map[string][]float32
-
-     for {
-     	 line, err := br.ReadString('\n')
-	 if err == io.EOF {
-	    break
-	 }
-
-	 line = strings.Trim(line, "\n")
-
-	 var words = []int64{}
-
-	 s := strings.Split(line, " ")
-	 value, err := strconv.Atoi(s[0])
-	 var feed_int_map map[string][]int64
-
-	 for _, v := range s[1:value + 1] {
-	     int_v, _ := strconv.Atoi(v)
-	     words = append(words, int64(int_v))
-	 }
-
-	 label, err := strconv.Atoi(s[len(s)-1])
-
-	 if err != nil {
-	    panic(err)
-	 }
-
-	 feed_int_map = map[string][]int64{}
-	 feed_int_map["words"] = words
-	 feed_int_map["label"] = []int64{int64(label)}
-	 
-	 result = serving_client.Predict(handle,
-	 	 feed_int_map, fetch)
-	 fmt.Println(result["prediction"][1], "\t", int64(label))
-     }
-}
\ No newline at end of file
--- a/go/proto/general_model_config.pb.go
+++ b/go/proto/general_model_config.pb.go
-// Code generated by protoc-gen-go. DO NOT EDIT.
-// source: general_model_config.proto
-
-package baidu_paddle_serving_configure
-
-import (
-	fmt "fmt"
-	proto "github.com/golang/protobuf/proto"
-	math "math"
-)
-
-// Reference imports to suppress errors if they are not otherwise used.
-var _ = proto.Marshal
-var _ = fmt.Errorf
-var _ = math.Inf
-
-// This is a compile-time assertion to ensure that this generated file
-// is compatible with the proto package it is being compiled against.
-// A compilation error at this line likely means your copy of the
-// proto package needs to be updated.
-const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
-
-type FeedVar struct {
-	Name                 *string  `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"`
-	AliasName            *string  `protobuf:"bytes,2,opt,name=alias_name,json=aliasName" json:"alias_name,omitempty"`
-	IsLodTensor          *bool    `protobuf:"varint,3,opt,name=is_lod_tensor,json=isLodTensor,def=0" json:"is_lod_tensor,omitempty"`
-	FeedType             *int32   `protobuf:"varint,4,opt,name=feed_type,json=feedType,def=0" json:"feed_type,omitempty"`
-	Shape                []int32  `protobuf:"varint,5,rep,name=shape" json:"shape,omitempty"`
-	XXX_NoUnkeyedLiteral struct{} `json:"-"`
-	XXX_unrecognized     []byte   `json:"-"`
-	XXX_sizecache        int32    `json:"-"`
-}
-
-func (m *FeedVar) Reset()         { *m = FeedVar{} }
-func (m *FeedVar) String() string { return proto.CompactTextString(m) }
-func (*FeedVar) ProtoMessage()    {}
-func (*FeedVar) Descriptor() ([]byte, []int) {
-	return fileDescriptor_efa52beffa29d37a, []int{0}
-}
-
-func (m *FeedVar) XXX_Unmarshal(b []byte) error {
-	return xxx_messageInfo_FeedVar.Unmarshal(m, b)
-}
-func (m *FeedVar) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
-	return xxx_messageInfo_FeedVar.Marshal(b, m, deterministic)
-}
-func (m *FeedVar) XXX_Merge(src proto.Message) {
-	xxx_messageInfo_FeedVar.Merge(m, src)
-}
-func (m *FeedVar) XXX_Size() int {
-	return xxx_messageInfo_FeedVar.Size(m)
-}
-func (m *FeedVar) XXX_DiscardUnknown() {
-	xxx_messageInfo_FeedVar.DiscardUnknown(m)
-}
-
-var xxx_messageInfo_FeedVar proto.InternalMessageInfo
-
-const Default_FeedVar_IsLodTensor bool = false
-const Default_FeedVar_FeedType int32 = 0
-
-func (m *FeedVar) GetName() string {
-	if m != nil && m.Name != nil {
-		return *m.Name
-	}
-	return ""
-}
-
-func (m *FeedVar) GetAliasName() string {
-	if m != nil && m.AliasName != nil {
-		return *m.AliasName
-	}
-	return ""
-}
-
-func (m *FeedVar) GetIsLodTensor() bool {
-	if m != nil && m.IsLodTensor != nil {
-		return *m.IsLodTensor
-	}
-	return Default_FeedVar_IsLodTensor
-}
-
-func (m *FeedVar) GetFeedType() int32 {
-	if m != nil && m.FeedType != nil {
-		return *m.FeedType
-	}
-	return Default_FeedVar_FeedType
-}
-
-func (m *FeedVar) GetShape() []int32 {
-	if m != nil {
-		return m.Shape
-	}
-	return nil
-}
-
-type FetchVar struct {
-	Name                 *string  `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"`
-	AliasName            *string  `protobuf:"bytes,2,opt,name=alias_name,json=aliasName" json:"alias_name,omitempty"`
-	IsLodTensor          *bool    `protobuf:"varint,3,opt,name=is_lod_tensor,json=isLodTensor,def=0" json:"is_lod_tensor,omitempty"`
-	Shape                []int32  `protobuf:"varint,4,rep,name=shape" json:"shape,omitempty"`
-	XXX_NoUnkeyedLiteral struct{} `json:"-"`
-	XXX_unrecognized     []byte   `json:"-"`
-	XXX_sizecache        int32    `json:"-"`
-}
-
-func (m *FetchVar) Reset()         { *m = FetchVar{} }
-func (m *FetchVar) String() string { return proto.CompactTextString(m) }
-func (*FetchVar) ProtoMessage()    {}
-func (*FetchVar) Descriptor() ([]byte, []int) {
-	return fileDescriptor_efa52beffa29d37a, []int{1}
-}
-
-func (m *FetchVar) XXX_Unmarshal(b []byte) error {
-	return xxx_messageInfo_FetchVar.Unmarshal(m, b)
-}
-func (m *FetchVar) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
-	return xxx_messageInfo_FetchVar.Marshal(b, m, deterministic)
-}
-func (m *FetchVar) XXX_Merge(src proto.Message) {
-	xxx_messageInfo_FetchVar.Merge(m, src)
-}
-func (m *FetchVar) XXX_Size() int {
-	return xxx_messageInfo_FetchVar.Size(m)
-}
-func (m *FetchVar) XXX_DiscardUnknown() {
-	xxx_messageInfo_FetchVar.DiscardUnknown(m)
-}
-
-var xxx_messageInfo_FetchVar proto.InternalMessageInfo
-
-const Default_FetchVar_IsLodTensor bool = false
-
-func (m *FetchVar) GetName() string {
-	if m != nil && m.Name != nil {
-		return *m.Name
-	}
-	return ""
-}
-
-func (m *FetchVar) GetAliasName() string {
-	if m != nil && m.AliasName != nil {
-		return *m.AliasName
-	}
-	return ""
-}
-
-func (m *FetchVar) GetIsLodTensor() bool {
-	if m != nil && m.IsLodTensor != nil {
-		return *m.IsLodTensor
-	}
-	return Default_FetchVar_IsLodTensor
-}
-
-func (m *FetchVar) GetShape() []int32 {
-	if m != nil {
-		return m.Shape
-	}
-	return nil
-}
-
-type GeneralModelConfig struct {
-	FeedVar              []*FeedVar  `protobuf:"bytes,1,rep,name=feed_var,json=feedVar" json:"feed_var,omitempty"`
-	FetchVar             []*FetchVar `protobuf:"bytes,2,rep,name=fetch_var,json=fetchVar" json:"fetch_var,omitempty"`
-	XXX_NoUnkeyedLiteral struct{}    `json:"-"`
-	XXX_unrecognized     []byte      `json:"-"`
-	XXX_sizecache        int32       `json:"-"`
-}
-
-func (m *GeneralModelConfig) Reset()         { *m = GeneralModelConfig{} }
-func (m *GeneralModelConfig) String() string { return proto.CompactTextString(m) }
-func (*GeneralModelConfig) ProtoMessage()    {}
-func (*GeneralModelConfig) Descriptor() ([]byte, []int) {
-	return fileDescriptor_efa52beffa29d37a, []int{2}
-}
-
-func (m *GeneralModelConfig) XXX_Unmarshal(b []byte) error {
-	return xxx_messageInfo_GeneralModelConfig.Unmarshal(m, b)
-}
-func (m *GeneralModelConfig) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
-	return xxx_messageInfo_GeneralModelConfig.Marshal(b, m, deterministic)
-}
-func (m *GeneralModelConfig) XXX_Merge(src proto.Message) {
-	xxx_messageInfo_GeneralModelConfig.Merge(m, src)
-}
-func (m *GeneralModelConfig) XXX_Size() int {
-	return xxx_messageInfo_GeneralModelConfig.Size(m)
-}
-func (m *GeneralModelConfig) XXX_DiscardUnknown() {
-	xxx_messageInfo_GeneralModelConfig.DiscardUnknown(m)
-}
-
-var xxx_messageInfo_GeneralModelConfig proto.InternalMessageInfo
-
-func (m *GeneralModelConfig) GetFeedVar() []*FeedVar {
-	if m != nil {
-		return m.FeedVar
-	}
-	return nil
-}
-
-func (m *GeneralModelConfig) GetFetchVar() []*FetchVar {
-	if m != nil {
-		return m.FetchVar
-	}
-	return nil
-}
-
-func init() {
-	proto.RegisterType((*FeedVar)(nil), "baidu.paddle_serving.configure.FeedVar")
-	proto.RegisterType((*FetchVar)(nil), "baidu.paddle_serving.configure.FetchVar")
-	proto.RegisterType((*GeneralModelConfig)(nil), "baidu.paddle_serving.configure.GeneralModelConfig")
-}
-
-func init() { proto.RegisterFile("general_model_config.proto", fileDescriptor_efa52beffa29d37a) }
-
-var fileDescriptor_efa52beffa29d37a = []byte{
-	// 283 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0xd0, 0x31, 0x4b, 0xc4, 0x30,
-	0x14, 0x07, 0x70, 0x72, 0x6d, 0xb9, 0xf6, 0x1d, 0x2e, 0xc1, 0xa1, 0x08, 0x1e, 0xe5, 0x16, 0xe3,
-	0x52, 0xc4, 0xf1, 0x46, 0xc5, 0x73, 0x51, 0x87, 0x72, 0xb8, 0x86, 0xd8, 0xbc, 0xb6, 0x81, 0x5c,
-	0x53, 0x92, 0xde, 0xc1, 0x2d, 0x7e, 0x13, 0xf1, 0xab, 0x4a, 0x93, 0x43, 0x9c, 0x74, 0x72, 0x7b,
-	0x79, 0xff, 0xf0, 0xde, 0xe3, 0x07, 0x17, 0x2d, 0xf6, 0x68, 0x85, 0xe6, 0x3b, 0x23, 0x51, 0xf3,
-	0xda, 0xf4, 0x8d, 0x6a, 0xcb, 0xc1, 0x9a, 0xd1, 0xd0, 0xe5, 0x9b, 0x50, 0x72, 0x5f, 0x0e, 0x42,
-	0x4a, 0x8d, 0xdc, 0xa1, 0x3d, 0xa8, 0xbe, 0x2d, 0xc3, 0x97, 0xbd, 0xc5, 0xd5, 0x07, 0x81, 0xf9,
-	0x06, 0x51, 0xbe, 0x0a, 0x4b, 0x29, 0xc4, 0xbd, 0xd8, 0x61, 0x4e, 0x0a, 0xc2, 0xb2, 0xca, 0xd7,
-	0xf4, 0x12, 0x40, 0x68, 0x25, 0x1c, 0xf7, 0xc9, 0xcc, 0x27, 0x99, 0xef, 0xbc, 0x4c, 0xf1, 0x35,
-	0x9c, 0x29, 0xc7, 0xb5, 0x91, 0x7c, 0xc4, 0xde, 0x19, 0x9b, 0x47, 0x05, 0x61, 0xe9, 0x3a, 0x69,
-	0x84, 0x76, 0x58, 0x2d, 0x94, 0x7b, 0x32, 0x72, 0xeb, 0x13, 0xba, 0x84, 0xac, 0x41, 0x94, 0x7c,
-	0x3c, 0x0e, 0x98, 0xc7, 0x05, 0x61, 0xc9, 0x9a, 0xdc, 0x54, 0xe9, 0xd4, 0xdb, 0x1e, 0x07, 0xa4,
-	0xe7, 0x90, 0xb8, 0x4e, 0x0c, 0x98, 0x27, 0x45, 0xc4, 0x92, 0x2a, 0x3c, 0x56, 0xef, 0x90, 0x6e,
-	0x70, 0xac, 0xbb, 0xff, 0xbf, 0xef, 0x7b, 0x7f, 0xfc, 0x73, 0xff, 0x27, 0x01, 0xfa, 0x18, 0x78,
-	0x9f, 0x27, 0xdd, 0x7b, 0x2f, 0x47, 0xef, 0xc0, 0x1f, 0xce, 0x0f, 0xc2, 0xe6, 0xa4, 0x88, 0xd8,
-	0xe2, 0xf6, 0xaa, 0xfc, 0x5d, 0xba, 0x3c, 0x29, 0x57, 0xf3, 0xe6, 0xc4, 0xfd, 0x30, 0x81, 0x8c,
-	0x75, 0xe7, 0x87, 0xcc, 0xfc, 0x10, 0xf6, 0xf7, 0x90, 0x60, 0x31, 0xb9, 0x85, 0xea, 0x2b, 0x00,
-	0x00, 0xff, 0xff, 0x08, 0x27, 0x9c, 0x1a, 0xfe, 0x01, 0x00, 0x00,
-}
--- a/go/serving_client/serving_client_api.go
+++ b/go/serving_client/serving_client_api.go
-//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package serving_client
-
-import (
-       "bytes"
-       "encoding/json"
-       "io/ioutil"
-       "log"
-       "net/http"
-       pb "github.com/PaddlePaddle/Serving/go/proto"
-       "github.com/golang/protobuf/proto"
-)
-
-type Tensor struct {
-     Data   []byte `json:"data"`
-     FloatData	   []float32 `json:"float_data"`
-     IntData	   []int `json:"int_data"`
-     Int64Data	   []int64 `json:"int64_data"`
-     ElemType	int `json:"elem_type"`
-     Shape	[]int `json:"shape"`
-}
-
-type FeedInst struct {
-     TensorArray     []Tensor `json:"tensor_array"`
-}
-
-type FetchInst struct {
-     TensorArray      []Tensor `json:"tensor_array"`
-}
-
-type Request struct {
-     Insts   []FeedInst `json:"insts"`
-     FetchVarNames	[]string `json:"fetch_var_names"`
-     ProfileServer	bool `json:"profile_server"`
-}
-
-type Response struct {
-     Insts    []FetchInst `json:"insts"`
-     ProfileTime	  []int64 `json:"profile_time"`     
-}
-
-type Handle struct {
-     Url    string
-     Port   string
-     FeedAliasNameMap	map[string]string
-     FeedShapeMap	map[string][]int
-     FeedNameMap   map[string]int
-     FeedAliasNames	   []string
-     FetchNameMap  map[string]int
-     FetchAliasNameMap	map[string]string
-}
-
-func LoadModelConfig(config string) Handle {
-     in, err := ioutil.ReadFile(config)
-     if err != nil {
-     	log.Fatalln("Failed to read general model: ", err)
-     }
-     general_model_config := &pb.GeneralModelConfig{}
-     if err := proto.Unmarshal(in, general_model_config); err != nil {
-     	log.Fatalln("Failed to parse GeneralModelConfig: ", err)
-     }
-     log.Println("read protobuf succeed")
-     handle := Handle{}
-     handle.FeedNameMap = map[string]int{}
-     handle.FeedAliasNameMap = map[string]string{}
-     handle.FeedShapeMap = map[string][]int{}
-     handle.FetchNameMap = map[string]int{}
-     handle.FetchAliasNameMap = map[string]string{}
-     handle.FeedAliasNames = []string{}
-
-     for i, v := range general_model_config.FeedVar {
-     	 handle.FeedNameMap[*v.Name] = i
-	 tmp_array := []int{}
-	 for _, vv := range v.Shape {
-	     tmp_array = append(tmp_array, int(vv))
-	 }
-	 handle.FeedShapeMap[*v.Name] = tmp_array
-	 handle.FeedAliasNameMap[*v.AliasName] = *v.Name
-	 handle.FeedAliasNames = append(handle.FeedAliasNames, *v.AliasName)
-     }
-
-     for i, v := range general_model_config.FetchVar {
-     	 handle.FetchNameMap[*v.Name] = i
-	 handle.FetchAliasNameMap[*v.AliasName] = *v.Name
-     }
-
-     return handle
-}
-
-func Connect(url string, port string, handle Handle) Handle {
-     handle.Url = url
-     handle.Port = port
-     return handle
-}
-
-func Predict(handle Handle, int_feed_map map[string][]int64, fetch []string) map[string][]float32 {
-     contentType := "application/json;charset=utf-8"
-
-     var tensor_array []Tensor
-     var inst FeedInst
-     tensor_array = []Tensor{}
-     inst = FeedInst{}
-
-     for i := 0; i < len(handle.FeedAliasNames); i++ {
-     	 key_i := handle.FeedAliasNames[i]
-	 var tmp Tensor
-	 tmp.IntData = []int{}
-	 tmp.Shape = []int{}
-	 tmp.Int64Data = int_feed_map[key_i]
-	 tmp.ElemType = 0
-	 tmp.Shape = handle.FeedShapeMap[key_i]
-	 tensor_array = append(tensor_array, tmp)
-     }
-
-     inst.TensorArray = tensor_array
-
-     var profile_server bool
-     profile_server = false
-
-     req := &Request{
-     	 Insts: []FeedInst{inst},
-	 FetchVarNames: fetch,
-	 ProfileServer: profile_server}
-
-     b, err := json.Marshal(req)
-
-     body := bytes.NewBuffer(b)
-
-     var post_address bytes.Buffer
-     post_address.WriteString("http://")
-     post_address.WriteString(handle.Url)
-     post_address.WriteString(":")
-     post_address.WriteString(handle.Port)
-     post_address.WriteString("/GeneralModelService/inference")
-
-     resp, err := http.Post(post_address.String(), contentType, body)
-     if err != nil {
-     	log.Println("Post failed:", err)
-     }
-
-     defer resp.Body.Close()
-
-     content, err := ioutil.ReadAll(resp.Body)
-     if err != nil {
-      	log.Println("Read failed:", err)
-     }
-
-     response_json := Response{}
-     err = json.Unmarshal([]byte(content), &response_json)
-
-     var result map[string][]float32
-     result = map[string][]float32{}
-     for i, v := range fetch {
-     	 result[v] = response_json.Insts[0].TensorArray[i].FloatData
-     }
-     
-     return result
-}
--- a/java/examples/src/main/java/PaddleServingClientExample.java
+++ b/java/examples/src/main/java/PaddleServingClientExample.java
@@ -18,28 +18,17 @@ public class PaddleServingClientExample {
        INDArray npdata = Nd4j.createFromArray(data);
        long[] batch_shape = {1,13};
        INDArray batch_npdata = npdata.reshape(batch_shape);
-        HashMap<String, INDArray> feed_data
-            = new HashMap<String, INDArray>() {{
+        HashMap<String, Object> feed_data
+            = new HashMap<String, Object>() {{
                put("x", batch_npdata);
            }};
        List<String> fetch = Arrays.asList("price");
        
-        Client client = new Client();
-        String target = "localhost:9393";
-        boolean succ = client.connect(target);
-        if (succ != true) {
-            System.out.println("connect failed.");
-            return false;
-        }
-
-        Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
-        if (fetch_map == null) {
-            return false;
-        }
-
-        for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
-            System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-        }
+        HttpClient client = new HttpClient();
+        
+        String result = client.predict(feed_data, fetch, true, 0);
+        
+        System.out.println(result);
        return true;
    }

@@ -77,134 +66,15 @@ public class PaddleServingClientExample {
        INDArray im_size = Nd4j.createFromArray(new int[]{height, width});
        long[] batch_size_shape = {1,2};
        INDArray batch_im_size = im_size.reshape(batch_size_shape);
-        HashMap<String, INDArray> feed_data
-            = new HashMap<String, INDArray>() {{
+        HashMap<String, Object> feed_data
+            = new HashMap<String, Object>() {{
                put("image", batch_image);
                put("im_size", batch_im_size);
            }};
        List<String> fetch = Arrays.asList("save_infer_model/scale_0.tmp_0");
-        
-        Client client = new Client();
-        String target = "localhost:9393";
-        boolean succ = client.connect(target);
-        if (succ != true) {
-            System.out.println("connect failed.");
-            return false;
-        }
-        succ = client.setRpcTimeoutMs(20000); // cpu
-        if (succ != true) {
-            System.out.println("set timeout failed.");
-            return false;
-        }
-
-        Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
-        if (fetch_map == null) {
-            return false;
-        }
-
-        for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
-            System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-        }
-        return true;
-    }
-
-    boolean batch_predict() {
-        float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
-            0.0582f, -0.0727f, -0.1583f, -0.0584f,
-            0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
-        INDArray npdata = Nd4j.createFromArray(data);
-        HashMap<String, INDArray> feed_data
-            = new HashMap<String, INDArray>() {{
-                put("x", npdata);
-            }};
-        List<HashMap<String, INDArray>> feed_batch
-            = new ArrayList<HashMap<String, INDArray>>() {{
-                add(feed_data);
-                add(feed_data);
-            }};
-        List<String> fetch = Arrays.asList("price");
-        
-        Client client = new Client();
-        String target = "localhost:9393";
-        boolean succ = client.connect(target);
-        if (succ != true) {
-            System.out.println("connect failed.");
-            return false;
-        }
-
-        Map<String, INDArray> fetch_map = client.predict(feed_batch, fetch);
-        if (fetch_map == null) {
-            return false;
-        }
-
-        for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
-            System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-        }
-        return true;
-    }
-
-    boolean asyn_predict() {
-        float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
-            0.0582f, -0.0727f, -0.1583f, -0.0584f,
-            0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
-        INDArray npdata = Nd4j.createFromArray(data);
-        HashMap<String, INDArray> feed_data
-            = new HashMap<String, INDArray>() {{
-                put("x", npdata);
-            }};
-        List<String> fetch = Arrays.asList("price");
-
-        Client client = new Client();
-        String target = "localhost:9393";
-        boolean succ = client.connect(target);
-        if (succ != true) {
-            System.out.println("connect failed.");
-            return false;
-        }
-
-        PredictFuture future = client.asyn_predict(feed_data, fetch);
-        Map<String, INDArray> fetch_map = future.get();
-        if (fetch_map == null) {
-            System.out.println("Get future reslut failed");
-            return false;
-        }
-        
-        for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
-            System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-        }
-        return true;
-    }
-
-    boolean model_ensemble() {
-        long[] data = {8, 233, 52, 601};
-        INDArray npdata = Nd4j.createFromArray(data);
-        HashMap<String, INDArray> feed_data
-            = new HashMap<String, INDArray>() {{
-                put("words", npdata);
-            }};
-        List<String> fetch = Arrays.asList("prediction");
-
-        Client client = new Client();
-        String target = "localhost:9393";
-        boolean succ = client.connect(target);
-        if (succ != true) {
-            System.out.println("connect failed.");
-            return false;
-        }
-        
-        Map<String, HashMap<String, INDArray>> fetch_map
-            = client.ensemble_predict(feed_data, fetch);
-        if (fetch_map == null) {
-            return false;
-        }
-
-        for (Map.Entry<String, HashMap<String, INDArray>> entry : fetch_map.entrySet()) {
-            System.out.println("Model = " + entry.getKey());
-            HashMap<String, INDArray> tt = entry.getValue();
-            for (Map.Entry<String, INDArray> e : tt.entrySet()) {
-                System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-            }
-        }
+        HttpClient client = new HttpClient();
+        String result = client.predict(feed_data, fetch, true, 0);
+        System.out.println(result);
        return true;
    }

@@ -213,8 +83,8 @@ public class PaddleServingClientExample {
        long[] position_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
        long[] input_ids = {101, 6843, 3241, 749, 8024, 7662, 2533, 1391, 2533, 2523, 7676, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
        long[] segment_ids = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-        HashMap<String, INDArray> feed_data
-            = new HashMap<String, INDArray>() {{
+        HashMap<String, Object> feed_data
+            = new HashMap<String, Object>() {{
                put("input_mask", Nd4j.createFromArray(input_mask));
                put("position_ids", Nd4j.createFromArray(position_ids));
                put("input_ids", Nd4j.createFromArray(input_ids));
@@ -222,22 +92,9 @@ public class PaddleServingClientExample {
            }};
        List<String> fetch = Arrays.asList("pooled_output");

-        Client client = new Client();
-        String target = "localhost:9393";
-        boolean succ = client.connect(target);
-        if (succ != true) {
-            System.out.println("connect failed.");
-            return false;
-        }
-        
-        Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
-        if (fetch_map == null) {
-            return false;
-        }
-
-        for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
-            System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-        }
+        HttpClient client = new HttpClient();
+        String result = client.predict(feed_data, fetch, true, 0);
+        System.out.println(result);
        return true;
    }

@@ -271,8 +128,8 @@ public class PaddleServingClientExample {
        long[] embedding_19 = {537425};
        long[] embedding_0 = {737395};

-        HashMap<String, INDArray> feed_data
-            = new HashMap<String, INDArray>() {{
+        HashMap<String, Object> feed_data
+            = new HashMap<String, Object>() {{
                put("embedding_14.tmp_0", Nd4j.createFromArray(embedding_14));
                put("embedding_2.tmp_0", Nd4j.createFromArray(embedding_2));
                put("embedding_10.tmp_0", Nd4j.createFromArray(embedding_10));
@@ -302,23 +159,9 @@ public class PaddleServingClientExample {
                put("embedding_0.tmp_0", Nd4j.createFromArray(embedding_0));
            }};
        List<String> fetch = Arrays.asList("prob");
-
-        Client client = new Client();
-        String target = "localhost:9393";
-        boolean succ = client.connect(target);
-        if (succ != true) {
-            System.out.println("connect failed.");
-            return false;
-        }
-        
-        Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
-        if (fetch_map == null) {
-            return false;
-        }
-
-        for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
-            System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-        }
+        HttpClient client = new HttpClient();
+        String result = client.predict(feed_data, fetch, true, 0);
+        System.out.println(result);
        return true;
    }

@@ -330,7 +173,7 @@ public class PaddleServingClientExample {
        
        if (args.length < 1) {
            System.out.println("Usage: java -cp <jar> PaddleServingClientExample <test-type>.");
-            System.out.println("<test-type>: fit_a_line bert model_ensemble asyn_predict batch_predict cube_local cube_quant yolov4");
+            System.out.println("<test-type>: fit_a_line bert cube_local yolov4");
            return;
        }
        String testType = args[0];
@@ -339,16 +182,8 @@ public class PaddleServingClientExample {
            succ = e.fit_a_line();
        } else if ("bert".equals(testType)) {
            succ = e.bert();
-        } else if ("model_ensemble".equals(testType)) {
-            succ = e.model_ensemble();
-        } else if ("asyn_predict".equals(testType)) {
-            succ = e.asyn_predict();
-        } else if ("batch_predict".equals(testType)) {
-            succ = e.batch_predict();
        } else if ("cube_local".equals(testType)) {
            succ = e.cube_local();
-        } else if ("cube_quant".equals(testType)) {
-            succ = e.cube_local();
        } else if ("yolov4".equals(testType)) {
            if (args.length < 2) {
                System.out.println("Usage: java -cp <jar> PaddleServingClientExample yolov4 <image-filepath>.");

--- a/java/pom.xml
+++ b/java/pom.xml
@@ -145,6 +145,11 @@
            <artifactId>json</artifactId>
            <version>20190722</version>
        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient</artifactId>
+            <version>4.5.12</version>
+        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>

--- a/java/src/main/java/io/paddle/serving/client/Client.java
+++ b/java/src/main/java/io/paddle/serving/client/Client.java
--- a/java/src/main/java/io/paddle/serving/client/HttpClient.java
+++ b/java/src/main/java/io/paddle/serving/client/HttpClient.java
+package io.paddle.serving.client;
+import java.util.*;
+import java.util.function.Function;
+import java.lang.management.ManagementFactory;
+import java.lang.management.RuntimeMXBean;
+import java.util.stream.Collectors;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.Map.Entry;
+import java.nio.file.*;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.iter.NdIndexIterator;
+import org.nd4j.linalg.factory.Nd4j;
+import java.lang.reflect.*;
+
+import org.apache.http.HttpEntity;
+import org.apache.http.NameValuePair;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.entity.UrlEncodedFormEntity;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.client.entity.GzipDecompressingEntity;
+import org.apache.http.Header;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.message.BasicNameValuePair;
+import org.apache.http.util.EntityUtils;
+
+import org.json.*;
+
+import io.paddle.serving.configure.*;
+
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+ 
+
+enum ElementType
+{
+    Int64_type, Float32_type, Int32_type, Bytes_type;
+}
+
+class Profiler {
+    int pid_;
+    String print_head_ = null;
+    List<String> time_record_ = null;
+    boolean enable_ = false;
+
+    Profiler() {
+        RuntimeMXBean runtimeMXBean = ManagementFactory.getRuntimeMXBean();
+        pid_ = Integer.valueOf(runtimeMXBean.getName().split("@")[0]).intValue();
+        print_head_ = "\nPROFILE\tpid:" + pid_ + "\t";
+        time_record_ = new ArrayList<String>();
+        time_record_.add(print_head_);
+    }
+
+    void record(String name) {
+        if (enable_) {
+            long ctime = System.currentTimeMillis() * 1000;
+            time_record_.add(name + ":" + String.valueOf(ctime) + " ");
+        }
+    }
+
+    void printProfile() {
+        if (enable_) {
+            String profile_str = String.join("", time_record_);
+            time_record_ = new ArrayList<String>();
+            time_record_.add(print_head_);
+        }
+    }
+
+    void enable(boolean flag) {
+        enable_ = flag;
+    }
+}
+public class HttpClient {
+    private int httpTimeoutS_;
+    private List<String> feedNames_;
+    private Map<String, String> feedRealNames_;
+    private Map<String, Integer> feedTypes_;
+    private Map<String, List<Integer>> feedShapes_;
+    private Map<String, Integer> feedNameToIndex_;
+    private Map<Integer, String> feedTypeToDataKey_;
+    private List<String> fetchNames_;
+    private Map<String, Integer> fetchTypes_;
+    private Set<String> lodTensorSet_;
+    private Map<String, Integer> feedTensorLen_;
+    private Profiler profiler_;
+    private String ip;
+    private String serverPort;
+    private String port;
+    private String serviceName;
+    private boolean request_compress_flag;
+    private boolean response_compress_flag;
+
+    public HttpClient() {
+        feedNames_ = null;
+        feedRealNames_ = null;
+        feedTypes_ = null;
+        feedShapes_ = null;
+        fetchNames_ = null;
+        fetchTypes_ = null;
+        lodTensorSet_ = null;
+        feedTensorLen_ = null;
+        feedNameToIndex_ = null;
+        httpTimeoutS_ = 200000;
+        ip = "127.0.0.1";
+        port = "9393";
+        serverPort = "9393";
+        serviceName = "/GeneralModelService/inference";
+        request_compress_flag = false;
+        response_compress_flag = false;
+
+        feedTypeToDataKey_ = new HashMap<Integer, String>();
+        feedTypeToDataKey_.put(0, "int64_data");
+        feedTypeToDataKey_.put(1, "float_data");
+        feedTypeToDataKey_.put(2, "int_data");
+        feedTypeToDataKey_.put(3, "data");
+
+        profiler_ = new Profiler();
+        boolean is_profile = false;
+        String FLAGS_profile_client = System.getenv("FLAGS_profile_client");
+        if (FLAGS_profile_client != null && FLAGS_profile_client.equals("1")) {
+            is_profile = true;
+        }
+        profiler_.enable(is_profile);
+    }
+
+    public void setTimeOut(int httpTimeoutS_) {
+        this.httpTimeoutS_ = httpTimeoutS_;
+    }
+
+    public void setIP(String ip) {
+        this.ip = ip;
+    }
+
+    public void setPort(String port) {
+        this.port = port;
+        this.serverPort = port;
+    }
+
+    public void setServiceName(String serviceName){
+        this.serviceName = serviceName;
+    }
+
+    public void loadClientConfig(String model_config_path) {
+        GeneralModelConfig.Builder model_conf_builder = GeneralModelConfig.newBuilder();
+        try {
+            String model_config_str = Files.readString(Paths.get(model_config_path));
+            com.google.protobuf.TextFormat.getParser().merge(model_config_str, model_conf_builder);
+        } catch (com.google.protobuf.TextFormat.ParseException e) {
+            System.out.format("Parse client config failed: %s\n", e.toString());
+        }
+        GeneralModelConfig model_conf = model_conf_builder.build();
+
+        feedNames_ = new ArrayList<String>();
+        feedRealNames_ = new HashMap<String, String>();
+        feedTypes_ = new HashMap<String, Integer>();
+        feedShapes_ = new HashMap<String, List<Integer>>();
+        lodTensorSet_ = new HashSet<String>();
+        feedTensorLen_ = new HashMap<String, Integer>();
+        feedNameToIndex_ = new HashMap<String, Integer>();
+
+        fetchNames_ = new ArrayList<String>();
+        fetchTypes_ = new HashMap<String, Integer>();
+
+        List<FeedVar> feed_var_list = model_conf.getFeedVarList();
+        for (int i = 0; i < feed_var_list.size(); ++i) {
+            FeedVar feed_var = feed_var_list.get(i);
+            String var_name = feed_var.getAliasName();
+            feedNames_.add(var_name);
+            feedRealNames_.put(var_name, feed_var.getName());
+            feedTypes_.put(var_name, feed_var.getFeedType());
+            feedShapes_.put(var_name, feed_var.getShapeList());
+            feedNameToIndex_.put(var_name, i);
+            if (feed_var.getIsLodTensor()) {
+                lodTensorSet_.add(var_name);
+            } else {
+                int counter = 1;
+                for (int dim : feedShapes_.get(var_name)) {
+                    counter *= dim;
+                }
+                feedTensorLen_.put(var_name, counter);
+            }
+        }
+
+        List<FetchVar> fetch_var_list = model_conf.getFetchVarList();
+        for (int i = 0; i < fetch_var_list.size(); i++) {
+            FetchVar fetch_var = fetch_var_list.get(i);
+            String var_name = fetch_var.getAliasName();
+            fetchNames_.add(var_name);
+            fetchTypes_.put(var_name, fetch_var.getFetchType());
+        }
+    }
+
+    public void use_key(String keyFilePath) {
+        String key_str = null;
+        String encrypt_url = "http://" + this.ip + ":" +this.port;
+        try {
+            key_str = Files.readString(Paths.get(keyFilePath));
+        } catch (Exception e) {
+            System.out.format("Open key file failed: %s\n", e.toString());
+        }
+        JSONObject jsonKey = new JSONObject();
+        if( key_str != null) {
+            jsonKey.put("key", key_str);
+        }else{
+            jsonKey.put("key", "");
+        }
+        String result = doPost(encrypt_url, jsonKey.toString());
+        try {
+            JSONObject jsonObject = new JSONObject(result);
+            JSONArray jsonArray = jsonObject.getJSONArray("endpoint_list");
+            this.serverPort = jsonArray.getString(0);
+            System.out.format("Real ServerPort is: %s\n", this.serverPort);
+        }catch (JSONException err) {
+            System.out.format("Parse serverPort failed: %s\n", err.toString());
+        }
+    }
+
+    public void set_request_compress(boolean request_compress_flag) {
+        // need to be done.
+        this.request_compress_flag = request_compress_flag;
+    }
+
+    public void set_response_compress(boolean response_compress_flag) {
+        // need to be done.
+        this.response_compress_flag = response_compress_flag;
+    }
+
+    public static String compress(String str,String inEncoding) throws IOException {
+        if (str == null || str.length() == 0) {
+          return str;
+        }
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
+        GZIPOutputStream gzip = new GZIPOutputStream(out);
+        gzip.write(str.getBytes(inEncoding));
+        gzip.close();
+        return out.toString("ISO-8859-1");
+        
+    }
+    
+    // 帮助用户封装Http请求的接口，用户只需要传递FeedData,Lod,Fetchlist即可。
+    // 根据Proto组装Json的过程由这个函数来完成，且接口与Python基本一致.
+    // 共提供了四组重载的接口，支持用户最少传入feedData和fetch，还可传lod和batchFlag.
+    public String predict(Map<String, Object> feedData,
+                    List<String> fetch,
+                    int log_id) {
+        
+        return predict(feedData,null,fetch,false,log_id);
+    }
+
+    public String predict(Map<String, Object> feedData,
+                    List<String> fetch,
+                    boolean batchFlag,
+                    int log_id) {
+        
+        return predict(feedData,null,fetch,batchFlag,log_id);
+    }
+
+    public String predict(Map<String, Object> feedData,
+                    Map<String, Object> feedLod,
+                    List<String> fetch,
+                    int log_id) {
+        
+        return predict(feedData,feedLod,fetch,false,log_id);
+    }
+
+    public String predict(Map<String, Object> feedData,
+                    Map<String, Object> feedLod,
+                    List<String> fetch,
+                    boolean batchFlag,
+                    int log_id) {
+                        String server_url = "http://" + this.ip + ":" + this.serverPort + this.serviceName;
+                        // 处理fetchList
+                        JSONArray jsonFetchList = new JSONArray();
+                        Iterator<String> fetchIterator = fetch.iterator();
+                        while (fetchIterator.hasNext()) {
+                            jsonFetchList.put(fetchIterator.next());
+                        }
+
+                        // 处理Tensor
+                        JSONArray jsonTensorArray = new JSONArray();
+                        try{
+                            if (null != feedData && feedData.size() > 0) {
+                                // 通过map集成entrySet方法获取entity
+                                Set<Entry<String, Object>> entrySet = feedData.entrySet();
+                                // 循环遍历，获取迭代器
+                                Iterator<Entry<String, Object>> iterator = entrySet.iterator();
+                                while (iterator.hasNext()) {
+                                    JSONObject jsonTensor = new JSONObject();
+                                    Entry<String, Object> mapEntry = iterator.next();
+                                    Object objectValue = mapEntry.getValue();
+                                    String feed_alias_name = mapEntry.getKey();
+                                    String feed_real_name = feedRealNames_.get(feed_alias_name);
+                                    List<Integer> shape = feedShapes_.get(feed_alias_name);
+                                    int element_type = feedTypes_.get(feed_alias_name);
+                                    String protoDataKey = feedTypeToDataKey_.get(element_type);
+                                    Object feedLodValue = feedLod.get(feed_alias_name);
+                                    // 如果是INDArray类型，先转为一维，再objectValue.ToString.
+                                    // 如果是String或List，则直接objectValue.ToString.
+                                    if(objectValue.getClass().equals(INDArray.class)){
+                                        long[] flattened_shape = {-1};
+                                        Class<?> classLongArray = flattened_shape.getClass();
+                                        Method methodReshape = mapEntry.getValue().getClass().getMethod("reshape", classLongArray);
+                                        Method methodShape = mapEntry.getValue().getClass().getMethod("shape");
+                                        long[] indarrayShape = (long[])methodShape.invoke(objectValue);
+                                        shape.clear();
+                                        for(long dim:indarrayShape){
+                                            shape.add((int)dim);
+                                        }
+                                        objectValue = methodReshape.invoke(objectValue,flattened_shape);
+                                    }
+                                    if(batchFlag){
+                                        // 在index=0处，加上batch=1
+                                        shape.add(0, 1);
+                                    }
+                                    jsonTensor.put("alias_name", feed_alias_name);
+                                    jsonTensor.put("name", feed_real_name);
+                                    jsonTensor.put("shape", shape);
+                                    
+                                    jsonTensor.put("elem_type", element_type);
+                                    jsonTensor.put(protoDataKey,objectValue);
+                                    if(feedLodValue != null) {
+                                        jsonTensor.put("lod", feedLodValue);
+                                    }
+                                    jsonTensorArray.put(jsonTensor);
+                                }
+                            }
+                        }catch (Exception e) {
+                            e.printStackTrace();
+                        }
+                        JSONObject jsonRequest = new JSONObject();
+                        jsonRequest.put("log_id",log_id);
+                        jsonRequest.put("fetch_var_names", jsonFetchList);
+                        jsonRequest.put("tensor",jsonTensorArray);
+                        return doPost(server_url, jsonRequest.toString());
+                    }
+
+    public String doPost(String url, String strPostData) {
+        CloseableHttpClient httpClient = null;
+        CloseableHttpResponse httpResponse = null;
+        String result = "";
+        // 创建httpClient实例
+        httpClient = HttpClients.createDefault();
+        // 创建httpPost远程连接实例
+        HttpPost httpPost = new HttpPost(url);
+        // 配置请求参数实例
+        RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(httpTimeoutS_)// 设置连接主机服务超时时间
+                .setConnectionRequestTimeout(httpTimeoutS_)// 设置连接请求超时时间
+                .setSocketTimeout(httpTimeoutS_)// 设置读取数据连接超时时间
+                .build();
+        // 为httpPost实例设置配置
+        httpPost.setConfig(requestConfig);
+        // 设置请求头
+        httpPost.addHeader("Content-Type", "application/json");
+        if(response_compress_flag){
+            httpPost.addHeader("Accept-encoding", "gzip");
+        }
+        if(request_compress_flag && strPostData.length()>512){
+            try{
+                strPostData = compress(strPostData,"UTF-8");
+                httpPost.addHeader("Content-Encoding", "gzip");
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        }
+        try {
+            httpPost.setEntity(new StringEntity(strPostData, "UTF-8"));
+            // httpClient对象执行post请求,并返回响应参数对象
+            httpResponse = httpClient.execute(httpPost);
+            // 从响应对象中获取响应内容
+            HttpEntity entity = httpResponse.getEntity();
+            Header header = entity.getContentEncoding();
+            if(header != null && header.getValue().equalsIgnoreCase("gzip")){	//判断返回内容是否为gzip压缩格式
+                GzipDecompressingEntity gzipEntity = new GzipDecompressingEntity(entity);
+                result = EntityUtils.toString(gzipEntity);
+            }else{
+                result = EntityUtils.toString(entity);
+            }
+        } catch (ClientProtocolException e) {
+            e.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } finally {
+            // 关闭资源
+            if (null != httpResponse) {
+                try {
+                    httpResponse.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+            if (null != httpClient) {
+                try {
+                    httpClient.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+        }
+        return result;
+    }
+}
+
--- a/java/src/main/java/io/paddle/serving/client/PredictFuture.java
+++ b/java/src/main/java/io/paddle/serving/client/PredictFuture.java
-package io.paddle.serving.client;
-
-import java.util.*;
-import java.util.function.Function;
-import io.grpc.StatusRuntimeException;
-import com.google.common.util.concurrent.ListenableFuture;
-import org.nd4j.linalg.api.ndarray.INDArray;
-
-import io.paddle.serving.client.Client;
-import io.paddle.serving.grpc.*;
-
-public class PredictFuture {
-    private ListenableFuture<InferenceResponse> callFuture_;
-    private Function<InferenceResponse, 
-                     Map<String, HashMap<String, INDArray>>> callBackFunc_;
-    
-    PredictFuture(ListenableFuture<InferenceResponse> call_future,
-            Function<InferenceResponse, 
-                     Map<String, HashMap<String, INDArray>>> call_back_func) {
-        callFuture_ = call_future;
-        callBackFunc_ = call_back_func;
-    }
-
-    public Map<String, INDArray> get() {
-        InferenceResponse resp = null;
-        try {
-            resp = callFuture_.get();
-        } catch (Exception e) {
-            System.out.format("predict failed: %s\n", e.toString());
-            return null;
-        }
-        Map<String, HashMap<String, INDArray>> ensemble_result
-            = callBackFunc_.apply(resp);
-        List<Map.Entry<String, HashMap<String, INDArray>>> list
-            = new ArrayList<Map.Entry<String, HashMap<String, INDArray>>>(
-                    ensemble_result.entrySet());
-        if (list.size() != 1) {
-            System.out.format("predict failed: please use get_ensemble impl.\n");
-            return null;
-        }
-        return list.get(0).getValue();
-    }
-
-    public Map<String, HashMap<String, INDArray>> ensemble_get() {
-        InferenceResponse resp = null;
-        try {
-            resp = callFuture_.get();
-        } catch (Exception e) {
-            System.out.format("predict failed: %s\n", e.toString());
-            return null;
-        }
-        return callBackFunc_.apply(resp);
-    }
-}
--- a/java/src/main/proto/multi_lang_general_model_service.proto
+++ b/java/src/main/proto/multi_lang_general_model_service.proto
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-syntax = "proto2";
-
-package baidu.paddle_serving.multi_lang;
-
-option java_multiple_files = true;
-option java_package = "io.paddle.serving.grpc";
-option java_outer_classname = "ServingProto";
-
-message Tensor {
-  optional bytes data = 1;
-  repeated int32 int_data = 2;
-  repeated int64 int64_data = 3;
-  repeated float float_data = 4;
-  optional int32 elem_type = 5;
-  repeated int32 shape = 6;
-  repeated int32 lod = 7; // only for fetch tensor currently
-};
-
-message FeedInst { repeated Tensor tensor_array = 1; };
-
-message FetchInst { repeated Tensor tensor_array = 1; };
-
-message InferenceRequest {
-  repeated FeedInst insts = 1;
-  repeated string feed_var_names = 2;
-  repeated string fetch_var_names = 3;
-  required bool is_python = 4 [ default = false ];
-  required uint64 log_id = 5 [ default = 0 ];
-};
-
-message InferenceResponse {
-  repeated ModelOutput outputs = 1;
-  optional string tag = 2;
-  required int32 err_code = 3;
-};
-
-message ModelOutput {
-  repeated FetchInst insts = 1;
-  optional string engine_name = 2;
-}
-
-message SetTimeoutRequest { required int32 timeout_ms = 1; }
-
-message SimpleResponse { required int32 err_code = 1; }
-
-message GetClientConfigRequest {}
-
-message GetClientConfigResponse { required string client_config_str = 1; }
-
-service MultiLangGeneralModelService {
-  rpc Inference(InferenceRequest) returns (InferenceResponse) {}
-  rpc SetTimeout(SetTimeoutRequest) returns (SimpleResponse) {}
-  rpc GetClientConfig(GetClientConfigRequest)
-      returns (GetClientConfigResponse) {}
-};
--- a/python/examples/encryption/README.md
+++ b/python/examples/encryption/README.md
@@ -35,11 +35,11 @@ client-side configuration file are stored in the `encrypt_client` directory.
 ## Start Encryption Service
 CPU Service
 ```
-python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model
+python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
 ```
 GPU Service
 ```
-python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
+python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
 ```

 ## Prediction

--- a/python/examples/encryption/README_CN.md
+++ b/python/examples/encryption/README_CN.md
@@ -36,14 +36,14 @@ def serving_encryption():
 ## 启动加密预测服务
 CPU预测服务
 ```
-python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model
+python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
 ```
 GPU预测服务
 ```
-python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
+python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
 ```

 ## 预测
 ```
-python test_client.py encrypt_client/
+python test_client.py encrypt_client/serving_client_conf.prototxt
 ```
--- a/python/examples/encryption/test_client.py
+++ b/python/examples/encryption/test_client.py
@@ -19,7 +19,8 @@ import sys
 client = Client()
 client.load_client_config(sys.argv[1])
 client.use_key("./key")
-client.connect(["127.0.0.1:9300"], encryption=True)
+client.connect(["0.0.0.0:9393"], encryption=True)
+fetch_list = client.get_fetch_names()

 import paddle
 test_reader = paddle.batch(
@@ -28,5 +29,5 @@ test_reader = paddle.batch(
    batch_size=1)

 for data in test_reader():
-    fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
-    print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
+    fetch_map = client.predict(feed={"x": data[0][0]}, fetch=fetch_list)
+    print(fetch_map)
--- a/python/examples/fit_a_line/test_client.py
+++ b/python/examples/fit_a_line/test_client.py
@@ -20,7 +20,7 @@ import numpy as np
 client = Client()
 client.load_client_config(sys.argv[1])
 client.connect(["127.0.0.1:9393"])
-
+fetch_list = client.get_fetch_names()
 import paddle
 test_reader = paddle.batch(
    paddle.reader.shuffle(
@@ -31,6 +31,5 @@ for data in test_reader():
    new_data = np.zeros((1, 13)).astype("float32")
    new_data[0] = data[0][0]
    fetch_map = client.predict(
-        feed={"x": new_data}, fetch=["price"], batch=True)
-    print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
+        feed={"x": new_data}, fetch=fetch_list, batch=True)
    print(fetch_map)
--- a/python/examples/grpc_impl_example/fit_a_line/test_sync_client.py
+++ b/python/examples/grpc_impl_example/fit_a_line/test_sync_client.py
@@ -13,29 +13,30 @@
 # limitations under the License.
 # pylint: disable=doc-string-missing

-from paddle_serving_client import MultiLangClient as Client
+from paddle_serving_client.httpclient import HttpClient
+import sys
 import numpy as np
-client = Client()
-client.connect(["127.0.0.1:9393"])
-"""
+import time
+
+client = HttpClient()
+client.load_client_config(sys.argv[1])
+client.use_key("./key")
+client.set_response_compress(True)
+client.set_request_compress(True)
+fetch_list = client.get_fetch_names()
+import paddle
+test_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.uci_housing.test(), buf_size=500),
+    batch_size=1)
+
 for data in test_reader():
-    new_data = np.zeros((1, 1, 13)).astype("float32")
+    new_data = np.zeros((1, 13)).astype("float32")
    new_data[0] = data[0][0]
+    lst_data = []
+    for i in range(200):
+        lst_data.append(data[0][0])
    fetch_map = client.predict(
-        feed={"x": new_data}, fetch=["price"], batch=True)
-    print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
+        feed={"x": lst_data}, fetch=fetch_list, batch=True)
    print(fetch_map)
-"""
-
-x = [
-    0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
-    0.4919, 0.1856, 0.0795, -0.0332
-]
-for i in range(3):
-    new_data = np.array(x).astype("float32").reshape((1, 13))
-    fetch_map = client.predict(
-        feed={"x": new_data}, fetch=["price"], batch=False)
-    if fetch_map["serving_status_code"] == 0:
-        print(fetch_map)
-    else:
-        print(fetch_map["serving_status_code"])
+    break
--- a/python/examples/grpc_impl_example/fit_a_line/README_CN.md
+++ b/python/examples/grpc_impl_example/fit_a_line/README_CN.md
-# 线性回归预测服务示例
-
-## 获取数据
-
-```shell
-sh get_data.sh
-```
-
-## 开启 gRPC 服务端
-
-``` shell
-python test_server.py uci_housing_model/
-```
-
-也可以通过下面的一行代码开启默认 gRPC 服务：
-
-```shell
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang
-```
-
-## 客户端预测
-
-### 同步预测
-
-``` shell
-python test_sync_client.py
-```
-
-### 异步预测
-
-``` shell
-python test_asyn_client.py
-```
-
-### Batch 预测
-
-``` shell
-python test_batch_client.py
-```
-
-### 预测超时
-
-``` shell
-python test_timeout_client.py
-```
--- a/python/examples/grpc_impl_example/fit_a_line/get_data.sh
+++ b/python/examples/grpc_impl_example/fit_a_line/get_data.sh
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
-tar -xzf uci_housing.tar.gz
--- a/python/examples/grpc_impl_example/fit_a_line/test_asyn_client.py
+++ b/python/examples/grpc_impl_example/fit_a_line/test_asyn_client.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-from paddle_serving_client import MultiLangClient as Client
-import functools
-import time
-import threading
-import grpc
-import numpy as np
-client = Client()
-client.connect(["127.0.0.1:9393"])
-
-complete_task_count = [0]
-lock = threading.Lock()
-
-
-def call_back(call_future):
-    try:
-        fetch_map = call_future.result()
-        print(fetch_map)
-    except grpc.RpcError as e:
-        print(e.code())
-    finally:
-        with lock:
-            complete_task_count[0] += 1
-
-
-x = [
-    0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
-    0.4919, 0.1856, 0.0795, -0.0332
-]
-task_count = 0
-for i in range(3):
-    new_data = np.array(x).astype("float32").reshape((1, 13))
-    future = client.predict(
-        feed={"x": new_data}, fetch=["price"], batch=False, asyn=True)
-    task_count += 1
-    future.add_done_callback(functools.partial(call_back))
-
-while complete_task_count[0] != task_count:
-    time.sleep(0.1)
--- a/python/examples/grpc_impl_example/fit_a_line/test_batch_client.py
+++ b/python/examples/grpc_impl_example/fit_a_line/test_batch_client.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-from paddle_serving_client import MultiLangClient as Client
-import numpy as np
-client = Client()
-client.connect(["127.0.0.1:9393"])
-
-batch_size = 2
-x = [
-    0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
-    0.4919, 0.1856, 0.0795, -0.0332
-]
-
-for i in range(3):
-    new_data = np.array(x).astype("float32").reshape((1, 1, 13))
-    batch_data = np.concatenate([new_data, new_data, new_data], axis=0)
-    print(batch_data.shape)
-    fetch_map = client.predict(
-        feed={"x": batch_data}, fetch=["price"], batch=True)
-
-    if fetch_map["serving_status_code"] == 0:
-        print(fetch_map)
-    else:
-        print(fetch_map["serving_status_code"])
--- a/python/examples/grpc_impl_example/fit_a_line/test_server.py
+++ b/python/examples/grpc_impl_example/fit_a_line/test_server.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-import os
-import sys
-from paddle_serving_server import OpMaker
-from paddle_serving_server import OpSeqMaker
-from paddle_serving_server import MultiLangServer as Server
-
-op_maker = OpMaker()
-read_op = op_maker.create('general_reader')
-general_infer_op = op_maker.create('general_infer')
-response_op = op_maker.create('general_response')
-
-op_seq_maker = OpSeqMaker()
-op_seq_maker.add_op(read_op)
-op_seq_maker.add_op(general_infer_op)
-op_seq_maker.add_op(response_op)
-
-server = Server()
-server.set_op_sequence(op_seq_maker.get_op_sequence())
-server.load_model_config(sys.argv[1])
-server.prepare_server(workdir="work_dir1", port=9393, device="cpu")
-server.run_server()
--- a/python/examples/grpc_impl_example/fit_a_line/test_server_gpu.py
+++ b/python/examples/grpc_impl_example/fit_a_line/test_server_gpu.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-import os
-import sys
-from paddle_serving_server import OpMaker
-from paddle_serving_server import OpSeqMaker
-from paddle_serving_server import MultiLangServer as Server
-
-op_maker = OpMaker()
-read_op = op_maker.create('general_reader')
-general_infer_op = op_maker.create('general_infer')
-response_op = op_maker.create('general_response')
-
-op_seq_maker = OpSeqMaker()
-op_seq_maker.add_op(read_op)
-op_seq_maker.add_op(general_infer_op)
-op_seq_maker.add_op(response_op)
-
-server = Server()
-server.set_op_sequence(op_seq_maker.get_op_sequence())
-server.load_model_config(sys.argv[1])
-server.set_gpuid("0")
-server.prepare_server(workdir="work_dir1", port=9393, device="gpu")
-server.run_server()
--- a/python/examples/grpc_impl_example/fit_a_line/test_timeout_client.py
+++ b/python/examples/grpc_impl_example/fit_a_line/test_timeout_client.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-from paddle_serving_client import MultiLangClient as Client
-import grpc
-import numpy as np
-client = Client()
-client.connect(["127.0.0.1:9393"])
-client.set_rpc_timeout_ms(40)
-
-x = [
-    0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
-    0.4919, 0.1856, 0.0795, -0.0332
-]
-for i in range(3):
-    new_data = np.array(x).astype("float32").reshape((1, 13))
-    fetch_map = client.predict(
-        feed={"x": new_data}, fetch=["price"], batch=False)
-    if fetch_map["serving_status_code"] == 0:
-        print(fetch_map)
-    elif fetch_map["serving_status_code"] == grpc.StatusCode.DEADLINE_EXCEEDED:
-        print('timeout')
-    else:
-        print(fetch_map["serving_status_code"])
--- a/python/examples/grpc_impl_example/imdb/README.md
+++ b/python/examples/grpc_impl_example/imdb/README.md
-## IMDB comment sentiment inference service
-
-([简体中文](./README_CN.md)|English)
-
-### Get model files and sample data
-
-```
-sh get_data.sh
-```
-the package downloaded contains cnn, lstm and bow model config along with their test_data and train_data.
-
-### Start RPC inference service
-
-```
-python -m paddle_serving_server.serve --model imdb_cnn_model/ --thread 10 --port 9393 --use_multilang
-```
-### RPC Infer
-
-The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`).
-
-```
-head test_data/part-0 | python test_client.py
-```
-
-it will get predict results of the first 10 test cases.
--- a/python/examples/grpc_impl_example/imdb/README_CN.md
+++ b/python/examples/grpc_impl_example/imdb/README_CN.md
-## IMDB评论情绪预测服务
-
-(简体中文|[English](./README.md))
-
-### 获取模型文件和样例数据
-
-```
-sh get_data.sh
-```
-脚本会下载和解压出cnn、lstm和bow三种模型的配置文文件以及test_data和train_data。
-
-### 启动RPC预测服务
-
-```
-python -m paddle_serving_server.serve --model imdb_cnn_model/ --thread 10 --port 9393 --use_multilang
-```
-### 执行预测
-
-`test_client.py`中使用了`paddlepaddle`包，需要进行下载（`pip install paddlepaddle`）。
-
-```
-head test_data/part-0 | python test_client.py
-```
-预测test_data/part-0的前十个样例。
--- a/python/examples/grpc_impl_example/imdb/get_data.sh
+++ b/python/examples/grpc_impl_example/imdb/get_data.sh
-wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz
-tar -zxvf text_classification_data.tar.gz
-tar -zxvf imdb_model.tar.gz
--- a/python/examples/grpc_impl_example/imdb/imdb_reader.py
+++ b/python/examples/grpc_impl_example/imdb/imdb_reader.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-import sys
-import os
-import paddle
-import re
-import paddle.fluid.incubate.data_generator as dg
-
-py_version = sys.version_info[0]
-
-
-class IMDBDataset(dg.MultiSlotDataGenerator):
-    def load_resource(self, dictfile):
-        self._vocab = {}
-        wid = 0
-        if py_version == 2:
-            with open(dictfile) as f:
-                for line in f:
-                    self._vocab[line.strip()] = wid
-                    wid += 1
-        else:
-            with open(dictfile, encoding="utf-8") as f:
-                for line in f:
-                    self._vocab[line.strip()] = wid
-                    wid += 1
-        self._unk_id = len(self._vocab)
-        self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
-        self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
-
-    def get_words_only(self, line):
-        sent = line.lower().replace("<br />", " ").strip()
-        words = [x for x in self._pattern.split(sent) if x and x != " "]
-        feas = [
-            self._vocab[x] if x in self._vocab else self._unk_id for x in words
-        ]
-        return feas
-
-    def get_words_and_label(self, line):
-        send = '|'.join(line.split('|')[:-1]).lower().replace("<br />",
-                                                              " ").strip()
-        label = [int(line.split('|')[-1])]
-
-        words = [x for x in self._pattern.split(send) if x and x != " "]
-        feas = [
-            self._vocab[x] if x in self._vocab else self._unk_id for x in words
-        ]
-        return feas, label
-
-    def infer_reader(self, infer_filelist, batch, buf_size):
-        def local_iter():
-            for fname in infer_filelist:
-                with open(fname, "r") as fin:
-                    for line in fin:
-                        feas, label = self.get_words_and_label(line)
-                        yield feas, label
-
-        import paddle
-        batch_iter = paddle.batch(
-            paddle.reader.shuffle(
-                local_iter, buf_size=buf_size),
-            batch_size=batch)
-        return batch_iter
-
-    def generate_sample(self, line):
-        def memory_iter():
-            for i in range(1000):
-                yield self.return_value
-
-        def data_iter():
-            feas, label = self.get_words_and_label(line)
-            yield ("words", feas), ("label", label)
-
-        return data_iter
-
-
-if __name__ == "__main__":
-    imdb = IMDBDataset()
-    imdb.load_resource("imdb.vocab")
-    imdb.run_from_stdin()
--- a/python/examples/grpc_impl_example/imdb/test_client.py
+++ b/python/examples/grpc_impl_example/imdb/test_client.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-from paddle_serving_client import MultiLangClient as Client
-from paddle_serving_app.reader.imdb_reader import IMDBDataset
-import sys
-import numpy as np
-
-client = Client()
-client.connect(["127.0.0.1:9393"])
-
-# you can define any english sentence or dataset here
-# This example reuses imdb reader in training, you
-# can define your own data preprocessing easily.
-imdb_dataset = IMDBDataset()
-imdb_dataset.load_resource('imdb.vocab')
-
-for line in sys.stdin:
-    word_ids, label = imdb_dataset.get_words_and_label(line)
-    word_len = len(word_ids)
-    feed = {
-        "words": np.array(word_ids).reshape(word_len, 1),
-        "words.lod": [0, word_len]
-    }
-    fetch = ["prediction"]
-    fetch_map = client.predict(feed=feed, fetch=fetch, batch=True)
-    if fetch_map["serving_status_code"] == 0:
-        print(fetch_map)
-    else:
-        print(fetch_map["serving_status_code"])
-    #print("{} {}".format(fetch_map["prediction"][0], label[0]))
--- a/python/examples/grpc_impl_example/yolov4/000000570688.jpg
+++ b/python/examples/grpc_impl_example/yolov4/000000570688.jpg
--- a/python/examples/grpc_impl_example/yolov4/README.md
+++ b/python/examples/grpc_impl_example/yolov4/README.md
-# Yolov4 Detection Service
-
-([简体中文](README_CN.md)|English)
-
-## Get Model
-
-```
-python -m paddle_serving_app.package --get_model yolov4
-tar -xzvf yolov4.tar.gz
-```
-
-## Start RPC Service
-
-```
-python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
-```
-
-## Prediction
-
-```
-python test_client.py 000000570688.jpg
-```
-After the prediction is completed, a json file to save the prediction result and a picture with the detection result box will be generated in the `./outpu folder.
--- a/python/examples/grpc_impl_example/yolov4/README_CN.md
+++ b/python/examples/grpc_impl_example/yolov4/README_CN.md
-# Yolov4 检测服务
-
-(简体中文|[English](README.md))
-
-## 获取模型
-
-```
-python -m paddle_serving_app.package --get_model yolov4
-tar -xzvf yolov4.tar.gz
-```
-
-## 启动RPC服务
-
-```
-python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
-```
-
-## 预测
-
-```
-python test_client.py 000000570688.jpg
-```
-
-预测完成会在`./output`文件夹下生成保存预测结果的json文件以及标出检测结果框的图片。
--- a/python/examples/grpc_impl_example/yolov4/label_list.txt
+++ b/python/examples/grpc_impl_example/yolov4/label_list.txt
-person
-bicycle
-car
-motorcycle
-airplane
-bus
-train
-truck
-boat
-traffic light
-fire hydrant
-stop sign
-parking meter
-bench
-bird
-cat
-dog
-horse
-sheep
-cow
-elephant
-bear
-zebra
-giraffe
-backpack
-umbrella
-handbag
-tie
-suitcase
-frisbee
-skis
-snowboard
-sports ball
-kite
-baseball bat
-baseball glove
-skateboard
-surfboard
-tennis racket
-bottle
-wine glass
-cup
-fork
-knife
-spoon
-bowl
-banana
-apple
-sandwich
-orange
-broccoli
-carrot
-hot dog
-pizza
-donut
-cake
-chair
-couch
-potted plant
-bed
-dining table
-toilet
-tv
-laptop
-mouse
-remote
-keyboard
-cell phone
-microwave
-oven
-toaster
-sink
-refrigerator
-book
-clock
-vase
-scissors
-teddy bear
-hair drier
-toothbrush
--- a/python/examples/grpc_impl_example/yolov4/test_client.py
+++ b/python/examples/grpc_impl_example/yolov4/test_client.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import numpy as np
-from paddle_serving_client import MultiLangClient as Client
-from paddle_serving_app.reader import *
-import cv2
-
-preprocess = Sequential([
-    File2Image(), BGR2RGB(), Resize(
-        (608, 608), interpolation=cv2.INTER_LINEAR), Div(255.0), Transpose(
-            (2, 0, 1))
-])
-
-postprocess = RCNNPostprocess("label_list.txt", "output", [608, 608])
-client = Client()
-client.connect(['127.0.0.1:9393'])
-client.set_rpc_timeout_ms(100000)
-
-im = preprocess(sys.argv[1])
-fetch_map = client.predict(
-    feed={
-        "image": im,
-        "im_size": np.array(list(im.shape[1:])),
-    },
-    fetch=["save_infer_model/scale_0.tmp_0"],
-    batch=False)
-print(fetch_map)
-fetch_map.pop("serving_status_code")
-fetch_map["image"] = sys.argv[1]
-postprocess(fetch_map)
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -16,5 +16,6 @@
 from . import version
 from . import client
 from .client import *
+from .httpclient import *

 __version__ = version.version_tag
--- a/python/paddle_serving_client/client.py
+++ b/python/paddle_serving_client/client.py
--- a/python/paddle_serving_client/httpclient.py
+++ b/python/paddle_serving_client/httpclient.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import requests
+import json
+import numpy as np
+import os
+from .proto import general_model_config_pb2 as m_config
+import google.protobuf.text_format
+import gzip
+from collections import Iterable
+import base64
+
+#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
+#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
+#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
+#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto)
+int64_type = 0
+float32_type = 1
+int32_type = 2
+bytes_type = 3
+# this is corresponding to the proto
+proto_data_key_list = ["int64_data", "float_data", "int_data", "data"]
+
+
+def list_flatten(items, ignore_types=(str, bytes)):
+    for x in items:
+        if isinstance(x, Iterable) and not isinstance(x, ignore_types):
+            yield from list_flatten(x)
+        else:
+            yield x
+
+
+class HttpClient(object):
+    def __init__(self,
+                 ip="0.0.0.0",
+                 port="9393",
+                 service_name="/GeneralModelService/inference"):
+        self.feed_names_ = []
+        self.feed_real_names = []
+        self.fetch_names_ = []
+        self.feed_shapes_ = {}
+        self.feed_types_ = {}
+        self.feed_names_to_idx_ = {}
+        self.http_timeout_ms = 200000
+        self.ip = ip
+        self.port = port
+        self.server_port = port
+        self.service_name = service_name
+        self.key = None
+        self.try_request_gzip = False
+        self.try_response_gzip = False
+
+    def load_client_config(self, model_config_path_list):
+        if isinstance(model_config_path_list, str):
+            model_config_path_list = [model_config_path_list]
+        elif isinstance(model_config_path_list, list):
+            pass
+
+        file_path_list = []
+        for single_model_config in model_config_path_list:
+            if os.path.isdir(single_model_config):
+                file_path_list.append("{}/serving_client_conf.prototxt".format(
+                    single_model_config))
+            elif os.path.isfile(single_model_config):
+                file_path_list.append(single_model_config)
+        model_conf = m_config.GeneralModelConfig()
+        f = open(file_path_list[0], 'r')
+        model_conf = google.protobuf.text_format.Merge(
+            str(f.read()), model_conf)
+
+        # load configuraion here
+        # get feed vars, fetch vars
+        # get feed shapes, feed types
+        # map feed names to index
+        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
+        self.feed_real_names = [var.name for var in model_conf.feed_var]
+        self.feed_names_to_idx_ = {}  #this is useful
+        self.lod_tensor_set = set()
+        self.feed_tensor_len = {}  #this is only used for shape check
+        self.key = None
+
+        for i, var in enumerate(model_conf.feed_var):
+            self.feed_names_to_idx_[var.alias_name] = i
+            self.feed_types_[var.alias_name] = var.feed_type
+            self.feed_shapes_[var.alias_name] = [dim for dim in var.shape]
+
+            if var.is_lod_tensor:
+                self.lod_tensor_set.add(var.alias_name)
+            else:
+                counter = 1
+                for dim in self.feed_shapes_[var.alias_name]:
+                    counter *= dim
+                self.feed_tensor_len[var.alias_name] = counter
+        if len(file_path_list) > 1:
+            model_conf = m_config.GeneralModelConfig()
+            f = open(file_path_list[-1], 'r')
+            model_conf = google.protobuf.text_format.Merge(
+                str(f.read()), model_conf)
+        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
+        self.fetch_names_to_type_ = {}
+        self.fetch_names_to_idx_ = {}
+        for i, var in enumerate(model_conf.fetch_var):
+            self.fetch_names_to_idx_[var.alias_name] = i
+            self.fetch_names_to_type_[var.alias_name] = var.fetch_type
+            if var.is_lod_tensor:
+                self.lod_tensor_set.add(var.alias_name)
+        return
+
+    def set_http_timeout_ms(self, http_timeout_ms):
+        if not isinstance(http_timeout_ms, int):
+            raise ValueError("http_timeout_ms must be int type.")
+        else:
+            self.http_timeout_ms = http_timeout_ms
+
+    def set_request_compress(self, try_request_gzip):
+        self.try_request_gzip = try_request_gzip
+
+    def set_response_compress(self, try_response_gzip):
+        self.try_response_gzip = try_response_gzip
+
+    # use_key is the function of encryption.
+    def use_key(self, key_filename):
+        with open(key_filename, "rb") as f:
+            self.key = f.read()
+            self.get_serving_port()
+
+    def get_serving_port(self):
+        encrypt_url = "http://" + str(self.ip) + ":" + str(self.port)
+        if self.key is not None:
+            req = json.dumps({"key": base64.b64encode(self.key).decode()})
+        else:
+            req = json.dumps({})
+        r = requests.post(encrypt_url, req)
+        result = r.json()
+        print(result)
+        if "endpoint_list" not in result:
+            raise ValueError("server not ready")
+        else:
+            self.server_port = str(result["endpoint_list"][0])
+            print("rpc port is ", self.server_port)
+
+    def get_feed_names(self):
+        return self.feed_names_
+
+    def get_fetch_names(self):
+        return self.fetch_names_
+
+    # feed 支持Numpy类型，Json-String，以及直接List、tuple
+    def predict(self,
+                feed=None,
+                fetch=None,
+                batch=False,
+                need_variant_tag=False,
+                log_id=0):
+        if feed is None or fetch is None:
+            raise ValueError("You should specify feed and fetch for prediction")
+
+        fetch_list = []
+        if isinstance(fetch, str):
+            fetch_list = [fetch]
+        elif isinstance(fetch, (list, tuple)):
+            fetch_list = fetch
+        else:
+            raise ValueError("Fetch only accepts string and list of string")
+
+        feed_batch = []
+        if isinstance(feed, dict):
+            feed_batch.append(feed)
+        elif isinstance(feed, (list, str, tuple)):
+            # if input is a list or str, and the number of feed_var is 1.
+            # create a temp_dict { key = feed_var_name, value = list}
+            # put the temp_dict into the feed_batch.
+            if len(self.feed_names_) != 1:
+                raise ValueError(
+                    "input is a list, but we got 0 or 2+ feed_var, don`t know how to divide the feed list"
+                )
+            temp_dict = {}
+            temp_dict[self.feed_names_[0]] = feed
+            feed_batch.append(temp_dict)
+        else:
+            raise ValueError("Feed only accepts dict and list of dict")
+
+        # batch_size must be 1, cause batch is already in Tensor.
+        if len(feed_batch) != 1:
+            raise ValueError("len of feed_batch can only be 1.")
+
+        fetch_names = []
+        for key in fetch_list:
+            if key in self.fetch_names_:
+                fetch_names.append(key)
+
+        if len(fetch_names) == 0:
+            raise ValueError(
+                "Fetch names should not be empty or out of saved fetch list.")
+            return {}
+
+        feed_i = feed_batch[0]
+
+        Request = {}
+        Request["fetch_var_names"] = fetch_list
+        Request["log_id"] = int(log_id)
+        Request["tensor"] = []
+        index = 0
+        total_data_number = 0
+        for key in feed_i:
+            if ".lod" not in key and key not in self.feed_names_:
+                raise ValueError("Wrong feed name: {}.".format(key))
+            if ".lod" in key:
+                continue
+
+            Request["tensor"].append('')
+            Request["tensor"][index] = {}
+            lod = []
+            if "{}.lod".format(key) in feed_i:
+                lod = feed_i["{}.lod".format(key)]
+            shape = self.feed_shapes_[key].copy()
+            elem_type = self.feed_types_[key]
+            data_value = feed_i[key]
+            data_key = proto_data_key_list[elem_type]
+
+            # 输入不是string类型
+            if self.feed_types_[key] != bytes_type:
+                # feed_i[key] 可以是np.ndarray
+                # 也可以是string或list或tuple
+                # 当np.ndarray需要处理为list
+                if isinstance(feed_i[key], np.ndarray):
+                    shape_lst = []
+                    # 0维numpy 需要在外层再加一个[]
+                    if feed_i[key].ndim == 0:
+                        data_value = [feed_i[key].tolist()]
+                        shape_lst.append(1)
+                    else:
+                        shape_lst.extend(list(feed_i[key].shape))
+                        shape = shape_lst
+                        data_value = feed_i[key].flatten().tolist()
+                    # 当Batch为False，shape字段前插一个1,表示batch维
+                    # 当Batch为True,则直接使用numpy.shape作为batch维度
+                    if batch == False:
+                        shape.insert(0, 1)
+
+                    # 当是list或tuple时，需要把多层嵌套展开
+                if isinstance(feed_i[key], (list, tuple)):
+                    # 当Batch为False，shape字段前插一个1,表示batch维
+                    # 当Batch为True, 由于list并不像numpy那样规整，所以
+                    # 无法获取shape，此时取第一维度作为Batch维度.
+                    # 插入到feedVar.shape前面.
+                    if batch == False:
+                        shape.insert(0, 1)
+                    else:
+                        shape.insert(0, len(feed_i[key]))
+                    feed_i[key] = [x for x in list_flatten(feed_i[key])]
+                    data_value = feed_i[key]
+            '''
+            this is comment, for coder to understand.
+            #if input is string, feed is not numpy.
+            else:
+                shape = self.feed_shapes_[key]
+                data_value = feed_i[key]
+            '''
+            total_data_number = total_data_number + len(data_value)
+            Request["tensor"][index]["elem_type"] = elem_type
+            Request["tensor"][index]["shape"] = shape
+            Request["tensor"][index][data_key] = data_value
+            proto_index = self.feed_names_to_idx_[key]
+            Request["tensor"][index]["name"] = self.feed_real_names[proto_index]
+            Request["tensor"][index]["alias_name"] = key
+            if len(lod) > 0:
+                Request["tensor"][index]["lod"] = lod
+            index = index + 1
+
+        result = None
+        # request
+        web_url = "http://" + self.ip + ":" + self.server_port + self.service_name
+        postData = json.dumps(Request)
+        headers = {}
+        if self.try_request_gzip and total_data_number > 512:
+            postData = gzip.compress(bytes(postData, 'utf-8'))
+            headers["Content-Encoding"] = "gzip"
+        if self.try_response_gzip:
+            headers["Accept-encoding"] = "gzip"
+        # requests支持自动识别解压
+        result = requests.post(url=web_url, headers=headers, data=postData)
+
+        if result == None:
+            return None
+        if result.status_code == 200:
+            return result.json()
+        return result
--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -14,18 +14,16 @@
 # pylint: disable=doc-string-missing

 from . import monitor
-from . import rpc_service
 from . import serve
 from . import version

-__all__ = ["version", "server", "serve", "monitor", "rpc_service", "dag"]
+__all__ = ["version", "server", "serve", "monitor", "dag"]

 from paddle_serving_server import (
    version,
    server,
    serve,
    monitor,
-    rpc_service,
    dag, )

 from .dag import *

--- a/python/paddle_serving_server/rpc_service.py
+++ b/python/paddle_serving_server/rpc_service.py
--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -14,6 +14,7 @@
 #!flask/bin/python
 # pylint: disable=doc-string-missing

+# Now, this is only for Pipeline.
 from flask import Flask, request, abort
 from contextlib import closing
 from multiprocessing import Pool, Process, Queue

--- a/python/pipeline/operator.py
+++ b/python/pipeline/operator.py
@@ -16,7 +16,7 @@ from time import time as _time
 import time
 import threading
 import multiprocessing
-from paddle_serving_client import MultiLangClient, Client
+from paddle_serving_client import Client
 from concurrent import futures
 import logging
 import func_timeout
@@ -330,8 +330,9 @@ class Op(object):
        if self.client_type == 'brpc':
            client = Client()
            client.load_client_config(client_config)
-        elif self.client_type == 'grpc':
-            client = MultiLangClient()
+        # 待测试完成后，使用brpc-http替代。
+        # elif self.client_type == 'grpc':
+        #   client = MultiLangClient()
        elif self.client_type == 'local_predictor':
            if self.local_predictor is None:
                raise ValueError("local predictor not yet created")
@@ -474,10 +475,13 @@ class Op(object):
                fetch=self._fetch_names,
                batch=True,
                log_id=typical_logid)
+        # 后续用HttpClient替代
+        '''
        if isinstance(self.client, MultiLangClient):
            if call_result is None or call_result["serving_status_code"] != 0:
                return None
            call_result.pop("serving_status_code")
+        '''
        return call_result

    def postprocess(self, input_data, fetch_data, log_id=0):

--- a/tools/cpp_examples/demo-serving/proto/general_model_service.proto
+++ b/tools/cpp_examples/demo-serving/proto/general_model_service.proto
@@ -21,17 +21,33 @@ option cc_generic_services = true;

 message Tensor {
  repeated bytes data = 1;
-  optional int32 elem_type = 2;
-  repeated int32 shape = 3;
+  repeated int32 int_data = 2;
+  repeated int64 int64_data = 3;
+  repeated float float_data = 4;
+  optional int32 elem_type =
+      5; // 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
+  repeated int32 shape = 6;       // shape should include batch
+  repeated int32 lod = 7;         // only for fetch tensor currently
+  optional string name = 8;       // get from the Model prototxt
+  optional string alias_name = 9; // get from the Model prototxt
 };

-message FeedInst { repeated Tensor tensor_array = 1; };
-
-message FetchInst { repeated Tensor tensor_array = 1; };
+message Request {
+  repeated Tensor tensor = 1;
+  repeated string fetch_var_names = 2;
+  optional bool profile_server = 3 [ default = false ];
+  required uint64 log_id = 4 [ default = 0 ];
+};

-message Request { repeated FeedInst insts = 1; };
+message Response {
+  repeated ModelOutput outputs = 1;
+  repeated int64 profile_time = 2;
+};

-message Response { repeated FetchInst insts = 1; };
+message ModelOutput {
+  repeated Tensor tensor = 1;
+  optional string engine_name = 2;
+}

 service GeneralModelService {
  rpc inference(Request) returns (Response);