Merge branch 'develop' into ci-test

a7064391 · Zhang Yulong · GitHub · 7884b20b · 1529199a · a7064391
192 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
--- a/cmake/external/zlib.cmake
+++ b/cmake/external/zlib.cmake
--- a/core/configure/CMakeLists.txt
+++ b/core/configure/CMakeLists.txt
@@ -33,9 +33,7 @@ if (WITH_PYTHON)
  add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
  add_dependencies(general_model_config_py_proto general_model_config_py_proto_init)
-  py_grpc_proto_compile(multi_lang_general_model_service_py_proto SRCS proto/multi_lang_general_model_service.proto)
-  add_custom_target(multi_lang_general_model_service_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
-  add_dependencies(multi_lang_general_model_service_py_proto multi_lang_general_model_service_py_proto_init)
  if (CLIENT)
    py_proto_compile(sdk_configure_py_proto SRCS proto/sdk_configure.proto)
@@ -53,11 +51,7 @@ if (WITH_PYTHON)
                    COMMENT "Copy generated general_model_config proto file into directory paddle_serving_client/proto."
                    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
-    add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
-                    COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
-                    COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
-                    COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_client/proto."
-                    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
  endif()
  if (APP)
@@ -84,11 +78,6 @@ if (WITH_PYTHON)
    		COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto."
    		WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
-    add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
-                    COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
-                    COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
-                    COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server/proto."
-                    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
  endif()
 endif()
--- a/core/configure/proto/multi_lang_general_model_service.proto
+++ b/core/configure/proto/multi_lang_general_model_service.proto
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-syntax = "proto2";
-package baidu.paddle_serving.multi_lang;
-option java_multiple_files = true;
-option java_package = "io.paddle.serving.grpc";
-option java_outer_classname = "ServingProto";
-message Tensor {
-  optional bytes data = 1;
-  repeated int32 int_data = 2;
-  repeated int64 int64_data = 3;
-  repeated float float_data = 4;
-  optional int32 elem_type = 5;
-  repeated int32 shape = 6;
-  repeated int32 lod = 7; // only for fetch tensor currently
-};
-message FeedInst { repeated Tensor tensor_array = 1; };
-message FetchInst { repeated Tensor tensor_array = 1; };
-message InferenceRequest {
-  repeated FeedInst insts = 1;
-  repeated string feed_var_names = 2;
-  repeated string fetch_var_names = 3;
-  required bool is_python = 4 [ default = false ];
-  required uint64 log_id = 5 [ default = 0 ];
-};
-message InferenceResponse {
-  repeated ModelOutput outputs = 1;
-  optional string tag = 2;
-  required int32 err_code = 3;
-};
-message ModelOutput {
-  repeated FetchInst insts = 1;
-  optional string engine_name = 2;
-}
-message SetTimeoutRequest { required int32 timeout_ms = 1; }
-message SimpleResponse { required int32 err_code = 1; }
-message GetClientConfigRequest {}
-message GetClientConfigResponse { required string client_config_str = 1; }
-service MultiLangGeneralModelService {
-  rpc Inference(InferenceRequest) returns (InferenceResponse) {}
-  rpc SetTimeout(SetTimeoutRequest) returns (SimpleResponse) {}
-  rpc GetClientConfig(GetClientConfigRequest)
-      returns (GetClientConfigResponse) {}
-};
--- a/core/configure/proto/server_configure.proto
+++ b/core/configure/proto/server_configure.proto
--- a/core/cube/CMakeLists.txt
+++ b/core/cube/CMakeLists.txt
@@ -11,10 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License
-#execute_process(COMMAND go env -w GO111MODULE=off)
 add_subdirectory(cube-server)
 add_subdirectory(cube-api)
 add_subdirectory(cube-builder)
-#add_subdirectory(cube-transfer)
+add_subdirectory(cube-transfer)
-#add_subdirectory(cube-agent)
+add_subdirectory(cube-agent)
--- a/core/cube/cube-agent/CMakeLists.txt
+++ b/core/cube/cube-agent/CMakeLists.txt
@@ -15,7 +15,6 @@
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 project(cube-agent Go)
 include(cmake/golang.cmake)
 ExternalGoProject_Add(agent-docopt-go github.com/docopt/docopt-go)

--- a/core/cube/cube-agent/src/agent/http.go
+++ b/core/cube/cube-agent/src/agent/http.go
--- a/core/cube/cube-agent/src/agent/http_get.go
+++ b/core/cube/cube-agent/src/agent/http_get.go
--- a/core/cube/cube-agent/src/agent/http_post.go
+++ b/core/cube/cube-agent/src/agent/http_post.go
--- a/core/cube/cube-builder/CMakeLists.txt
+++ b/core/cube/cube-builder/CMakeLists.txt
@@ -22,7 +22,7 @@ include_directories(SYSTEM ${CMAKE_CURRENT_BINARY_DIR}/../)
 add_executable(cube-builder src/main.cpp include/cube-builder/util.h src/util.cpp src/builder_job.cpp include/cube-builder/builder_job.h include/cube-builder/define.h src/seqfile_reader.cpp include/cube-builder/seqfile_reader.h include/cube-builder/raw_reader.h include/cube-builder/vtext.h src/crovl_builder_increment.cpp include/cube-builder/crovl_builder_increment.h src/curl_simple.cpp include/cube-builder/curl_simple.h)
-add_dependencies(cube-builder jsoncpp boost)
+add_dependencies(cube-builder jsoncpp boost brpc)
 set(DYNAMIC_LIB
    gflags
@@ -39,4 +39,8 @@ target_link_libraries(cube-builder ${DYNAMIC_LIB})
 # install
 install(TARGETS cube-builder RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin)
-install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/tool DESTINATION ${PADDLE_SERVING_INSTALL_DIR})
+install(FILES ${CMAKE_CURRENT_LIST_DIR}/tool/kvtool.py DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/tool)
+install(FILES ${CMAKE_CURRENT_LIST_DIR}/tool/kv_to_seqfile.py DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/tool)
+install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/tool/source DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/tool)
--- a/core/cube/cube-server/include/cube/slim_hash_map.h
+++ b/core/cube/cube-server/include/cube/slim_hash_map.h
@@ -212,7 +212,7 @@ class slim_hash_map {
  int copy_data_from(const slim_hash_map& rhs) {
    destroy();
+    LOG(INFO) << "start copy data, rhs info, mHashSize: " << rhs.m_nHashSize;
    if (rhs.m_nHashSize > 0) {
      m_hashTable = new (std::nothrow) uint32_t[rhs.m_nHashSize];
      if (!m_hashTable) {
@@ -231,7 +231,7 @@ class slim_hash_map {
                   << sizeof(hash_node_t) * BLOCK_SIZE;
        return -1;
      }
+      LOG(INFO) << "copy data, m_nBlockNum: " << m_nBlockNum << " , copy size:" << sizeof(hash_node_t) * BLOCK_SIZE;
      memcpy(m_blockAddr[m_nBlockNum],
             rhs.m_blockAddr[m_nBlockNum],
             sizeof(hash_node_t) * BLOCK_SIZE);
@@ -265,11 +265,13 @@ class slim_hash_map {
    }
    size_type index = key % m_nHashSize;
    hash_node_t* node = get_node(m_hashTable[index]);
+    int node_cnt = 0;
    while (node != NULL && node->data.first != key) {
+      LOG(INFO) << "node link get:" << node->data.first;
+      node_cnt++;
      node = get_node(node->next);
    }
+    LOG(INFO) << "key: " << key << " , found count: " << node_cnt;  
    if (node == NULL) {
      return end();
    }
@@ -390,7 +392,6 @@ class slim_hash_map {
    if (node != NULL) {
      return node->data.second;
    }
    return add_node(index, key)->data.second;
  }
  void clear() {
@@ -399,16 +400,16 @@ class slim_hash_map {
    m_nFreeEntries = 0;
    m_nSize = 0;
  }
-  bool load(const char* file) {
+  bool load(const char* file, uint32_t block_id) {
    // clear();
+    // bias = 0 means base mode, bias = K means patch mode, and base dict has size K
    int size = sizeof(key_t) + sizeof(value_t);
    FILE* fp = fopen(file, "rb");
    char* buf = reinterpret_cast<char*>(malloc(size * 100000));
+    LOG(INFO) << "current block id: " << block_id;
    if (fp == NULL || buf == NULL) {
      return false;
    }
    size_t read_count;
    bool err = false;
    key_t key;
@@ -423,6 +424,8 @@ class slim_hash_map {
      for (int i = 0; i < static_cast<int>(read_count); ++i) {
        key = *(reinterpret_cast<key_t*>(buf + i * size));
        value = *(reinterpret_cast<value_t*>(buf + i * size + sizeof(key_t)));
+        value = ((uint64_t)block_id << 32) | value;
+        LOG(INFO) << "slim map key: " << key << " , value: " << value; 
        (*this)[key] = value;
      }
    }
@@ -557,7 +560,6 @@ class slim_hash_map {
  }
  hash_node_t* add_node(uint32_t index, const key_type& key) {
    ++m_nSize;
    if (m_nFreeEntries) {
      uint32_t addr = m_nFreeEntries;
      hash_node_t* node = get_node(addr);
@@ -569,7 +571,7 @@ class slim_hash_map {
    }
    uint32_t block = ((m_nNextEntry & 0xFF800000) >> 23);
+    //LOG(INFO) << "key: " << key << " here. index: " << index << " , m_nNextEntry: "<< m_nNextEntry << " , block:" << block<< ", m_nBlockNum:" << m_nBlockNum;
    if (block >= m_nBlockNum) {
      try {
        m_blockAddr[m_nBlockNum++] = new hash_node_t[BLOCK_SIZE];
@@ -581,7 +583,6 @@ class slim_hash_map {
        return NULL;
      }
    }
    uint32_t addr = m_nNextEntry;
    ++m_nNextEntry;
    hash_node_t* node = get_node(addr);

--- a/core/cube/cube-server/src/dict.cpp
+++ b/core/cube/cube-server/src/dict.cpp
@@ -51,13 +51,12 @@ int Dict::load(const std::string& dict_path,
               bool in_mem,
               const std::string& v_path) {
  TIME_FLAG(load_start);
  int ret = load_index(dict_path, v_path);
  if (ret != E_OK) {
    LOG(WARNING) << "load index failed";
    return ret;
  }
+  LOG(INFO) << "load index in mem mode: " << in_mem ;
  if (in_mem) {
    ret = load_data(dict_path, v_path);
    if (ret != E_OK) {
@@ -81,8 +80,11 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
  std::string index_n_path(dict_path);
  index_n_path.append(v_path);
  index_n_path.append("/index.n");
+  uint32_t cur_block_id = 0;
+  if (_base_dict) cur_block_id = _base_dict->_block_set.size(); 
  LOG(INFO) << "index file path: " << index_n_path;
+  //ERR HERE
  std::unique_ptr<FILE, decltype(&fclose)> pf(fopen(index_n_path.c_str(), "rb"),
                                              &fclose);
  if (pf.get() == NULL) {
@@ -150,12 +152,16 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
        return E_DATA_ERROR;
      }
    } else {
+      if (_slim_table.copy_data_from(_base_dict->_slim_table) != 0) {
+        LOG(ERROR) << "copy data from old index failed in patch mode";
+        return E_DATA_ERROR;
+      }
      file_idx = 0;
      LOG(INFO)
-          << "index check file len failed in patch mode, set file_idx to 0";
+          << "index check fail, direct copy";
    }
  }
+  LOG(INFO) << "resize slim table, new count: " << count/2;
  _slim_table.resize(count / 2);
  char file[1024];
@@ -167,6 +173,7 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
             dict_path.c_str(),
             v_path.c_str(),
             file_idx);
+    LOG(INFO) << "load file str: " << file;
    if (stat(file, &fstat) < 0) {
      if (errno == ENOENT) {
        LOG(WARNING) << "index." << file_idx << " not exist";
@@ -181,8 +188,8 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
                 << (uint64_t)fstat.st_size;
      return E_DATA_ERROR;
    }
-    LOG(INFO) << "loading from index." << file_idx;
+    LOG(INFO) << "loading from index." << file_idx << " . table size: " << _slim_table.size();
-    if (!_slim_table.load(file) || _slim_table.size() > count) {
+    if (!_slim_table.load(file, cur_block_id)) {
      return E_DATA_ERROR;
    }
@@ -193,8 +200,15 @@ int Dict::load_index(const std::string& dict_path, const std::string& v_path) {
 }
 int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
+  std::vector<uint32_t> block_size;
+  uint64_t total_data_size = 0;
  if (_base_dict) {
    _block_set = _base_dict->_block_set;
+    LOG(INFO)<< "load data base dict block set size: " << _block_set[0].size;
+    for (size_t i = 0; i < _block_set.size(); ++i) {
+      block_size.push_back(_block_set[i].size); 
+      total_data_size += _block_set[i].size;     
+    }
  }
  std::string data_n_path(dict_path);
@@ -212,8 +226,6 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
    return E_DATA_ERROR;
  }
-  std::vector<uint32_t> block_size;
-  uint64_t total_data_size = 0;
  for (uint32_t i = 0; i < count; ++i) {
    uint32_t size = 0;
    if (fread(reinterpret_cast<void*>(&size), sizeof(uint32_t), 1, pf) != 1) {
@@ -222,6 +234,7 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
      return E_DATA_ERROR;
    }
    block_size.push_back(size);
+    LOG(INFO) << "new block size: " << size;
    total_data_size += size;
  }
  g_data_size << (total_data_size / 1024 / 1024);
@@ -229,36 +242,35 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
  pf = NULL;
  uint32_t old_size = _block_set.size();
+  LOG(INFO) << "load data old size: " << old_size;
  for (size_t i = 0; i < old_size; ++i) {
    if (_block_set[i].size != block_size[i]) {
      old_size = 0;
      break;
    }
  }
-  _block_set.resize(count);
+  LOG(INFO) << "load data block set count: " << count << " , old size: " << old_size;
+  _block_set.resize(count + old_size);
  for (size_t i = old_size; i < _block_set.size(); ++i) {
    char data_path[1024];
    LOG(INFO) << "load from data." << i;
-    snprintf(
+    //snprintf(
-        data_path, 1024, "%s%s/data.%lu", dict_path.c_str(), v_path.c_str(), i);
+      //  data_path, 1024, "%s%s/data.%lu", dict_path.c_str(), v_path.c_str(), i);
+    snprintf(data_path, 1024, "%s%s/data.%lu", dict_path.c_str(), v_path.c_str(), i - old_size);
    FILE* data_file = fopen(data_path, "rb");
    if (data_file == NULL) {
-      LOG(WARNING) << "open data file [" << data_path << " failed";
+      LOG(WARNING) << "open data file [" << data_path << " ]failed";
      _block_set[i].s_data.reset();
      _block_set[i].size = 0;
      continue;
    }
+    _block_set[i].s_data.reset(reinterpret_cast<char*>(malloc(block_size[i] * sizeof(char))));
-    _block_set[i].s_data.reset(
-        reinterpret_cast<char*>(malloc(block_size[i] * sizeof(char))));
    if (_block_set[i].s_data.get() == NULL) {
      LOG(ERROR) << "malloc data failed";
      fclose(data_file);
      return E_OOM;
    }
    _block_set[i].size = block_size[i];
    if (fread(reinterpret_cast<void*>(_block_set[i].s_data.get()),
              sizeof(char),
              _block_set[i].size,
@@ -267,7 +279,10 @@ int Dict::load_data(const std::string& dict_path, const std::string& v_path) {
      fclose(data_file);
      return E_DATA_ERROR;
    }
+    LOG(INFO) << "load new data to BlockSet succ";
+    for (size_t ii = 0; ii < 20; ++ii) {
+       LOG(INFO) << "data ptr: " << (int)(_block_set[i].s_data.get()[ii]);
+    }
    fclose(data_file);
  }
@@ -386,12 +401,11 @@ bool Dict::seek(uint64_t key, char* buff, uint64_t* buff_size) {
  uint64_t flag = it->second;
  uint32_t id = (uint32_t)(flag >> 32);
  uint64_t addr = (uint32_t)(flag);
+  LOG(INFO) << "search key: " << id << " , addr: " << addr;
  if (_block_set.size() > id) {
    uint32_t block_size = _block_set[id].size;
    char* block_data = NULL;
    block_data = _block_set[id].s_data.get();
    if (block_data && addr + sizeof(uint32_t) <= block_size) {
      uint32_t len = *(reinterpret_cast<uint32_t*>(block_data + addr));
      if (addr + len <= block_size && len >= sizeof(uint32_t)) {
@@ -405,6 +419,7 @@ bool Dict::seek(uint64_t key, char* buff, uint64_t* buff_size) {
                     << default_buffer_size;
          return false;
        }
+        LOG(INFO) << "seek key: " << key << " , addr: " << addr;
        memcpy(buff,
               (block_data + addr + sizeof(uint32_t)),
               len - sizeof(uint32_t));

--- a/core/cube/cube-transfer/CMakeLists.txt
+++ b/core/cube/cube-transfer/CMakeLists.txt
@@ -18,11 +18,9 @@ project(cube-transfer Go)
 include(cmake/golang.cmake)
-ExternalGoProject_Add(rfw github.com/mipearson/rfw)
+ExternalGoProject_Add(transfer-rfw github.com/mipearson/rfw)
-ExternalGoProject_Add(docopt-go github.com/docopt/docopt-go)  
+ExternalGoProject_Add(transfer-docopt-go github.com/docopt/docopt-go)  
-add_custom_target(logex
+ExternalGoProject_Add(transfer-logex github.com/Badangel/logex)
-                  COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get github.com/Badangel/logex
-                  DEPENDS rfw)
 add_subdirectory(src)
 install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/conf DESTINATION ${PADDLE_SERVING_INSTALL_DIR})
--- a/core/cube/cube-transfer/conf/transfer.conf
+++ b/core/cube/cube-transfer/conf/transfer.conf
--- a/core/cube/cube-transfer/src/CMakeLists.txt
+++ b/core/cube/cube-transfer/src/CMakeLists.txt
@@ -14,6 +14,6 @@
 set(SOURCE_FILE cube-transfer.go)
 add_go_executable(cube-transfer ${SOURCE_FILE})
-add_dependencies(cube-transfer docopt-go)
+add_dependencies(cube-transfer transfer-docopt-go)
-add_dependencies(cube-transfer rfw)
+add_dependencies(cube-transfer transfer-rfw)
-add_dependencies(cube-transfer logex)
+add_dependencies(cube-transfer transfer-logex)
--- a/core/cube/cube-transfer/src/cube-transfer.go
+++ b/core/cube/cube-transfer/src/cube-transfer.go
--- a/core/cube/cube-transfer/src/transfer/builder.go
+++ b/core/cube/cube-transfer/src/transfer/builder.go
--- a/core/cube/cube-transfer/src/transfer/config.go
+++ b/core/cube/cube-transfer/src/transfer/config.go
--- a/core/cube/cube-transfer/src/transfer/deployer.go
+++ b/core/cube/cube-transfer/src/transfer/deployer.go
--- a/core/cube/cube-transfer/src/transfer/dict/cube_agent_server.go
+++ b/core/cube/cube-transfer/src/transfer/dict/cube_agent_server.go
--- a/core/cube/cube-transfer/src/transfer/dict/define.go
+++ b/core/cube/cube-transfer/src/transfer/dict/define.go
--- a/core/cube/cube-transfer/src/transfer/dict/dict_info.go
+++ b/core/cube/cube-transfer/src/transfer/dict/dict_info.go
--- a/core/cube/cube-transfer/src/transfer/dict/dict_instance_status.go
+++ b/core/cube/cube-transfer/src/transfer/dict/dict_instance_status.go
--- a/core/cube/cube-transfer/src/transfer/dict/dict_shard_info.go
+++ b/core/cube/cube-transfer/src/transfer/dict/dict_shard_info.go
--- a/core/cube/cube-transfer/src/transfer/dict/dict_version_info.go
+++ b/core/cube/cube-transfer/src/transfer/dict/dict_version_info.go
--- a/core/cube/cube-transfer/src/transfer/global.go
+++ b/core/cube/cube-transfer/src/transfer/global.go
--- a/core/cube/cube-transfer/src/transfer/http.go
+++ b/core/cube/cube-transfer/src/transfer/http.go
--- a/core/cube/cube-transfer/src/transfer/http_get.go
+++ b/core/cube/cube-transfer/src/transfer/http_get.go
--- a/core/cube/cube-transfer/src/transfer/transfer.go
+++ b/core/cube/cube-transfer/src/transfer/transfer.go
@@ -17,68 +17,56 @@ package transfer
 import (
 	"fmt"
 	"github.com/Badangel/logex"
-	"os"
-    "time"
 	"transfer/dict"
 )
 func Start() {
-	go BackupTransfer()
+	BackupTransfer()
-	logex.Notice(">>> starting server...")
-	addr := ":" + Port
-	err := startHttp(addr)
-	if err != nil {
-		logex.Fatalf("start http(addr=%v) failed: %v", addr, err)
-		os.Exit(255)
-	}
-	logex.Notice(">>> start server succ")
 }
 func BackupTransfer() {
-	for {
+	//trigger
-		//trigger
+	version, err := TriggerStart(Dict.DonefileAddress)
-		version, err := TriggerStart(Dict.DonefileAddress)
+	if err != nil {
-		if err != nil {
+		logex.Fatalf("[trigger err]trigger err:%v ", err)
-			logex.Fatalf("[trigger err]trigger err:%v ", err)
+		fmt.Printf("[error]trigger err:%v \n", err)
-			fmt.Printf("[error]trigger err:%v \n", err)
+		fmt.Print("transfer over!")
-			break
+        	logex.Noticef("[transfer]status machine exit!")
-		}
+                return
-		logex.Noticef("[trigger] get version:%v \n", version)
+	}
-		if version.Id == 0 {
+	logex.Noticef("[trigger] get version:%v \n", version)
-			logex.Noticef("[sleep]no new version, sleep 5 min")
-			fmt.Printf("[sleep]no new version, wait 5 min\n")
-            time.Sleep(5 * time.Minute)
-            continue
-        }
        Dict.WaitVersionInfo = version
-		logex.Noticef("[trigger finish] WaitVersionInfo version:%v \n", Dict.WaitVersionInfo)
+	logex.Noticef("[trigger finish] WaitVersionInfo version:%v \n", Dict.WaitVersionInfo)
-		WriteWaitVersionInfoToFile()
+	WriteWaitVersionInfoToFile()
-		//builder
+	//builder
-		Dict.WaitVersionInfo.Status = dict.Dict_Status_Building
+	Dict.WaitVersionInfo.Status = dict.Dict_Status_Building
-		Dict.WaitVersionInfo.MetaInfos = make(map[int]string)
+	Dict.WaitVersionInfo.MetaInfos = make(map[int]string)
-		WriteWaitVersionInfoToFile()
+	WriteWaitVersionInfoToFile()
-		if err = BuilderStart(Dict.WaitVersionInfo); err != nil {
+	if err = BuilderStart(Dict.WaitVersionInfo); err != nil {
-			logex.Fatalf("builder err:%v \n", err)
+		logex.Fatalf("builder err:%v \n", err)
-		}
+	}
-		if Dict.WaitVersionInfo.Mode == dict.BASE {
+	if Dict.WaitVersionInfo.Mode == dict.BASE {
-			var newCurrentVersion []dict.DictVersionInfo
+		var newCurrentVersion []dict.DictVersionInfo
-			Dict.CurrentVersionInfo = newCurrentVersion
+		Dict.CurrentVersionInfo = newCurrentVersion
-			WriteCurrentVersionInfoToFile()
+		WriteCurrentVersionInfoToFile()
-		}
+	}
-		logex.Noticef("[builder finish] WaitVersionInfo version:%v \n", Dict.WaitVersionInfo)
+        if Dict.WaitVersionInfo.Mode == dict.DELTA {
+                var newCurrentVersion []dict.DictVersionInfo
+                Dict.CurrentVersionInfo = newCurrentVersion
+                WriteCurrentVersionInfoToFile()
+        }
+	logex.Noticef("[builder finish] WaitVersionInfo version:%v \n", Dict.WaitVersionInfo)
-		//deployer
+	//deployer
-		Dict.WaitVersionInfo.Status = dict.Dict_Status_Deploying
+	Dict.WaitVersionInfo.Status = dict.Dict_Status_Deploying
-		WriteWaitVersionInfoToFile()
+	WriteWaitVersionInfoToFile()
-		if err = DeployStart(Dict.WaitVersionInfo); err != nil {
+	if err = DeployStart(Dict.WaitVersionInfo); err != nil {
-			logex.Fatalf("deploy err:%v \n", err)
+		logex.Fatalf("deploy err:%v \n", err)
-		}
-        logex.Noticef("[deploy finish]current version: %v\n",Dict.CurrentVersionInfo)
 	}
+        logex.Noticef("[deploy finish]current version: %v\n",Dict.CurrentVersionInfo)
 	fmt.Print("transfer over!")
 	logex.Noticef("[transfer]status machine exit!")
 }
--- a/core/cube/cube-transfer/src/transfer/trigger.go
+++ b/core/cube/cube-transfer/src/transfer/trigger.go
@@ -38,18 +38,19 @@ func GetDoneFileInfo(addr string) (version dict.DictVersionInfo, err error) {
 		Wget(addr, donefileAddr)
 		addr = donefileAddr
 	}
-	baseDonefile := addr + "/base.txt"
-	fmt.Printf("[trigrer]donefile path:%v \n", baseDonefile)
-	logex.Noticef("[trigrer]base donefile path:%v", baseDonefile)
-	contents, err := ioutil.ReadFile(baseDonefile)
 	VersionLen := len(Dict.CurrentVersionInfo)
 	version.DictName = Dict.DictName
-	if err != nil {
+        fmt.Printf("get into mode check here\n")
-		fmt.Printf("[trigrer]read files err:%v \n", err)
+        if Dict.DictMode == dict.BASE_ONLY {
-		logex.Fatalf("[trigrer]read files err:%v ", err)
+          baseDonefile := addr + "/base.txt"
+          fmt.Printf("[trigrer]donefile path:%v \n", baseDonefile)
+          logex.Noticef("[trigrer]base donefile path:%v", baseDonefile)
+          contents, err_0 := ioutil.ReadFile(baseDonefile)
+	  if err_0 != nil {
+		fmt.Printf("[trigrer]read files err:%v \n", err_0)
+		logex.Fatalf("[trigrer]read files err:%v ", err_0)
 		return
-	} else {
+	  } else {
 		contentss := string(contents)
 		lines := strings.Split(contentss, "\n")
 		index := len(lines) - 1
@@ -80,19 +81,21 @@ func GetDoneFileInfo(addr string) (version dict.DictVersionInfo, err error) {
 			version.Mode = dict.BASE
 			return
 		}
-	}
+	  }
-	if Dict.DictMode == dict.BASR_DELTA && VersionLen > 0 {
+        }
+	if Dict.DictMode == dict.BASR_DELTA {
 		patchDonefile := addr + "/patch.txt"
 		fmt.Printf("[trigrer]patchDonefile path:%v \n", patchDonefile)
 		logex.Noticef("[trigrer]patch donefile path:%v", patchDonefile)
-		contents, err = ioutil.ReadFile(patchDonefile)
+		contents, err_0 := ioutil.ReadFile(patchDonefile)
-		if err != nil {
+		if err_0 != nil {
-			fmt.Printf("read files err:%v \n", err)
+			fmt.Printf("[trigrer]read files err:%v \n", err_0)
+                        logex.Fatalf("[trigrer]read files err:%v ", err_0)
 			return
 		} else {
 			contentss := string(contents)
 			lines := strings.Split(contentss, "\n")
+                        fmt.Printf("[trigger]get patch lines here\n")
 			for index := 0; index < len(lines)-1; index++ {
 				if len(lines[index]) < 3 {
 					logex.Noticef("[trigrer]get patch donfile info error")
@@ -106,14 +109,15 @@ func GetDoneFileInfo(addr string) (version dict.DictVersionInfo, err error) {
 				logex.Noticef("[trigrer]donfile info:%v", donefileInfo)
 				newId, _ := strconv.Atoi(donefileInfo.Id)
 				newKey, _ := strconv.Atoi(donefileInfo.Key)
-				if newId > Dict.CurrentVersionInfo[VersionLen-1].Id && newKey == Dict.CurrentVersionInfo[VersionLen-1].Key {
+                                fmt.Printf("[trigger]read patch id: %d, key: %d\n", newId, newKey)
+				if VersionLen == 0 || newId > Dict.CurrentVersionInfo[VersionLen-1].Id {
 					version.Id = newId
 					version.Key, _ = strconv.Atoi(donefileInfo.Key)
 					version.Input = donefileInfo.Input
 					deployVersion := int(time.Now().Unix())
 					version.CreateTime = deployVersion
 					version.Version = deployVersion
-					version.Depend = Dict.CurrentVersionInfo[VersionLen-1].Depend
+                                        version.Depend = deployVersion
 					version.Mode = dict.DELTA
 					return
 				}

--- a/core/cube/cube-transfer/src/transfer/util.go
+++ b/core/cube/cube-transfer/src/transfer/util.go
@@ -96,7 +96,8 @@ func ExeCommad(files string, params []string) (err error) {
 func Wget(ftpPath string, downPath string) {
 	var params []string
-	params = append(params, "-P")
+	params = append(params, "--limit-rate=100m")
+        params = append(params, "-P")
 	params = append(params, downPath)
 	params = append(params, "-r")
 	params = append(params, "-N")
@@ -110,4 +111,4 @@ func Wget(ftpPath string, downPath string) {
 	if err != nil {
 		fmt.Printf("wget exe: %v\n", err)
 	}
 }
\ No newline at end of file
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -207,7 +207,7 @@ class PredictorClient {
  void init_gflags(std::vector<std::string> argv);
-  int init(const std::vector<std::string> &client_conf);
+  int init(const std::vector<std::string>& client_conf);
  void set_predictor_conf(const std::string& conf_path,
                          const std::string& conf_file);
@@ -218,23 +218,22 @@ class PredictorClient {
  int destroy_predictor();
-  int numpy_predict(
+  int numpy_predict(const std::vector<py::array_t<float>>& float_feed,
-      const std::vector<std::vector<py::array_t<float>>>& float_feed_batch,
+                    const std::vector<std::string>& float_feed_name,
-      const std::vector<std::string>& float_feed_name,
+                    const std::vector<std::vector<int>>& float_shape,
-      const std::vector<std::vector<int>>& float_shape,
+                    const std::vector<std::vector<int>>& float_lod_slot_batch,
-      const std::vector<std::vector<int>>& float_lod_slot_batch,
+                    const std::vector<py::array_t<int64_t>>& int_feed,
-      const std::vector<std::vector<py::array_t<int64_t>>>& int_feed_batch,
+                    const std::vector<std::string>& int_feed_name,
-      const std::vector<std::string>& int_feed_name,
+                    const std::vector<std::vector<int>>& int_shape,
-      const std::vector<std::vector<int>>& int_shape,
+                    const std::vector<std::vector<int>>& int_lod_slot_batch,
-      const std::vector<std::vector<int>>& int_lod_slot_batch,
+                    const std::vector<std::string>& string_feed,
-      const std::vector<std::vector<std::string>>& string_feed_batch,
+                    const std::vector<std::string>& string_feed_name,
-      const std::vector<std::string>& string_feed_name,
+                    const std::vector<std::vector<int>>& string_shape,
-      const std::vector<std::vector<int>>& string_shape,
+                    const std::vector<std::vector<int>>& string_lod_slot_batch,
-      const std::vector<std::vector<int>>& string_lod_slot_batch,
+                    const std::vector<std::string>& fetch_name,
-      const std::vector<std::string>& fetch_name,
+                    PredictorRes& predict_res_batch,  // NOLINT
-      PredictorRes& predict_res_batch,  // NOLINT
+                    const int& pid,
-      const int& pid,
+                    const uint64_t log_id);
-      const uint64_t log_id);
 private:
  PredictorApi _api;
@@ -243,6 +242,7 @@ class PredictorClient {
  std::string _predictor_path;
  std::string _conf_file;
  std::map<std::string, int> _feed_name_to_idx;
+  std::vector<std::string> _feed_name;
  std::map<std::string, int> _fetch_name_to_idx;
  std::map<std::string, std::string> _fetch_name_to_var_name;
  std::map<std::string, int> _fetch_name_to_type;

--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -25,8 +25,6 @@ using baidu::paddle_serving::Timer;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Tensor;
-using baidu::paddle_serving::predictor::general_model::FeedInst;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
 enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
 std::once_flag gflags_init_flag;
 namespace py = pybind11;
@@ -68,9 +66,13 @@ int PredictorClient::init(const std::vector<std::string> &conf_file) {
    _fetch_name_to_idx.clear();
    _shape.clear();
    int feed_var_num = model_config.feed_var_size();
+    _feed_name.clear();
    VLOG(2) << "feed var num: " << feed_var_num;
    for (int i = 0; i < feed_var_num; ++i) {
      _feed_name_to_idx[model_config.feed_var(i).alias_name()] = i;
+      VLOG(2) << "feed [" << i << "]"
+              << " name: " << model_config.feed_var(i).name();
+      _feed_name.push_back(model_config.feed_var(i).name());
      VLOG(2) << "feed alias name: " << model_config.feed_var(i).alias_name()
              << " index: " << i;
      std::vector<int> tmp_feed_shape;
@@ -146,15 +148,15 @@ int PredictorClient::create_predictor() {
 }
 int PredictorClient::numpy_predict(
-    const std::vector<std::vector<py::array_t<float>>> &float_feed_batch,
+    const std::vector<py::array_t<float>> &float_feed,
    const std::vector<std::string> &float_feed_name,
    const std::vector<std::vector<int>> &float_shape,
    const std::vector<std::vector<int>> &float_lod_slot_batch,
-    const std::vector<std::vector<py::array_t<int64_t>>> &int_feed_batch,
+    const std::vector<py::array_t<int64_t>> &int_feed,
    const std::vector<std::string> &int_feed_name,
    const std::vector<std::vector<int>> &int_shape,
    const std::vector<std::vector<int>> &int_lod_slot_batch,
-    const std::vector<std::vector<std::string>> &string_feed_batch,
+    const std::vector<std::string> &string_feed,
    const std::vector<std::string> &string_feed_name,
    const std::vector<std::vector<int>> &string_shape,
    const std::vector<std::vector<int>> &string_lod_slot_batch,
@@ -162,12 +164,6 @@ int PredictorClient::numpy_predict(
    PredictorRes &predict_res_batch,
    const int &pid,
    const uint64_t log_id) {
-  int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
-  batch_size = batch_size > string_feed_batch.size() ? batch_size
-                                                     : string_feed_batch.size();
-  VLOG(2) << "batch size: " << batch_size;
-  // batch_size must be 1, cause batch is already in Tensor.
-  // I suggest to remove the outside vector<>.
  predict_res_batch.clear();
  Timer timeline;
  int64_t preprocess_start = timeline.TimeStampUS();
@@ -190,136 +186,122 @@ int PredictorClient::numpy_predict(
  }
  int vec_idx = 0;
-  // batch_size can only be 1, cause batch is already in Tensor.
+  // batch is already in Tensor.
-  // if batch_size is not 1, error will occur in C++ part.
+  std::vector<Tensor *> tensor_vec;
-  for (int bi = 0; bi < batch_size; bi++) {
-    VLOG(2) << "prepare batch " << bi;
-    std::vector<Tensor *> tensor_vec;
-    FeedInst *inst = req.add_insts();
-    std::vector<py::array_t<float>> float_feed = float_feed_batch[bi];
-    std::vector<py::array_t<int64_t>> int_feed = int_feed_batch[bi];
-    std::vector<std::string> string_feed = string_feed_batch[bi];
-    for (auto &name : float_feed_name) {
-      tensor_vec.push_back(inst->add_tensor_array());
-    }
-    for (auto &name : int_feed_name) {
-      tensor_vec.push_back(inst->add_tensor_array());
-    }
-    for (auto &name : string_feed_name) {
+  for (auto &name : float_feed_name) {
-      tensor_vec.push_back(inst->add_tensor_array());
+    tensor_vec.push_back(req.add_tensor());
-    }
+  }
-    VLOG(2) << "batch [" << bi << "] "
+  for (auto &name : int_feed_name) {
-            << "prepared";
+    tensor_vec.push_back(req.add_tensor());
+  }
-    vec_idx = 0;
+  for (auto &name : string_feed_name) {
-    for (auto &name : float_feed_name) {
+    tensor_vec.push_back(req.add_tensor());
-      int idx = _feed_name_to_idx[name];
+  }
-      if (idx >= tensor_vec.size()) {
-        LOG(ERROR) << "idx > tensor_vec.size()";
-        return -1;
-      }
-      int nbytes = float_feed[vec_idx].nbytes();
-      void *rawdata_ptr = (void *)(float_feed[vec_idx].data(0));
-      int total_number = float_feed[vec_idx].size();
-      Tensor *tensor = tensor_vec[idx];
-      VLOG(2) << "prepare float feed " << name << " shape size "
-              << float_shape[vec_idx].size();
-      for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
-        tensor->add_shape(float_shape[vec_idx][j]);
-      }
-      for (uint32_t j = 0; j < float_lod_slot_batch[vec_idx].size(); ++j) {
-        tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
-      }
-      tensor->set_elem_type(P_FLOAT32);
-      tensor->mutable_float_data()->Resize(total_number, 0);
+  vec_idx = 0;
-      memcpy(tensor->mutable_float_data()->mutable_data(), rawdata_ptr, nbytes);
+  for (auto &name : float_feed_name) {
-      vec_idx++;
+    int idx = _feed_name_to_idx[name];
+    if (idx >= tensor_vec.size()) {
+      LOG(ERROR) << "idx > tensor_vec.size()";
+      return -1;
+    }
+    VLOG(2) << "prepare float feed " << name << " idx " << idx;
+    int nbytes = float_feed[vec_idx].nbytes();
+    void *rawdata_ptr = (void *)(float_feed[vec_idx].data(0));
+    int total_number = float_feed[vec_idx].size();
+    Tensor *tensor = tensor_vec[idx];
+    VLOG(2) << "prepare float feed " << name << " shape size "
+            << float_shape[vec_idx].size();
+    for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
+      tensor->add_shape(float_shape[vec_idx][j]);
+    }
+    for (uint32_t j = 0; j < float_lod_slot_batch[vec_idx].size(); ++j) {
+      tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
    }
+    tensor->set_elem_type(P_FLOAT32);
-    VLOG(2) << "batch [" << bi << "] "
+    tensor->set_name(_feed_name[idx]);
-            << "float feed value prepared";
+    tensor->set_alias_name(name);
-    vec_idx = 0;
+    tensor->mutable_float_data()->Resize(total_number, 0);
-    for (auto &name : int_feed_name) {
+    memcpy(tensor->mutable_float_data()->mutable_data(), rawdata_ptr, nbytes);
-      int idx = _feed_name_to_idx[name];
+    vec_idx++;
-      if (idx >= tensor_vec.size()) {
+  }
-        LOG(ERROR) << "idx > tensor_vec.size()";
-        return -1;
-      }
-      Tensor *tensor = tensor_vec[idx];
-      int nbytes = int_feed[vec_idx].nbytes();
-      void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0));
-      int total_number = int_feed[vec_idx].size();
-      for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
+  vec_idx = 0;
-        tensor->add_shape(int_shape[vec_idx][j]);
+  for (auto &name : int_feed_name) {
-      }
+    int idx = _feed_name_to_idx[name];
-      for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) {
+    if (idx >= tensor_vec.size()) {
-        tensor->add_lod(int_lod_slot_batch[vec_idx][j]);
+      LOG(ERROR) << "idx > tensor_vec.size()";
-      }
+      return -1;
-      tensor->set_elem_type(_type[idx]);
-      if (_type[idx] == P_INT64) {
-        tensor->mutable_int64_data()->Resize(total_number, 0);
-        memcpy(
-            tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
-      } else {
-        tensor->mutable_int_data()->Resize(total_number, 0);
-        memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
-      }
-      vec_idx++;
    }
+    Tensor *tensor = tensor_vec[idx];
+    int nbytes = int_feed[vec_idx].nbytes();
+    void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0));
+    int total_number = int_feed[vec_idx].size();
-    VLOG(2) << "batch [" << bi << "] "
+    for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
-            << "int feed value prepared";
+      tensor->add_shape(int_shape[vec_idx][j]);
+    }
+    for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) {
+      tensor->add_lod(int_lod_slot_batch[vec_idx][j]);
+    }
+    tensor->set_elem_type(_type[idx]);
+    tensor->set_name(_feed_name[idx]);
+    tensor->set_alias_name(name);
+    if (_type[idx] == P_INT64) {
+      tensor->mutable_int64_data()->Resize(total_number, 0);
+      memcpy(tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
+    } else {
+      tensor->mutable_int_data()->Resize(total_number, 0);
+      memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
+    }
+    vec_idx++;
+  }
-    vec_idx = 0;
+  vec_idx = 0;
-    for (auto &name : string_feed_name) {
+  for (auto &name : string_feed_name) {
-      int idx = _feed_name_to_idx[name];
+    int idx = _feed_name_to_idx[name];
-      if (idx >= tensor_vec.size()) {
+    if (idx >= tensor_vec.size()) {
-        LOG(ERROR) << "idx > tensor_vec.size()";
+      LOG(ERROR) << "idx > tensor_vec.size()";
-        return -1;
+      return -1;
-      }
+    }
-      Tensor *tensor = tensor_vec[idx];
+    Tensor *tensor = tensor_vec[idx];
-      for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) {
+    for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) {
-        tensor->add_shape(string_shape[vec_idx][j]);
+      tensor->add_shape(string_shape[vec_idx][j]);
-      }
+    }
-      for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
+    for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
-        tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
+      tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
-      }
+    }
-      tensor->set_elem_type(P_STRING);
+    tensor->set_elem_type(P_STRING);
+    tensor->set_name(_feed_name[idx]);
-      const int string_shape_size = string_shape[vec_idx].size();
+    tensor->set_alias_name(name);
-      // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
-      // we pass string via vector<vector<string> >.
+    const int string_shape_size = string_shape[vec_idx].size();
-      if (string_shape_size != 1) {
+    // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
-        LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
+    // we pass string via vector<vector<string> >.
-                   << string_shape_size;
+    if (string_shape_size != 1) {
-        return -1;
+      LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
-      }
+                 << string_shape_size;
-      switch (string_shape_size) {
+      return -1;
-        case 1: {
+    }
-          tensor->add_data(string_feed[vec_idx]);
+    switch (string_shape_size) {
-          break;
+      case 1: {
-        }
+        tensor->add_data(string_feed[vec_idx]);
+        break;
      }
-      vec_idx++;
    }
+    vec_idx++;
-    VLOG(2) << "batch [" << bi << "] "
-            << "string feed value prepared";
  }
  int64_t preprocess_end = timeline.TimeStampUS();
  int64_t client_infer_start = timeline.TimeStampUS();
  Response res;
  int64_t client_infer_end = 0;
@@ -351,19 +333,18 @@ int PredictorClient::numpy_predict(
      int idx = 0;
      for (auto &name : fetch_name) {
        // int idx = _fetch_name_to_idx[name];
-        int shape_size = output.insts(0).tensor_array(idx).shape_size();
+        int shape_size = output.tensor(idx).shape_size();
        VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
                << shape_size;
        model._shape_map[name].resize(shape_size);
        for (int i = 0; i < shape_size; ++i) {
-          model._shape_map[name][i] =
+          model._shape_map[name][i] = output.tensor(idx).shape(i);
-              output.insts(0).tensor_array(idx).shape(i);
        }
-        int lod_size = output.insts(0).tensor_array(idx).lod_size();
+        int lod_size = output.tensor(idx).lod_size();
        if (lod_size > 0) {
          model._lod_map[name].resize(lod_size);
          for (int i = 0; i < lod_size; ++i) {
-            model._lod_map[name][i] = output.insts(0).tensor_array(idx).lod(i);
+            model._lod_map[name][i] = output.tensor(idx).lod(i);
          }
        }
        idx += 1;
@@ -375,22 +356,22 @@ int PredictorClient::numpy_predict(
        // int idx = _fetch_name_to_idx[name];
        if (_fetch_name_to_type[name] == P_INT64) {
          VLOG(2) << "ferch var " << name << "type int64";
-          int size = output.insts(0).tensor_array(idx).int64_data_size();
+          int size = output.tensor(idx).int64_data_size();
          model._int64_value_map[name] = std::vector<int64_t>(
-              output.insts(0).tensor_array(idx).int64_data().begin(),
+              output.tensor(idx).int64_data().begin(),
-              output.insts(0).tensor_array(idx).int64_data().begin() + size);
+              output.tensor(idx).int64_data().begin() + size);
        } else if (_fetch_name_to_type[name] == P_FLOAT32) {
          VLOG(2) << "fetch var " << name << "type float";
-          int size = output.insts(0).tensor_array(idx).float_data_size();
+          int size = output.tensor(idx).float_data_size();
          model._float_value_map[name] = std::vector<float>(
-              output.insts(0).tensor_array(idx).float_data().begin(),
+              output.tensor(idx).float_data().begin(),
-              output.insts(0).tensor_array(idx).float_data().begin() + size);
+              output.tensor(idx).float_data().begin() + size);
        } else if (_fetch_name_to_type[name] == P_INT32) {
          VLOG(2) << "fetch var " << name << "type int32";
-          int size = output.insts(0).tensor_array(idx).int_data_size();
+          int size = output.tensor(idx).int_data_size();
          model._int32_value_map[name] = std::vector<int32_t>(
-              output.insts(0).tensor_array(idx).int_data().begin(),
+              output.tensor(idx).int_data().begin(),
-              output.insts(0).tensor_array(idx).int_data().begin() + size);
+              output.tensor(idx).int_data().begin() + size);
        }
        idx += 1;
      }

--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -97,33 +97,31 @@ PYBIND11_MODULE(serving_client, m) {
           [](PredictorClient &self) { self.destroy_predictor(); })
      .def("numpy_predict",
           [](PredictorClient &self,
-              const std::vector<std::vector<py::array_t<float>>>
+              const std::vector<py::array_t<float>> &float_feed,
-                  &float_feed_batch,
              const std::vector<std::string> &float_feed_name,
              const std::vector<std::vector<int>> &float_shape,
              const std::vector<std::vector<int>> &float_lod_slot_batch,
-              const std::vector<std::vector<py::array_t<int64_t>>>
+              const std::vector<py::array_t<int64_t>> &int_feed,
-                  &int_feed_batch,
              const std::vector<std::string> &int_feed_name,
              const std::vector<std::vector<int>> &int_shape,
              const std::vector<std::vector<int>> &int_lod_slot_batch,
-              const std::vector<std::vector<std::string>>& string_feed_batch,
+              const std::vector<std::string> &string_feed,
-              const std::vector<std::string>& string_feed_name,
+              const std::vector<std::string> &string_feed_name,
-              const std::vector<std::vector<int>>& string_shape,
+              const std::vector<std::vector<int>> &string_shape,
-              const std::vector<std::vector<int>>& string_lod_slot_batch,
+              const std::vector<std::vector<int>> &string_lod_slot_batch,
              const std::vector<std::string> &fetch_name,
              PredictorRes &predict_res_batch,
              const int &pid,
              const uint64_t log_id) {
-             return self.numpy_predict(float_feed_batch,
+             return self.numpy_predict(float_feed,
                                       float_feed_name,
                                       float_shape,
                                       float_lod_slot_batch,
-                                       int_feed_batch,
+                                       int_feed,
                                       int_feed_name,
                                       int_shape,
                                       int_lod_slot_batch,
-                                       string_feed_batch,
+                                       string_feed,
                                       string_feed_name,
                                       string_shape,
                                       string_lod_slot_batch,

--- a/core/general-server/CMakeLists.txt
+++ b/core/general-server/CMakeLists.txt
--- a/core/general-server/op/CMakeLists.txt
+++ b/core/general-server/op/CMakeLists.txt
--- a/core/general-server/op/general_copy_op.cpp
+++ b/core/general-server/op/general_copy_op.cpp
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "core/general-server/op/general_copy_op.h"
-#include <algorithm>
-#include <iostream>
-#include <memory>
-#include <sstream>
-#include "core/general-server/op/general_infer_helper.h"
-#include "core/predictor/framework/infer.h"
-#include "core/predictor/framework/memory.h"
-#include "core/util/include/timer.h"
-namespace baidu {
-namespace paddle_serving {
-namespace serving {
-using baidu::paddle_serving::Timer;
-using baidu::paddle_serving::predictor::MempoolWrapper;
-using baidu::paddle_serving::predictor::general_model::Tensor;
-using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FeedInst;
-using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
-int GeneralCopyOp::inference() {
-  // reade request from client
-  const std::vector<std::string> pre_node_names = pre_names();
-  if (pre_node_names.size() != 1) {
-    LOG(ERROR) << "This op(" << op_name()
-               << ") can only have one predecessor op, but received "
-               << pre_node_names.size();
-    return -1;
-  }
-  const std::string pre_name = pre_node_names[0];
-  const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
-  uint64_t log_id = input_blob->GetLogId();
-  VLOG(2) << "(logid=" << log_id << ") precedent name: " << pre_name;
-  const TensorVector *in = &input_blob->tensor_vector;
-  VLOG(2) << "(logid=" << log_id << ") input size: " << in->size();
-  int batch_size = input_blob->GetBatchSize();
-  int input_var_num = 0;
-  GeneralBlob *res = mutable_data<GeneralBlob>();
-  res->SetLogId(log_id);
-  TensorVector *out = &res->tensor_vector;
-  VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
-  res->SetBatchSize(batch_size);
-  if (!res) {
-    LOG(ERROR) << "(logid=" << log_id
-               << ") Failed get op tls reader object output";
-  }
-  Timer timeline;
-  int64_t start = timeline.TimeStampUS();
-  VLOG(2) << "(logid=" << log_id << ") Going to init lod tensor";
-  for (int i = 0; i < in->size(); ++i) {
-    paddle::PaddleTensor lod_tensor;
-    CopyLod(&in->at(i), &lod_tensor);
-    lod_tensor.dtype = in->at(i).dtype;
-    lod_tensor.name = in->at(i).name;
-    VLOG(2) << "(logid=" << log_id << ") lod tensor [" << i
-            << "].name = " << lod_tensor.name;
-    out->push_back(lod_tensor);
-  }
-  VLOG(2) << "(logid=" << log_id << ") pack done.";
-  for (int i = 0; i < out->size(); ++i) {
-    int64_t *src_ptr = static_cast<int64_t *>(in->at(i).data.data());
-    out->at(i).data.Resize(out->at(i).lod[0].back() * sizeof(int64_t));
-    out->at(i).shape = {out->at(i).lod[0].back(), 1};
-    int64_t *tgt_ptr = static_cast<int64_t *>(out->at(i).data.data());
-    for (int j = 0; j < out->at(i).lod[0].back(); ++j) {
-      tgt_ptr[j] = src_ptr[j];
-    }
-  }
-  VLOG(2) << "(logid=" << log_id << ") output done.";
-  timeline.Pause();
-  int64_t end = timeline.TimeStampUS();
-  CopyBlobInfo(input_blob, res);
-  AddBlobInfo(res, start);
-  AddBlobInfo(res, end);
-  VLOG(2) << "(logid=" << log_id << ") read data from client success";
-  return 0;
-}
-DEFINE_OP(GeneralCopyOp);
-}  // namespace serving
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/core/general-server/op/general_copy_op.h
+++ b/core/general-server/op/general_copy_op.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <string>
-#include <vector>
-#include "core/general-server/general_model_service.pb.h"
-#include "core/general-server/op/general_infer_helper.h"
-#include "core/predictor/framework/resource.h"
-#include "paddle_inference_api.h"  // NOLINT
-namespace baidu {
-namespace paddle_serving {
-namespace serving {
-class GeneralCopyOp
-    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
- public:
-  typedef std::vector<paddle::PaddleTensor> TensorVector;
-  DECLARE_OP(GeneralCopyOp);
-  int inference();
-};
-}  // namespace serving
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/core/general-server/op/general_detection_op.cpp
+++ b/core/general-server/op/general_detection_op.cpp
@@ -36,7 +36,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
 using baidu::paddle_serving::predictor::InferManager;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;

--- a/core/general-server/op/general_detection_op.h
+++ b/core/general-server/op/general_detection_op.h
--- a/core/general-server/op/general_dist_kv_infer_op.cpp
+++ b/core/general-server/op/general_dist_kv_infer_op.cpp
@@ -34,11 +34,152 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
 using baidu::paddle_serving::predictor::InferManager;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
-int GeneralDistKVInferOp::inference() { return 0; }
+// DistKV Infer Op: seek cube and then call paddle inference
+// op seq: general_reader-> dist_kv_infer -> general_response
+int GeneralDistKVInferOp::inference() { 
+  VLOG(2) << "Going to run inference";
+  const std::vector<std::string> pre_node_names = pre_names();
+  if (pre_node_names.size() != 1) {
+    LOG(ERROR) << "This op(" << op_name()
+               << ") can only have one predecessor op, but received "
+               << pre_node_names.size();
+    return -1;
+  }
+  const std::string pre_name = pre_node_names[0];
+  const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
+  if (!input_blob) {
+    LOG(ERROR) << "input_blob is nullptr,error";
+    return -1;
+  }
+  uint64_t log_id = input_blob->GetLogId();
+  VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
+  GeneralBlob *output_blob = mutable_data<GeneralBlob>();
+  if (!output_blob) {
+    LOG(ERROR) <<  "(logid=" << log_id << ") output_blob is nullptr,error";
+      return -1;
+  }
+  output_blob->SetLogId(log_id);
+  if (!input_blob) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed mutable depended argument, op:" << pre_name;
+    return -1;
+  }
+  const TensorVector *in = &input_blob->tensor_vector;
+  TensorVector *out = &output_blob->tensor_vector;
+  std::vector<uint64_t> keys;
+  std::vector<rec::mcube::CubeValue> values;
+  int sparse_count = 0; // sparse inputs counts, sparse would seek cube
+  int dense_count = 0; // dense inputs counts, dense would directly call paddle infer
+  std::vector<std::pair<int64_t *, size_t>> dataptr_size_pairs;
+  size_t key_len = 0;
+  for (size_t i = 0; i < in->size(); ++i) {
+    if (in->at(i).dtype != paddle::PaddleDType::INT64) {
+      ++dense_count;
+      continue;
+    }
+    ++sparse_count;
+    size_t elem_num = 1;
+    for (size_t s = 0; s < in->at(i).shape.size(); ++s) {
+      elem_num *= in->at(i).shape[s];
+    }
+    key_len += elem_num;
+    int64_t *data_ptr = static_cast<int64_t *>(in->at(i).data.data());
+    dataptr_size_pairs.push_back(std::make_pair(data_ptr, elem_num));
+  }
+  keys.resize(key_len);
+  VLOG(3) << "(logid=" << log_id << ") cube number of keys to look up: " << key_len;
+  int key_idx = 0;
+  for (size_t i = 0; i < dataptr_size_pairs.size(); ++i) {
+    std::copy(dataptr_size_pairs[i].first,
+              dataptr_size_pairs[i].first + dataptr_size_pairs[i].second,
+              keys.begin() + key_idx);
+    key_idx += dataptr_size_pairs[i].second;
+  }
+  rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance();
+  std::vector<std::string> table_names = cube->get_table_names();
+  if (table_names.size() == 0) {
+    LOG(ERROR) << "cube init error or cube config not given.";
+    return -1;
+  }
+  // gather keys and seek cube servers, put results in values 
+  int ret = cube->seek(table_names[0], keys, &values);
+  VLOG(3) << "(logid=" << log_id << ") cube seek status: " << ret;
+  if (values.size() != keys.size() || values[0].buff.size() == 0) {
+    LOG(ERROR) << "cube value return null";
+  }
+  // EMBEDDING_SIZE means the length of sparse vector, user can define length here. 
+  size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
+  TensorVector sparse_out;
+  sparse_out.resize(sparse_count);
+  TensorVector dense_out;
+  dense_out.resize(dense_count);
+  int cube_val_idx = 0;
+  int sparse_idx = 0;
+  int dense_idx = 0;
+  std::unordered_map<int, int> in_out_map;
+  baidu::paddle_serving::predictor::Resource &resource =
+      baidu::paddle_serving::predictor::Resource::instance();
+  std::shared_ptr<PaddleGeneralModelConfig> model_config = resource.get_general_model_config().front();
+  //copy data to tnsor
+  for (size_t i = 0; i < in->size(); ++i) {
+    if (in->at(i).dtype != paddle::PaddleDType::INT64) {
+      dense_out[dense_idx] = in->at(i);
+      ++dense_idx;
+      continue;
+    }
+    sparse_out[sparse_idx].lod.resize(in->at(i).lod.size());
+    for (size_t x = 0; x < sparse_out[sparse_idx].lod.size(); ++x) {
+      sparse_out[sparse_idx].lod[x].resize(in->at(i).lod[x].size());
+      std::copy(in->at(i).lod[x].begin(),
+                in->at(i).lod[x].end(),
+                sparse_out[sparse_idx].lod[x].begin());
+    }
+    sparse_out[sparse_idx].dtype = paddle::PaddleDType::FLOAT32;
+    sparse_out[sparse_idx].shape.push_back(sparse_out[sparse_idx].lod[0].back());
+    sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE);
+    sparse_out[sparse_idx].name = model_config->_feed_name[i];
+    sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() *
+                                       EMBEDDING_SIZE * sizeof(float));
+    float *dst_ptr = static_cast<float *>(sparse_out[sparse_idx].data.data());
+    for (int x = 0; x < sparse_out[sparse_idx].lod[0].back(); ++x) {
+      float *data_ptr = dst_ptr + x * EMBEDDING_SIZE;
+      memcpy(data_ptr,
+             values[cube_val_idx].buff.data(),
+             values[cube_val_idx].buff.size());
+      cube_val_idx++;
+    }
+    ++sparse_idx;
+  }
+  VLOG(3) << "(logid=" << log_id << ") sparse tensor load success.";
+  TensorVector infer_in;
+  infer_in.insert(infer_in.end(), dense_out.begin(), dense_out.end());
+  infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end());
+  int batch_size = input_blob->_batch_size;
+  output_blob->_batch_size = batch_size;
+  Timer timeline;
+  int64_t start = timeline.TimeStampUS();
+  timeline.Start();
+  // call paddle inference here
+  if (InferManager::instance().infer(
+          engine_name().c_str(), &infer_in, out, batch_size)) {
+    LOG(ERROR) << "(logid=" << log_id << ") Failed do infer in fluid model: " << engine_name();
+    return -1;
+  }
+  int64_t end = timeline.TimeStampUS();
+  CopyBlobInfo(input_blob, output_blob);
+  AddBlobInfo(output_blob, start);
+  AddBlobInfo(output_blob, end);
+  return 0; 
+}
 DEFINE_OP(GeneralDistKVInferOp);
 }  // namespace serving

--- a/core/general-server/op/general_dist_kv_quant_infer_op.cpp
+++ b/core/general-server/op/general_dist_kv_quant_infer_op.cpp
@@ -35,7 +35,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
 using baidu::paddle_serving::predictor::InferManager;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
@@ -117,9 +116,6 @@ int GeneralDistKVQuantInferOp::inference() {
  std::unordered_map<int, int> in_out_map;
  baidu::paddle_serving::predictor::Resource &resource =
      baidu::paddle_serving::predictor::Resource::instance();
-  //TODO:Temporary addition, specific details to be studied by HexToString
-  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config()[0];
  int cube_quant_bits = resource.get_cube_quant_bits();
  size_t EMBEDDING_SIZE = 0;
  if (cube_quant_bits == 0) {
@@ -146,7 +142,7 @@ int GeneralDistKVQuantInferOp::inference() {
    sparse_out[sparse_idx].shape.push_back(
        sparse_out[sparse_idx].lod[0].back());
    sparse_out[sparse_idx].shape.push_back(EMBEDDING_SIZE);
-    sparse_out[sparse_idx].name = model_config->_feed_name[i];
+    sparse_out[sparse_idx].name = in->at(i).name;
    sparse_out[sparse_idx].data.Resize(sparse_out[sparse_idx].lod[0].back() *
                                       EMBEDDING_SIZE * sizeof(float));
    // END HERE

--- a/core/general-server/op/general_infer_op.cpp
+++ b/core/general-server/op/general_infer_op.cpp
@@ -31,7 +31,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
 using baidu::paddle_serving::predictor::InferManager;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
@@ -49,7 +48,7 @@ int GeneralInferOp::inference() {
  const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
  if (!input_blob) {
    LOG(ERROR) << "input_blob is nullptr,error";
-      return -1;
+    return -1;
  }
  uint64_t log_id = input_blob->GetLogId();
  VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
@@ -57,7 +56,7 @@ int GeneralInferOp::inference() {
  GeneralBlob *output_blob = mutable_data<GeneralBlob>();
  if (!output_blob) {
    LOG(ERROR) << "output_blob is nullptr,error";
-      return -1;
+    return -1;
  }
  output_blob->SetLogId(log_id);

--- a/core/general-server/op/general_reader_op.cpp
+++ b/core/general-server/op/general_reader_op.cpp
@@ -30,42 +30,8 @@ using baidu::paddle_serving::Timer;
 using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FeedInst;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
 enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
-int conf_check(const Request *req,
-               const std::shared_ptr<PaddleGeneralModelConfig> &model_config) {
-  int var_num = req->insts(0).tensor_array_size();
-  if (var_num != model_config->_feed_type.size()) {
-    LOG(ERROR) << "feed var number not match: model config["
-               << model_config->_feed_type.size() << "] vs. actual[" << var_num
-               << "]";
-    return -1;
-  }
-  VLOG(2) << "fetch var num in reader op: " << req->fetch_var_names_size();
-  for (int i = 0; i < var_num; ++i) {
-    const Tensor &tensor = req->insts(0).tensor_array(i);
-    if (model_config->_feed_type[i] != tensor.elem_type()) {
-      LOG(ERROR) << "feed type not match.";
-      return -1;
-    }
-    if (model_config->_feed_shape[i].size() == tensor.shape_size()) {
-      for (int j = 0; j < model_config->_feed_shape[i].size(); ++j) {
-        tensor.shape(j);
-        if (model_config->_feed_shape[i][j] != tensor.shape(j)) {
-          LOG(ERROR) << "feed shape not match.";
-          return -1;
-        }
-      }
-    } else {
-      LOG(ERROR) << "feed shape not match.";
-      return -1;
-    }
-  }
-  return 0;
-}
 int GeneralReaderOp::inference() {
  // read request from client
@@ -93,10 +59,8 @@ int GeneralReaderOp::inference() {
  res->SetLogId(log_id);
  Timer timeline;
  int64_t start = timeline.TimeStampUS();
-  // only get insts(0), cause batch is already in Tensor.
-  // req can only include 1 inst.
  // var_num means the number of feed_var.
-  int var_num = req->insts(0).tensor_array_size();
+  int var_num = req->tensor_size();
  VLOG(2) << "(logid=" << log_id << ") var num: " << var_num
          << ") start to call load general model_conf op";
@@ -105,19 +69,7 @@ int GeneralReaderOp::inference() {
      baidu::paddle_serving::predictor::Resource::instance();
  VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
-  // get the first InferOP's model_config as ReaderOp's model_config by default.
-  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config().front();
-  // TODO(guru4elephant): how to do conditional check?
-  /*
-  int ret = conf_check(req, model_config);
-  if (ret != 0) {
-    LOG(ERROR) << "model conf of server:";
-    resource.print_general_model_config(model_config);
-    return 0;
-  }
-  */
  // package tensor
  // prepare basic information for input
  // specify the memory needed for output tensor_vector
@@ -128,7 +80,7 @@ int GeneralReaderOp::inference() {
  int64_t databuf_size = 0;
  for (int i = 0; i < var_num; ++i) {
    paddle::PaddleTensor paddleTensor;
-    const Tensor &tensor = req->insts(0).tensor_array(i);
+    const Tensor &tensor = req->tensor(i);
    data_len = 0;
    elem_type = 0;
    elem_size = 0;
@@ -175,7 +127,7 @@ int GeneralReaderOp::inference() {
      VLOG(2) << "(logid=" << log_id << ") shape for var[" << i << "]: " << dim;
      paddleTensor.shape.push_back(dim);
    }
-    paddleTensor.name = model_config->_feed_name[i];
+    paddleTensor.name = tensor.name();
    out->push_back(paddleTensor);
    VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i

--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -34,7 +34,6 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
 using baidu::paddle_serving::predictor::general_model::ModelOutput;
 using baidu::paddle_serving::predictor::InferManager;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
@@ -49,7 +48,6 @@ int GeneralResponseOp::inference() {
      get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
  const Request *req = dynamic_cast<const Request *>(get_request_message());
-  // response inst with only fetch_var_names
  Response *res = mutable_data<Response>();
  Timer timeline;
@@ -63,7 +61,8 @@ int GeneralResponseOp::inference() {
      baidu::paddle_serving::predictor::Resource::instance();
  VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
-  //get the last InferOP's model_config as ResponseOp's model_config by default.
+  // get the last InferOP's model_config as ResponseOp's model_config by
+  // default.
  std::shared_ptr<PaddleGeneralModelConfig> model_config =
      resource.get_general_model_config().back();
@@ -71,6 +70,10 @@ int GeneralResponseOp::inference() {
          << ") max body size : " << brpc::fLU64::FLAGS_max_body_size;
  std::vector<int> fetch_index;
+  // this is based on GetOutPutNames() is ordered map.
+  // and the order of Output is the same as the prototxt FetchVar.
+  // otherwise, you can only get the Output by the corresponding of
+  // Name -- Alias_name.
  fetch_index.resize(req->fetch_var_names_size());
  for (int i = 0; i < req->fetch_var_names_size(); ++i) {
    fetch_index[i] =
@@ -95,40 +98,41 @@ int GeneralResponseOp::inference() {
    ModelOutput *output = res->add_outputs();
    // To get the order of model return values
    output->set_engine_name(pre_name);
-    FetchInst *fetch_inst = output->add_insts();
+    var_idx = 0;
+    // idx is the real index of FetchVar.
+    // idx is not the index of FetchList.
+    // fetch_index is the real index in FetchVar of Fetchlist
+    // for example, FetchVar = {0:A, 1:B, 2:C}
+    // FetchList = {0:C,1:A}, at this situation.
+    // fetch_index = [2,0], C`index = 2 and A`index = 0 
    for (auto &idx : fetch_index) {
-      Tensor *tensor = fetch_inst->add_tensor_array();
+      Tensor *tensor = output->add_tensor();
-      //tensor->set_elem_type(1);
+      tensor->set_name(in->at(idx).name);
-      if (model_config->_is_lod_fetch[idx]) {
+      tensor->set_alias_name(model_config->_fetch_alias_name[idx]);
-        VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
+      for (int k = 0; k < in->at(idx).shape.size(); ++k) {
-                << model_config->_fetch_name[idx] << " is lod_tensor";
+        VLOG(2) << "(logid=" << log_id << ") shape[" << k
-        for (int k = 0; k < in->at(idx).shape.size(); ++k) {
+                << "]: " << in->at(idx).shape[k];
-          VLOG(2) << "(logid=" << log_id << ") shape[" << k
+        tensor->add_shape(in->at(idx).shape[k]);
-                  << "]: " << in->at(idx).shape[k];
+      }
-          tensor->add_shape(in->at(idx).shape[k]);
+      std::string str_tensor_type = "is tensor";
-        }
+      if (model_config->_is_lod_fetch[idx] && in->at(idx).lod.size() > 0) {
-      } else {
+        str_tensor_type = "is lod_tensor";
-        VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
+        for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
-                << model_config->_fetch_name[idx] << " is tensor";
+          tensor->add_lod(in->at(idx).lod[0][j]);
-        for (int k = 0; k < in->at(idx).shape.size(); ++k) {
-          VLOG(2) << "(logid=" << log_id << ") shape[" << k
-                  << "]: " << in->at(idx).shape[k];
-          tensor->add_shape(in->at(idx).shape[k]);
        }
      }
-    }
+      VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
+              << model_config->_fetch_name[idx] << str_tensor_type;
-    var_idx = 0;
-    for (auto &idx : fetch_index) {
      cap = 1;
      for (int j = 0; j < in->at(idx).shape.size(); ++j) {
        cap *= in->at(idx).shape[j];
      }
-      FetchInst *fetch_p = output->mutable_insts(0);
      auto dtype = in->at(idx).dtype;
      if (dtype == paddle::PaddleDType::INT64) {
+        tensor->set_elem_type(0);
        VLOG(2) << "(logid=" << log_id << ") Prepare int64 var ["
                << model_config->_fetch_name[idx] << "].";
        int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
@@ -137,35 +141,24 @@ int GeneralResponseOp::inference() {
        // `Swap` method is faster than `{}` method.
        google::protobuf::RepeatedField<int64_t> tmp_data(data_ptr,
                                                          data_ptr + cap);
-        fetch_p->mutable_tensor_array(var_idx)->mutable_int64_data()->Swap(
+        output->mutable_tensor(var_idx)->mutable_int64_data()->Swap(&tmp_data);
-            &tmp_data);
      } else if (dtype == paddle::PaddleDType::FLOAT32) {
+        tensor->set_elem_type(1);
        VLOG(2) << "(logid=" << log_id << ") Prepare float var ["
                << model_config->_fetch_name[idx] << "].";
        float *data_ptr = static_cast<float *>(in->at(idx).data.data());
        google::protobuf::RepeatedField<float> tmp_data(data_ptr,
                                                        data_ptr + cap);
-        fetch_p->mutable_tensor_array(var_idx)->mutable_float_data()->Swap(
+        output->mutable_tensor(var_idx)->mutable_float_data()->Swap(&tmp_data);
-            &tmp_data);
      } else if (dtype == paddle::PaddleDType::INT32) {
+        tensor->set_elem_type(2);
        VLOG(2) << "(logid=" << log_id << ")Prepare int32 var ["
                << model_config->_fetch_name[idx] << "].";
        int32_t *data_ptr = static_cast<int32_t *>(in->at(idx).data.data());
        google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
                                                          data_ptr + cap);
-        fetch_p->mutable_tensor_array(var_idx)->mutable_int_data()->Swap(
+        output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data);
-            &tmp_data);
-      }
-      if (model_config->_is_lod_fetch[idx]) {
-        if (in->at(idx).lod.size() > 0) {
-          for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
-            fetch_p->mutable_tensor_array(var_idx)->add_lod(
-                in->at(idx).lod[0][j]);
-          }
-        }
      }
      VLOG(2) << "(logid=" << log_id << ") fetch var ["
@@ -205,4 +198,4 @@ DEFINE_OP(GeneralResponseOp);
 }  // namespace serving
 }  // namespace paddle_serving
 }  // namespace baidu
\ No newline at end of file
--- a/core/general-server/op/general_text_reader_op.cpp
+++ b/core/general-server/op/general_text_reader_op.cpp
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "core/general-server/op/general_text_reader_op.h"
-#include <algorithm>
-#include <iostream>
-#include <memory>
-#include <sstream>
-#include "core/predictor/framework/infer.h"
-#include "core/predictor/framework/memory.h"
-#include "core/util/include/timer.h"
-namespace baidu {
-namespace paddle_serving {
-namespace serving {
-using baidu::paddle_serving::Timer;
-using baidu::paddle_serving::predictor::MempoolWrapper;
-using baidu::paddle_serving::predictor::general_model::Tensor;
-using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FeedInst;
-using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
-int GeneralTextReaderOp::inference() {
-  // reade request from client
-  const Request *req = dynamic_cast<const Request *>(get_request_message());
-  uint64_t log_id = req->log_id();
-  int batch_size = req->insts_size();
-  int input_var_num = 0;
-  std::vector<int64_t> elem_type;
-  std::vector<int64_t> elem_size;
-  std::vector<int64_t> capacity;
-  GeneralBlob *res = mutable_data<GeneralBlob>();
-  if (!res) {
-    LOG(ERROR) << "(logid=" << log_id
-               << ") Failed get op tls reader object output";
-  }
-  TensorVector *out = &res->tensor_vector;
-  res->SetBatchSize(batch_size);
-  res->SetLogId(log_id);
-  if (batch_size <= 0) {
-    LOG(ERROR) << "(logid=" << log_id << ") Batch size < 0";
-    return -1;
-  }
-  Timer timeline;
-  int64_t start = timeline.TimeStampUS();
-  int var_num = req->insts(0).tensor_array_size();
-  VLOG(2) << "(logid=" << log_id << ") var num: " << var_num;
-  VLOG(2) << "(logid=" << log_id
-          << ") start to call load general model_conf op";
-  baidu::paddle_serving::predictor::Resource &resource =
-      baidu::paddle_serving::predictor::Resource::instance();
-  VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
-  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config()[0];
-  VLOG(2) << "(logid=" << log_id << ") print general model config done.";
-  elem_type.resize(var_num);
-  elem_size.resize(var_num);
-  capacity.resize(var_num);
-  for (int i = 0; i < var_num; ++i) {
-    paddle::PaddleTensor lod_tensor;
-    elem_type[i] = req->insts(0).tensor_array(i).elem_type();
-    VLOG(2) << "(logid=" << log_id << ") var[" << i
-            << "] has elem type: " << elem_type[i];
-    if (elem_type[i] == 0) {  // int64
-      elem_size[i] = sizeof(int64_t);
-      lod_tensor.dtype = paddle::PaddleDType::INT64;
-    } else {
-      elem_size[i] = sizeof(float);
-      lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
-    }
-    if (req->insts(0).tensor_array(i).shape(0) == -1) {
-      lod_tensor.lod.resize(1);
-      lod_tensor.lod[0].push_back(0);
-      VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
-    } else {
-      lod_tensor.shape.push_back(batch_size);
-      capacity[i] = 1;
-      for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
-        int dim = req->insts(0).tensor_array(i).shape(k);
-        VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
-                << "]: " << dim;
-        capacity[i] *= dim;
-        lod_tensor.shape.push_back(dim);
-      }
-      VLOG(2) << "(logid=" << log_id << ") var[" << i
-              << "] is tensor, capacity: " << capacity[i];
-    }
-    lod_tensor.name = model_config->_feed_name[i];
-    out->push_back(lod_tensor);
-  }
-  for (int i = 0; i < var_num; ++i) {
-    if (out->at(i).lod.size() == 1) {
-      for (int j = 0; j < batch_size; ++j) {
-        const Tensor &tensor = req->insts(j).tensor_array(i);
-        int data_len = tensor.int_data_size();
-        int cur_len = out->at(i).lod[0].back();
-        out->at(i).lod[0].push_back(cur_len + data_len);
-      }
-      out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
-      out->at(i).shape = {out->at(i).lod[0].back(), 1};
-      VLOG(2) << "(logid=" << log_id << ") var[" << i
-              << "] is lod_tensor and len=" << out->at(i).lod[0].back();
-    } else {
-      out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
-      VLOG(2) << "(logid=" << log_id << ") var[" << i
-              << "] is tensor and capacity=" << batch_size * capacity[i];
-    }
-  }
-  for (int i = 0; i < var_num; ++i) {
-    if (elem_type[i] == 0) {
-      int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
-      int offset = 0;
-      for (int j = 0; j < batch_size; ++j) {
-        for (int k = 0; k < req->insts(j).tensor_array(i).int_data_size();
-             ++k) {
-          dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k);
-        }
-        if (out->at(i).lod.size() == 1) {
-          offset = out->at(i).lod[0][j + 1];
-        } else {
-          offset += capacity[i];
-        }
-      }
-    } else {
-      float *dst_ptr = static_cast<float *>(out->at(i).data.data());
-      int offset = 0;
-      for (int j = 0; j < batch_size; ++j) {
-        for (int k = 0; k < req->insts(j).tensor_array(i).int_data_size();
-             ++k) {
-          dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k);
-        }
-        if (out->at(i).lod.size() == 1) {
-          offset = out->at(i).lod[0][j + 1];
-        } else {
-          offset += capacity[i];
-        }
-      }
-    }
-  }
-  int64_t end = timeline.TimeStampUS();
-  res->p_size = 0;
-  AddBlobInfo(res, start);
-  AddBlobInfo(res, end);
-  VLOG(2) << "(logid=" << log_id << ") read data from client success";
-  return 0;
-}
-DEFINE_OP(GeneralTextReaderOp);
-}  // namespace serving
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/core/general-server/op/general_text_reader_op.h
+++ b/core/general-server/op/general_text_reader_op.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <string>
-#include <vector>
-#include "core/general-server/general_model_service.pb.h"
-#include "core/general-server/load_general_model_service.pb.h"
-#include "core/general-server/op/general_infer_helper.h"
-#include "core/predictor/framework/resource.h"
-#include "paddle_inference_api.h"  // NOLINT
-namespace baidu {
-namespace paddle_serving {
-namespace serving {
-class GeneralTextReaderOp
-    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
- public:
-  typedef std::vector<paddle::PaddleTensor> TensorVector;
-  DECLARE_OP(GeneralTextReaderOp);
-  int inference();
-};
-}  // namespace serving
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/core/general-server/op/general_text_response_op.cpp
+++ b/core/general-server/op/general_text_response_op.cpp
-// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include "core/general-server/op/general_text_response_op.h"
-#include <algorithm>
-#include <iostream>
-#include <memory>
-#include <sstream>
-#include "core/predictor/framework/infer.h"
-#include "core/predictor/framework/memory.h"
-#include "core/predictor/framework/resource.h"
-#include "core/util/include/timer.h"
-namespace baidu {
-namespace paddle_serving {
-namespace serving {
-using baidu::paddle_serving::Timer;
-using baidu::paddle_serving::predictor::MempoolWrapper;
-using baidu::paddle_serving::predictor::general_model::Tensor;
-using baidu::paddle_serving::predictor::general_model::Response;
-using baidu::paddle_serving::predictor::general_model::Request;
-using baidu::paddle_serving::predictor::general_model::FetchInst;
-using baidu::paddle_serving::predictor::general_model::ModelOutput;
-using baidu::paddle_serving::predictor::InferManager;
-using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
-int GeneralTextResponseOp::inference() {
-  VLOG(2) << "Going to run inference";
-  const std::vector<std::string> pre_node_names = pre_names();
-  VLOG(2) << "pre node names size: " << pre_node_names.size();
-  const GeneralBlob *input_blob;
-  uint64_t log_id =
-      get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
-  const Request *req = dynamic_cast<const Request *>(get_request_message());
-  // response inst with only fetch_var_names
-  Response *res = mutable_data<Response>();
-  Timer timeline;
-  int64_t start = timeline.TimeStampUS();
-  VLOG(2) << "(logid=" << log_id
-          << ") start to call load general model_conf op";
-  baidu::paddle_serving::predictor::Resource &resource =
-      baidu::paddle_serving::predictor::Resource::instance();
-  VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
-  std::shared_ptr<PaddleGeneralModelConfig> model_config =
-      resource.get_general_model_config().back();
-  std::vector<int> fetch_index;
-  fetch_index.resize(req->fetch_var_names_size());
-  for (int i = 0; i < req->fetch_var_names_size(); ++i) {
-    fetch_index[i] =
-        model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
-  }
-  for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
-    const std::string &pre_name = pre_node_names[pi];
-    VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name
-            << " (" << pre_node_names.size() << ")";
-    input_blob = get_depend_argument<GeneralBlob>(pre_name);
-    if (!input_blob) {
-      LOG(ERROR) << "(logid=" << log_id
-                 << ") Failed mutable depended argument, op: " << pre_name;
-      return -1;
-    }
-    const TensorVector *in = &input_blob->tensor_vector;
-    int batch_size = input_blob->GetBatchSize();
-    VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
-    ModelOutput *output = res->add_outputs();
-    output->set_engine_name(
-        pre_name);  // To get the order of model return values
-    for (int i = 0; i < batch_size; ++i) {
-      FetchInst *fetch_inst = output->add_insts();
-      for (auto &idx : fetch_index) {
-        Tensor *tensor = fetch_inst->add_tensor_array();
-        // currently only response float tensor or lod_tensor
-        tensor->set_elem_type(1);
-        if (model_config->_is_lod_fetch[idx]) {
-          VLOG(2) << "(logid=" << log_id << ") out[" << idx << " is lod_tensor";
-          tensor->add_shape(-1);
-        } else {
-          VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] is tensor";
-          for (int k = 1; k < in->at(idx).shape.size(); ++k) {
-            VLOG(2) << "(logid=" << log_id << ") shape[" << k - 1
-                    << "]: " << in->at(idx).shape[k];
-            tensor->add_shape(in->at(idx).shape[k]);
-          }
-        }
-      }
-    }
-    int var_idx = 0;
-    for (auto &idx : fetch_index) {
-      float *data_ptr = static_cast<float *>(in->at(idx).data.data());
-      int cap = 1;
-      for (int j = 1; j < in->at(idx).shape.size(); ++j) {
-        cap *= in->at(idx).shape[j];
-      }
-      if (model_config->_is_lod_fetch[idx]) {
-        for (int j = 0; j < batch_size; ++j) {
-          for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1];
-               k++) {
-            output->mutable_insts(j)
-                ->mutable_tensor_array(var_idx)
-                ->add_float_data(data_ptr[k]);
-          }
-        }
-      } else {
-        for (int j = 0; j < batch_size; ++j) {
-          for (int k = j * cap; k < (j + 1) * cap; ++k) {
-            output->mutable_insts(j)
-                ->mutable_tensor_array(var_idx)
-                ->add_float_data(data_ptr[k]);
-          }
-        }
-      }
-      var_idx++;
-    }
-  }
-  if (req->profile_server()) {
-    int64_t end = timeline.TimeStampUS();
-    // TODO(barriery): multi-model profile_time.
-    // At present, only the response_op is multi-input, so here we get
-    // the profile_time by hard coding. It needs to be replaced with
-    // a more elegant way.
-    for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
-      input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
-      VLOG(2) << "(logid=" << log_id
-              << ") p size for input blob: " << input_blob->p_size;
-      int profile_time_idx = -1;
-      if (pi == 0) {
-        profile_time_idx = 0;
-      } else {
-        profile_time_idx = input_blob->p_size - 2;
-      }
-      for (; profile_time_idx < input_blob->p_size; ++profile_time_idx) {
-        res->add_profile_time(input_blob->time_stamp[profile_time_idx]);
-      }
-    }
-    // TODO(guru4elephant): find more elegant way to do this
-    res->add_profile_time(start);
-    res->add_profile_time(end);
-  }
-  return 0;
-}
-DEFINE_OP(GeneralTextResponseOp);
-}  // namespace serving
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/core/general-server/op/general_text_response_op.h
+++ b/core/general-server/op/general_text_response_op.h
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#pragma once
-#include <string>
-#include <vector>
-#include "core/general-server/general_model_service.pb.h"
-#include "core/general-server/op/general_infer_helper.h"
-#include "paddle_inference_api.h"  // NOLINT
-namespace baidu {
-namespace paddle_serving {
-namespace serving {
-class GeneralTextResponseOp
-    : public baidu::paddle_serving::predictor::OpWithChannel<
-          baidu::paddle_serving::predictor::general_model::Response> {
- public:
-  typedef std::vector<paddle::PaddleTensor> TensorVector;
-  DECLARE_OP(GeneralTextResponseOp);
-  int inference();
-};
-}  // namespace serving
-}  // namespace paddle_serving
-}  // namespace baidu
--- a/core/general-server/proto/general_model_service.proto
+++ b/core/general-server/proto/general_model_service.proto
@@ -24,17 +24,16 @@ message Tensor {
  repeated int32 int_data = 2;
  repeated int64 int64_data = 3;
  repeated float float_data = 4;
-  optional int32 elem_type = 5;
+  optional int32 elem_type =
-  repeated int32 shape = 6;
+      5; // 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
-  repeated int32 lod = 7; // only for fetch tensor currently
+  repeated int32 shape = 6;       // shape should include batch
+  repeated int32 lod = 7;         // only for fetch tensor currently
+  optional string name = 8;       // get from the Model prototxt
+  optional string alias_name = 9; // get from the Model prototxt
 };
-message FeedInst { repeated Tensor tensor_array = 1; };
-message FetchInst { repeated Tensor tensor_array = 1; };
 message Request {
-  repeated FeedInst insts = 1;
+  repeated Tensor tensor = 1;
  repeated string fetch_var_names = 2;
  optional bool profile_server = 3 [ default = false ];
  required uint64 log_id = 4 [ default = 0 ];
@@ -46,7 +45,7 @@ message Response {
 };
 message ModelOutput {
-  repeated FetchInst insts = 1;
+  repeated Tensor tensor = 1;
  optional string engine_name = 2;
 }

--- a/core/pdcodegen/src/pdcodegen.cpp
+++ b/core/pdcodegen/src/pdcodegen.cpp
@@ -280,6 +280,7 @@ class PdsCodeGenerator : public CodeGenerator {
            "  baidu::rpc::ClosureGuard done_guard(done);\n"
            "  baidu::rpc::Controller* cntl = \n"
            "        static_cast<baidu::rpc::Controller*>(cntl_base);\n"
+            "  cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
            "  uint64_t log_id = request->log_id();\n"
            "  cntl->set_log_id(log_id);\n"
            "  ::baidu::paddle_serving::predictor::InferService* svr = \n"
@@ -322,6 +323,7 @@ class PdsCodeGenerator : public CodeGenerator {
            "  baidu::rpc::ClosureGuard done_guard(done);\n"
            "  baidu::rpc::Controller* cntl = \n"
            "        static_cast<baidu::rpc::Controller*>(cntl_base);\n"
+            "  cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
            "  uint64_t log_id = equest->log_id();\n"
            "  cntl->set_log_id(log_id);\n"
            "  ::baidu::paddle_serving::predictor::InferService* svr = \n"
@@ -1023,6 +1025,7 @@ class PdsCodeGenerator : public CodeGenerator {
            "  brpc::ClosureGuard done_guard(done);\n"
            "  brpc::Controller* cntl = \n"
            "        static_cast<brpc::Controller*>(cntl_base);\n"
+            "  cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
            "  uint64_t log_id = request->log_id();\n"
            "  cntl->set_log_id(log_id);\n"
            "  ::baidu::paddle_serving::predictor::InferService* svr = \n"
@@ -1067,6 +1070,7 @@ class PdsCodeGenerator : public CodeGenerator {
            "  brpc::ClosureGuard done_guard(done);\n"
            "  brpc::Controller* cntl = \n"
            "        static_cast<brpc::Controller*>(cntl_base);\n"
+            "  cntl->set_response_compress_type(brpc::COMPRESS_TYPE_GZIP);\n"
            "  uint64_t log_id = request->log_id();\n"
            "  cntl->set_log_id(log_id);\n"
            "  ::baidu::paddle_serving::predictor::InferService* svr = \n"

--- a/core/predictor/framework/bsf-inl.h
+++ b/core/predictor/framework/bsf-inl.h
--- a/core/predictor/framework/bsf.h
+++ b/core/predictor/framework/bsf.h
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
--- a/core/predictor/framework/infer_data.h
+++ b/core/predictor/framework/infer_data.h
--- a/core/predictor/framework/memory.h
+++ b/core/predictor/framework/memory.h
--- a/core/predictor/framework/resource.cpp
+++ b/core/predictor/framework/resource.cpp
--- a/core/predictor/framework/resource.h
+++ b/core/predictor/framework/resource.h
--- a/core/predictor/framework/server.cpp
+++ b/core/predictor/framework/server.cpp
--- a/core/predictor/framework/service.cpp
+++ b/core/predictor/framework/service.cpp
--- a/core/predictor/mempool/mempool.cpp
+++ b/core/predictor/mempool/mempool.cpp
--- a/core/predictor/op/op.cpp
+++ b/core/predictor/op/op.cpp
--- a/core/predictor/proto/framework.proto
+++ b/core/predictor/proto/framework.proto
--- a/core/predictor/tools/CMakeLists.txt
+++ b/core/predictor/tools/CMakeLists.txt
@@ -2,3 +2,16 @@ set(seq_gen_src ${CMAKE_CURRENT_LIST_DIR}/seq_generator.cpp  ${CMAKE_CURRENT_LIS
 LIST(APPEND seq_gen_src ${PROTO_SRCS})
 add_executable(seq_generator ${seq_gen_src})
 target_link_libraries(seq_generator protobuf -lpthread)
+set(seq_reader_src ${CMAKE_CURRENT_LIST_DIR}/seq_reader.cpp ${CMAKE_CURRENT_LIST_DIR}/../../cube/cube-builder/src/seqfile_reader.cpp)
+add_executable(seq_reader ${seq_reader_src})
+add_dependencies(seq_reader brpc)
+install(TARGETS seq_reader
+        RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin
+        ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
+        LIBRARY DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/so
+        )
+install(TARGETS seq_reader RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/tool)
+install(TARGETS seq_generator RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/tool)
--- a/core/predictor/tools/ocrtools/clipper.cpp
+++ b/core/predictor/tools/ocrtools/clipper.cpp
--- a/core/predictor/tools/ocrtools/clipper.h
+++ b/core/predictor/tools/ocrtools/clipper.h
--- a/core/predictor/tools/ocrtools/postprocess_op.cpp
+++ b/core/predictor/tools/ocrtools/postprocess_op.cpp
--- a/core/predictor/tools/ocrtools/postprocess_op.h
+++ b/core/predictor/tools/ocrtools/postprocess_op.h
--- a/core/predictor/tools/ocrtools/preprocess_op.cpp
+++ b/core/predictor/tools/ocrtools/preprocess_op.cpp
--- a/core/predictor/tools/ocrtools/preprocess_op.h
+++ b/core/predictor/tools/ocrtools/preprocess_op.h
--- a/core/predictor/tools/ocrtools/utility.cpp
+++ b/core/predictor/tools/ocrtools/utility.cpp
--- a/core/predictor/tools/ocrtools/utility.h
+++ b/core/predictor/tools/ocrtools/utility.h
--- a/core/predictor/tools/seq_reader.cpp
+++ b/core/predictor/tools/seq_reader.cpp
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sys/time.h>
+#include <limits.h> 
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <thread>
+#include "core/cube/cube-builder/include/cube-builder/seqfile_reader.h"
+std::string string_to_hex(const std::string& input) {
+  static const char* const lut = "0123456789ABCDEF";
+  size_t len = input.length();
+  std::string output;
+  output.reserve(2 * len);
+  for (size_t i = 0; i < len; ++i) {
+    const unsigned char c = input[i];
+    output.push_back(lut[c >> 4]);
+    output.push_back(lut[c & 15]);
+  }
+  return output;
+}
+void printSeq(std::string file, int limit) {
+  SequenceFileRecordReader reader(file.c_str());
+  if (reader.open() != 0) {
+    std::cerr << "open file failed! " << file;
+    return;
+  }
+  if (reader.read_header() != 0) {
+    std::cerr << "read header error! " << file;
+    reader.close();
+    return;
+  }
+  Record record(reader.get_header());
+  int total_count = 0;
+  while (reader.next(&record) == 0) {
+    uint64_t key =
+        *reinterpret_cast<uint64_t *>(const_cast<char *>(record.key.data()));
+    total_count++;
+    int64_t value_length = record.record_len - record.key_len;
+    std::cout << "key: " << key << " , value: " << string_to_hex(record.value.c_str()) << std::endl; 
+    if (total_count >= limit) {
+        break;
+    }
+  }
+  if (reader.close() != 0) {
+    std::cerr << "close file failed! " << file;
+    return;
+  }
+}
+int main(int argc, char **argv) {
+    if (argc != 3 && argc != 2) {
+        std::cout << "Seq Reader Usage:" << std::endl;
+        std::cout << "get all keys: ./seq_reader $FILENAME " << std::endl;
+        std::cout << "get some keys: ./seq_reader $FILENAME $KEY_NUM" << std::endl;
+        return -1; 
+    }
+    if (argc == 3 || argc == 2) {
+        const char* filename_str = argv[1];
+        std::cout << "cstr filename is " << filename_str << std::endl;
+        std::string filename = filename_str;
+        std::cout << "filename is " << filename << std::endl;
+        if (argc == 3) {
+          const char* key_num_str = argv[2];
+          int key_limit = std::stoi(key_num_str);
+          printSeq(filename, key_limit);
+        } else {
+          printSeq(filename, INT_MAX);
+        }
+    }
+    return 0;
+}
--- a/core/sdk-cpp/proto/general_model_service.proto
+++ b/core/sdk-cpp/proto/general_model_service.proto
@@ -24,17 +24,16 @@ message Tensor {
  repeated int32 int_data = 2;
  repeated int64 int64_data = 3;
  repeated float float_data = 4;
-  optional int32 elem_type = 5;
+  optional int32 elem_type =
-  repeated int32 shape = 6;
+      5; // 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
-  repeated int32 lod = 7; // only for fetch tensor currently
+  repeated int32 shape = 6;       // shape should include batch
+  repeated int32 lod = 7;         // only for fetch tensor currently
+  optional string name = 8;       // get from the Model prototxt
+  optional string alias_name = 9; // get from the Model prototxt
 };
-message FeedInst { repeated Tensor tensor_array = 1; };
-message FetchInst { repeated Tensor tensor_array = 1; };
 message Request {
-  repeated FeedInst insts = 1;
+  repeated Tensor tensor = 1;
  repeated string fetch_var_names = 2;
  optional bool profile_server = 3 [ default = false ];
  required uint64 log_id = 4 [ default = 0 ];
@@ -46,7 +45,7 @@ message Response {
 };
 message ModelOutput {
-  repeated FetchInst insts = 1;
+  repeated Tensor tensor = 1;
  optional string engine_name = 2;
 }

--- a/doc/BENCHMARKING_GPU.md
+++ b/doc/BENCHMARKING_GPU.md
--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -87,6 +87,7 @@ go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
 go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
 go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
 go get -u google.golang.org/grpc@v1.33.0
+go env -w GO111MODULE=auto
 ```

--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -86,6 +86,7 @@ go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
 go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
 go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
 go get -u google.golang.org/grpc@v1.33.0
+go env -w GO111MODULE=auto
 ```

--- a/doc/CUBE_TEST_CN.md
+++ b/doc/CUBE_TEST_CN.md
+## 如果获得稀疏参数索引Cube所需的模型输入
+### 背景知识
+推荐系统需要大规模稀疏参数索引来帮助分布式部署，可在`python/example/criteo_ctr_with_cube`或是[PaddleRec](https://github.com/paddlepaddle/paddlerec)了解推荐模型。
+稀疏参数索引的模型格式是SequenceFile，源自Hadoop生态的键值对格式文件。
+为了方便调试，我们给出了从特定格式的可读文本文件到SequenceFile格式文件的转换工具，以及SequenceFile格式文件与可阅读文字的转换。
+用户在调试Cube服务功能时，可以自定义KV对生成SequenceFile格式文件来进行调试。
+用户在验证Cube的配送正确性时，可以转换SequenceFile格式文件至可读文字来进行比对验证。
+### 预备知识
+- 需要会编译Paddle Serving，参见[编译文档](./COMPILE.md)
+### 用法
+在编译结束后的安装文件，可以得到 seq_reader 和 kv_to_seqfile.py。
+#### 生成SequenceFile
+在`output/tool/`下，修改`output/tool/source/file.txt`，该文件每一行对应一个键值对，用冒号`:`区分key和value部分。
+例如：
+```
+1676869128226002114:48241       37064           91      -539    114     51      -122    269     229     -134    -282
+1657749292782759014:167 40              98      27      117     10      -29     15      74      67      -54
+```
+执行
+```
+python kv_to_seqfile.py
+```
+即可生成`data`文件夹，我们看下它的结构
+```
+.
+├── 20210805095422
+│   └── base
+│       └── feature
+└── donefile
+    └── base.txt
+```
+其中`20210805095422/base/feature` 就是SequenceFile格式文件，donefile保存在`donefile/base.txt`。
+#### 查看SequenceFile
+我们使用`seq_reader`工具来解读SequenceFile格式文件。
+```
+./seq_reader 20210805095422/base/feature 10 # 阅读开头的10个KV对
+./seq_reader 20210805095422/base/feature # 阅读所有KV对
+```
+结果
+```
+key: 1676869128226002114 , value: 343832343109333730363409093931092D35333909313134093531092D3132320932363909323239092D313334092D323832
+key: 1657749292782759014 , value: 3136370934300909393809323709313137093130092D3239093135093734093637092D3534
+```
+其中value 我们目前都以16进制的形式打印。
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -324,6 +324,15 @@ GLOG_v=2 python -m paddle_serving_server.serve --model xxx_conf/ --port 9999
 **A:** Logid默认为0（后续应该有自动生成Logid的计划，当前版本0.4.0），Client端通过在predict函数中指定log_id参数传递
+#### Q: C++Server出现问题如何调试和定位
+**A:** 推荐您使用gdb进行定位和调试，如果您使用docker,在启动容器时候，需要加上docker run --privileged参数，开启特权模式，这样才能在docker容器中使用gdb定位和调试
+如果您C++端出现coredump，一般而言会生成一个core文件，若没有，则应开启生成core文件选项，使用ulimit -c unlimited命令。
+使用gdb调试core文件的方法为：gdb <可执行文件> <core文件>，进入后输入bt指令，一般即可显示出错在哪一行。
+注意：可执行文件路径是C++ bin文件的路径，而不是python命令，一般为类似下面的这种/usr/local/lib/python3.6/site-packages/paddle_serving_server/serving-gpu-102-0.6.2/serving
 ## 性能优化
--- a/doc/HTTP_SERVICE_CN.md
+++ b/doc/HTTP_SERVICE_CN.md
+# HTTP方式访问Server
+Paddle Serving服务端目前提供了支持Http直接访问的功能，本文档显示了详细信息。
+## 基本原理
+BRPC-Server端支持通过Http的方式被访问，各种语言都有实现Http请求的一些库，所以Java/Python/Go等BRPC支持不太完善的语言，可以通过Http的方式直接访问服务端进行预测。
+### Http方式
+基本流程和原理：客户端需要将数据按照Proto约定的格式(请参阅[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto))封装在Http请求的请求体中。
+BRPC-Server会尝试去JSON字符串中再去反序列化出Proto格式的数据，从而进行后续的处理。
+### Http+protobuf方式
+各种语言都提供了对ProtoBuf的支持，如果您对此比较熟悉，您也可以先将数据使用ProtoBuf序列化，再将序列化后的数据放入Http请求数据体中，然后指定Content-Type: application/proto，从而使用http/h2+protobuf二进制串访问服务。
+实测随着数据量的增大，使用JSON方式的Http的数据量和反序列化的耗时会大幅度增加，推荐当您的数据量较大时，使用Http+protobuf方式，后续我们会在框架的HttpClient中增加该功能，目前暂没有支持。
+**理论上讲，序列化/反序列化的性能从高到底排序为：protobuf > http/h2+protobuf > http**
+## 示例
+我们将以python/examples/fit_a_line为例，讲解如何通过Http访问Server端。
+### 获取模型
+```shell
+sh get_data.sh
+```
+## 开启服务端
+```shell
+python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
+```
+服务端无须做任何改造，即可支持BRPC和HTTP两种方式。
+## 客户端访问
+### HttpClient方式发送Http请求(Python/Java)
+为了方便用户快速的使用Http方式请求Server端预测服务，我们已经将常用的Http请求的数据体封装、压缩、请求加密等功能封装为一个HttpClient类提供给用户，方便用户使用。
+使用HttpClient最简单只需要三步，1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件（本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt)，3、调用Predict函数，通过Http方式请求预测服务。
+此外，您可以根据自己的需要配置Server端IP、Port、服务名称（此服务名称需要与[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)文件中的Service服务名和rpc方法名对应，即`GeneralModelService`字段和`inference`字段），设置Request数据体压缩，设置Response支持压缩传输，模型加密预测（需要配置Server端使用模型加密）、设置响应超时时间等功能。
+Python的HttpClient使用示例见[`python/examples/fit_a_line/test_httpclient.py`](../python/examples/fit_a_line/test_httpclient.py)，接口详见[`python/paddle_serving_client/httpclient.py`](../python/paddle_serving_client/httpclient.py)。
+Java的HttpClient使用示例见[`java/examples/src/main/java/PaddleServingClientExample.java`](../java/examples/src/main/java/PaddleServingClientExample.java)接口详见[`java/src/main/java/io/paddle/serving/client/HttpClient.java`](../java/src/main/java/io/paddle/serving/client/HttpClient.java)。
+如果不能满足您的需求，您也可以在此基础上添加一些功能。
+如需支持https或者自定义Response的Status Code等,则需要对C++端brpc-Server进行一定的二次开发，请参考https://github.com/apache/incubator-brpc/blob/master/docs/cn/http_service.md，后续如果需求很大，我们也会将这部分功能加入到Server中，尽情期待。
+### curl方式发送Http请求(基本原理)
+```shell
+curl -XPOST http://0.0.0.0:9393/GeneralModelService/inference -d ' {"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"name":"x","alias_name":"x","shape":[1,13]}],"fetch_var_names":["price"],"log_id":0}'
+```
+其中`127.0.0.1:9393`为IP和Port，根据您服务端启动的IP和Port自行设定。
+`GeneralModelService`字段和`inference`字段分别为Proto文件中的Service服务名和rpc方法名，详见[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)
+-d后面的是请求的数据体，json中一定要包含上述proto中的required字段，否则转化会失败，对应请求会被拒绝。
+需要注意的是，数据中的shape字段为模型实际需要的shape信息，包含batch维度在内，可能与proto文件中的shape不一致。
+#### message
+对应rapidjson Object, 以花括号包围，其中的元素会被递归地解析。
+```protobuf
+// protobuf
+message Foo {
+    required string field1 = 1;
+    required int32 field2 = 2;  
+}
+message Bar { 
+    required Foo foo = 1; 
+    optional bool flag = 2;
+    required string name = 3;
+}
+// rapidjson
+{"foo":{"field1":"hello", "field2":3},"name":"Tom" }
+```
+#### repeated field
+对应rapidjson Array, 以方括号包围，其中的元素会被递归地解析，和message不同，每个元素的类型相同。
+```protobuf
+// protobuf
+repeated int32 numbers = 1;
+// rapidjson
+{"numbers" : [12, 17, 1, 24] }
+```
+#### elem_type
+表示数据类型，0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
+#### fetch_var_names
+表示返回结果中需要的数据名称，请参考模型文件serving_client_conf.prototxt中的`fetch_var`字段下的`alias_name`。
+### Http压缩
+支持gzip压缩，但gzip并不是一个压缩解压速度非常快的方法，当数据量较小时候，使用gzip压缩反而会得不偿失，推荐至少数据大于512字节时才考虑使用gzip压缩,实测结果是当数据量小于50K时，压缩的收益都不大。
+#### Client请求的数据体压缩
+以上面的fit_a_line为例，仍使用上文的请求数据体，但只作为示例演示用法，实际此时使用压缩得不偿失。
+```shell
+echo ' {"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"shape":[1,13]}],"fetch_var_names":["price"],"log_id":0}' | gzip -c > data.txt.gz
+```
+```shell
+curl --data-binary @data.txt.gz -H'Content-Encoding: gzip' -XPOST http://127.0.0.1:9393/GeneralModelService/inference
+```
+**注意：当请求数据体压缩时，需要指定请求头中Content-Encoding: gzip**
+#### Server端Response压缩
+当Http请求头中设置了Accept-encoding: gzip时，Server端会尝试用gzip压缩Response的数据，“尝试“指的是压缩有可能不发生，条件有：
+- 请求中没有设置Accept-encoding: gzip。
+- body尺寸小于-http_body_compress_threshold指定的字节数，默认是512。gzip并不是一个很快的压缩算法，当body较小时，压缩增加的延时可能比网络传输省下的还多。当包较小时不做压缩可能是个更好的选项。
+这时server总是会返回不压缩的结果。
+如果使用curl，通常推荐使用--compressed参数来设置Response压缩，--compressed参数会自动地在http请求中设置Accept-encoding: gzip，并在收到压缩后的Response后自动解压，对于用户而言，整个压缩/解压过程就像透明的一样。
+```shell
+curl --data-binary @data.txt.gz -H'Content-Encoding: gzip' --compressed -XPOST http://127.0.0.1:9393/GeneralModelService/inference
+```
+若您只是在Http请求头中通过-H'Accept-encoding: gzip'设置了接收压缩的信息，收到的将是压缩后的Response，此时，您需要手动解压。
+也就是说，--compressed = -H'Content-Encoding: gzip' + 自动解压，所以推荐您使用--compressed，以下仅作为单独设置请求头+手动解压的原理性示例。
+当您想要验证返回值是否真的压缩时，您可以只添加请求头-H'Content-Encoding: gzip'，而不解压，可以看到返回信息是压缩后的数据（一般而言是看不懂的压缩码）。
+```shell
+curl --data-binary @data.txt.gz -H'Content-Encoding: gzip' -H'Accept-encoding: gzip' -XPOST http://127.0.0.1:9393/GeneralModelService/inference | gunzip
+```
--- a/doc/architecture.png
+++ b/doc/architecture.png
--- a/doc/client-side-proxy.png
+++ b/doc/client-side-proxy.png
--- a/doc/deprecated/CTR_PREDICTION.md
+++ b/doc/deprecated/CTR_PREDICTION.md
--- a/doc/framework.png
+++ b/doc/framework.png
--- a/doc/gpu-local-qps-batchsize.png
+++ b/doc/gpu-local-qps-batchsize.png
--- a/doc/gpu-local-qps-concurrency.png
+++ b/doc/gpu-local-qps-concurrency.png
--- a/doc/gpu-local-time-batchsize.png
+++ b/doc/gpu-local-time-batchsize.png
--- a/doc/gpu-local-time-concurrency.png
+++ b/doc/gpu-local-time-concurrency.png
--- a/doc/gpu-serving-multi-card-multi-concurrency-qps-batchsize-concurrency-client1.png
+++ b/doc/gpu-serving-multi-card-multi-concurrency-qps-batchsize-concurrency-client1.png
--- a/doc/gpu-serving-multi-card-multi-concurrency-qps-batchsize-concurrency-client2.png
+++ b/doc/gpu-serving-multi-card-multi-concurrency-qps-batchsize-concurrency-client2.png
--- a/doc/gpu-serving-multi-card-multi-concurrency-time-batchsize-concurrency-client1.png
+++ b/doc/gpu-serving-multi-card-multi-concurrency-time-batchsize-concurrency-client1.png
--- a/doc/gpu-serving-multi-card-multi-concurrency-time-batchsize-concurrency-client2.png
+++ b/doc/gpu-serving-multi-card-multi-concurrency-time-batchsize-concurrency-client2.png
--- a/doc/gpu-serving-multi-card-single-concurrency-qps-batchsize-client1.png
+++ b/doc/gpu-serving-multi-card-single-concurrency-qps-batchsize-client1.png
--- a/doc/gpu-serving-multi-card-single-concurrency-qps-batchsize-client2.png
+++ b/doc/gpu-serving-multi-card-single-concurrency-qps-batchsize-client2.png
--- a/doc/gpu-serving-multi-card-single-concurrency-time-batchsize-client1.png
+++ b/doc/gpu-serving-multi-card-single-concurrency-time-batchsize-client1.png
--- a/doc/gpu-serving-multi-card-single-concurrency-time-batchsize-client2.png
+++ b/doc/gpu-serving-multi-card-single-concurrency-time-batchsize-client2.png
--- a/doc/gpu-serving-single-card-qps-batchsize.png
+++ b/doc/gpu-serving-single-card-qps-batchsize.png
--- a/doc/gpu-serving-single-card-qps-concurrency.png
+++ b/doc/gpu-serving-single-card-qps-concurrency.png
--- a/doc/gpu-serving-single-card-time-batchsize.png
+++ b/doc/gpu-serving-single-card-time-batchsize.png
--- a/doc/gpu-serving-single-card-time-concurrency.png
+++ b/doc/gpu-serving-single-card-time-concurrency.png
--- a/doc/multi-service.png
+++ b/doc/multi-service.png
--- a/doc/multi-variants.png
+++ b/doc/multi-variants.png
--- a/doc/predict-service.png
+++ b/doc/predict-service.png
--- a/doc/pruned-ctr-network.png
+++ b/doc/pruned-ctr-network.png
--- a/doc/qps-threads-bow.png
+++ b/doc/qps-threads-bow.png
--- a/doc/qps-threads-cnn.png
+++ b/doc/qps-threads-cnn.png
--- a/doc/qps-threads-lstm.png
+++ b/doc/qps-threads-lstm.png
--- a/doc/server-side.png
+++ b/doc/server-side.png
--- a/doc/serving-timings.png
+++ b/doc/serving-timings.png
--- a/go/client_app/acc.go
+++ b/go/client_app/acc.go
-package main
-import (
-       "io"
-       "os"
-       "fmt"
-       "bufio"
-       "strings"
-       "strconv"
-)
-func main() {
-     score_file := os.Args[1]
-     fi, err := os.Open(score_file)
-     if err != nil {
-     	fmt.Print(err)
-     }
-     defer fi.Close()
-     br := bufio.NewReader(fi)     
-     total := int(0)
-     acc := int(0)
-     for {
-     	 line, err := br.ReadString('\n')
-	 if err == io.EOF {
-	    break
-	 }
-	 line = strings.Trim(line, "\n")
-	 s := strings.Split(line, "\t")
-	 prob_str := strings.Trim(s[0], " ")
-	 label_str := strings.Trim(s[1], " ")
-	 prob, err := strconv.ParseFloat(prob_str, 32)
-	 if err != nil {
-	    panic(err)
-	 }
-	 label, err := strconv.ParseFloat(label_str, 32)
-	 if err != nil {
-	    panic(err)
-	 }
-	 if (prob - 0.5) * (label - 0.5) > 0 {
-	    acc++
-	 }
-	 total++
-    }
-    fmt.Println("total num: ", total)
-    fmt.Println("acc num: ", acc)
-    fmt.Println("acc: ", float32(acc) / float32(total))    
-}
\ No newline at end of file
--- a/go/client_app/imdb_client.go
+++ b/go/client_app/imdb_client.go
-//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-package main
-import (
-       "io"
-       "fmt"
-       "strings"
-       "bufio"
-       "strconv"
-       "os"
-       serving_client "github.com/PaddlePaddle/Serving/go/serving_client"
-)
-func main() {
-     var config_file_path string
-     config_file_path = os.Args[1]
-     handle := serving_client.LoadModelConfig(config_file_path)
-     handle = serving_client.Connect("127.0.0.1", "9292", handle)
-     test_file_path := os.Args[2]
-     fi, err := os.Open(test_file_path)
-     if err != nil {
-     	fmt.Print(err)
-     }
-     defer fi.Close()
-     br := bufio.NewReader(fi)
-     fetch := []string{"cost", "acc", "prediction"}     
-     var result map[string][]float32
-     for {
-     	 line, err := br.ReadString('\n')
-	 if err == io.EOF {
-	    break
-	 }
-	 line = strings.Trim(line, "\n")
-	 var words = []int64{}
-	 s := strings.Split(line, " ")
-	 value, err := strconv.Atoi(s[0])
-	 var feed_int_map map[string][]int64
-	 for _, v := range s[1:value + 1] {
-	     int_v, _ := strconv.Atoi(v)
-	     words = append(words, int64(int_v))
-	 }
-	 label, err := strconv.Atoi(s[len(s)-1])
-	 if err != nil {
-	    panic(err)
-	 }
-	 feed_int_map = map[string][]int64{}
-	 feed_int_map["words"] = words
-	 feed_int_map["label"] = []int64{int64(label)}
-	 result = serving_client.Predict(handle,
-	 	 feed_int_map, fetch)
-	 fmt.Println(result["prediction"][1], "\t", int64(label))
-     }
-}
\ No newline at end of file
--- a/go/proto/general_model_config.pb.go
+++ b/go/proto/general_model_config.pb.go
-// Code generated by protoc-gen-go. DO NOT EDIT.
-// source: general_model_config.proto
-package baidu_paddle_serving_configure
-import (
-	fmt "fmt"
-	proto "github.com/golang/protobuf/proto"
-	math "math"
-)
-// Reference imports to suppress errors if they are not otherwise used.
-var _ = proto.Marshal
-var _ = fmt.Errorf
-var _ = math.Inf
-// This is a compile-time assertion to ensure that this generated file
-// is compatible with the proto package it is being compiled against.
-// A compilation error at this line likely means your copy of the
-// proto package needs to be updated.
-const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
-type FeedVar struct {
-	Name                 *string  `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"`
-	AliasName            *string  `protobuf:"bytes,2,opt,name=alias_name,json=aliasName" json:"alias_name,omitempty"`
-	IsLodTensor          *bool    `protobuf:"varint,3,opt,name=is_lod_tensor,json=isLodTensor,def=0" json:"is_lod_tensor,omitempty"`
-	FeedType             *int32   `protobuf:"varint,4,opt,name=feed_type,json=feedType,def=0" json:"feed_type,omitempty"`
-	Shape                []int32  `protobuf:"varint,5,rep,name=shape" json:"shape,omitempty"`
-	XXX_NoUnkeyedLiteral struct{} `json:"-"`
-	XXX_unrecognized     []byte   `json:"-"`
-	XXX_sizecache        int32    `json:"-"`
-}
-func (m *FeedVar) Reset()         { *m = FeedVar{} }
-func (m *FeedVar) String() string { return proto.CompactTextString(m) }
-func (*FeedVar) ProtoMessage()    {}
-func (*FeedVar) Descriptor() ([]byte, []int) {
-	return fileDescriptor_efa52beffa29d37a, []int{0}
-}
-func (m *FeedVar) XXX_Unmarshal(b []byte) error {
-	return xxx_messageInfo_FeedVar.Unmarshal(m, b)
-}
-func (m *FeedVar) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
-	return xxx_messageInfo_FeedVar.Marshal(b, m, deterministic)
-}
-func (m *FeedVar) XXX_Merge(src proto.Message) {
-	xxx_messageInfo_FeedVar.Merge(m, src)
-}
-func (m *FeedVar) XXX_Size() int {
-	return xxx_messageInfo_FeedVar.Size(m)
-}
-func (m *FeedVar) XXX_DiscardUnknown() {
-	xxx_messageInfo_FeedVar.DiscardUnknown(m)
-}
-var xxx_messageInfo_FeedVar proto.InternalMessageInfo
-const Default_FeedVar_IsLodTensor bool = false
-const Default_FeedVar_FeedType int32 = 0
-func (m *FeedVar) GetName() string {
-	if m != nil && m.Name != nil {
-		return *m.Name
-	}
-	return ""
-}
-func (m *FeedVar) GetAliasName() string {
-	if m != nil && m.AliasName != nil {
-		return *m.AliasName
-	}
-	return ""
-}
-func (m *FeedVar) GetIsLodTensor() bool {
-	if m != nil && m.IsLodTensor != nil {
-		return *m.IsLodTensor
-	}
-	return Default_FeedVar_IsLodTensor
-}
-func (m *FeedVar) GetFeedType() int32 {
-	if m != nil && m.FeedType != nil {
-		return *m.FeedType
-	}
-	return Default_FeedVar_FeedType
-}
-func (m *FeedVar) GetShape() []int32 {
-	if m != nil {
-		return m.Shape
-	}
-	return nil
-}
-type FetchVar struct {
-	Name                 *string  `protobuf:"bytes,1,opt,name=name" json:"name,omitempty"`
-	AliasName            *string  `protobuf:"bytes,2,opt,name=alias_name,json=aliasName" json:"alias_name,omitempty"`
-	IsLodTensor          *bool    `protobuf:"varint,3,opt,name=is_lod_tensor,json=isLodTensor,def=0" json:"is_lod_tensor,omitempty"`
-	Shape                []int32  `protobuf:"varint,4,rep,name=shape" json:"shape,omitempty"`
-	XXX_NoUnkeyedLiteral struct{} `json:"-"`
-	XXX_unrecognized     []byte   `json:"-"`
-	XXX_sizecache        int32    `json:"-"`
-}
-func (m *FetchVar) Reset()         { *m = FetchVar{} }
-func (m *FetchVar) String() string { return proto.CompactTextString(m) }
-func (*FetchVar) ProtoMessage()    {}
-func (*FetchVar) Descriptor() ([]byte, []int) {
-	return fileDescriptor_efa52beffa29d37a, []int{1}
-}
-func (m *FetchVar) XXX_Unmarshal(b []byte) error {
-	return xxx_messageInfo_FetchVar.Unmarshal(m, b)
-}
-func (m *FetchVar) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
-	return xxx_messageInfo_FetchVar.Marshal(b, m, deterministic)
-}
-func (m *FetchVar) XXX_Merge(src proto.Message) {
-	xxx_messageInfo_FetchVar.Merge(m, src)
-}
-func (m *FetchVar) XXX_Size() int {
-	return xxx_messageInfo_FetchVar.Size(m)
-}
-func (m *FetchVar) XXX_DiscardUnknown() {
-	xxx_messageInfo_FetchVar.DiscardUnknown(m)
-}
-var xxx_messageInfo_FetchVar proto.InternalMessageInfo
-const Default_FetchVar_IsLodTensor bool = false
-func (m *FetchVar) GetName() string {
-	if m != nil && m.Name != nil {
-		return *m.Name
-	}
-	return ""
-}
-func (m *FetchVar) GetAliasName() string {
-	if m != nil && m.AliasName != nil {
-		return *m.AliasName
-	}
-	return ""
-}
-func (m *FetchVar) GetIsLodTensor() bool {
-	if m != nil && m.IsLodTensor != nil {
-		return *m.IsLodTensor
-	}
-	return Default_FetchVar_IsLodTensor
-}
-func (m *FetchVar) GetShape() []int32 {
-	if m != nil {
-		return m.Shape
-	}
-	return nil
-}
-type GeneralModelConfig struct {
-	FeedVar              []*FeedVar  `protobuf:"bytes,1,rep,name=feed_var,json=feedVar" json:"feed_var,omitempty"`
-	FetchVar             []*FetchVar `protobuf:"bytes,2,rep,name=fetch_var,json=fetchVar" json:"fetch_var,omitempty"`
-	XXX_NoUnkeyedLiteral struct{}    `json:"-"`
-	XXX_unrecognized     []byte      `json:"-"`
-	XXX_sizecache        int32       `json:"-"`
-}
-func (m *GeneralModelConfig) Reset()         { *m = GeneralModelConfig{} }
-func (m *GeneralModelConfig) String() string { return proto.CompactTextString(m) }
-func (*GeneralModelConfig) ProtoMessage()    {}
-func (*GeneralModelConfig) Descriptor() ([]byte, []int) {
-	return fileDescriptor_efa52beffa29d37a, []int{2}
-}
-func (m *GeneralModelConfig) XXX_Unmarshal(b []byte) error {
-	return xxx_messageInfo_GeneralModelConfig.Unmarshal(m, b)
-}
-func (m *GeneralModelConfig) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
-	return xxx_messageInfo_GeneralModelConfig.Marshal(b, m, deterministic)
-}
-func (m *GeneralModelConfig) XXX_Merge(src proto.Message) {
-	xxx_messageInfo_GeneralModelConfig.Merge(m, src)
-}
-func (m *GeneralModelConfig) XXX_Size() int {
-	return xxx_messageInfo_GeneralModelConfig.Size(m)
-}
-func (m *GeneralModelConfig) XXX_DiscardUnknown() {
-	xxx_messageInfo_GeneralModelConfig.DiscardUnknown(m)
-}
-var xxx_messageInfo_GeneralModelConfig proto.InternalMessageInfo
-func (m *GeneralModelConfig) GetFeedVar() []*FeedVar {
-	if m != nil {
-		return m.FeedVar
-	}
-	return nil
-}
-func (m *GeneralModelConfig) GetFetchVar() []*FetchVar {
-	if m != nil {
-		return m.FetchVar
-	}
-	return nil
-}
-func init() {
-	proto.RegisterType((*FeedVar)(nil), "baidu.paddle_serving.configure.FeedVar")
-	proto.RegisterType((*FetchVar)(nil), "baidu.paddle_serving.configure.FetchVar")
-	proto.RegisterType((*GeneralModelConfig)(nil), "baidu.paddle_serving.configure.GeneralModelConfig")
-}
-func init() { proto.RegisterFile("general_model_config.proto", fileDescriptor_efa52beffa29d37a) }
-var fileDescriptor_efa52beffa29d37a = []byte{
-	// 283 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xb4, 0xd0, 0x31, 0x4b, 0xc4, 0x30,
-	0x14, 0x07, 0x70, 0x72, 0x6d, 0xb9, 0xf6, 0x1d, 0x2e, 0xc1, 0xa1, 0x08, 0x1e, 0xe5, 0x16, 0xe3,
-	0x52, 0xc4, 0xf1, 0x46, 0xc5, 0x73, 0x51, 0x87, 0x72, 0xb8, 0x86, 0xd8, 0xbc, 0xb6, 0x81, 0x5c,
-	0x53, 0x92, 0xde, 0xc1, 0x2d, 0x7e, 0x13, 0xf1, 0xab, 0x4a, 0x93, 0x43, 0x9c, 0x74, 0x72, 0x7b,
-	0x79, 0xff, 0xf0, 0xde, 0xe3, 0x07, 0x17, 0x2d, 0xf6, 0x68, 0x85, 0xe6, 0x3b, 0x23, 0x51, 0xf3,
-	0xda, 0xf4, 0x8d, 0x6a, 0xcb, 0xc1, 0x9a, 0xd1, 0xd0, 0xe5, 0x9b, 0x50, 0x72, 0x5f, 0x0e, 0x42,
-	0x4a, 0x8d, 0xdc, 0xa1, 0x3d, 0xa8, 0xbe, 0x2d, 0xc3, 0x97, 0xbd, 0xc5, 0xd5, 0x07, 0x81, 0xf9,
-	0x06, 0x51, 0xbe, 0x0a, 0x4b, 0x29, 0xc4, 0xbd, 0xd8, 0x61, 0x4e, 0x0a, 0xc2, 0xb2, 0xca, 0xd7,
-	0xf4, 0x12, 0x40, 0x68, 0x25, 0x1c, 0xf7, 0xc9, 0xcc, 0x27, 0x99, 0xef, 0xbc, 0x4c, 0xf1, 0x35,
-	0x9c, 0x29, 0xc7, 0xb5, 0x91, 0x7c, 0xc4, 0xde, 0x19, 0x9b, 0x47, 0x05, 0x61, 0xe9, 0x3a, 0x69,
-	0x84, 0x76, 0x58, 0x2d, 0x94, 0x7b, 0x32, 0x72, 0xeb, 0x13, 0xba, 0x84, 0xac, 0x41, 0x94, 0x7c,
-	0x3c, 0x0e, 0x98, 0xc7, 0x05, 0x61, 0xc9, 0x9a, 0xdc, 0x54, 0xe9, 0xd4, 0xdb, 0x1e, 0x07, 0xa4,
-	0xe7, 0x90, 0xb8, 0x4e, 0x0c, 0x98, 0x27, 0x45, 0xc4, 0x92, 0x2a, 0x3c, 0x56, 0xef, 0x90, 0x6e,
-	0x70, 0xac, 0xbb, 0xff, 0xbf, 0xef, 0x7b, 0x7f, 0xfc, 0x73, 0xff, 0x27, 0x01, 0xfa, 0x18, 0x78,
-	0x9f, 0x27, 0xdd, 0x7b, 0x2f, 0x47, 0xef, 0xc0, 0x1f, 0xce, 0x0f, 0xc2, 0xe6, 0xa4, 0x88, 0xd8,
-	0xe2, 0xf6, 0xaa, 0xfc, 0x5d, 0xba, 0x3c, 0x29, 0x57, 0xf3, 0xe6, 0xc4, 0xfd, 0x30, 0x81, 0x8c,
-	0x75, 0xe7, 0x87, 0xcc, 0xfc, 0x10, 0xf6, 0xf7, 0x90, 0x60, 0x31, 0xb9, 0x85, 0xea, 0x2b, 0x00,
-	0x00, 0xff, 0xff, 0x08, 0x27, 0x9c, 0x1a, 0xfe, 0x01, 0x00, 0x00,
-}
--- a/go/serving_client/serving_client_api.go
+++ b/go/serving_client/serving_client_api.go
-//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-package serving_client
-import (
-       "bytes"
-       "encoding/json"
-       "io/ioutil"
-       "log"
-       "net/http"
-       pb "github.com/PaddlePaddle/Serving/go/proto"
-       "github.com/golang/protobuf/proto"
-)
-type Tensor struct {
-     Data   []byte `json:"data"`
-     FloatData	   []float32 `json:"float_data"`
-     IntData	   []int `json:"int_data"`
-     Int64Data	   []int64 `json:"int64_data"`
-     ElemType	int `json:"elem_type"`
-     Shape	[]int `json:"shape"`
-}
-type FeedInst struct {
-     TensorArray     []Tensor `json:"tensor_array"`
-}
-type FetchInst struct {
-     TensorArray      []Tensor `json:"tensor_array"`
-}
-type Request struct {
-     Insts   []FeedInst `json:"insts"`
-     FetchVarNames	[]string `json:"fetch_var_names"`
-     ProfileServer	bool `json:"profile_server"`
-}
-type Response struct {
-     Insts    []FetchInst `json:"insts"`
-     ProfileTime	  []int64 `json:"profile_time"`     
-}
-type Handle struct {
-     Url    string
-     Port   string
-     FeedAliasNameMap	map[string]string
-     FeedShapeMap	map[string][]int
-     FeedNameMap   map[string]int
-     FeedAliasNames	   []string
-     FetchNameMap  map[string]int
-     FetchAliasNameMap	map[string]string
-}
-func LoadModelConfig(config string) Handle {
-     in, err := ioutil.ReadFile(config)
-     if err != nil {
-     	log.Fatalln("Failed to read general model: ", err)
-     }
-     general_model_config := &pb.GeneralModelConfig{}
-     if err := proto.Unmarshal(in, general_model_config); err != nil {
-     	log.Fatalln("Failed to parse GeneralModelConfig: ", err)
-     }
-     log.Println("read protobuf succeed")
-     handle := Handle{}
-     handle.FeedNameMap = map[string]int{}
-     handle.FeedAliasNameMap = map[string]string{}
-     handle.FeedShapeMap = map[string][]int{}
-     handle.FetchNameMap = map[string]int{}
-     handle.FetchAliasNameMap = map[string]string{}
-     handle.FeedAliasNames = []string{}
-     for i, v := range general_model_config.FeedVar {
-     	 handle.FeedNameMap[*v.Name] = i
-	 tmp_array := []int{}
-	 for _, vv := range v.Shape {
-	     tmp_array = append(tmp_array, int(vv))
-	 }
-	 handle.FeedShapeMap[*v.Name] = tmp_array
-	 handle.FeedAliasNameMap[*v.AliasName] = *v.Name
-	 handle.FeedAliasNames = append(handle.FeedAliasNames, *v.AliasName)
-     }
-     for i, v := range general_model_config.FetchVar {
-     	 handle.FetchNameMap[*v.Name] = i
-	 handle.FetchAliasNameMap[*v.AliasName] = *v.Name
-     }
-     return handle
-}
-func Connect(url string, port string, handle Handle) Handle {
-     handle.Url = url
-     handle.Port = port
-     return handle
-}
-func Predict(handle Handle, int_feed_map map[string][]int64, fetch []string) map[string][]float32 {
-     contentType := "application/json;charset=utf-8"
-     var tensor_array []Tensor
-     var inst FeedInst
-     tensor_array = []Tensor{}
-     inst = FeedInst{}
-     for i := 0; i < len(handle.FeedAliasNames); i++ {
-     	 key_i := handle.FeedAliasNames[i]
-	 var tmp Tensor
-	 tmp.IntData = []int{}
-	 tmp.Shape = []int{}
-	 tmp.Int64Data = int_feed_map[key_i]
-	 tmp.ElemType = 0
-	 tmp.Shape = handle.FeedShapeMap[key_i]
-	 tensor_array = append(tensor_array, tmp)
-     }
-     inst.TensorArray = tensor_array
-     var profile_server bool
-     profile_server = false
-     req := &Request{
-     	 Insts: []FeedInst{inst},
-	 FetchVarNames: fetch,
-	 ProfileServer: profile_server}
-     b, err := json.Marshal(req)
-     body := bytes.NewBuffer(b)
-     var post_address bytes.Buffer
-     post_address.WriteString("http://")
-     post_address.WriteString(handle.Url)
-     post_address.WriteString(":")
-     post_address.WriteString(handle.Port)
-     post_address.WriteString("/GeneralModelService/inference")
-     resp, err := http.Post(post_address.String(), contentType, body)
-     if err != nil {
-     	log.Println("Post failed:", err)
-     }
-     defer resp.Body.Close()
-     content, err := ioutil.ReadAll(resp.Body)
-     if err != nil {
-      	log.Println("Read failed:", err)
-     }
-     response_json := Response{}
-     err = json.Unmarshal([]byte(content), &response_json)
-     var result map[string][]float32
-     result = map[string][]float32{}
-     for i, v := range fetch {
-     	 result[v] = response_json.Insts[0].TensorArray[i].FloatData
-     }
-     return result
-}
--- a/java/examples/src/main/java/PaddleServingClientExample.java
+++ b/java/examples/src/main/java/PaddleServingClientExample.java
@@ -11,39 +11,83 @@ import org.nd4j.linalg.factory.Nd4j;
 import java.util.*;
 public class PaddleServingClientExample {
-    boolean fit_a_line() {
+    boolean fit_a_line(String model_config_path) {
        float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
            0.0582f, -0.0727f, -0.1583f, -0.0584f,
            0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
        INDArray npdata = Nd4j.createFromArray(data);
        long[] batch_shape = {1,13};
        INDArray batch_npdata = npdata.reshape(batch_shape);
-        HashMap<String, INDArray> feed_data
+        HashMap<String, Object> feed_data
-            = new HashMap<String, INDArray>() {{
+            = new HashMap<String, Object>() {{
                put("x", batch_npdata);
            }};
        List<String> fetch = Arrays.asList("price");
-        Client client = new Client();
+        HttpClient client = new HttpClient();
-        String target = "localhost:9393";
+        client.setIP("0.0.0.0");
-        boolean succ = client.connect(target);
+        client.setPort("9393");
-        if (succ != true) {
+        client.loadClientConfig(model_config_path);
-            System.out.println("connect failed.");
+        String result = client.predict(feed_data, fetch, true, 0);
-            return false;
-        }
+        System.out.println(result);
+        return true;
+    }
-        Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
+    boolean encrypt(String model_config_path,String keyFilePath) {
-        if (fetch_map == null) {
+        float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
-            return false;
+            0.0582f, -0.0727f, -0.1583f, -0.0584f,
+            0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
+        INDArray npdata = Nd4j.createFromArray(data);
+        long[] batch_shape = {1,13};
+        INDArray batch_npdata = npdata.reshape(batch_shape);
+        HashMap<String, Object> feed_data
+            = new HashMap<String, Object>() {{
+                put("x", batch_npdata);
+            }};
+        List<String> fetch = Arrays.asList("price");
+        HttpClient client = new HttpClient();
+        client.setIP("0.0.0.0");
+        client.setPort("9393");
+        client.loadClientConfig(model_config_path);
+        client.use_key(keyFilePath);
+        try {
+            Thread.sleep(1000*3);   // 休眠3秒，等待Server启动
+        } catch (Exception e) {
+            //TODO: handle exception
        }
+        String result = client.predict(feed_data, fetch, true, 0);
+        System.out.println(result);
+        return true;
+    }
-        for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
+    boolean compress(String model_config_path) {
-            System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
+        float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
-        }
+            0.0582f, -0.0727f, -0.1583f, -0.0584f,
+            0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
+        INDArray npdata = Nd4j.createFromArray(data);
+        long[] batch_shape = {500,13};
+        INDArray batch_npdata = npdata.broadcast(batch_shape);
+        HashMap<String, Object> feed_data
+            = new HashMap<String, Object>() {{
+                put("x", batch_npdata);
+            }};
+        List<String> fetch = Arrays.asList("price");
+        HttpClient client = new HttpClient();
+        client.setIP("0.0.0.0");
+        client.setPort("9393");
+        client.loadClientConfig(model_config_path);
+        client.set_request_compress(true);
+        client.set_response_compress(true);
+        String result = client.predict(feed_data, fetch, true, 0);
+        System.out.println(result);
        return true;
    }
-    boolean yolov4(String filename) {
+    boolean yolov4(String model_config_path,String filename) {
        // https://deeplearning4j.konduit.ai/
        int height = 608;
        int width = 608;
@@ -77,171 +121,44 @@ public class PaddleServingClientExample {
        INDArray im_size = Nd4j.createFromArray(new int[]{height, width});
        long[] batch_size_shape = {1,2};
        INDArray batch_im_size = im_size.reshape(batch_size_shape);
-        HashMap<String, INDArray> feed_data
+        HashMap<String, Object> feed_data
-            = new HashMap<String, INDArray>() {{
+            = new HashMap<String, Object>() {{
                put("image", batch_image);
                put("im_size", batch_im_size);
            }};
        List<String> fetch = Arrays.asList("save_infer_model/scale_0.tmp_0");
+        HttpClient client = new HttpClient();
-        Client client = new Client();
+        client.setIP("0.0.0.0");
-        String target = "localhost:9393";
+        client.setPort("9393");
-        boolean succ = client.connect(target);
+        client.loadClientConfig(model_config_path);
-        if (succ != true) {
+        String result = client.predict(feed_data, fetch, true, 0);
-            System.out.println("connect failed.");
+        System.out.println(result);
-            return false;
-        }
-        succ = client.setRpcTimeoutMs(20000); // cpu
-        if (succ != true) {
-            System.out.println("set timeout failed.");
-            return false;
-        }
-        Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
-        if (fetch_map == null) {
-            return false;
-        }
-        for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
-            System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-        }
-        return true;
-    }
-    boolean batch_predict() {
-        float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
-            0.0582f, -0.0727f, -0.1583f, -0.0584f,
-            0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
-        INDArray npdata = Nd4j.createFromArray(data);
-        HashMap<String, INDArray> feed_data
-            = new HashMap<String, INDArray>() {{
-                put("x", npdata);
-            }};
-        List<HashMap<String, INDArray>> feed_batch
-            = new ArrayList<HashMap<String, INDArray>>() {{
-                add(feed_data);
-                add(feed_data);
-            }};
-        List<String> fetch = Arrays.asList("price");
-        Client client = new Client();
-        String target = "localhost:9393";
-        boolean succ = client.connect(target);
-        if (succ != true) {
-            System.out.println("connect failed.");
-            return false;
-        }
-        Map<String, INDArray> fetch_map = client.predict(feed_batch, fetch);
-        if (fetch_map == null) {
-            return false;
-        }
-        for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
-            System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-        }
        return true;
    }
-    boolean asyn_predict() {
+    boolean bert(String model_config_path) {
-        float[] data = {0.0137f, -0.1136f, 0.2553f, -0.0692f,
-            0.0582f, -0.0727f, -0.1583f, -0.0584f,
-            0.6283f, 0.4919f, 0.1856f, 0.0795f, -0.0332f};
-        INDArray npdata = Nd4j.createFromArray(data);
-        HashMap<String, INDArray> feed_data
-            = new HashMap<String, INDArray>() {{
-                put("x", npdata);
-            }};
-        List<String> fetch = Arrays.asList("price");
-        Client client = new Client();
-        String target = "localhost:9393";
-        boolean succ = client.connect(target);
-        if (succ != true) {
-            System.out.println("connect failed.");
-            return false;
-        }
-        PredictFuture future = client.asyn_predict(feed_data, fetch);
-        Map<String, INDArray> fetch_map = future.get();
-        if (fetch_map == null) {
-            System.out.println("Get future reslut failed");
-            return false;
-        }
-        for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
-            System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-        }
-        return true;
-    }
-    boolean model_ensemble() {
-        long[] data = {8, 233, 52, 601};
-        INDArray npdata = Nd4j.createFromArray(data);
-        HashMap<String, INDArray> feed_data
-            = new HashMap<String, INDArray>() {{
-                put("words", npdata);
-            }};
-        List<String> fetch = Arrays.asList("prediction");
-        Client client = new Client();
-        String target = "localhost:9393";
-        boolean succ = client.connect(target);
-        if (succ != true) {
-            System.out.println("connect failed.");
-            return false;
-        }
-        Map<String, HashMap<String, INDArray>> fetch_map
-            = client.ensemble_predict(feed_data, fetch);
-        if (fetch_map == null) {
-            return false;
-        }
-        for (Map.Entry<String, HashMap<String, INDArray>> entry : fetch_map.entrySet()) {
-            System.out.println("Model = " + entry.getKey());
-            HashMap<String, INDArray> tt = entry.getValue();
-            for (Map.Entry<String, INDArray> e : tt.entrySet()) {
-                System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-            }
-        }
-        return true;
-    }
-    boolean bert() {
        float[] input_mask = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
        long[] position_ids = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
        long[] input_ids = {101, 6843, 3241, 749, 8024, 7662, 2533, 1391, 2533, 2523, 7676, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
        long[] segment_ids = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-        HashMap<String, INDArray> feed_data
+        HashMap<String, Object> feed_data
-            = new HashMap<String, INDArray>() {{
+            = new HashMap<String, Object>() {{
                put("input_mask", Nd4j.createFromArray(input_mask));
                put("position_ids", Nd4j.createFromArray(position_ids));
                put("input_ids", Nd4j.createFromArray(input_ids));
                put("segment_ids", Nd4j.createFromArray(segment_ids));
            }};
        List<String> fetch = Arrays.asList("pooled_output");
+        HttpClient client = new HttpClient();
-        Client client = new Client();
+        client.setIP("0.0.0.0");
-        String target = "localhost:9393";
+        client.setPort("9393");
-        boolean succ = client.connect(target);
+        client.loadClientConfig(model_config_path);
-        if (succ != true) {
+        String result = client.predict(feed_data, fetch, true, 0);
-            System.out.println("connect failed.");
+        System.out.println(result);
-            return false;
-        }
-        Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
-        if (fetch_map == null) {
-            return false;
-        }
-        for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
-            System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-        }
        return true;
    }
-    boolean cube_local() {
+    boolean cube_local(String model_config_path) {
        long[] embedding_14 = {250644};
        long[] embedding_2 = {890346};
        long[] embedding_10 = {3939};
@@ -271,8 +188,8 @@ public class PaddleServingClientExample {
        long[] embedding_19 = {537425};
        long[] embedding_0 = {737395};
-        HashMap<String, INDArray> feed_data
+        HashMap<String, Object> feed_data
-            = new HashMap<String, INDArray>() {{
+            = new HashMap<String, Object>() {{
                put("embedding_14.tmp_0", Nd4j.createFromArray(embedding_14));
                put("embedding_2.tmp_0", Nd4j.createFromArray(embedding_2));
                put("embedding_10.tmp_0", Nd4j.createFromArray(embedding_10));
@@ -302,23 +219,12 @@ public class PaddleServingClientExample {
                put("embedding_0.tmp_0", Nd4j.createFromArray(embedding_0));
            }};
        List<String> fetch = Arrays.asList("prob");
+        HttpClient client = new HttpClient();
-        Client client = new Client();
+        client.setIP("0.0.0.0");
-        String target = "localhost:9393";
+        client.setPort("9393");
-        boolean succ = client.connect(target);
+        client.loadClientConfig(model_config_path);
-        if (succ != true) {
+        String result = client.predict(feed_data, fetch, true, 0);
-            System.out.println("connect failed.");
+        System.out.println(result);
-            return false;
-        }
-        Map<String, INDArray> fetch_map = client.predict(feed_data, fetch);
-        if (fetch_map == null) {
-            return false;
-        }
-        for (Map.Entry<String, INDArray> e : fetch_map.entrySet()) {
-            System.out.println("Key = " + e.getKey() + ", Value = " + e.getValue());
-        }
        return true;
    }
@@ -328,33 +234,33 @@ public class PaddleServingClientExample {
        PaddleServingClientExample e = new PaddleServingClientExample();
        boolean succ = false;
-        if (args.length < 1) {
+        if (args.length < 2) {
-            System.out.println("Usage: java -cp <jar> PaddleServingClientExample <test-type>.");
+            System.out.println("Usage: java -cp <jar> PaddleServingClientExample <test-type> <configPath>.");
-            System.out.println("<test-type>: fit_a_line bert model_ensemble asyn_predict batch_predict cube_local cube_quant yolov4");
+            System.out.println("<test-type>: fit_a_line bert cube_local yolov4 encrypt");
            return;
        }
        String testType = args[0];
        System.out.format("[Example] %s\n", testType);
        if ("fit_a_line".equals(testType)) {
-            succ = e.fit_a_line();
+            succ = e.fit_a_line(args[1]);
+        } else if ("compress".equals(testType)) {
+            succ = e.compress(args[1]);
        } else if ("bert".equals(testType)) {
-            succ = e.bert();
+            succ = e.bert(args[1]);
-        } else if ("model_ensemble".equals(testType)) {
-            succ = e.model_ensemble();
-        } else if ("asyn_predict".equals(testType)) {
-            succ = e.asyn_predict();
-        } else if ("batch_predict".equals(testType)) {
-            succ = e.batch_predict();
        } else if ("cube_local".equals(testType)) {
-            succ = e.cube_local();
+            succ = e.cube_local(args[1]);
-        } else if ("cube_quant".equals(testType)) {
-            succ = e.cube_local();
        } else if ("yolov4".equals(testType)) {
-            if (args.length < 2) {
+            if (args.length < 3) {
-                System.out.println("Usage: java -cp <jar> PaddleServingClientExample yolov4 <image-filepath>.");
+                System.out.println("Usage: java -cp <jar> PaddleServingClientExample yolov4 <configPath> <image-filepath>.");
+                return;
+            }
+            succ = e.yolov4(args[1],args[2]);
+        } else if ("encrypt".equals(testType)) {
+            if (args.length < 3) {
+                System.out.println("Usage: java -cp <jar> PaddleServingClientExample encrypt <configPath> <keyPath>.");
                return;
            }
-            succ = e.yolov4(args[1]);
+            succ = e.encrypt(args[1],args[2]);
        } else {
            System.out.format("test-type(%s) not match.\n", testType);
            return;

--- a/java/examples/src/main/java/PipelineClientExample.java
+++ b/java/examples/src/main/java/PipelineClientExample.java
--- a/java/examples/src/main/java/StaticPipelineClient.java
+++ b/java/examples/src/main/java/StaticPipelineClient.java
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -145,6 +145,11 @@
            <artifactId>json</artifactId>
            <version>20190722</version>
        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient</artifactId>
+            <version>4.5.12</version>
+        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>

--- a/java/src/main/java/io/paddle/serving/client/Client.java
+++ b/java/src/main/java/io/paddle/serving/client/Client.java
-package io.paddle.serving.client;
-import java.util.*;
-import java.util.function.Function;
-import java.lang.management.ManagementFactory;
-import java.lang.management.RuntimeMXBean;
-import java.util.stream.Collectors;
-import java.util.List;
-import java.util.ArrayList;
-import io.grpc.ManagedChannel;
-import io.grpc.ManagedChannelBuilder;
-import io.grpc.StatusRuntimeException;
-import com.google.protobuf.ByteString;
-import com.google.common.util.concurrent.ListenableFuture;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import org.nd4j.linalg.api.iter.NdIndexIterator;
-import org.nd4j.linalg.factory.Nd4j;
-import io.paddle.serving.grpc.*;
-import io.paddle.serving.configure.*;
-import io.paddle.serving.client.PredictFuture;
-class Profiler {
-    int pid_;
-    String print_head_ = null;
-    List<String> time_record_ = null;
-    boolean enable_ = false;
-    Profiler() {
-        RuntimeMXBean runtimeMXBean = ManagementFactory.getRuntimeMXBean();
-        pid_ = Integer.valueOf(runtimeMXBean.getName().split("@")[0]).intValue();
-        print_head_ = "\nPROFILE\tpid:" + pid_ + "\t";
-        time_record_ = new ArrayList<String>();
-        time_record_.add(print_head_);
-    }
-    void record(String name) {
-        if (enable_) {
-            long ctime = System.currentTimeMillis() * 1000;
-            time_record_.add(name + ":" + String.valueOf(ctime) + " ");
-        }
-    }
-    void printProfile() {
-        if (enable_) {
-            String profile_str = String.join("", time_record_);
-            time_record_ = new ArrayList<String>();
-            time_record_.add(print_head_);
-        }
-    }
-    void enable(boolean flag) {
-        enable_ = flag;
-    }
-}
-public class Client {
-    private ManagedChannel channel_;
-    private MultiLangGeneralModelServiceGrpc.MultiLangGeneralModelServiceBlockingStub blockingStub_;
-    private MultiLangGeneralModelServiceGrpc.MultiLangGeneralModelServiceFutureStub futureStub_;
-    private double rpcTimeoutS_;
-    private List<String> feedNames_;
-    private Map<String, Integer> feedTypes_;
-    private Map<String, List<Integer>> feedShapes_;
-    private List<String> fetchNames_;
-    private Map<String, Integer> fetchTypes_;
-    private Set<String> lodTensorSet_;
-    private Map<String, Integer> feedTensorLen_;
-    private Profiler profiler_;
-    public Client() {
-        channel_ = null;
-        blockingStub_ = null;
-        futureStub_ = null;
-        rpcTimeoutS_ = 2;
-        feedNames_ = null;
-        feedTypes_ = null;
-        feedShapes_ = null;
-        fetchNames_ = null;
-        fetchTypes_ = null;
-        lodTensorSet_ = null;
-        feedTensorLen_ = null;
-        profiler_ = new Profiler();
-        boolean is_profile = false;
-        String FLAGS_profile_client = System.getenv("FLAGS_profile_client");
-        if (FLAGS_profile_client != null && FLAGS_profile_client.equals("1")) {
-            is_profile = true;
-        }
-        profiler_.enable(is_profile);
-    }
-    public boolean setRpcTimeoutMs(int rpc_timeout) {
-        if (futureStub_ == null || blockingStub_ == null) {
-            System.out.println("set timeout must be set after connect.");
-            return false;
-        }
-        rpcTimeoutS_ = rpc_timeout / 1000.0;
-        SetTimeoutRequest timeout_req = SetTimeoutRequest.newBuilder()
-            .setTimeoutMs(rpc_timeout)
-            .build();
-        SimpleResponse resp;
-        try {
-            resp = blockingStub_.setTimeout(timeout_req);
-        } catch (StatusRuntimeException e) {
-            System.out.format("Set RPC timeout failed: %s\n", e.toString());
-            return false;
-        }
-        return resp.getErrCode() == 0;
-    }
-    public boolean connect(String target) {
-        // TODO: target must be NameResolver-compliant URI
-        // https://grpc.github.io/grpc-java/javadoc/io/grpc/ManagedChannelBuilder.html
-        try {
-            channel_ = ManagedChannelBuilder.forTarget(target)
-                .defaultLoadBalancingPolicy("round_robin")
-                .maxInboundMessageSize(Integer.MAX_VALUE)
-                .usePlaintext()
-                .build();
-            blockingStub_ = MultiLangGeneralModelServiceGrpc.newBlockingStub(channel_);
-            futureStub_ = MultiLangGeneralModelServiceGrpc.newFutureStub(channel_);
-        } catch (Exception e) {
-            System.out.format("Connect failed: %s\n", e.toString());
-            return false;
-        }
-        GetClientConfigRequest get_client_config_req = GetClientConfigRequest.newBuilder().build();
-        GetClientConfigResponse resp;
-        try {
-            resp = blockingStub_.getClientConfig(get_client_config_req);
-        } catch (Exception e) {
-            System.out.format("Get Client config failed: %s\n", e.toString());
-            return false;
-        }
-        String model_config_str = resp.getClientConfigStr();
-        _parseModelConfig(model_config_str);
-        return true;
-    }
-    private void _parseModelConfig(String model_config_str) {
-        GeneralModelConfig.Builder model_conf_builder = GeneralModelConfig.newBuilder();
-        try {
-            com.google.protobuf.TextFormat.getParser().merge(model_config_str, model_conf_builder);
-        } catch (com.google.protobuf.TextFormat.ParseException e) {
-            System.out.format("Parse client config failed: %s\n", e.toString());
-        }
-        GeneralModelConfig model_conf = model_conf_builder.build();
-        feedNames_ = new ArrayList<String>();
-        fetchNames_ = new ArrayList<String>();
-        feedTypes_ = new HashMap<String, Integer>();
-        feedShapes_ = new HashMap<String, List<Integer>>();
-        fetchTypes_ = new HashMap<String, Integer>();
-        lodTensorSet_ = new HashSet<String>();
-        feedTensorLen_ = new HashMap<String, Integer>();
-        List<FeedVar> feed_var_list = model_conf.getFeedVarList();
-        for (FeedVar feed_var : feed_var_list) {
-            feedNames_.add(feed_var.getAliasName());
-        }
-        List<FetchVar> fetch_var_list = model_conf.getFetchVarList();
-        for (FetchVar fetch_var : fetch_var_list) {
-            fetchNames_.add(fetch_var.getAliasName());
-        }
-        for (int i = 0; i < feed_var_list.size(); ++i) {
-            FeedVar feed_var = feed_var_list.get(i);
-            String var_name = feed_var.getAliasName();
-            feedTypes_.put(var_name, feed_var.getFeedType());
-            feedShapes_.put(var_name, feed_var.getShapeList());
-            if (feed_var.getIsLodTensor()) {
-                lodTensorSet_.add(var_name);
-            } else {
-                int counter = 1;
-                for (int dim : feedShapes_.get(var_name)) {
-                    counter *= dim;
-                }
-                feedTensorLen_.put(var_name, counter);
-            }
-        }
-        for (int i = 0; i < fetch_var_list.size(); i++) {
-            FetchVar fetch_var = fetch_var_list.get(i);
-            String var_name = fetch_var.getAliasName();
-            fetchTypes_.put(var_name, fetch_var.getFetchType());
-            if (fetch_var.getIsLodTensor()) {
-                lodTensorSet_.add(var_name);
-            }
-        }
-    }
-    private InferenceRequest _packInferenceRequest(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch,
-            long log_id) throws IllegalArgumentException {
-        List<String> feed_var_names = new ArrayList<String>();
-        feed_var_names.addAll(feed_batch.get(0).keySet());
-        InferenceRequest.Builder req_builder = InferenceRequest.newBuilder()
-            .addAllFeedVarNames(feed_var_names)
-            .addAllFetchVarNames(fetch)
-            .setIsPython(false)
-            .setLogId(log_id);
-        for (HashMap<String, INDArray> feed_data: feed_batch) {
-            FeedInst.Builder inst_builder = FeedInst.newBuilder();
-            for (String name: feed_var_names) {
-                Tensor.Builder tensor_builder = Tensor.newBuilder();
-                INDArray variable = feed_data.get(name);
-                long[] flattened_shape = {-1};
-                INDArray flattened_list = variable.reshape(flattened_shape);
-                int v_type = feedTypes_.get(name);
-                NdIndexIterator iter = new NdIndexIterator(flattened_list.shape());
-                if (v_type == 0) { // int64
-                    while (iter.hasNext()) {
-                        long[] next_index = iter.next();
-                        long x = flattened_list.getLong(next_index);
-                        tensor_builder.addInt64Data(x);
-                    }
-                } else if (v_type == 1) { // float32
-                    while (iter.hasNext()) {
-                        long[] next_index = iter.next();
-                        float x = flattened_list.getFloat(next_index);
-                        tensor_builder.addFloatData(x);
-                    }
-                } else if (v_type == 2) { // int32
-                    while (iter.hasNext()) {
-                        long[] next_index = iter.next();
-                        // the interface of INDArray is strange:
-                        // https://deeplearning4j.org/api/latest/org/nd4j/linalg/api/ndarray/INDArray.html
-                        int[] int_next_index = new int[next_index.length];
-                        for(int i = 0; i < next_index.length; i++) {
-                            int_next_index[i] = (int)next_index[i];
-                        }
-                        int x = flattened_list.getInt(int_next_index);
-                        tensor_builder.addIntData(x);
-                    }
-                } else {
-                    throw new IllegalArgumentException("error tensor value type.");
-                }
-                long[] longArray = variable.shape();
-                int[] intArray = Arrays.stream(longArray).mapToInt(i -> (int) i).toArray();
-                List<Integer> indarrayShapeList = Arrays.stream(intArray).boxed().collect(Collectors.toList());
-                //tensor_builder.addAllShape(feedShapes_.get(name));
-                tensor_builder.addAllShape(indarrayShapeList);
-                inst_builder.addTensorArray(tensor_builder.build());
-            }
-            req_builder.addInsts(inst_builder.build());
-        }
-        return req_builder.build();
-    }
-    private Map<String, HashMap<String, INDArray>>
-        _unpackInferenceResponse(
-            InferenceResponse resp,
-            Iterable<String> fetch,
-            Boolean need_variant_tag) throws IllegalArgumentException {
-        return Client._staticUnpackInferenceResponse(
-                resp, fetch, fetchTypes_, lodTensorSet_, need_variant_tag);
-    }
-    private static Map<String, HashMap<String, INDArray>>
-        _staticUnpackInferenceResponse(
-            InferenceResponse resp,
-            Iterable<String> fetch,
-            Map<String, Integer> fetchTypes,
-            Set<String> lodTensorSet,
-            Boolean need_variant_tag) throws IllegalArgumentException {
-        if (resp.getErrCode() != 0) {
-            return null;
-        }
-        String tag = resp.getTag();
-        HashMap<String, HashMap<String, INDArray>> multi_result_map
-            = new HashMap<String, HashMap<String, INDArray>>();
-        for (ModelOutput model_result: resp.getOutputsList()) {
-            String engine_name = model_result.getEngineName();
-            FetchInst inst = model_result.getInsts(0);
-            HashMap<String, INDArray> result_map
-                = new HashMap<String, INDArray>();
-            int index = 0;
-            for (String name: fetch) {
-                Tensor variable = inst.getTensorArray(index);
-                int v_type = fetchTypes.get(name);
-                INDArray data = null; 
-                if (v_type == 0) { // int64
-                    List<Long> list = variable.getInt64DataList();
-                    long[] array = new long[list.size()];
-                    for (int i = 0; i < list.size(); i++) {
-                        array[i] = list.get(i);
-                    }
-                    data = Nd4j.createFromArray(array);
-                } else if (v_type == 1) { // float32
-                    List<Float> list = variable.getFloatDataList();
-                    float[] array = new float[list.size()];
-                    for (int i = 0; i < list.size(); i++) {
-                        array[i] = list.get(i);
-                    }
-                    data = Nd4j.createFromArray(array);
-                } else if (v_type == 2) { // int32
-                    List<Integer> list = variable.getIntDataList();
-                    int[] array = new int[list.size()];
-                    for (int i = 0; i < list.size(); i++) {
-                        array[i] = list.get(i);
-                    }
-                    data = Nd4j.createFromArray(array);
-                } else {
-                    throw new IllegalArgumentException("error tensor value type.");
-                }
-                // shape
-                List<Integer> shape_lsit = variable.getShapeList();
-                int[] shape_array = new int[shape_lsit.size()];
-                for (int i = 0; i < shape_lsit.size(); ++i) {
-                    shape_array[i] = shape_lsit.get(i);
-                }
-                data = data.reshape(shape_array);
-                // put data to result_map
-                result_map.put(name, data);
-                // lod
-                if (lodTensorSet.contains(name)) {
-                    List<Integer> list = variable.getLodList();
-                    int[] array = new int[list.size()];
-                    for (int i = 0; i < list.size(); i++) {
-                        array[i] = list.get(i);
-                    }
-                    result_map.put(name + ".lod", Nd4j.createFromArray(array));
-                }
-                index += 1;
-            }
-            multi_result_map.put(engine_name, result_map);
-        }
-        // TODO: tag(ABtest not support now)
-        return multi_result_map;
-    }
-    public Map<String, INDArray> predict(
-            HashMap<String, INDArray> feed,
-            Iterable<String> fetch) {
-        return predict(feed, fetch, false, 0);
-    }
-    public Map<String, INDArray> predict(
-            HashMap<String, INDArray> feed,
-            Iterable<String> fetch,
-            long log_id) {
-        return predict(feed, fetch, false, log_id);
-    }
-    public Map<String, HashMap<String, INDArray>> ensemble_predict(
-            HashMap<String, INDArray> feed,
-            Iterable<String> fetch) {
-        return ensemble_predict(feed, fetch, false, 0);
-    }
-    public Map<String, HashMap<String, INDArray>> ensemble_predict(
-            HashMap<String, INDArray> feed,
-            Iterable<String> fetch,
-            long log_id) {
-        return ensemble_predict(feed, fetch, false, log_id);
-    }
-    public PredictFuture asyn_predict(
-            HashMap<String, INDArray> feed,
-            Iterable<String> fetch) {
-        return asyn_predict(feed, fetch, false, 0);
-    }
-    public PredictFuture asyn_predict(
-            HashMap<String, INDArray> feed,
-            Iterable<String> fetch,
-            long log_id) {
-        return asyn_predict(feed, fetch, false, log_id);
-    }
-    public Map<String, INDArray> predict(
-            HashMap<String, INDArray> feed,
-            Iterable<String> fetch,
-            Boolean need_variant_tag) {
-        return predict(feed, fetch, need_variant_tag, 0);
-    }
-    public Map<String, INDArray> predict(
-            HashMap<String, INDArray> feed,
-            Iterable<String> fetch,
-            Boolean need_variant_tag,
-            long log_id) {
-        List<HashMap<String, INDArray>> feed_batch
-            = new ArrayList<HashMap<String, INDArray>>();
-        feed_batch.add(feed);
-        return predict(feed_batch, fetch, need_variant_tag, log_id);
-    }
-    public Map<String, HashMap<String, INDArray>> ensemble_predict(
-            HashMap<String, INDArray> feed,
-            Iterable<String> fetch,
-            Boolean need_variant_tag) {
-        return ensemble_predict(feed, fetch, need_variant_tag, 0);
-    }
-    public Map<String, HashMap<String, INDArray>> ensemble_predict(
-            HashMap<String, INDArray> feed,
-            Iterable<String> fetch,
-            Boolean need_variant_tag,
-            long log_id) {
-        List<HashMap<String, INDArray>> feed_batch
-            = new ArrayList<HashMap<String, INDArray>>();
-        feed_batch.add(feed);
-        return ensemble_predict(feed_batch, fetch, need_variant_tag, log_id);
-    }
-    public PredictFuture asyn_predict(
-            HashMap<String, INDArray> feed,
-            Iterable<String> fetch,
-            Boolean need_variant_tag) {
-        return asyn_predict(feed, fetch, need_variant_tag, 0);
-    }
-    public PredictFuture asyn_predict(
-            HashMap<String, INDArray> feed,
-            Iterable<String> fetch,
-            Boolean need_variant_tag,
-            long log_id) {
-        List<HashMap<String, INDArray>> feed_batch
-            = new ArrayList<HashMap<String, INDArray>>();
-        feed_batch.add(feed);
-        return asyn_predict(feed_batch, fetch, need_variant_tag, log_id);
-    }
-    public Map<String, INDArray> predict(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch) {
-        return predict(feed_batch, fetch, false, 0);
-    }
-    public Map<String, INDArray> predict(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch,
-            long log_id) {
-        return predict(feed_batch, fetch, false, log_id);
-    }
-    public Map<String, HashMap<String, INDArray>> ensemble_predict(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch) {
-        return ensemble_predict(feed_batch, fetch, false, 0);
-    }
-    public Map<String, HashMap<String, INDArray>> ensemble_predict(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch,
-            long log_id) {
-        return ensemble_predict(feed_batch, fetch, false, log_id);
-    }
-    public PredictFuture asyn_predict(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch) {
-        return asyn_predict(feed_batch, fetch, false, 0);
-    }
-    public PredictFuture asyn_predict(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch,
-            long log_id) {
-        return asyn_predict(feed_batch, fetch, false, log_id);
-    }
-    public Map<String, INDArray> predict(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch,
-            Boolean need_variant_tag) {
-        return predict(feed_batch, fetch, need_variant_tag, 0);        
-    }
-    public Map<String, INDArray> predict(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch,
-            Boolean need_variant_tag,
-            long log_id) {
-        try {
-            profiler_.record("java_prepro_0");
-            InferenceRequest req = _packInferenceRequest(
-                    feed_batch, fetch, log_id);
-            profiler_.record("java_prepro_1");
-            profiler_.record("java_client_infer_0");
-            InferenceResponse resp = blockingStub_.inference(req);
-            profiler_.record("java_client_infer_1");
-            profiler_.record("java_postpro_0");
-            Map<String, HashMap<String, INDArray>> ensemble_result
-                = _unpackInferenceResponse(resp, fetch, need_variant_tag);
-            List<Map.Entry<String, HashMap<String, INDArray>>> list
-                = new ArrayList<Map.Entry<String, HashMap<String, INDArray>>>(
-                    ensemble_result.entrySet());
-            if (list.size() != 1) {
-                System.out.format("Failed to predict: please use ensemble_predict impl.\n");
-                return null;
-            }
-            profiler_.record("java_postpro_1");
-            profiler_.printProfile();
-            return list.get(0).getValue();
-        } catch (StatusRuntimeException e) {
-            System.out.format("Failed to predict: %s\n", e.toString());
-            return null;
-        }
-    }
-    public Map<String, HashMap<String, INDArray>> ensemble_predict(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch,
-            Boolean need_variant_tag) {
-        return ensemble_predict(feed_batch, fetch, need_variant_tag, 0);        
-    }
-    public Map<String, HashMap<String, INDArray>> ensemble_predict(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch,
-            Boolean need_variant_tag,
-            long log_id) {
-        try {
-            profiler_.record("java_prepro_0");
-            InferenceRequest req = _packInferenceRequest(
-                    feed_batch, fetch, log_id);
-            profiler_.record("java_prepro_1");
-            profiler_.record("java_client_infer_0");
-            InferenceResponse resp = blockingStub_.inference(req);
-            profiler_.record("java_client_infer_1");
-            profiler_.record("java_postpro_0");
-            Map<String, HashMap<String, INDArray>> ensemble_result 
-               = _unpackInferenceResponse(resp, fetch, need_variant_tag);
-            profiler_.record("java_postpro_1");
-            profiler_.printProfile();
-            return ensemble_result;
-        } catch (StatusRuntimeException e) {
-            System.out.format("Failed to predict: %s\n", e.toString());
-            return null;
-        }
-    }
-    public PredictFuture asyn_predict(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch,
-            Boolean need_variant_tag) {
-        return asyn_predict(feed_batch, fetch, need_variant_tag, 0);
-    }
-    public PredictFuture asyn_predict(
-            List<HashMap<String, INDArray>> feed_batch,
-            Iterable<String> fetch,
-            Boolean need_variant_tag,
-            long log_id) {
-        InferenceRequest req = _packInferenceRequest(
-                feed_batch, fetch, log_id);
-        ListenableFuture<InferenceResponse> future = futureStub_.inference(req);
-        PredictFuture predict_future = new PredictFuture(future, 
-            (InferenceResponse resp) -> {
-                return Client._staticUnpackInferenceResponse(
-                resp, fetch, fetchTypes_, lodTensorSet_, need_variant_tag);
-            }
-        );
-        return predict_future;
-    }
-}
--- a/java/src/main/java/io/paddle/serving/client/HttpClient.java
+++ b/java/src/main/java/io/paddle/serving/client/HttpClient.java
+package io.paddle.serving.client;
+import java.util.*;
+import java.util.function.Function;
+import java.lang.management.ManagementFactory;
+import java.lang.management.RuntimeMXBean;
+import java.util.stream.Collectors;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.Map.Entry;
+import java.nio.file.*;
+import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.iter.NdIndexIterator;
+import org.nd4j.linalg.factory.Nd4j;
+import java.lang.reflect.*;
+import org.apache.http.HttpEntity;
+import org.apache.http.NameValuePair;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.entity.UrlEncodedFormEntity;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.client.entity.GzipDecompressingEntity;
+import org.apache.http.Header;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.message.BasicNameValuePair;
+import org.apache.http.util.EntityUtils;
+import org.apache.http.entity.InputStreamEntity;
+import org.json.*;
+import io.paddle.serving.configure.*;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+enum ElementType
+{
+    Int64_type, Float32_type, Int32_type, Bytes_type;
+}
+class Profiler {
+    int pid_;
+    String print_head_ = null;
+    List<String> time_record_ = null;
+    boolean enable_ = false;
+    Profiler() {
+        RuntimeMXBean runtimeMXBean = ManagementFactory.getRuntimeMXBean();
+        pid_ = Integer.valueOf(runtimeMXBean.getName().split("@")[0]).intValue();
+        print_head_ = "\nPROFILE\tpid:" + pid_ + "\t";
+        time_record_ = new ArrayList<String>();
+        time_record_.add(print_head_);
+    }
+    void record(String name) {
+        if (enable_) {
+            long ctime = System.currentTimeMillis() * 1000;
+            time_record_.add(name + ":" + String.valueOf(ctime) + " ");
+        }
+    }
+    void printProfile() {
+        if (enable_) {
+            String profile_str = String.join("", time_record_);
+            time_record_ = new ArrayList<String>();
+            time_record_.add(print_head_);
+        }
+    }
+    void enable(boolean flag) {
+        enable_ = flag;
+    }
+}
+public class HttpClient {
+    private int httpTimeoutS_;
+    private List<String> feedNames_;
+    private Map<String, String> feedRealNames_;
+    private Map<String, Integer> feedTypes_;
+    private Map<String, List<Integer>> feedShapes_;
+    private Map<String, Integer> feedNameToIndex_;
+    private Map<Integer, String> feedTypeToDataKey_;
+    private List<String> fetchNames_;
+    private Map<String, Integer> fetchTypes_;
+    private Set<String> lodTensorSet_;
+    private Map<String, Integer> feedTensorLen_;
+    private Profiler profiler_;
+    private String ip;
+    private String serverPort;
+    private String port;
+    private String serviceName;
+    private boolean request_compress_flag;
+    private boolean response_compress_flag;
+    private String GLOG_v;
+    public HttpClient() {
+        feedNames_ = null;
+        feedRealNames_ = null;
+        feedTypes_ = null;
+        feedShapes_ = null;
+        fetchNames_ = null;
+        fetchTypes_ = null;
+        lodTensorSet_ = null;
+        feedTensorLen_ = null;
+        feedNameToIndex_ = null;
+        httpTimeoutS_ = 200000;
+        ip = "0.0.0.0";
+        port = "9393";
+        serverPort = "9393";
+        serviceName = "/GeneralModelService/inference";
+        request_compress_flag = false;
+        response_compress_flag = false;
+        GLOG_v = System.getenv("GLOG_v");
+        feedTypeToDataKey_ = new HashMap<Integer, String>();
+        feedTypeToDataKey_.put(0, "int64_data");
+        feedTypeToDataKey_.put(1, "float_data");
+        feedTypeToDataKey_.put(2, "int_data");
+        feedTypeToDataKey_.put(3, "data");
+        profiler_ = new Profiler();
+        boolean is_profile = false;
+        String FLAGS_profile_client = System.getenv("FLAGS_profile_client");
+        if (FLAGS_profile_client != null && FLAGS_profile_client.equals("1")) {
+            is_profile = true;
+        }
+        profiler_.enable(is_profile);
+    }
+    public void setTimeOut(int httpTimeoutS_) {
+        this.httpTimeoutS_ = httpTimeoutS_;
+    }
+    public void setIP(String ip) {
+        this.ip = ip;
+    }
+    public void setPort(String port) {
+        this.port = port;
+        this.serverPort = port;
+    }
+    public void setServiceName(String serviceName){
+        this.serviceName = serviceName;
+    }
+    public void loadClientConfig(String model_config_path) {
+        GeneralModelConfig.Builder model_conf_builder = GeneralModelConfig.newBuilder();
+        try {
+            byte[] data = Files.readAllBytes(Paths.get(model_config_path));
+            String model_config_str = new String(data, "utf-8");
+            com.google.protobuf.TextFormat.getParser().merge(model_config_str, model_conf_builder);
+        } catch (com.google.protobuf.TextFormat.ParseException e) {
+            System.out.format("Parse client config failed: %s\n", e.toString());
+        } catch (Exception e) {
+            System.out.format("Open client config failed: %s\n", e.toString());
+        }
+        GeneralModelConfig model_conf = model_conf_builder.build();
+        feedNames_ = new ArrayList<String>();
+        feedRealNames_ = new HashMap<String, String>();
+        feedTypes_ = new HashMap<String, Integer>();
+        feedShapes_ = new HashMap<String, List<Integer>>();
+        lodTensorSet_ = new HashSet<String>();
+        feedTensorLen_ = new HashMap<String, Integer>();
+        feedNameToIndex_ = new HashMap<String, Integer>();
+        fetchNames_ = new ArrayList<String>();
+        fetchTypes_ = new HashMap<String, Integer>();
+        List<FeedVar> feed_var_list = model_conf.getFeedVarList();
+        for (int i = 0; i < feed_var_list.size(); ++i) {
+            FeedVar feed_var = feed_var_list.get(i);
+            String var_name = feed_var.getAliasName();
+            feedNames_.add(var_name);
+            feedRealNames_.put(var_name, feed_var.getName());
+            feedTypes_.put(var_name, feed_var.getFeedType());
+            feedShapes_.put(var_name, feed_var.getShapeList());
+            feedNameToIndex_.put(var_name, i);
+            if (feed_var.getIsLodTensor()) {
+                lodTensorSet_.add(var_name);
+            } else {
+                int counter = 1;
+                for (int dim : feedShapes_.get(var_name)) {
+                    counter *= dim;
+                }
+                feedTensorLen_.put(var_name, counter);
+            }
+        }
+        List<FetchVar> fetch_var_list = model_conf.getFetchVarList();
+        for (int i = 0; i < fetch_var_list.size(); i++) {
+            FetchVar fetch_var = fetch_var_list.get(i);
+            String var_name = fetch_var.getAliasName();
+            fetchNames_.add(var_name);
+            fetchTypes_.put(var_name, fetch_var.getFetchType());
+        }
+    }
+    public void use_key(String keyFilePath) {
+        String key_str = null;
+        String encrypt_url = "http://" + this.ip + ":" +this.port;
+        try {
+            byte[] data = Files.readAllBytes(Paths.get(keyFilePath));
+            key_str = Base64.getEncoder().encodeToString(data);
+        } catch (Exception e) {
+            System.out.format("Open key file failed: %s\n", e.toString());
+        }
+        JSONObject jsonKey = new JSONObject();
+        if( key_str != null) {
+            jsonKey.put("key", key_str);
+        }else{
+            jsonKey.put("key", "");
+        }
+        String result = doPost(encrypt_url, jsonKey.toString());
+        try {
+            JSONObject jsonObject = new JSONObject(result);
+            JSONArray jsonArray = jsonObject.getJSONArray("endpoint_list");
+            this.serverPort = jsonArray.getString(0);
+            System.out.format("Real ServerPort is: %s\n", this.serverPort);
+        }catch (JSONException err) {
+            System.out.format("Parse serverPort failed: %s\n", err.toString());
+        }
+    }
+    public void set_request_compress(boolean request_compress_flag) {
+        // need to be done.
+        this.request_compress_flag = request_compress_flag;
+    }
+    public void set_response_compress(boolean response_compress_flag) {
+        // need to be done.
+        this.response_compress_flag = response_compress_flag;
+    }
+    public byte[] compress(String str) {
+        if (str == null || str.length() == 0) {
+            return null;
+        }
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
+        GZIPOutputStream gzip;
+        try {
+            gzip = new GZIPOutputStream(out);
+            gzip.write(str.getBytes("UTF-8"));
+            gzip.close();
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return out.toByteArray();
+    }
+    // 帮助用户封装Http请求的接口，用户只需要传递FeedData,Lod,Fetchlist即可。
+    // 根据Proto组装Json的过程由这个函数来完成，且接口与Python基本一致.
+    // 共提供了四组重载的接口，支持用户最少传入feedData和fetch，还可传lod和batchFlag.
+    public String predict(Map<String, Object> feedData,
+                    List<String> fetch,
+                    int log_id) {
+        return predict(feedData,null,fetch,false,log_id);
+    }
+    public String predict(Map<String, Object> feedData,
+                    List<String> fetch,
+                    boolean batchFlag,
+                    int log_id) {
+        return predict(feedData,null,fetch,batchFlag,log_id);
+    }
+    public String predict(Map<String, Object> feedData,
+                    Map<String, Object> feedLod,
+                    List<String> fetch,
+                    int log_id) {
+        return predict(feedData,feedLod,fetch,false,log_id);
+    }
+    public String predict(Map<String, Object> feedData,
+                    Map<String, Object> feedLod,
+                    List<String> fetch,
+                    boolean batchFlag,
+                    int log_id) {
+                        String server_url = "http://" + this.ip + ":" + this.serverPort + this.serviceName;
+                        // 处理fetchList
+                        JSONArray jsonFetchList = new JSONArray();
+                        Iterator<String> fetchIterator = fetch.iterator();
+                        while (fetchIterator.hasNext()) {
+                            jsonFetchList.put(fetchIterator.next());
+                        }
+                        // 处理Tensor
+                        JSONArray jsonTensorArray = new JSONArray();
+                        try{
+                            if (null != feedData && feedData.size() > 0) {
+                                // 通过map集成entrySet方法获取entity
+                                Set<Entry<String, Object>> entrySet = feedData.entrySet();
+                                // 循环遍历，获取迭代器
+                                Iterator<Entry<String, Object>> iterator = entrySet.iterator();
+                                while (iterator.hasNext()) {
+                                    JSONObject jsonTensor = new JSONObject();
+                                    Entry<String, Object> mapEntry = iterator.next();
+                                    Object objectValue = mapEntry.getValue();
+                                    String feed_alias_name = mapEntry.getKey();
+                                    String feed_real_name = feedRealNames_.get(feed_alias_name);
+                                    List<Integer> shape = new ArrayList<Integer>(feedShapes_.get(feed_alias_name));
+                                    int element_type = feedTypes_.get(feed_alias_name);
+                                    jsonTensor.put("alias_name", feed_alias_name);
+                                    jsonTensor.put("name", feed_real_name);
+                                    jsonTensor.put("elem_type", element_type);
+                                    // 处理数据与shape
+                                    String protoDataKey = feedTypeToDataKey_.get(element_type);
+                                    // 如果是INDArray类型，先转为一维.
+                                    // 此时shape为INDArray的shape
+                                    if(objectValue instanceof INDArray){
+                                        INDArray tempIndArray = (INDArray)objectValue;
+                                        long[] indarrayShape = tempIndArray.shape();
+                                        shape.clear();
+                                        for(long dim:indarrayShape){
+                                            shape.add((int)dim);
+                                        }
+                                        objectValue = tempIndArray.data().asDouble();
+                                    }else if(objectValue.getClass().isArray()){
+                                        // 如果是数组类型，则无须处理，直接使用即可。
+                                        // 且数组无法嵌套，此时batch无法从数据中获取
+                                        // 默认batch维度为1，或者feedVar的shape信息中已包含batch
+                                    }else if(objectValue instanceof List){
+                                        // 如果为list，可能存在嵌套，此时需要展平
+                                        // 如果batchFlag为True，则认为是嵌套list
+                                        // 此时取最外层为batch的维度
+                                        if (batchFlag) {
+                                            List<?> list = new ArrayList<>();
+                                            list = new ArrayList<>((Collection<?>)objectValue);
+                                            // 在index=0处，加上batch
+                                            shape.add(0, list.size());
+                                        }
+                                        objectValue = recursiveExtract(objectValue);
+                                    }else{
+                                        // 此时认为是传入的单个String或者Int等
+                                        // 此时无法获取batch信息，故对shape不处理
+                                        // 由于Proto中为Repeated,需要把数据包装成list
+                                        if(objectValue instanceof String){
+                                            if(feedTypes_.get(protoDataKey)!= ElementType.Bytes_type.ordinal()){
+                                                throw new Exception("feedvar is not string-type,feed can`t be a single string.");
+                                            }
+                                        }else{
+                                            if(feedTypes_.get(protoDataKey)== ElementType.Bytes_type.ordinal()){
+                                                throw new Exception("feedvar is string-type,feed, feed can`t be a single int or others.");
+                                            }
+                                        }
+                                        List<Object> list = new ArrayList<>();
+                                        list.add(objectValue);
+                                        objectValue = list;
+                                    }
+                                    jsonTensor.put(protoDataKey,objectValue);
+                                    if(!batchFlag){
+                                        // 在index=0处，加上batch=1
+                                        shape.add(0, 1);
+                                    }
+                                    jsonTensor.put("shape", shape);
+                                    // 处理lod信息，支持INDArray Array Iterable
+                                    Object feedLodValue = null;
+                                    if(feedLod != null){
+                                        feedLodValue = feedLod.get(feed_alias_name);
+                                        if(feedLodValue != null) {
+                                            if(feedLodValue instanceof INDArray){
+                                                INDArray tempIndArray = (INDArray)feedLodValue;    
+                                                feedLodValue = tempIndArray.data().asInt();
+                                            }else if(feedLodValue.getClass().isArray()){
+                                                // 如果是数组类型，则无须处理，直接使用即可。
+                                            }else if(feedLodValue instanceof Iterable){
+                                                // 如果为list，可能存在嵌套，此时需要展平
+                                                feedLodValue = recursiveExtract(feedLodValue);
+                                            }else{
+                                                throw new Exception("Lod must be INDArray or Array or Iterable.");
+                                            }
+                                            jsonTensor.put("lod", feedLodValue);
+                                        }
+                                    }
+                                    jsonTensorArray.put(jsonTensor);
+                                }
+                            }
+                        }catch (Exception e) {
+                            e.printStackTrace();
+                        }
+                        JSONObject jsonRequest = new JSONObject();
+                        jsonRequest.put("log_id",log_id);
+                        jsonRequest.put("fetch_var_names", jsonFetchList);
+                        jsonRequest.put("tensor",jsonTensorArray);
+                        if(GLOG_v != null){
+                            System.out.format("------- Final jsonRequest:  %s\n", jsonRequest.toString());
+                        }
+                        return doPost(server_url, jsonRequest.toString());
+                    }
+    public String doPost(String url, String strPostData) {
+        CloseableHttpClient httpClient = null;
+        CloseableHttpResponse httpResponse = null;
+        String result = "";
+        // 创建httpClient实例
+        httpClient = HttpClients.createDefault();
+        // 创建httpPost远程连接实例
+        HttpPost httpPost = new HttpPost(url);
+        // 配置请求参数实例
+        RequestConfig requestConfig = RequestConfig.custom().setConnectTimeout(httpTimeoutS_)// 设置连接主机服务超时时间
+                .setConnectionRequestTimeout(httpTimeoutS_)// 设置连接请求超时时间
+                .setSocketTimeout(httpTimeoutS_)// 设置读取数据连接超时时间
+                .build();
+        // 为httpPost实例设置配置
+        httpPost.setConfig(requestConfig);
+        httpPost.setHeader("Content-Type", "application/json;charset=utf-8");
+        // 设置请求头
+        if(response_compress_flag){
+            httpPost.addHeader("Accept-encoding", "gzip");
+            if(GLOG_v != null){
+                System.out.format("------- Accept-encoding gzip:  \n");
+            }
+        }
+        try {
+            if(request_compress_flag && strPostData.length()>1024){
+                try{
+                    byte[] gzipEncrypt = compress(strPostData);
+                    httpPost.setEntity(new InputStreamEntity(new ByteArrayInputStream(gzipEncrypt), gzipEncrypt.length));
+                    httpPost.addHeader("Content-Encoding", "gzip");
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+            }else{
+                httpPost.setEntity(new StringEntity(strPostData, "UTF-8"));
+            }
+            // httpClient对象执行post请求,并返回响应参数对象
+            httpResponse = httpClient.execute(httpPost);
+            // 从响应对象中获取响应内容
+            HttpEntity entity = httpResponse.getEntity();
+            Header header = entity.getContentEncoding();
+            if(GLOG_v != null){
+                System.out.format("------- response header:  %s\n", header);
+            }
+            if(header != null && header.getValue().equalsIgnoreCase("gzip")){	//判断返回内容是否为gzip压缩格式
+                GzipDecompressingEntity gzipEntity = new GzipDecompressingEntity(entity);
+                result = EntityUtils.toString(gzipEntity);
+                if(GLOG_v != null){
+                    System.out.format("------- degzip response:  %s\n", result);
+                }
+            }else{
+                result = EntityUtils.toString(entity);
+            }
+        } catch (ClientProtocolException e) {
+            e.printStackTrace();
+        } catch (IOException e) {
+            e.printStackTrace();
+        } finally {
+            // 关闭资源
+            if (null != httpResponse) {
+                try {
+                    httpResponse.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+            if (null != httpClient) {
+                try {
+                    httpClient.close();
+                } catch (IOException e) {
+                    e.printStackTrace();
+                }
+            }
+        }
+        return result;
+    }
+    public List<Object> recursiveExtract(Object stuff) {
+        List<Object> mylist = new ArrayList<Object>();
+        if(stuff instanceof Iterable) {
+            for(Object o : (Iterable< ? >)stuff) {
+                mylist.addAll(recursiveExtract(o));
+            }
+        } else if(stuff instanceof Map) {
+            for(Object o : ((Map<?, ? extends Object>) stuff).values()) {
+                mylist.addAll(recursiveExtract(o));
+            }
+        } else {
+            mylist.add(stuff);
+        }
+        return mylist;
+    }
+}
--- a/java/src/main/java/io/paddle/serving/client/PipelineClient.java
+++ b/java/src/main/java/io/paddle/serving/client/PipelineClient.java
--- a/java/src/main/java/io/paddle/serving/client/PredictFuture.java
+++ b/java/src/main/java/io/paddle/serving/client/PredictFuture.java
-package io.paddle.serving.client;
-import java.util.*;
-import java.util.function.Function;
-import io.grpc.StatusRuntimeException;
-import com.google.common.util.concurrent.ListenableFuture;
-import org.nd4j.linalg.api.ndarray.INDArray;
-import io.paddle.serving.client.Client;
-import io.paddle.serving.grpc.*;
-public class PredictFuture {
-    private ListenableFuture<InferenceResponse> callFuture_;
-    private Function<InferenceResponse, 
-                     Map<String, HashMap<String, INDArray>>> callBackFunc_;
-    PredictFuture(ListenableFuture<InferenceResponse> call_future,
-            Function<InferenceResponse, 
-                     Map<String, HashMap<String, INDArray>>> call_back_func) {
-        callFuture_ = call_future;
-        callBackFunc_ = call_back_func;
-    }
-    public Map<String, INDArray> get() {
-        InferenceResponse resp = null;
-        try {
-            resp = callFuture_.get();
-        } catch (Exception e) {
-            System.out.format("predict failed: %s\n", e.toString());
-            return null;
-        }
-        Map<String, HashMap<String, INDArray>> ensemble_result
-            = callBackFunc_.apply(resp);
-        List<Map.Entry<String, HashMap<String, INDArray>>> list
-            = new ArrayList<Map.Entry<String, HashMap<String, INDArray>>>(
-                    ensemble_result.entrySet());
-        if (list.size() != 1) {
-            System.out.format("predict failed: please use get_ensemble impl.\n");
-            return null;
-        }
-        return list.get(0).getValue();
-    }
-    public Map<String, HashMap<String, INDArray>> ensemble_get() {
-        InferenceResponse resp = null;
-        try {
-            resp = callFuture_.get();
-        } catch (Exception e) {
-            System.out.format("predict failed: %s\n", e.toString());
-            return null;
-        }
-        return callBackFunc_.apply(resp);
-    }
-}
--- a/java/src/main/proto/multi_lang_general_model_service.proto
+++ b/java/src/main/proto/multi_lang_general_model_service.proto
-// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-syntax = "proto2";
-package baidu.paddle_serving.multi_lang;
-option java_multiple_files = true;
-option java_package = "io.paddle.serving.grpc";
-option java_outer_classname = "ServingProto";
-message Tensor {
-  optional bytes data = 1;
-  repeated int32 int_data = 2;
-  repeated int64 int64_data = 3;
-  repeated float float_data = 4;
-  optional int32 elem_type = 5;
-  repeated int32 shape = 6;
-  repeated int32 lod = 7; // only for fetch tensor currently
-};
-message FeedInst { repeated Tensor tensor_array = 1; };
-message FetchInst { repeated Tensor tensor_array = 1; };
-message InferenceRequest {
-  repeated FeedInst insts = 1;
-  repeated string feed_var_names = 2;
-  repeated string fetch_var_names = 3;
-  required bool is_python = 4 [ default = false ];
-  required uint64 log_id = 5 [ default = 0 ];
-};
-message InferenceResponse {
-  repeated ModelOutput outputs = 1;
-  optional string tag = 2;
-  required int32 err_code = 3;
-};
-message ModelOutput {
-  repeated FetchInst insts = 1;
-  optional string engine_name = 2;
-}
-message SetTimeoutRequest { required int32 timeout_ms = 1; }
-message SimpleResponse { required int32 err_code = 1; }
-message GetClientConfigRequest {}
-message GetClientConfigResponse { required string client_config_str = 1; }
-service MultiLangGeneralModelService {
-  rpc Inference(InferenceRequest) returns (InferenceResponse) {}
-  rpc SetTimeout(SetTimeoutRequest) returns (SimpleResponse) {}
-  rpc GetClientConfig(GetClientConfigRequest)
-      returns (GetClientConfigResponse) {}
-};
--- a/paddle_inference/paddle/include/paddle_engine.h
+++ b/paddle_inference/paddle/include/paddle_engine.h
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -81,7 +81,6 @@ if (SERVER)
  if(WITH_LITE)
    set(VERSION_SUFFIX 2)
  endif()
  add_custom_command(
    OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
    COMMAND cp -r

--- a/python/examples/bert/benchmark.py
+++ b/python/examples/bert/benchmark.py
--- a/python/examples/bert/benchmark.sh
+++ b/python/examples/bert/benchmark.sh
--- a/python/examples/bert/benchmark_with_profile.sh
+++ b/python/examples/bert/benchmark_with_profile.sh
--- a/python/examples/criteo_ctr_with_cube/README.md
+++ b/python/examples/criteo_ctr_with_cube/README.md
+## Criteo CTR with Sparse Parameter Indexing Service
+([简体中文](./README_CN.md)|English)
+### Get Sample Dataset
+go to directory `python/examples/criteo_ctr_with_cube`
+```
+sh get_data.sh
+```
+### Download Model and Sparse Parameter Sequence Files
+```
+wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz
+tar xf ctr_cube_unittest.tar.gz
+mv models/ctr_client_conf ./
+mv models/ctr_serving_model_kv ./
+mv models/data ./cube/
+```
+the model will be in ./ctr_server_model_kv and ./ctr_client_config.
+### Start Sparse Parameter Indexing Service
+```
+wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz
+tar xf cube_app.tar.gz
+mv cube_app/cube* ./cube/
+sh cube_prepare.sh &
+```
+Here, the sparse parameter is loaded by cube sparse parameter indexing service Cube.
+### Start RPC Predictor, the number of serving thread is 4（configurable in test_server.py）
+```
+python test_server.py ctr_serving_model_kv 
+```
+### Run Prediction
+```
+python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
+```
+### Benchmark
+CPU ：Intel(R) Xeon(R) CPU 6148 @ 2.40GHz 
+Model ：[Criteo CTR](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/criteo_ctr_with_cube/network_conf.py)
+server core/thread num ： 4/8
+Run
+```
+bash benchmark.sh
+```
+1000 batches will be sent by every client
+| client  thread num | prepro | client infer | op0    | op1   | op2    | postpro | avg_latency | qps   |
+| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- | ----- |
+| 1                  | 0.035  | 1.596        | 0.021  | 0.518 | 0.0024 | 0.0025  | 6.774 | 147.7 |
+| 2                  | 0.034  | 1.780        | 0.027  | 0.463 | 0.0020 | 0.0023  | 6.931 | 288.3 |
+| 4                  | 0.038  | 2.954        | 0.025  | 0.455 | 0.0019 | 0.0027  | 8.378 | 477.5 |
+| 8                  | 0.044  | 8.230        | 0.028  | 0.464 | 0.0023 | 0.0034  | 14.191 | 563.8 |
+| 16                 | 0.048  | 21.037       | 0.028  | 0.455 | 0.0025 | 0.0041  | 27.236 | 587.5 |
+the average latency of threads
+![avg cost](../../../doc/criteo-cube-benchmark-avgcost.png)
+The QPS is 
+![qps](../../../doc/criteo-cube-benchmark-qps.png)
--- a/python/examples/criteo_ctr_with_cube/README_CN.md
+++ b/python/examples/criteo_ctr_with_cube/README_CN.md
+## 带稀疏参数索引服务的CTR预测服务
+(简体中文|[English](./README.md))
+### 获取样例数据
+进入目录 `python/examples/criteo_ctr_with_cube`
+```
+sh get_data.sh
+```
+### 下载模型和稀疏参数序列文件
+```
+wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz
+tar xf ctr_cube_unittest.tar.gz
+mv models/ctr_client_conf ./
+mv models/ctr_serving_model_kv ./
+mv models/data ./cube/
+```
+执行脚本后会在当前目录有ctr_server_model_kv和ctr_client_config文件夹。
+### 启动稀疏参数索引服务
+```
+wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz
+tar xf cube_app.tar.gz
+mv cube_app/cube* ./cube/
+sh cube_prepare.sh &
+```
+此处，模型当中的稀疏参数会被存放在稀疏参数索引服务Cube当中。
+### 启动RPC预测服务，服务端线程数为4（可在test_server.py配置）
+```
+python test_server.py ctr_serving_model_kv 
+```
+### 执行预测
+```
+python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
+```
+### Benchmark
+设备 ：Intel(R) Xeon(R) CPU 6148 @ 2.40GHz 
+模型 ：[Criteo CTR](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/criteo_ctr_with_cube/network_conf.py)
+server core/thread num ： 4/8
+执行
+```
+bash benchmark.sh
+```
+客户端每个线程会发送1000个batch
+| client  thread num | prepro | client infer | op0    | op1   | op2    | postpro | avg_latency | qps   |
+| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- | ----- |
+| 1                  | 0.035  | 1.596        | 0.021  | 0.518 | 0.0024 | 0.0025  | 6.774 | 147.7 |
+| 2                  | 0.034  | 1.780        | 0.027  | 0.463 | 0.0020 | 0.0023  | 6.931 | 288.3 |
+| 4                  | 0.038  | 2.954        | 0.025  | 0.455 | 0.0019 | 0.0027  | 8.378 | 477.5 |
+| 8                  | 0.044  | 8.230        | 0.028  | 0.464 | 0.0023 | 0.0034  | 14.191 | 563.8 |
+| 16                 | 0.048  | 21.037       | 0.028  | 0.455 | 0.0025 | 0.0041  | 27.236 | 587.5 |
+平均每个线程耗时图如下
+![avg cost](../../../doc/criteo-cube-benchmark-avgcost.png)
+每个线程QPS耗时如下
+![qps](../../../doc/criteo-cube-benchmark-qps.png)
--- a/python/examples/grpc_impl_example/imdb/imdb_reader.py
+++ b/python/examples/grpc_impl_example/imdb/imdb_reader.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,58 +14,49 @@
 # pylint: disable=doc-string-missing
 import sys
-import os
-import paddle
-import re
 import paddle.fluid.incubate.data_generator as dg
-py_version = sys.version_info[0]
+class CriteoDataset(dg.MultiSlotDataGenerator):
-class IMDBDataset(dg.MultiSlotDataGenerator):
+    def setup(self, sparse_feature_dim):
-    def load_resource(self, dictfile):
+        self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-        self._vocab = {}
+        self.cont_max_ = [
-        wid = 0
+            20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50
-        if py_version == 2:
+        ]
-            with open(dictfile) as f:
+        self.cont_diff_ = [
-                for line in f:
+            20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50
-                    self._vocab[line.strip()] = wid
-                    wid += 1
-        else:
-            with open(dictfile, encoding="utf-8") as f:
-                for line in f:
-                    self._vocab[line.strip()] = wid
-                    wid += 1
-        self._unk_id = len(self._vocab)
-        self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
-        self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
-    def get_words_only(self, line):
-        sent = line.lower().replace("<br />", " ").strip()
-        words = [x for x in self._pattern.split(sent) if x and x != " "]
-        feas = [
-            self._vocab[x] if x in self._vocab else self._unk_id for x in words
        ]
-        return feas
+        self.hash_dim_ = sparse_feature_dim
+        # here, training data are lines with line_index < train_idx_
+        self.train_idx_ = 41256555
+        self.continuous_range_ = range(1, 14)
+        self.categorical_range_ = range(14, 40)
-    def get_words_and_label(self, line):
+    def _process_line(self, line):
-        send = '|'.join(line.split('|')[:-1]).lower().replace("<br />",
+        features = line.rstrip('\n').split('\t')
-                                                              " ").strip()
+        dense_feature = []
-        label = [int(line.split('|')[-1])]
+        sparse_feature = []
+        for idx in self.continuous_range_:
+            if features[idx] == '':
+                dense_feature.append(0.0)
+            else:
+                dense_feature.append((float(features[idx]) - self.cont_min_[idx - 1]) / \
+                                     self.cont_diff_[idx - 1])
+        for idx in self.categorical_range_:
+            sparse_feature.append(
+                [hash(str(idx) + features[idx]) % self.hash_dim_])
-        words = [x for x in self._pattern.split(send) if x and x != " "]
+        return dense_feature, sparse_feature, [int(features[0])]
-        feas = [
-            self._vocab[x] if x in self._vocab else self._unk_id for x in words
-        ]
-        return feas, label
-    def infer_reader(self, infer_filelist, batch, buf_size):
+    def infer_reader(self, filelist, batch, buf_size):
        def local_iter():
-            for fname in infer_filelist:
+            for fname in filelist:
-                with open(fname, "r") as fin:
+                with open(fname.strip(), "r") as fin:
                    for line in fin:
-                        feas, label = self.get_words_and_label(line)
+                        dense_feature, sparse_feature, label = self._process_line(
-                        yield feas, label
+                            line)
+                        #yield dense_feature, sparse_feature, label
+                        yield [dense_feature] + sparse_feature + [label]
        import paddle
        batch_iter = paddle.batch(
@@ -75,18 +66,18 @@ class IMDBDataset(dg.MultiSlotDataGenerator):
        return batch_iter
    def generate_sample(self, line):
-        def memory_iter():
-            for i in range(1000):
-                yield self.return_value
        def data_iter():
-            feas, label = self.get_words_and_label(line)
+            dense_feature, sparse_feature, label = self._process_line(line)
-            yield ("words", feas), ("label", label)
+            feature_name = ["dense_input"]
+            for idx in self.categorical_range_:
+                feature_name.append("C" + str(idx - 13))
+            feature_name.append("label")
+            yield zip(feature_name, [dense_feature] + sparse_feature + [label])
        return data_iter
 if __name__ == "__main__":
-    imdb = IMDBDataset()
+    criteo_dataset = CriteoDataset()
-    imdb.load_resource("imdb.vocab")
+    criteo_dataset.setup(int(sys.argv[1]))
-    imdb.run_from_stdin()
+    criteo_dataset.run_from_stdin()
--- a/python/examples/grpc_impl_example/fit_a_line/test_batch_client.py
+++ b/python/examples/grpc_impl_example/fit_a_line/test_batch_client.py
@@ -12,25 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # pylint: disable=doc-string-missing
-from paddle_serving_client import MultiLangClient as Client
+#! /bin/bash
-import numpy as np
-client = Client()
-client.connect(["127.0.0.1:9393"])
-batch_size = 2
+mkdir -p cube_model
-x = [
+mkdir -p cube/data
-    0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
+./cube/cube-builder -dict_name=test_dict -job_mode=base -last_version=0 -cur_version=0 -depend_version=0 -input_path=./cube_model -output_path=${PWD}/cube/data -shard_num=1  -only_build=false
-    0.4919, 0.1856, 0.0795, -0.0332
+cd cube && ./cube
-]
-for i in range(3):
-    new_data = np.array(x).astype("float32").reshape((1, 1, 13))
-    batch_data = np.concatenate([new_data, new_data, new_data], axis=0)
-    print(batch_data.shape)
-    fetch_map = client.predict(
-        feed={"x": batch_data}, fetch=["price"], batch=True)
-    if fetch_map["serving_status_code"] == 0:
-        print(fetch_map)
-    else:
-        print(fetch_map["serving_status_code"])
--- a/python/examples/grpc_impl_example/fit_a_line/get_data.sh
+++ b/python/examples/grpc_impl_example/fit_a_line/get_data.sh
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/data/ctr_prediction/ctr_data.tar.gz
-tar -xzf uci_housing.tar.gz
+tar -zxvf ctr_data.tar.gz
--- a/python/examples/criteo_ctr_with_cube/local_train.py
+++ b/python/examples/criteo_ctr_with_cube/local_train.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+from __future__ import print_function
+from args import parse_args
+import os
+import paddle.fluid as fluid
+import paddle
+import sys
+from network_conf import dnn_model
+dense_feature_dim = 13
+paddle.enable_static()
+def train():
+    args = parse_args()
+    sparse_only = args.sparse_only
+    if not os.path.isdir(args.model_output_dir):
+        os.mkdir(args.model_output_dir)
+    dense_input = fluid.layers.data(
+        name="dense_input", shape=[dense_feature_dim], dtype='float32')
+    sparse_input_ids = [
+        fluid.layers.data(
+            name="C" + str(i), shape=[1], lod_level=1, dtype="int64")
+        for i in range(1, 27)
+    ]
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    #nn_input = None if sparse_only else dense_input
+    nn_input = dense_input
+    predict_y, loss, auc_var, batch_auc_var, infer_vars = dnn_model(
+        nn_input, sparse_input_ids, label, args.embedding_size,
+        args.sparse_feature_dim)
+    optimizer = fluid.optimizer.SGD(learning_rate=1e-4)
+    optimizer.minimize(loss)
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+    dataset.set_use_var([dense_input] + sparse_input_ids + [label])
+    python_executable = "python3.6"
+    pipe_command = "{} criteo_reader.py {}".format(python_executable,
+                                                   args.sparse_feature_dim)
+    dataset.set_pipe_command(pipe_command)
+    dataset.set_batch_size(128)
+    thread_num = 10
+    dataset.set_thread(thread_num)
+    whole_filelist = [
+        "raw_data/part-%d" % x for x in range(len(os.listdir("raw_data")))
+    ]
+    print(whole_filelist)
+    dataset.set_filelist(whole_filelist[:100])
+    dataset.load_into_memory()
+    fluid.layers.Print(auc_var)
+    epochs = 1
+    for i in range(epochs):
+        exe.train_from_dataset(
+            program=fluid.default_main_program(), dataset=dataset, debug=True)
+        print("epoch {} finished".format(i))
+    import paddle_serving_client.io as server_io
+    feed_var_dict = {}
+    feed_var_dict['dense_input'] = dense_input
+    for i, sparse in enumerate(sparse_input_ids):
+        feed_var_dict["embedding_{}.tmp_0".format(i)] = sparse
+    fetch_var_dict = {"prob": predict_y}
+    feed_kv_dict = {}
+    feed_kv_dict['dense_input'] = dense_input
+    for i, emb in enumerate(infer_vars):
+        feed_kv_dict["embedding_{}.tmp_0".format(i)] = emb
+    fetch_var_dict = {"prob": predict_y}
+    server_io.save_model("ctr_serving_model", "ctr_client_conf", feed_var_dict,
+                         fetch_var_dict, fluid.default_main_program())
+    server_io.save_model("ctr_serving_model_kv", "ctr_client_conf_kv",
+                         feed_kv_dict, fetch_var_dict,
+                         fluid.default_main_program())
+if __name__ == '__main__':
+    train()
--- a/python/examples/criteo_ctr_with_cube/network_conf.py
+++ b/python/examples/criteo_ctr_with_cube/network_conf.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+import paddle.fluid as fluid
+import math
+def dnn_model(dense_input, sparse_inputs, label, embedding_size,
+              sparse_feature_dim):
+    def embedding_layer(input):
+        emb = fluid.layers.embedding(
+            input=input,
+            is_sparse=True,
+            is_distributed=False,
+            size=[sparse_feature_dim, embedding_size],
+            param_attr=fluid.ParamAttr(
+                name="SparseFeatFactors",
+                initializer=fluid.initializer.Uniform()))
+        x = fluid.layers.sequence_pool(input=emb, pool_type='sum')
+        return emb, x
+    def mlp_input_tensor(emb_sums, dense_tensor):
+        #if isinstance(dense_tensor, fluid.Variable):
+        #    return fluid.layers.concat(emb_sums, axis=1)
+        #else:
+        return fluid.layers.concat(emb_sums + [dense_tensor], axis=1)
+    def mlp(mlp_input):
+        fc1 = fluid.layers.fc(input=mlp_input,
+                              size=400,
+                              act='relu',
+                              param_attr=fluid.ParamAttr(
+                                  initializer=fluid.initializer.Normal(
+                                      scale=1 / math.sqrt(mlp_input.shape[1]))))
+        fc2 = fluid.layers.fc(input=fc1,
+                              size=400,
+                              act='relu',
+                              param_attr=fluid.ParamAttr(
+                                  initializer=fluid.initializer.Normal(
+                                      scale=1 / math.sqrt(fc1.shape[1]))))
+        fc3 = fluid.layers.fc(input=fc2,
+                              size=400,
+                              act='relu',
+                              param_attr=fluid.ParamAttr(
+                                  initializer=fluid.initializer.Normal(
+                                      scale=1 / math.sqrt(fc2.shape[1]))))
+        pre = fluid.layers.fc(input=fc3,
+                              size=2,
+                              act='softmax',
+                              param_attr=fluid.ParamAttr(
+                                  initializer=fluid.initializer.Normal(
+                                      scale=1 / math.sqrt(fc3.shape[1]))))
+        return pre
+    emb_pair_sums = list(map(embedding_layer, sparse_inputs))
+    emb_sums = [x[1] for x in emb_pair_sums]
+    infer_vars = [x[0] for x in emb_pair_sums]
+    mlp_in = mlp_input_tensor(emb_sums, dense_input)
+    predict = mlp(mlp_in)
+    cost = fluid.layers.cross_entropy(input=predict, label=label)
+    avg_cost = fluid.layers.reduce_sum(cost)
+    accuracy = fluid.layers.accuracy(input=predict, label=label)
+    auc_var, batch_auc_var, auc_states = \
+        fluid.layers.auc(input=predict, label=label, num_thresholds=2 ** 12, slide_steps=20)
+    return predict, avg_cost, auc_var, batch_auc_var, infer_vars
--- a/python/examples/grpc_impl_example/imdb/test_client.py
+++ b/python/examples/grpc_impl_example/imdb/test_client.py
@@ -12,31 +12,45 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # pylint: disable=doc-string-missing
-from paddle_serving_client import MultiLangClient as Client
-from paddle_serving_app.reader.imdb_reader import IMDBDataset
+from paddle_serving_client import Client
 import sys
+import os
+import criteo as criteo
+import time
+from paddle_serving_client.metric import auc
 import numpy as np
+py_version = sys.version_info[0]
 client = Client()
-client.connect(["127.0.0.1:9393"])
+client.load_client_config(sys.argv[1])
+client.connect(["127.0.0.1:9292"])
-# you can define any english sentence or dataset here
-# This example reuses imdb reader in training, you
-# can define your own data preprocessing easily.
-imdb_dataset = IMDBDataset()
-imdb_dataset.load_resource('imdb.vocab')
-for line in sys.stdin:
+batch = 1
-    word_ids, label = imdb_dataset.get_words_and_label(line)
+buf_size = 100
-    word_len = len(word_ids)
+dataset = criteo.CriteoDataset()
-    feed = {
+dataset.setup(1000001)
-        "words": np.array(word_ids).reshape(word_len, 1),
+test_filelists = ["{}/part-0".format(sys.argv[2])]
-        "words.lod": [0, word_len]
+reader = dataset.infer_reader(test_filelists, batch, buf_size)
-    }
+label_list = []
-    fetch = ["prediction"]
+prob_list = []
-    fetch_map = client.predict(feed=feed, fetch=fetch, batch=True)
+start = time.time()
-    if fetch_map["serving_status_code"] == 0:
+for ei in range(10000):
-        print(fetch_map)
+    if py_version == 2:
+        data = reader().next()
    else:
-        print(fetch_map["serving_status_code"])
+        data = reader().__next__()
-    #print("{} {}".format(fetch_map["prediction"][0], label[0]))
+    feed_dict = {}
+    feed_dict['dense_input'] = data[0][0]
+    for i in range(1, 27):
+        feed_dict["embedding_{}.tmp_0".format(i - 1)] = np.array(data[0][i]).reshape(-1)
+        feed_dict["embedding_{}.tmp_0.lod".format(i - 1)] = [0, len(data[0][i])]
+    fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
+    print(fetch_map)
+    prob_list.append(fetch_map['prob'][0][1])
+    label_list.append(data[0][-1][0])
+print(auc(label_list, prob_list))
+end = time.time()
+print(end - start)
--- a/python/examples/grpc_impl_example/fit_a_line/test_server.py
+++ b/python/examples/grpc_impl_example/fit_a_line/test_server.py
@@ -17,20 +17,25 @@ import os
 import sys
 from paddle_serving_server import OpMaker
 from paddle_serving_server import OpSeqMaker
-from paddle_serving_server import MultiLangServer as Server
+from paddle_serving_server import Server
 op_maker = OpMaker()
 read_op = op_maker.create('general_reader')
-general_infer_op = op_maker.create('general_infer')
+general_dist_kv_infer_op = op_maker.create('general_dist_kv_infer')
 response_op = op_maker.create('general_response')
 op_seq_maker = OpSeqMaker()
 op_seq_maker.add_op(read_op)
-op_seq_maker.add_op(general_infer_op)
+op_seq_maker.add_op(general_dist_kv_infer_op)
 op_seq_maker.add_op(response_op)
 server = Server()
 server.set_op_sequence(op_seq_maker.get_op_sequence())
+server.set_num_threads(4)
 server.load_model_config(sys.argv[1])
-server.prepare_server(workdir="work_dir1", port=9393, device="cpu")
+server.prepare_server(
+    workdir="work_dir1",
+    port=9292,
+    device="cpu",
+    cube_conf="./cube/conf/cube.conf")
 server.run_server()
--- a/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/test_client.py
+++ b/python/examples/detection/faster_rcnn_r50_fpn_1x_coco/test_client.py
--- a/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/test_client.py
+++ b/python/examples/detection/fcos_dcn_r50_fpn_1x_coco/test_client.py
--- a/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/test_client.py
+++ b/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/test_client.py
--- a/python/examples/detection/ssd_vgg16_300_240e_voc/test_client.py
+++ b/python/examples/detection/ssd_vgg16_300_240e_voc/test_client.py
--- a/python/examples/detection/ttfnet_darknet53_1x_coco/test_client.py
+++ b/python/examples/detection/ttfnet_darknet53_1x_coco/test_client.py
--- a/python/examples/encryption/README.md
+++ b/python/examples/encryption/README.md
@@ -35,11 +35,11 @@ client-side configuration file are stored in the `encrypt_client` directory.
 ## Start Encryption Service
 CPU Service
 ```
-python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model
+python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
 ```
 GPU Service
 ```
-python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
+python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
 ```
 ## Prediction

--- a/python/examples/encryption/README_CN.md
+++ b/python/examples/encryption/README_CN.md
@@ -36,14 +36,14 @@ def serving_encryption():
 ## 启动加密预测服务
 CPU预测服务
 ```
-python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model
+python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model
 ```
 GPU预测服务
 ```
-python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
+python -m paddle_serving_server.serve --model encrypt_server/ --port 9393 --use_encryption_model --gpu_ids 0
 ```
 ## 预测
 ```
-python test_client.py encrypt_client/
+python test_client.py encrypt_client/serving_client_conf.prototxt
 ```
--- a/python/examples/encryption/test_client.py
+++ b/python/examples/encryption/test_client.py
@@ -19,7 +19,8 @@ import sys
 client = Client()
 client.load_client_config(sys.argv[1])
 client.use_key("./key")
-client.connect(["127.0.0.1:9300"], encryption=True)
+client.connect(["0.0.0.0:9393"], encryption=True)
+fetch_list = client.get_fetch_names()
 import paddle
 test_reader = paddle.batch(
@@ -28,5 +29,5 @@ test_reader = paddle.batch(
    batch_size=1)
 for data in test_reader():
-    fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
+    fetch_map = client.predict(feed={"x": data[0][0]}, fetch=fetch_list)
-    print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
+    print(fetch_map)
--- a/python/examples/fit_a_line/README.md
+++ b/python/examples/fit_a_line/README.md
@@ -18,30 +18,21 @@ sh get_data.sh
 python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
 ```
-### Client prediction
+## Client prediction
+### RPC Client
 The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`).
 ``` shell
 python test_client.py uci_housing_client/serving_client_conf.prototxt
 ```
+### Http Client
-## HTTP service
-### Start server
-Start a web service with default web service hosting modules:
 ``` shell
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci
+python test_httpclient.py uci_housing_client/serving_client_conf.prototxt
 ```
-### Client prediction
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
-```
 ## Benchmark
 ``` shell

--- a/python/examples/fit_a_line/README_CN.md
+++ b/python/examples/fit_a_line/README_CN.md
@@ -10,15 +10,16 @@ sh get_data.sh
-## RPC服务
-### 开启服务端
+## 开启服务端
 ```shell
 python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393
 ```
-### 客户端预测
+## 客户端预测
+### 客户端RPC
 `test_client.py`中使用了`paddlepaddle`包，需要进行下载（`pip install paddlepaddle`）。
@@ -26,23 +27,12 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 python test_client.py uci_housing_client/serving_client_conf.prototxt
 ```
+### 客户端Http预测
-## HTTP服务
-### 开启服务端
-通过下面的一行代码开启默认web服务：
 ``` shell
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --name uci
+python test_httpclient.py uci_housing_client/serving_client_conf.prototxt
 ```
-### 客户端预测
-``` shell
-curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
-```
 ## 性能测试
 ``` shell

--- a/python/examples/fit_a_line/benchmark.sh
+++ b/python/examples/fit_a_line/benchmark.sh
--- a/python/examples/fit_a_line/test_client.py
+++ b/python/examples/fit_a_line/test_client.py
@@ -20,7 +20,7 @@ import numpy as np
 client = Client()
 client.load_client_config(sys.argv[1])
 client.connect(["127.0.0.1:9393"])
+fetch_list = client.get_fetch_names()
 import paddle
 test_reader = paddle.batch(
    paddle.reader.shuffle(
@@ -31,6 +31,5 @@ for data in test_reader():
    new_data = np.zeros((1, 13)).astype("float32")
    new_data[0] = data[0][0]
    fetch_map = client.predict(
-        feed={"x": new_data}, fetch=["price"], batch=True)
+        feed={"x": new_data}, fetch=fetch_list, batch=True)
-    print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
    print(fetch_map)
--- a/python/examples/grpc_impl_example/fit_a_line/test_sync_client.py
+++ b/python/examples/grpc_impl_example/fit_a_line/test_sync_client.py
@@ -13,29 +13,28 @@
 # limitations under the License.
 # pylint: disable=doc-string-missing
-from paddle_serving_client import MultiLangClient as Client
+from paddle_serving_client.httpclient import HttpClient
+import sys
 import numpy as np
-client = Client()
+import time
-client.connect(["127.0.0.1:9393"])
-"""
+client = HttpClient()
+client.load_client_config(sys.argv[1])
+# if you want to enable Encrypt Module,uncommenting the following line
+# client.use_key("./key")
+client.set_response_compress(True)
+client.set_request_compress(True)
+fetch_list = client.get_fetch_names()
+import paddle
+test_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.uci_housing.test(), buf_size=500),
+    batch_size=1)
 for data in test_reader():
-    new_data = np.zeros((1, 1, 13)).astype("float32")
+    new_data = np.zeros((1, 13)).astype("float32")
    new_data[0] = data[0][0]
    fetch_map = client.predict(
-        feed={"x": new_data}, fetch=["price"], batch=True)
+        feed={"x": new_data}, fetch=fetch_list, batch=True)
-    print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
    print(fetch_map)
-"""
+    break
-x = [
-    0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
-    0.4919, 0.1856, 0.0795, -0.0332
-]
-for i in range(3):
-    new_data = np.array(x).astype("float32").reshape((1, 13))
-    fetch_map = client.predict(
-        feed={"x": new_data}, fetch=["price"], batch=False)
-    if fetch_map["serving_status_code"] == 0:
-        print(fetch_map)
-    else:
-        print(fetch_map["serving_status_code"])
--- a/python/examples/grpc_impl_example/fit_a_line/README_CN.md
+++ b/python/examples/grpc_impl_example/fit_a_line/README_CN.md
-# 线性回归预测服务示例
-## 获取数据
-```shell
-sh get_data.sh
-```
-## 开启 gRPC 服务端
-``` shell
-python test_server.py uci_housing_model/
-```
-也可以通过下面的一行代码开启默认 gRPC 服务：
-```shell
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang
-```
-## 客户端预测
-### 同步预测
-``` shell
-python test_sync_client.py
-```
-### 异步预测
-``` shell
-python test_asyn_client.py
-```
-### Batch 预测
-``` shell
-python test_batch_client.py
-```
-### 预测超时
-``` shell
-python test_timeout_client.py
-```
--- a/python/examples/grpc_impl_example/fit_a_line/test_asyn_client.py
+++ b/python/examples/grpc_impl_example/fit_a_line/test_asyn_client.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-from paddle_serving_client import MultiLangClient as Client
-import functools
-import time
-import threading
-import grpc
-import numpy as np
-client = Client()
-client.connect(["127.0.0.1:9393"])
-complete_task_count = [0]
-lock = threading.Lock()
-def call_back(call_future):
-    try:
-        fetch_map = call_future.result()
-        print(fetch_map)
-    except grpc.RpcError as e:
-        print(e.code())
-    finally:
-        with lock:
-            complete_task_count[0] += 1
-x = [
-    0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
-    0.4919, 0.1856, 0.0795, -0.0332
-]
-task_count = 0
-for i in range(3):
-    new_data = np.array(x).astype("float32").reshape((1, 13))
-    future = client.predict(
-        feed={"x": new_data}, fetch=["price"], batch=False, asyn=True)
-    task_count += 1
-    future.add_done_callback(functools.partial(call_back))
-while complete_task_count[0] != task_count:
-    time.sleep(0.1)
--- a/python/examples/grpc_impl_example/fit_a_line/test_server_gpu.py
+++ b/python/examples/grpc_impl_example/fit_a_line/test_server_gpu.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-import os
-import sys
-from paddle_serving_server import OpMaker
-from paddle_serving_server import OpSeqMaker
-from paddle_serving_server import MultiLangServer as Server
-op_maker = OpMaker()
-read_op = op_maker.create('general_reader')
-general_infer_op = op_maker.create('general_infer')
-response_op = op_maker.create('general_response')
-op_seq_maker = OpSeqMaker()
-op_seq_maker.add_op(read_op)
-op_seq_maker.add_op(general_infer_op)
-op_seq_maker.add_op(response_op)
-server = Server()
-server.set_op_sequence(op_seq_maker.get_op_sequence())
-server.load_model_config(sys.argv[1])
-server.set_gpuid("0")
-server.prepare_server(workdir="work_dir1", port=9393, device="gpu")
-server.run_server()
--- a/python/examples/grpc_impl_example/fit_a_line/test_timeout_client.py
+++ b/python/examples/grpc_impl_example/fit_a_line/test_timeout_client.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-from paddle_serving_client import MultiLangClient as Client
-import grpc
-import numpy as np
-client = Client()
-client.connect(["127.0.0.1:9393"])
-client.set_rpc_timeout_ms(40)
-x = [
-    0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283,
-    0.4919, 0.1856, 0.0795, -0.0332
-]
-for i in range(3):
-    new_data = np.array(x).astype("float32").reshape((1, 13))
-    fetch_map = client.predict(
-        feed={"x": new_data}, fetch=["price"], batch=False)
-    if fetch_map["serving_status_code"] == 0:
-        print(fetch_map)
-    elif fetch_map["serving_status_code"] == grpc.StatusCode.DEADLINE_EXCEEDED:
-        print('timeout')
-    else:
-        print(fetch_map["serving_status_code"])
--- a/python/examples/grpc_impl_example/imdb/README.md
+++ b/python/examples/grpc_impl_example/imdb/README.md
-## IMDB comment sentiment inference service
-([简体中文](./README_CN.md)|English)
-### Get model files and sample data
-```
-sh get_data.sh
-```
-the package downloaded contains cnn, lstm and bow model config along with their test_data and train_data.
-### Start RPC inference service
-```
-python -m paddle_serving_server.serve --model imdb_cnn_model/ --thread 10 --port 9393 --use_multilang
-```
-### RPC Infer
-The `paddlepaddle` package is used in `test_client.py`, and you may need to download the corresponding package(`pip install paddlepaddle`).
-```
-head test_data/part-0 | python test_client.py
-```
-it will get predict results of the first 10 test cases.
--- a/python/examples/grpc_impl_example/imdb/README_CN.md
+++ b/python/examples/grpc_impl_example/imdb/README_CN.md
-## IMDB评论情绪预测服务
-(简体中文|[English](./README.md))
-### 获取模型文件和样例数据
-```
-sh get_data.sh
-```
-脚本会下载和解压出cnn、lstm和bow三种模型的配置文文件以及test_data和train_data。
-### 启动RPC预测服务
-```
-python -m paddle_serving_server.serve --model imdb_cnn_model/ --thread 10 --port 9393 --use_multilang
-```
-### 执行预测
-`test_client.py`中使用了`paddlepaddle`包，需要进行下载（`pip install paddlepaddle`）。
-```
-head test_data/part-0 | python test_client.py
-```
-预测test_data/part-0的前十个样例。
--- a/python/examples/grpc_impl_example/imdb/get_data.sh
+++ b/python/examples/grpc_impl_example/imdb/get_data.sh
-wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz
-tar -zxvf text_classification_data.tar.gz
-tar -zxvf imdb_model.tar.gz
--- a/python/examples/grpc_impl_example/yolov4/000000570688.jpg
+++ b/python/examples/grpc_impl_example/yolov4/000000570688.jpg
--- a/python/examples/grpc_impl_example/yolov4/README.md
+++ b/python/examples/grpc_impl_example/yolov4/README.md
-# Yolov4 Detection Service
-([简体中文](README_CN.md)|English)
-## Get Model
-```
-python -m paddle_serving_app.package --get_model yolov4
-tar -xzvf yolov4.tar.gz
-```
-## Start RPC Service
-```
-python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
-```
-## Prediction
-```
-python test_client.py 000000570688.jpg
-```
-After the prediction is completed, a json file to save the prediction result and a picture with the detection result box will be generated in the `./outpu folder.
--- a/python/examples/grpc_impl_example/yolov4/README_CN.md
+++ b/python/examples/grpc_impl_example/yolov4/README_CN.md
-# Yolov4 检测服务
-(简体中文|[English](README.md))
-## 获取模型
-```
-python -m paddle_serving_app.package --get_model yolov4
-tar -xzvf yolov4.tar.gz
-```
-## 启动RPC服务
-```
-python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
-```
-## 预测
-```
-python test_client.py 000000570688.jpg
-```
-预测完成会在`./output`文件夹下生成保存预测结果的json文件以及标出检测结果框的图片。
--- a/python/examples/grpc_impl_example/yolov4/label_list.txt
+++ b/python/examples/grpc_impl_example/yolov4/label_list.txt
-person
-bicycle
-car
-motorcycle
-airplane
-bus
-train
-truck
-boat
-traffic light
-fire hydrant
-stop sign
-parking meter
-bench
-bird
-cat
-dog
-horse
-sheep
-cow
-elephant
-bear
-zebra
-giraffe
-backpack
-umbrella
-handbag
-tie
-suitcase
-frisbee
-skis
-snowboard
-sports ball
-kite
-baseball bat
-baseball glove
-skateboard
-surfboard
-tennis racket
-bottle
-wine glass
-cup
-fork
-knife
-spoon
-bowl
-banana
-apple
-sandwich
-orange
-broccoli
-carrot
-hot dog
-pizza
-donut
-cake
-chair
-couch
-potted plant
-bed
-dining table
-toilet
-tv
-laptop
-mouse
-remote
-keyboard
-cell phone
-microwave
-oven
-toaster
-sink
-refrigerator
-book
-clock
-vase
-scissors
-teddy bear
-hair drier
-toothbrush
--- a/python/examples/grpc_impl_example/yolov4/test_client.py
+++ b/python/examples/grpc_impl_example/yolov4/test_client.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import numpy as np
-from paddle_serving_client import MultiLangClient as Client
-from paddle_serving_app.reader import *
-import cv2
-preprocess = Sequential([
-    File2Image(), BGR2RGB(), Resize(
-        (608, 608), interpolation=cv2.INTER_LINEAR), Div(255.0), Transpose(
-            (2, 0, 1))
-])
-postprocess = RCNNPostprocess("label_list.txt", "output", [608, 608])
-client = Client()
-client.connect(['127.0.0.1:9393'])
-client.set_rpc_timeout_ms(100000)
-im = preprocess(sys.argv[1])
-fetch_map = client.predict(
-    feed={
-        "image": im,
-        "im_size": np.array(list(im.shape[1:])),
-    },
-    fetch=["save_infer_model/scale_0.tmp_0"],
-    batch=False)
-print(fetch_map)
-fetch_map.pop("serving_status_code")
-fetch_map["image"] = sys.argv[1]
-postprocess(fetch_map)
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
--- a/python/examples/ocr/README.md
+++ b/python/examples/ocr/README.md
--- a/python/examples/ocr/README_CN.md
+++ b/python/examples/ocr/README_CN.md
--- a/python/examples/ocr/det_debugger_server.py
+++ b/python/examples/ocr/det_debugger_server.py
--- a/python/examples/ocr/det_web_server.py
+++ b/python/examples/ocr/det_web_server.py
--- a/python/examples/ocr/ocr_cpp_client.py
+++ b/python/examples/ocr/ocr_cpp_client.py
--- a/python/examples/ocr/ocr_web_server.py
+++ b/python/examples/ocr/ocr_web_server.py
--- a/python/examples/ocr/rec_debugger_server.py
+++ b/python/examples/ocr/rec_debugger_server.py
--- a/python/examples/ocr/rec_web_server.py
+++ b/python/examples/ocr/rec_web_server.py
--- a/python/examples/util/show_profile.py
+++ b/python/examples/util/show_profile.py
--- a/python/paddle_serving_app/local_predict.py
+++ b/python/paddle_serving_app/local_predict.py
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -16,5 +16,6 @@
 from . import version
 from . import client
 from .client import *
+from .httpclient import *
 __version__ = version.version_tag
--- a/python/paddle_serving_client/client.py
+++ b/python/paddle_serving_client/client.py
@@ -25,11 +25,8 @@ import base64
 import time
 import sys
-import grpc
-from .proto import multi_lang_general_model_service_pb2
 sys.path.append(
    os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
-from .proto import multi_lang_general_model_service_pb2_grpc
 #param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
 #param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
@@ -307,30 +304,40 @@ class Client(object):
        if isinstance(feed, dict):
            feed_batch.append(feed)
        elif isinstance(feed, list):
-            # batch_size must be 1, cause batch is already in Tensor.
+            # if input is a list and the number of feed_var is 1.
-            if len(feed) != 1:
+            # create a temp_dict { key = feed_var_name, value = list}
-                raise ValueError("Feed only list = [dict]")
+            # put the temp_dict into the feed_batch.
-            feed_batch = feed
+            if len(self.feed_names_) != 1:
+                raise ValueError(
+                    "input is a list, but we got 0 or 2+ feed_var, don`t know how to divide the feed list"
+                )
+            temp_dict = {}
+            temp_dict[self.feed_names_[0]] = feed
+            feed_batch.append(temp_dict)
        else:
            raise ValueError("Feed only accepts dict and list of dict")
-        int_slot_batch = []
+        # batch_size must be 1, cause batch is already in Tensor.
+        if len(feed_batch) != 1:
+            raise ValueError("len of feed_batch can only be 1.")
+        int_slot = []
        int_feed_names = []
        int_shape = []
        int_lod_slot_batch = []
-        float_slot_batch = []
+        float_slot = []
        float_feed_names = []
        float_lod_slot_batch = []
        float_shape = []
-        string_slot_batch = []
+        string_slot = []
        string_feed_names = []
        string_lod_slot_batch = []
        string_shape = []
        fetch_names = []
        counter = 0
-        # batch_size must be 1, cause batch is already in Tensor.
-        batch_size = len(feed_batch)
        for key in fetch_list:
            if key in self.fetch_names_:
@@ -340,87 +347,69 @@ class Client(object):
            raise ValueError(
                "Fetch names should not be empty or out of saved fetch list.")
-        for i, feed_i in enumerate(feed_batch):
+        feed_i = feed_batch[0]
-            int_slot = []
+        for key in feed_i:
-            int_lod_slot = []
+            if ".lod" not in key and key not in self.feed_names_:
-            float_slot = []
+                raise ValueError("Wrong feed name: {}.".format(key))
-            float_lod_slot = []
+            if ".lod" in key:
-            string_slot = []
+                continue
-            string_lod_slot = []
-            for key in feed_i:
+            self.shape_check(feed_i, key)
-                if ".lod" not in key and key not in self.feed_names_:
+            if self.feed_types_[key] in int_type:
-                    raise ValueError("Wrong feed name: {}.".format(key))
+                int_feed_names.append(key)
-                if ".lod" in key:
+                shape_lst = []
-                    continue
+                if batch == False:
-                #if not isinstance(feed_i[key], np.ndarray):
+                    feed_i[key] = np.expand_dims(feed_i[key], 0).repeat(
-                self.shape_check(feed_i, key)
+                        1, axis=0)
-                if self.feed_types_[key] in int_type:
+                if isinstance(feed_i[key], np.ndarray):
-                    if i == 0:
+                    shape_lst.extend(list(feed_i[key].shape))
-                        int_feed_names.append(key)
+                    int_shape.append(shape_lst)
-                        shape_lst = []
+                else:
-                        if batch == False:
+                    int_shape.append(self.feed_shapes_[key])
-                            feed_i[key] = np.expand_dims(feed_i[key], 0).repeat(
+                if "{}.lod".format(key) in feed_i:
-                                1, axis=0)
+                    int_lod_slot_batch.append(feed_i["{}.lod".format(key)])
-                        if isinstance(feed_i[key], np.ndarray):
+                else:
-                            shape_lst.extend(list(feed_i[key].shape))
+                    int_lod_slot_batch.append([])
-                            int_shape.append(shape_lst)
-                        else:
+                if isinstance(feed_i[key], np.ndarray):
-                            int_shape.append(self.feed_shapes_[key])
+                    int_slot.append(np.ascontiguousarray(feed_i[key]))
-                        if "{}.lod".format(key) in feed_i:
-                            int_lod_slot_batch.append(feed_i["{}.lod".format(
-                                key)])
-                        else:
-                            int_lod_slot_batch.append([])
-                    if isinstance(feed_i[key], np.ndarray):
-                        int_slot.append(np.ascontiguousarray(feed_i[key]))
-                        self.has_numpy_input = True
-                    else:
-                        int_slot.append(np.ascontiguousarray(feed_i[key]))
-                        self.all_numpy_input = False
-                elif self.feed_types_[key] in float_type:
-                    if i == 0:
-                        float_feed_names.append(key)
-                        shape_lst = []
-                        if batch == False:
-                            feed_i[key] = np.expand_dims(feed_i[key], 0).repeat(
-                                1, axis=0)
-                        if isinstance(feed_i[key], np.ndarray):
-                            shape_lst.extend(list(feed_i[key].shape))
-                            float_shape.append(shape_lst)
-                        else:
-                            float_shape.append(self.feed_shapes_[key])
-                        if "{}.lod".format(key) in feed_i:
-                            float_lod_slot_batch.append(feed_i["{}.lod".format(
-                                key)])
-                        else:
-                            float_lod_slot_batch.append([])
-                    if isinstance(feed_i[key], np.ndarray):
-                        float_slot.append(np.ascontiguousarray(feed_i[key]))
-                        self.has_numpy_input = True
-                    else:
-                        float_slot.append(np.ascontiguousarray(feed_i[key]))
-                        self.all_numpy_input = False
-                #if input is string, feed is not numpy.
-                elif self.feed_types_[key] in string_type:
-                    if i == 0:
-                        string_feed_names.append(key)
-                        string_shape.append(self.feed_shapes_[key])
-                        if "{}.lod".format(key) in feed_i:
-                            string_lod_slot_batch.append(feed_i["{}.lod".format(
-                                key)])
-                        else:
-                            string_lod_slot_batch.append([])
-                    string_slot.append(feed_i[key])
                    self.has_numpy_input = True
-            int_slot_batch.append(int_slot)
+                else:
-            int_lod_slot_batch.append(int_lod_slot)
+                    int_slot.append(np.ascontiguousarray(feed_i[key]))
-            float_slot_batch.append(float_slot)
+                    self.all_numpy_input = False
-            float_lod_slot_batch.append(float_lod_slot)
-            string_slot_batch.append(string_slot)
+            elif self.feed_types_[key] in float_type:
-            string_lod_slot_batch.append(string_lod_slot)
+                float_feed_names.append(key)
+                shape_lst = []
+                if batch == False:
+                    feed_i[key] = np.expand_dims(feed_i[key], 0).repeat(
+                        1, axis=0)
+                if isinstance(feed_i[key], np.ndarray):
+                    shape_lst.extend(list(feed_i[key].shape))
+                    float_shape.append(shape_lst)
+                else:
+                    float_shape.append(self.feed_shapes_[key])
+                if "{}.lod".format(key) in feed_i:
+                    float_lod_slot_batch.append(feed_i["{}.lod".format(key)])
+                else:
+                    float_lod_slot_batch.append([])
+                if isinstance(feed_i[key], np.ndarray):
+                    float_slot.append(np.ascontiguousarray(feed_i[key]))
+                    self.has_numpy_input = True
+                else:
+                    float_slot.append(np.ascontiguousarray(feed_i[key]))
+                    self.all_numpy_input = False
+            #if input is string, feed is not numpy.
+            elif self.feed_types_[key] in string_type:
+                string_feed_names.append(key)
+                string_shape.append(self.feed_shapes_[key])
+                if "{}.lod".format(key) in feed_i:
+                    string_lod_slot_batch.append(feed_i["{}.lod".format(key)])
+                else:
+                    string_lod_slot_batch.append([])
+                string_slot.append(feed_i[key])
+                self.has_numpy_input = True
        self.profile_.record('py_prepro_1')
        self.profile_.record('py_client_infer_0')
@@ -428,11 +417,11 @@ class Client(object):
        result_batch_handle = self.predictorres_constructor()
        if self.all_numpy_input:
            res = self.client_handle_.numpy_predict(
-                float_slot_batch, float_feed_names, float_shape,
+                float_slot, float_feed_names, float_shape, float_lod_slot_batch,
-                float_lod_slot_batch, int_slot_batch, int_feed_names, int_shape,
+                int_slot, int_feed_names, int_shape, int_lod_slot_batch,
-                int_lod_slot_batch, string_slot_batch, string_feed_names,
+                string_slot, string_feed_names, string_shape,
-                string_shape, string_lod_slot_batch, fetch_names,
+                string_lod_slot_batch, fetch_names, result_batch_handle,
-                result_batch_handle, self.pid, log_id)
+                self.pid, log_id)
        elif self.has_numpy_input == False:
            raise ValueError(
                "Please make sure all of your inputs are numpy array")
@@ -520,243 +509,3 @@ class Client(object):
    def release(self):
        self.client_handle_.destroy_predictor()
        self.client_handle_ = None
-class MultiLangClient(object):
-    def __init__(self):
-        self.channel_ = None
-        self.stub_ = None
-        self.rpc_timeout_s_ = 2
-        self.profile_ = _Profiler()
-    def add_variant(self, tag, cluster, variant_weight):
-        # TODO
-        raise Exception("cannot support ABtest yet")
-    def set_rpc_timeout_ms(self, rpc_timeout):
-        if self.stub_ is None:
-            raise Exception("set timeout must be set after connect.")
-        if not isinstance(rpc_timeout, int):
-            # for bclient
-            raise ValueError("rpc_timeout must be int type.")
-        self.rpc_timeout_s_ = rpc_timeout / 1000.0
-        timeout_req = multi_lang_general_model_service_pb2.SetTimeoutRequest()
-        timeout_req.timeout_ms = rpc_timeout
-        resp = self.stub_.SetTimeout(timeout_req)
-        return resp.err_code == 0
-    def connect(self, endpoints):
-        # https://github.com/tensorflow/serving/issues/1382
-        options = [('grpc.max_receive_message_length', 512 * 1024 * 1024),
-                   ('grpc.max_send_message_length', 512 * 1024 * 1024),
-                   ('grpc.lb_policy_name', 'round_robin')]
-        # TODO: weight round robin
-        g_endpoint = 'ipv4:{}'.format(','.join(endpoints))
-        self.channel_ = grpc.insecure_channel(g_endpoint, options=options)
-        self.stub_ = multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelServiceStub(
-            self.channel_)
-        # get client model config
-        get_client_config_req = multi_lang_general_model_service_pb2.GetClientConfigRequest(
-        )
-        resp = self.stub_.GetClientConfig(get_client_config_req)
-        model_config_str = resp.client_config_str
-        self._parse_model_config(model_config_str)
-    def _flatten_list(self, nested_list):
-        for item in nested_list:
-            if isinstance(item, (list, tuple)):
-                for sub_item in self._flatten_list(item):
-                    yield sub_item
-            else:
-                yield item
-    def _parse_model_config(self, model_config_str):
-        model_conf = m_config.GeneralModelConfig()
-        model_conf = google.protobuf.text_format.Merge(model_config_str,
-                                                       model_conf)
-        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
-        self.feed_types_ = {}
-        self.feed_shapes_ = {}
-        self.lod_tensor_set_ = set()
-        for i, var in enumerate(model_conf.feed_var):
-            self.feed_types_[var.alias_name] = var.feed_type
-            self.feed_shapes_[var.alias_name] = var.shape
-            if var.is_lod_tensor:
-                self.lod_tensor_set_.add(var.alias_name)
-        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
-        self.fetch_types_ = {}
-        for i, var in enumerate(model_conf.fetch_var):
-            self.fetch_types_[var.alias_name] = var.fetch_type
-            if var.is_lod_tensor:
-                self.lod_tensor_set_.add(var.alias_name)
-    def _pack_inference_request(self, feed, fetch, is_python, log_id):
-        req = multi_lang_general_model_service_pb2.InferenceRequest()
-        req.fetch_var_names.extend(fetch)
-        req.is_python = is_python
-        req.log_id = log_id
-        feed_var_names = []
-        for key in feed.keys():
-            if '.lod' not in key:
-                feed_var_names.append(key)
-        req.feed_var_names.extend(feed_var_names)
-        inst = multi_lang_general_model_service_pb2.FeedInst()
-        for name in req.feed_var_names:
-            tensor = multi_lang_general_model_service_pb2.Tensor()
-            var = feed[name]
-            v_type = self.feed_types_[name]
-            if is_python:
-                data = None
-                if isinstance(var, list):
-                    if v_type == 0:  # int64
-                        data = np.array(var, dtype="int64")
-                    elif v_type == 1:  # float32
-                        data = np.array(var, dtype="float32")
-                    elif v_type == 2:  # int32
-                        data = np.array(var, dtype="int32")
-                    else:
-                        raise Exception("error tensor value type.")
-                elif isinstance(var, np.ndarray):
-                    data = var
-                    if v_type == 0:
-                        if data.dtype != 'int64':
-                            data = data.astype("int64")
-                    elif v_type == 1:
-                        if data.dtype != 'float32':
-                            data = data.astype("float32")
-                    elif v_type == 2:
-                        if data.dtype != 'int32':
-                            data = data.astype("int32")
-                    else:
-                        raise Exception("error tensor value type.")
-                else:
-                    raise Exception("var must be list or ndarray.")
-                data = np.ascontiguousarray(data)
-                tensor.data = data.tobytes()
-            tensor.shape.extend(list(var.shape))
-            if "{}.lod".format(name) in feed.keys():
-                tensor.lod.extend(feed["{}.lod".format(name)])
-            inst.tensor_array.append(tensor)
-        req.insts.append(inst)
-        return req
-    def _unpack_inference_response(self, resp, fetch, is_python,
-                                   need_variant_tag):
-        if resp.err_code != 0:
-            return None
-        tag = resp.tag
-        multi_result_map = {}
-        for model_result in resp.outputs:
-            inst = model_result.insts[0]
-            result_map = {}
-            for i, name in enumerate(fetch):
-                var = inst.tensor_array[i]
-                v_type = self.fetch_types_[name]
-                if is_python:
-                    if v_type == 0:  # int64
-                        result_map[name] = np.frombuffer(
-                            var.data, dtype="int64")
-                    elif v_type == 1:  # float32
-                        result_map[name] = np.frombuffer(
-                            var.data, dtype="float32")
-                    else:
-                        raise Exception("error type.")
-                else:
-                    if v_type == 0:  # int64
-                        result_map[name] = np.array(
-                            list(var.int64_data), dtype="int64")
-                    elif v_type == 1:  # float32
-                        result_map[name] = np.array(
-                            list(var.float_data), dtype="float32")
-                    else:
-                        raise Exception("error type.")
-                result_map[name].shape = list(var.shape)
-                if name in self.lod_tensor_set_:
-                    result_map["{}.lod".format(name)] = np.array(list(var.lod))
-            multi_result_map[model_result.engine_name] = result_map
-        ret = None
-        if len(resp.outputs) == 1:
-            ret = list(multi_result_map.values())[0]
-        else:
-            ret = multi_result_map
-        ret["serving_status_code"] = 0
-        return ret if not need_variant_tag else [ret, tag]
-    def _done_callback_func(self, fetch, is_python, need_variant_tag):
-        def unpack_resp(resp):
-            return self._unpack_inference_response(resp, fetch, is_python,
-                                                   need_variant_tag)
-        return unpack_resp
-    def get_feed_names(self):
-        return self.feed_names_
-    def predict(self,
-                feed,
-                fetch,
-                batch=True,
-                need_variant_tag=False,
-                asyn=False,
-                is_python=True,
-                log_id=0):
-        if isinstance(feed, dict) is False:
-            raise ValueError("Type Error. grpc feed must be dict.")
-        if batch is False:
-            for key in feed:
-                if ".lod" not in key:
-                    feed[key] = np.expand_dims(feed[key], 0).repeat(1, axis=0)
-        if not asyn:
-            try:
-                self.profile_.record('py_prepro_0')
-                req = self._pack_inference_request(
-                    feed, fetch, is_python=is_python, log_id=log_id)
-                self.profile_.record('py_prepro_1')
-                self.profile_.record('py_client_infer_0')
-                resp = self.stub_.Inference(req, timeout=self.rpc_timeout_s_)
-                self.profile_.record('py_client_infer_1')
-                self.profile_.record('py_postpro_0')
-                ret = self._unpack_inference_response(
-                    resp,
-                    fetch,
-                    is_python=is_python,
-                    need_variant_tag=need_variant_tag)
-                self.profile_.record('py_postpro_1')
-                self.profile_.print_profile()
-                return ret
-            except grpc.RpcError as e:
-                return {"serving_status_code": e.code()}
-        else:
-            req = self._pack_inference_request(
-                feed, fetch, is_python=is_python, log_id=log_id)
-            call_future = self.stub_.Inference.future(
-                req, timeout=self.rpc_timeout_s_)
-            return MultiLangPredictFuture(
-                call_future,
-                self._done_callback_func(
-                    fetch,
-                    is_python=is_python,
-                    need_variant_tag=need_variant_tag))
-class MultiLangPredictFuture(object):
-    def __init__(self, call_future, callback_func):
-        self.call_future_ = call_future
-        self.callback_func_ = callback_func
-    def result(self):
-        try:
-            resp = self.call_future_.result()
-        except grpc.RpcError as e:
-            return {"serving_status_code": e.code()}
-        return self.callback_func_(resp)
-    def add_done_callback(self, fn):
-        def __fn__(call_future):
-            assert call_future == self.call_future_
-            fn(self)
-        self.call_future_.add_done_callback(__fn__)
--- a/python/paddle_serving_client/httpclient.py
+++ b/python/paddle_serving_client/httpclient.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import requests
+import json
+import numpy as np
+import os
+from .proto import general_model_config_pb2 as m_config
+import google.protobuf.text_format
+import gzip
+from collections import Iterable
+import base64
+#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
+#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
+#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
+#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto)
+int64_type = 0
+float32_type = 1
+int32_type = 2
+bytes_type = 3
+# this is corresponding to the proto
+proto_data_key_list = ["int64_data", "float_data", "int_data", "data"]
+def list_flatten(items, ignore_types=(str, bytes)):
+    for x in items:
+        if isinstance(x, Iterable) and not isinstance(x, ignore_types):
+            yield from list_flatten(x)
+        else:
+            yield x
+def data_bytes_number(datalist):
+    total_bytes_number = 0
+    if isinstance(datalist, list):
+        if len(datalist) == 0:
+            return total_bytes_number
+        else:
+            for data in datalist:
+                if isinstance(data, str):
+                    total_bytes_number = total_bytes_number + len(data)
+                else:
+                    total_bytes_number = total_bytes_number + 4 * len(datalist)
+                    break
+    else:
+        raise ValueError(
+            "In the Function data_bytes_number(), data must be list.")
+    return total_bytes_number
+class HttpClient(object):
+    def __init__(self,
+                 ip="0.0.0.0",
+                 port="9393",
+                 service_name="/GeneralModelService/inference"):
+        self.feed_names_ = []
+        self.feed_real_names = []
+        self.fetch_names_ = []
+        self.feed_shapes_ = {}
+        self.feed_types_ = {}
+        self.feed_names_to_idx_ = {}
+        self.http_timeout_ms = 200000
+        self.ip = ip
+        self.port = port
+        self.server_port = port
+        self.service_name = service_name
+        self.key = None
+        self.try_request_gzip = False
+        self.try_response_gzip = False
+    def load_client_config(self, model_config_path_list):
+        if isinstance(model_config_path_list, str):
+            model_config_path_list = [model_config_path_list]
+        elif isinstance(model_config_path_list, list):
+            pass
+        file_path_list = []
+        for single_model_config in model_config_path_list:
+            if os.path.isdir(single_model_config):
+                file_path_list.append("{}/serving_client_conf.prototxt".format(
+                    single_model_config))
+            elif os.path.isfile(single_model_config):
+                file_path_list.append(single_model_config)
+        model_conf = m_config.GeneralModelConfig()
+        f = open(file_path_list[0], 'r')
+        model_conf = google.protobuf.text_format.Merge(
+            str(f.read()), model_conf)
+        # load configuraion here
+        # get feed vars, fetch vars
+        # get feed shapes, feed types
+        # map feed names to index
+        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
+        self.feed_real_names = [var.name for var in model_conf.feed_var]
+        self.feed_names_to_idx_ = {}  #this is useful
+        self.lod_tensor_set = set()
+        self.feed_tensor_len = {}  #this is only used for shape check
+        self.key = None
+        for i, var in enumerate(model_conf.feed_var):
+            self.feed_names_to_idx_[var.alias_name] = i
+            self.feed_types_[var.alias_name] = var.feed_type
+            self.feed_shapes_[var.alias_name] = [dim for dim in var.shape]
+            if var.is_lod_tensor:
+                self.lod_tensor_set.add(var.alias_name)
+            else:
+                counter = 1
+                for dim in self.feed_shapes_[var.alias_name]:
+                    counter *= dim
+                self.feed_tensor_len[var.alias_name] = counter
+        if len(file_path_list) > 1:
+            model_conf = m_config.GeneralModelConfig()
+            f = open(file_path_list[-1], 'r')
+            model_conf = google.protobuf.text_format.Merge(
+                str(f.read()), model_conf)
+        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
+        self.fetch_names_to_type_ = {}
+        self.fetch_names_to_idx_ = {}
+        for i, var in enumerate(model_conf.fetch_var):
+            self.fetch_names_to_idx_[var.alias_name] = i
+            self.fetch_names_to_type_[var.alias_name] = var.fetch_type
+            if var.is_lod_tensor:
+                self.lod_tensor_set.add(var.alias_name)
+        return
+    def set_http_timeout_ms(self, http_timeout_ms):
+        if not isinstance(http_timeout_ms, int):
+            raise ValueError("http_timeout_ms must be int type.")
+        else:
+            self.http_timeout_ms = http_timeout_ms
+    def set_ip(self, ip):
+        self.ip = ip
+    def set_service_name(self, service_name):
+        self.service_name = service_name
+    def set_port(self, port):
+        self.port = port
+    def set_request_compress(self, try_request_gzip):
+        self.try_request_gzip = try_request_gzip
+    def set_response_compress(self, try_response_gzip):
+        self.try_response_gzip = try_response_gzip
+    # use_key is the function of encryption.
+    def use_key(self, key_filename):
+        with open(key_filename, "rb") as f:
+            self.key = f.read()
+            self.get_serving_port()
+    def get_serving_port(self):
+        encrypt_url = "http://" + str(self.ip) + ":" + str(self.port)
+        if self.key is not None:
+            req = json.dumps({"key": base64.b64encode(self.key).decode()})
+        else:
+            req = json.dumps({})
+        r = requests.post(encrypt_url, req)
+        result = r.json()
+        print(result)
+        if "endpoint_list" not in result:
+            raise ValueError("server not ready")
+        else:
+            self.server_port = str(result["endpoint_list"][0])
+            print("rpc port is ", self.server_port)
+    def get_feed_names(self):
+        return self.feed_names_
+    def get_fetch_names(self):
+        return self.fetch_names_
+    # feed 支持Numpy类型，以及直接List、tuple
+    # 不支持str类型，因为proto中为repeated.
+    def predict(self,
+                feed=None,
+                fetch=None,
+                batch=False,
+                need_variant_tag=False,
+                log_id=0):
+        if feed is None or fetch is None:
+            raise ValueError("You should specify feed and fetch for prediction")
+        fetch_list = []
+        if isinstance(fetch, str):
+            fetch_list = [fetch]
+        elif isinstance(fetch, (list, tuple)):
+            fetch_list = fetch
+        else:
+            raise ValueError("Fetch only accepts string and list of string")
+        feed_batch = []
+        if isinstance(feed, dict):
+            feed_batch.append(feed)
+        elif isinstance(feed, (list, str, tuple)):
+            # if input is a list or str or tuple, and the number of feed_var is 1.
+            # create a temp_dict { key = feed_var_name, value = list}
+            # put the temp_dict into the feed_batch.
+            if len(self.feed_names_) != 1:
+                raise ValueError(
+                    "input is a list, but we got 0 or 2+ feed_var, don`t know how to divide the feed list"
+                )
+            temp_dict = {}
+            temp_dict[self.feed_names_[0]] = feed
+            feed_batch.append(temp_dict)
+        else:
+            raise ValueError("Feed only accepts dict and list of dict")
+        # batch_size must be 1, cause batch is already in Tensor.
+        if len(feed_batch) != 1:
+            raise ValueError("len of feed_batch can only be 1.")
+        fetch_names = []
+        for key in fetch_list:
+            if key in self.fetch_names_:
+                fetch_names.append(key)
+        if len(fetch_names) == 0:
+            raise ValueError(
+                "Fetch names should not be empty or out of saved fetch list.")
+            return {}
+        feed_i = feed_batch[0]
+        Request = {}
+        Request["fetch_var_names"] = fetch_list
+        Request["log_id"] = int(log_id)
+        Request["tensor"] = []
+        index = 0
+        total_data_number = 0
+        for key in feed_i:
+            if ".lod" not in key and key not in self.feed_names_:
+                raise ValueError("Wrong feed name: {}.".format(key))
+            if ".lod" in key:
+                continue
+            Request["tensor"].append('')
+            Request["tensor"][index] = {}
+            lod = []
+            if "{}.lod".format(key) in feed_i:
+                lod = feed_i["{}.lod".format(key)]
+            shape = self.feed_shapes_[key].copy()
+            elem_type = self.feed_types_[key]
+            data_value = feed_i[key]
+            data_key = proto_data_key_list[elem_type]
+            # feed_i[key] 可以是np.ndarray
+            # 也可以是list或tuple
+            # 当np.ndarray需要处理为list
+            if isinstance(feed_i[key], np.ndarray):
+                shape_lst = []
+                # 0维numpy 需要在外层再加一个[]
+                if feed_i[key].ndim == 0:
+                    data_value = [feed_i[key].tolist()]
+                    shape_lst.append(1)
+                else:
+                    shape_lst.extend(list(feed_i[key].shape))
+                    shape = shape_lst
+                    data_value = feed_i[key].flatten().tolist()
+                # 当Batch为False，shape字段前插一个1,表示batch维
+                # 当Batch为True,则直接使用numpy.shape作为batch维度
+                if batch == False:
+                    shape.insert(0, 1)
+                # 当是list或tuple时，需要把多层嵌套展开
+            elif isinstance(feed_i[key], (list, tuple)):
+                # 当Batch为False，shape字段前插一个1,表示batch维
+                # 当Batch为True, 由于list并不像numpy那样规整，所以
+                # 无法获取shape，此时取第一维度作为Batch维度.
+                # 插入到feedVar.shape前面.
+                if batch == False:
+                    shape.insert(0, 1)
+                else:
+                    shape.insert(0, len(feed_i[key]))
+                feed_i[key] = [x for x in list_flatten(feed_i[key])]
+                data_value = feed_i[key]
+            else:
+                # 输入可能是单个的str或int值等
+                # 此时先统一处理为一个list
+                # 由于输入比较特殊，shape保持原feedvar中不变
+                data_value = []
+                data_value.append(feed_i[key])
+                if isinstance(feed_i[key], str):
+                    if self.feed_types_[key] != bytes_type:
+                        raise ValueError(
+                            "feedvar is not string-type,feed can`t be a single string."
+                        )
+                else:
+                    if self.feed_types_[key] == bytes_type:
+                        raise ValueError(
+                            "feedvar is string-type,feed, feed can`t be a single int or others."
+                        )
+            # 如果不压缩，那么不需要统计数据量。
+            if self.try_request_gzip:
+                total_data_number = total_data_number + data_bytes_number(
+                    data_value)
+            Request["tensor"][index]["elem_type"] = elem_type
+            Request["tensor"][index]["shape"] = shape
+            Request["tensor"][index][data_key] = data_value
+            proto_index = self.feed_names_to_idx_[key]
+            Request["tensor"][index]["name"] = self.feed_real_names[proto_index]
+            Request["tensor"][index]["alias_name"] = key
+            if len(lod) > 0:
+                Request["tensor"][index]["lod"] = lod
+            index = index + 1
+        result = None
+        # request
+        web_url = "http://" + self.ip + ":" + self.server_port + self.service_name
+        postData = json.dumps(Request)
+        headers = {}
+        # 当数据区长度大于512字节时才压缩.
+        if self.try_request_gzip and total_data_number > 512:
+            postData = gzip.compress(bytes(postData, 'utf-8'))
+            headers["Content-Encoding"] = "gzip"
+        if self.try_response_gzip:
+            headers["Accept-encoding"] = "gzip"
+        # requests支持自动识别解压
+        result = requests.post(url=web_url, headers=headers, data=postData)
+        if result == None:
+            return None
+        if result.status_code == 200:
+            return result.json()
+        return result
--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -14,18 +14,16 @@
 # pylint: disable=doc-string-missing
 from . import monitor
-from . import rpc_service
 from . import serve
 from . import version
-__all__ = ["version", "server", "serve", "monitor", "rpc_service", "dag"]
+__all__ = ["version", "server", "serve", "monitor", "dag"]
 from paddle_serving_server import (
    version,
    server,
    serve,
    monitor,
-    rpc_service,
    dag, )
 from .dag import *

--- a/python/paddle_serving_server/dag.py
+++ b/python/paddle_serving_server/dag.py
--- a/python/paddle_serving_server/rpc_service.py
+++ b/python/paddle_serving_server/rpc_service.py
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import os
-import numpy as np
-import google.protobuf.text_format
-from .proto import general_model_config_pb2 as m_config
-from .proto import multi_lang_general_model_service_pb2
-sys.path.append(
-    os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
-from .proto import multi_lang_general_model_service_pb2_grpc
-class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
-                                     MultiLangGeneralModelServiceServicer):
-    def __init__(self, model_config_path_list, is_multi_model, endpoints):
-        self.is_multi_model_ = is_multi_model
-        self.model_config_path_list = model_config_path_list
-        self.endpoints_ = endpoints
-        self._init_bclient(self.model_config_path_list, self.endpoints_)
-        self._parse_model_config(self.model_config_path_list)
-    def _init_bclient(self, model_config_path_list, endpoints, timeout_ms=None):
-        file_path_list = []
-        for single_model_config in model_config_path_list:
-            if os.path.isdir(single_model_config):
-                file_path_list.append("{}/serving_server_conf.prototxt".format(
-                    single_model_config))
-            elif os.path.isfile(single_model_config):
-                file_path_list.append(single_model_config)
-        from paddle_serving_client import Client
-        self.bclient_ = Client()
-        if timeout_ms is not None:
-            self.bclient_.set_rpc_timeout_ms(timeout_ms)
-        self.bclient_.load_client_config(file_path_list)
-        self.bclient_.connect(endpoints)
-    def _parse_model_config(self, model_config_path_list):
-        if isinstance(model_config_path_list, str):
-            model_config_path_list = [model_config_path_list]
-        elif isinstance(model_config_path_list, list):
-            pass
-        file_path_list = []
-        for single_model_config in model_config_path_list:
-            if os.path.isdir(single_model_config):
-                file_path_list.append("{}/serving_server_conf.prototxt".format(
-                    single_model_config))
-            elif os.path.isfile(single_model_config):
-                file_path_list.append(single_model_config)
-        model_conf = m_config.GeneralModelConfig()
-        f = open(file_path_list[0], 'r')
-        model_conf = google.protobuf.text_format.Merge(
-            str(f.read()), model_conf)
-        self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
-        self.feed_types_ = {}
-        self.feed_shapes_ = {}
-        self.lod_tensor_set_ = set()
-        for i, var in enumerate(model_conf.feed_var):
-            self.feed_types_[var.alias_name] = var.feed_type
-            self.feed_shapes_[var.alias_name] = var.shape
-            if var.is_lod_tensor:
-                self.lod_tensor_set_.add(var.alias_name)
-        if len(file_path_list) > 1:
-            model_conf = m_config.GeneralModelConfig()
-            f = open(file_path_list[-1], 'r')
-            model_conf = google.protobuf.text_format.Merge(
-                str(f.read()), model_conf)
-        self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
-        self.fetch_types_ = {}
-        for i, var in enumerate(model_conf.fetch_var):
-            self.fetch_types_[var.alias_name] = var.fetch_type
-            if var.is_lod_tensor:
-                self.lod_tensor_set_.add(var.alias_name)
-    def _flatten_list(self, nested_list):
-        for item in nested_list:
-            if isinstance(item, (list, tuple)):
-                for sub_item in self._flatten_list(item):
-                    yield sub_item
-            else:
-                yield item
-    def _unpack_inference_request(self, request):
-        feed_names = list(request.feed_var_names)
-        fetch_names = list(request.fetch_var_names)
-        is_python = request.is_python
-        log_id = request.log_id
-        feed_batch = []
-        for feed_inst in request.insts:
-            feed_dict = {}
-            for idx, name in enumerate(feed_names):
-                var = feed_inst.tensor_array[idx]
-                v_type = self.feed_types_[name]
-                data = None
-                if is_python:
-                    if v_type == 0:  # int64
-                        data = np.frombuffer(var.data, dtype="int64")
-                    elif v_type == 1:  # float32
-                        data = np.frombuffer(var.data, dtype="float32")
-                    elif v_type == 2:  # int32
-                        data = np.frombuffer(var.data, dtype="int32")
-                    else:
-                        raise Exception("error type.")
-                else:
-                    if v_type == 0:  # int64
-                        data = np.array(list(var.int64_data), dtype="int64")
-                    elif v_type == 1:  # float32
-                        data = np.array(list(var.float_data), dtype="float32")
-                    elif v_type == 2:  # int32
-                        data = np.array(list(var.int_data), dtype="int32")
-                    else:
-                        raise Exception("error type.")
-                data.shape = list(feed_inst.tensor_array[idx].shape)
-                feed_dict[name] = np.ascontiguousarray(data)
-                if len(var.lod) > 0:
-                    feed_dict["{}.lod".format(name)] = var.lod
-            feed_batch.append(feed_dict)
-        return feed_batch, fetch_names, is_python, log_id
-    def _pack_inference_response(self, ret, fetch_names, is_python):
-        resp = multi_lang_general_model_service_pb2.InferenceResponse()
-        if ret is None:
-            resp.err_code = 1
-            return resp
-        results, tag = ret
-        resp.tag = tag
-        resp.err_code = 0
-        if not self.is_multi_model_:
-            results = {'general_infer_0': results}
-        for model_name, model_result in results.items():
-            model_output = multi_lang_general_model_service_pb2.ModelOutput()
-            inst = multi_lang_general_model_service_pb2.FetchInst()
-            for idx, name in enumerate(fetch_names):
-                tensor = multi_lang_general_model_service_pb2.Tensor()
-                v_type = self.fetch_types_[name]
-                if is_python:
-                    tensor.data = model_result[name].tobytes()
-                else:
-                    if v_type == 0:  # int64
-                        tensor.int64_data.extend(model_result[name].reshape(-1)
-                                                 .tolist())
-                    elif v_type == 1:  # float32
-                        tensor.float_data.extend(model_result[name].reshape(-1)
-                                                 .tolist())
-                    elif v_type == 2:  # int32
-                        tensor.int_data.extend(model_result[name].reshape(-1)
-                                               .tolist())
-                    else:
-                        raise Exception("error type.")
-                tensor.shape.extend(list(model_result[name].shape))
-                if "{}.lod".format(name) in model_result:
-                    tensor.lod.extend(model_result["{}.lod".format(name)]
-                                      .tolist())
-                inst.tensor_array.append(tensor)
-            model_output.insts.append(inst)
-            model_output.engine_name = model_name
-            resp.outputs.append(model_output)
-        return resp
-    def SetTimeout(self, request, context):
-        # This porcess and Inference process cannot be operate at the same time.
-        # For performance reasons, do not add thread lock temporarily.
-        timeout_ms = request.timeout_ms
-        self._init_bclient(self.model_config_path_list, self.endpoints_,
-                           timeout_ms)
-        resp = multi_lang_general_model_service_pb2.SimpleResponse()
-        resp.err_code = 0
-        return resp
-    def Inference(self, request, context):
-        feed_batch, fetch_names, is_python, log_id \
-                = self._unpack_inference_request(request)
-        ret = self.bclient_.predict(
-            feed=feed_batch,
-            fetch=fetch_names,
-            batch=True,
-            need_variant_tag=True,
-            log_id=log_id)
-        return self._pack_inference_response(ret, fetch_names, is_python)
-    def GetClientConfig(self, request, context):
-        #model_config_path_list is list right now.
-        #dict should be added when graphMaker is used.
-        resp = multi_lang_general_model_service_pb2.GetClientConfigResponse()
-        model_config_str = []
-        for single_model_config in self.model_config_path_list:
-            if os.path.isdir(single_model_config):
-                with open("{}/serving_server_conf.prototxt".format(
-                        single_model_config)) as f:
-                    model_config_str.append(str(f.read()))
-            elif os.path.isfile(single_model_config):
-                with open(single_model_config) as f:
-                    model_config_str.append(str(f.read()))
-        resp.client_config_str = model_config_str[0]
-        return resp
--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -23,13 +23,26 @@ import json
 import base64
 import time
 from multiprocessing import Process
-from flask import Flask, request
 import sys
 if sys.version_info.major == 2:
    from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
 elif sys.version_info.major == 3:
    from http.server import BaseHTTPRequestHandler, HTTPServer
+from contextlib import closing
+import socket
+# web_service.py is still used by Pipeline.
+def port_is_available(port):
+    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
+        sock.settimeout(2)
+        result = sock.connect_ex(('0.0.0.0', port))
+    if result != 0:
+        return True
+    else:
+        return False
 def format_gpu_to_strlist(unformatted_gpus):
    gpus_strlist = []
@@ -117,8 +130,6 @@ def serve_args():
        type=str,
        default="workdir",
        help="Working dir of current service")
-    parser.add_argument(
-        "--name", type=str, default="None", help="Default service name")
    parser.add_argument(
        "--use_mkl", default=False, action="store_true", help="Use MKL")
    parser.add_argument(
@@ -148,11 +159,6 @@ def serve_args():
        default=False,
        action="store_true",
        help="Use encryption model")
-    parser.add_argument(
-        "--use_multilang",
-        default=False,
-        action="store_true",
-        help="Use Multi-language-service")
    parser.add_argument(
        "--use_trt", default=False, action="store_true", help="Use TensorRT")
    parser.add_argument(
@@ -189,7 +195,6 @@ def start_gpu_card_model(gpu_mode, port, args):  # pylint: disable=doc-string-mi
    ir_optim = args.ir_optim
    use_mkl = args.use_mkl
    max_body_size = args.max_body_size
-    use_multilang = args.use_multilang
    workdir = "{}_{}".format(args.workdir, port)
    if model == "":
@@ -222,10 +227,7 @@ def start_gpu_card_model(gpu_mode, port, args):  # pylint: disable=doc-string-mi
    general_response_op = op_maker.create('general_response')
    op_seq_maker.add_op(general_response_op)
-    if use_multilang:
+    server = serving.Server()
-        server = serving.MultiLangServer()
-    else:
-        server = serving.Server()
    server.set_op_sequence(op_seq_maker.get_op_sequence())
    server.set_num_threads(thread_num)
    server.use_mkl(use_mkl)
@@ -372,54 +374,14 @@ if __name__ == "__main__":
        elif os.path.isfile(single_model_config):
            raise ValueError("The input of --model should be a dir not file.")
-    if args.name == "None":
+    if args.use_encryption_model:
-        from .web_service import port_is_available
+        p_flag = False
-        if args.use_encryption_model:
+        p = None
-            p_flag = False
+        serving_port = 0
-            p = None
+        server = HTTPServer(('0.0.0.0', int(args.port)), MainService)
-            serving_port = 0
+        print(
-            server = HTTPServer(('localhost', int(args.port)), MainService)
+            'Starting encryption server, waiting for key from client, use <Ctrl-C> to stop'
-            print(
+        )
-                'Starting encryption server, waiting for key from client, use <Ctrl-C> to stop'
+        server.serve_forever()
-            )
-            server.serve_forever()
-        else:
-            start_multi_card(args)
    else:
-        from .web_service import WebService
+        start_multi_card(args)
-        web_service = WebService(name=args.name)
-        web_service.load_model_config(args.model)
-        workdir = "{}_{}".format(args.workdir, args.port)
-        web_service.prepare_server(
-            workdir=workdir,
-            port=args.port,
-            use_lite=args.use_lite,
-            use_xpu=args.use_xpu,
-            ir_optim=args.ir_optim,
-            thread_num=args.thread,
-            precision=args.precision,
-            use_calib=args.use_calib,
-            use_trt=args.use_trt,
-            gpu_multi_stream=args.gpu_multi_stream,
-            op_num=args.op_num,
-            op_max_batch=args.op_max_batch,
-            gpuid=args.gpu_ids)
-        web_service.run_rpc_service()
-        app_instance = Flask(__name__)
-        @app_instance.before_first_request
-        def init():
-            web_service._launch_web_service()
-        service_name = "/" + web_service.name + "/prediction"
-        @app_instance.route(service_name, methods=["POST"])
-        def run():
-            return web_service.get_prediction(request)
-        app_instance.run(host="0.0.0.0",
-                         port=web_service.port,
-                         threaded=False,
-                         processes=4)
--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -16,11 +16,9 @@ import os
 import tarfile
 import socket
 import paddle_serving_server as paddle_serving_server
-from paddle_serving_server.rpc_service import MultiLangServerServiceServicer
 from paddle_serving_server.serve import format_gpu_to_strlist
 from .proto import server_configure_pb2 as server_sdk
 from .proto import general_model_config_pb2 as m_config
-from .proto import multi_lang_general_model_service_pb2_grpc
 import google.protobuf.text_format
 import time
 from .version import version_tag, version_suffix, device_type
@@ -33,7 +31,6 @@ if sys.platform.startswith('win') is False:
 import shutil
 import platform
 import numpy as np
-import grpc
 import sys
 import collections
 import subprocess
@@ -595,195 +592,3 @@ class Server(object):
        print(command)
        os.system(command)
-class MultiLangServer(object):
-    def __init__(self):
-        self.bserver_ = Server()
-        self.worker_num_ = 4
-        self.body_size_ = 64 * 1024 * 1024
-        self.concurrency_ = 100000
-        self.is_multi_model_ = False  # for model ensemble, which is not useful right now.
-        self.device = "cpu"  # this is the default value for multilang `device`.
-    def set_max_concurrency(self, concurrency):
-        self.concurrency_ = concurrency
-        self.bserver_.set_max_concurrency(concurrency)
-    def set_device(self, device="cpu"):
-        self.device = device
-        self.bserver_.set_device(device)
-    def set_num_threads(self, threads):
-        self.worker_num_ = threads
-        self.bserver_.set_num_threads(threads)
-    def set_max_body_size(self, body_size):
-        self.bserver_.set_max_body_size(body_size)
-        if body_size >= self.body_size_:
-            self.body_size_ = body_size
-        else:
-            print(
-                "max_body_size is less than default value, will use default value in service."
-            )
-    def use_encryption_model(self, flag=False):
-        self.encryption_model = flag
-    def set_port(self, port):
-        self.gport_ = port
-    def set_precision(self, precision="fp32"):
-        self.precision = precision
-    def set_use_calib(self, use_calib=False):
-        self.use_calib = use_calib
-    def set_reload_interval(self, interval):
-        self.bserver_.set_reload_interval(interval)
-    def set_op_sequence(self, op_seq):
-        self.bserver_.set_op_sequence(op_seq)
-    def set_op_graph(self, op_graph):
-        self.bserver_.set_op_graph(op_graph)
-    def use_mkl(self, flag):
-        self.bserver_.use_mkl(flag)
-    def set_memory_optimize(self, flag=False):
-        self.bserver_.set_memory_optimize(flag)
-    def set_ir_optimize(self, flag=False):
-        self.bserver_.set_ir_optimize(flag)
-    def set_gpuid(self, gpuid):
-        self.bserver_.set_gpuid(gpuid)
-    def set_op_num(self, op_num):
-        self.bserver_.set_op_num(op_num)
-    def set_op_max_batch(self, op_max_batch):
-        self.bserver_.set_op_max_batch(op_max_batch)
-    def set_trt(self):
-        self.bserver_.set_trt()
-    def set_gpu_multi_stream(self):
-        self.bserver_.set_gpu_multi_stream()
-    def set_lite(self):
-        self.bserver_.set_lite()
-    def set_xpu(self):
-        self.bserver_.set_xpu()
-    def load_model_config(self,
-                          server_config_dir_paths,
-                          client_config_path=None):
-        if isinstance(server_config_dir_paths, str):
-            server_config_dir_paths = [server_config_dir_paths]
-        elif isinstance(server_config_dir_paths, list):
-            pass
-        else:
-            raise Exception("The type of model_config_paths must be str or list"
-                            ", not {}.".format(type(server_config_dir_paths)))
-        for single_model_config in server_config_dir_paths:
-            if os.path.isdir(single_model_config):
-                pass
-            elif os.path.isfile(single_model_config):
-                raise ValueError(
-                    "The input of --model should be a dir not file.")
-        self.bserver_.load_model_config(server_config_dir_paths)
-        if client_config_path is None:
-            #now dict is not useful.
-            if isinstance(server_config_dir_paths, dict):
-                self.is_multi_model_ = True
-                client_config_path = []
-                for server_config_path_items in list(
-                        server_config_dir_paths.items()):
-                    client_config_path.append(server_config_path_items[1])
-            elif isinstance(server_config_dir_paths, list):
-                self.is_multi_model_ = False
-                client_config_path = server_config_dir_paths
-            else:
-                raise Exception(
-                    "The type of model_config_paths must be str or list or "
-                    "dict({op: model_path}), not {}.".format(
-                        type(server_config_dir_paths)))
-        if isinstance(client_config_path, str):
-            client_config_path = [client_config_path]
-        elif isinstance(client_config_path, list):
-            pass
-        else:  # dict is not support right now.
-            raise Exception(
-                "The type of client_config_path must be str or list or "
-                "dict({op: model_path}), not {}.".format(
-                    type(client_config_path)))
-        if len(client_config_path) != len(server_config_dir_paths):
-            raise Warning(
-                "The len(client_config_path) is {}, != len(server_config_dir_paths) {}."
-                .format(len(client_config_path), len(server_config_dir_paths)))
-        self.bclient_config_path_list = client_config_path
-    def prepare_server(self,
-                       workdir=None,
-                       port=9292,
-                       device=None,
-                       use_encryption_model=False,
-                       cube_conf=None):
-        # if `device` is not set, use self.device
-        # self.device may not be changed.
-        # or self.device may have changed by set_device.
-        if device == None:
-            device = self.device
-        # if `device` is set, let self.device = device.
-        else:
-            self.device = device
-        if not self._port_is_available(port):
-            raise SystemExit("Port {} is already used".format(port))
-        default_port = 12000
-        self.port_list_ = []
-        for i in range(1000):
-            if default_port + i != port and self._port_is_available(default_port
-                                                                    + i):
-                self.port_list_.append(default_port + i)
-                break
-        self.bserver_.prepare_server(
-            workdir=workdir,
-            port=self.port_list_[0],
-            device=device,
-            use_encryption_model=use_encryption_model,
-            cube_conf=cube_conf)
-        self.set_port(port)
-    def _launch_brpc_service(self, bserver):
-        bserver.run_server()
-    def _port_is_available(self, port):
-        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
-            sock.settimeout(2)
-            result = sock.connect_ex(('0.0.0.0', port))
-        return result != 0
-    def run_server(self):
-        p_bserver = Process(
-            target=self._launch_brpc_service, args=(self.bserver_, ))
-        p_bserver.start()
-        options = [('grpc.max_send_message_length', self.body_size_),
-                   ('grpc.max_receive_message_length', self.body_size_)]
-        server = grpc.server(
-            futures.ThreadPoolExecutor(max_workers=self.worker_num_),
-            options=options,
-            maximum_concurrent_rpcs=self.concurrency_)
-        multi_lang_general_model_service_pb2_grpc.add_MultiLangGeneralModelServiceServicer_to_server(
-            MultiLangServerServiceServicer(
-                self.bclient_config_path_list, self.is_multi_model_,
-                ["0.0.0.0:{}".format(self.port_list_[0])]), server)
-        server.add_insecure_port('[::]:{}'.format(self.gport_))
-        server.start()
-        p_bserver.join()
-        server.wait_for_termination()
--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -14,6 +14,7 @@
 #!flask/bin/python
 # pylint: disable=doc-string-missing
+# Now, this is only for Pipeline.
 from flask import Flask, request, abort
 from contextlib import closing
 from multiprocessing import Pool, Process, Queue

--- a/python/pipeline/operator.py
+++ b/python/pipeline/operator.py
@@ -16,7 +16,7 @@ from time import time as _time
 import time
 import threading
 import multiprocessing
-from paddle_serving_client import MultiLangClient, Client
+from paddle_serving_client import Client
 from concurrent import futures
 import logging
 import func_timeout
@@ -330,8 +330,9 @@ class Op(object):
        if self.client_type == 'brpc':
            client = Client()
            client.load_client_config(client_config)
-        elif self.client_type == 'grpc':
+        # 待测试完成后，使用brpc-http替代。
-            client = MultiLangClient()
+        # elif self.client_type == 'grpc':
+        #   client = MultiLangClient()
        elif self.client_type == 'local_predictor':
            if self.local_predictor is None:
                raise ValueError("local predictor not yet created")
@@ -474,10 +475,13 @@ class Op(object):
                fetch=self._fetch_names,
                batch=True,
                log_id=typical_logid)
+        # 后续用HttpClient替代
+        '''
        if isinstance(self.client, MultiLangClient):
            if call_result is None or call_result["serving_status_code"] != 0:
                return None
            call_result.pop("serving_status_code")
+        '''
        return call_result
    def postprocess(self, input_data, fetch_data, log_id=0):

--- a/tools/codestyle/clang_format.hook
+++ b/tools/codestyle/clang_format.hook
--- a/tools/codestyle/cpplint_pre_commit.hook
+++ b/tools/codestyle/cpplint_pre_commit.hook
--- a/tools/codestyle/pylint_pre_commit.hook
+++ b/tools/codestyle/pylint_pre_commit.hook
--- a/tools/cpp_examples/demo-serving/proto/general_model_service.proto
+++ b/tools/cpp_examples/demo-serving/proto/general_model_service.proto
@@ -21,17 +21,33 @@ option cc_generic_services = true;
 message Tensor {
  repeated bytes data = 1;
-  optional int32 elem_type = 2;
+  repeated int32 int_data = 2;
-  repeated int32 shape = 3;
+  repeated int64 int64_data = 3;
+  repeated float float_data = 4;
+  optional int32 elem_type =
+      5; // 0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
+  repeated int32 shape = 6;       // shape should include batch
+  repeated int32 lod = 7;         // only for fetch tensor currently
+  optional string name = 8;       // get from the Model prototxt
+  optional string alias_name = 9; // get from the Model prototxt
 };
-message FeedInst { repeated Tensor tensor_array = 1; };
+message Request {
+  repeated Tensor tensor = 1;
-message FetchInst { repeated Tensor tensor_array = 1; };
+  repeated string fetch_var_names = 2;
+  optional bool profile_server = 3 [ default = false ];
+  required uint64 log_id = 4 [ default = 0 ];
+};
-message Request { repeated FeedInst insts = 1; };
+message Response {
+  repeated ModelOutput outputs = 1;
+  repeated int64 profile_time = 2;
+};
-message Response { repeated FetchInst insts = 1; };
+message ModelOutput {
+  repeated Tensor tensor = 1;
+  optional string engine_name = 2;
+}
 service GeneralModelService {
  rpc inference(Request) returns (Response);

--- a/tools/dockerfiles/build_scripts/build_utils.sh
+++ b/tools/dockerfiles/build_scripts/build_utils.sh
--- a/tools/dockerfiles/root/.bashrc
+++ b/tools/dockerfiles/root/.bashrc
--- a/tools/dockerfiles/root/.gitconfig
+++ b/tools/dockerfiles/root/.gitconfig
--- a/tools/dockerfiles/root/.scripts/git-completion.sh
+++ b/tools/dockerfiles/root/.scripts/git-completion.sh
--- a/tools/dockerfiles/root/.scripts/git-prompt.sh
+++ b/tools/dockerfiles/root/.scripts/git-prompt.sh