diff --git a/core/configure/proto/general_model_service.proto b/core/configure/proto/general_model_service.proto
index 89ac489f8ae3b90b74c94a3f9f3c82711086cd64..c2deab2f69ea6f6ca5e77354ec955bf679f9a3d6 100644
--- a/core/configure/proto/general_model_service.proto
+++ b/core/configure/proto/general_model_service.proto
@@ -12,41 +12,97 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-syntax = "proto2";
+syntax = "proto3";
 package baidu.paddle_serving.predictor.general_model;
 option java_multiple_files = true;
+option cc_generic_services = true;
 
 message Tensor {
-  repeated string data = 1;
-  repeated int32 int_data = 2;
-  repeated int64 int64_data = 3;
-  repeated float float_data = 4;
-  optional int32 elem_type =
-      5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
-  repeated int32 shape = 6;       // shape should include batch
-  repeated int32 lod = 7;         // only for fetch tensor currently
-  optional string name = 8;       // get from the Model prototxt
-  optional string alias_name = 9; // get from the Model prototxt
+  // VarType: INT64
+  repeated int64 int64_data = 1;
+
+  // VarType: FP32
+  repeated float float_data = 2;
+
+  // VarType: INT32
+  repeated int32 int_data = 3;
+
+  // VarType: FP64
+  repeated double float64_data = 4;
+
+  // VarType: UINT32
+  repeated uint32 uint32_data = 5;
+
+  // VarType: BOOL
+  repeated bool bool_data = 6;
+
+  // (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
+  // represents the imaginary part
+  repeated float complex64_data = 7;
+
+  // (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
+  // represents the imaginary part
+  repeated double complex128_data = 8;
+
+  // VarType: STRING
+  repeated string data = 9;
+
+  // Element types:
+  //   0 => INT64
+  //   1 => FP32
+  //   2 => INT32
+  //   3 => FP64
+  //   4 => INT16
+  //   5 => FP16
+  //   6 => BF16
+  //   7 => UINT8
+  //   8 => INT8
+  //   9 => BOOL
+  //  10 => COMPLEX64
+  //  11 => COMPLEX128
+  //  20 => STRING
+  int32 elem_type = 10;
+
+  // Shape of the tensor, including batch dimensions.
+  repeated int32 shape = 11;
+
+  // Level of data(LOD), support variable length data, only for fetch tensor
+  // currently.
+  repeated int32 lod = 12;
+
+  // Correspond to the variable 'name' in the model description prototxt.
+  string name = 13;
+
+  // Correspond to the variable 'alias_name' in the model description prototxt.
+  string alias_name = 14; // get from the Model prototxt
+
+  // VarType: FP16, INT16, INT8, BF16, UINT8
+  bytes tensor_content = 15;
 };
 
 message Request {
   repeated Tensor tensor = 1;
   repeated string fetch_var_names = 2;
-  optional bool profile_server = 3 [ default = false ];
-  required uint64 log_id = 4 [ default = 0 ];
+  bool profile_server = 3;
+  uint64 log_id = 4;
 };
 
 message Response {
   repeated ModelOutput outputs = 1;
   repeated int64 profile_time = 2;
+  // Error code
+  int32 err_no = 3;
+
+  // Error messages
+  string err_msg = 4;
 };
 
 message ModelOutput {
   repeated Tensor tensor = 1;
-  optional string engine_name = 2;
+  string engine_name = 2;
 }
 
 service GeneralModelService {
-  rpc inference(Request) returns (Response) {}
-  rpc debug(Request) returns (Response) {}
+  rpc inference(Request) returns (Response);
+  rpc debug(Request) returns (Response);
 };
diff --git a/core/general-client/include/client.h b/core/general-client/include/client.h
index 689732c512fcb7612cbd3af025a470f4cbfc84fe..11c6a2b7aa324cd09d9895f7ba1c2f8b990aad29 100644
--- a/core/general-client/include/client.h
+++ b/core/general-client/include/client.h
@@ -88,7 +88,7 @@ class PredictorData {
                        const std::string& name,
                        const std::vector<int>& shape,
                        const std::vector<int>& lod,
-                       const int datatype = 3);
+                       const int datatype = 20);
 
   const std::map<std::string, std::vector<float>>& float_data_map() const {
     return _float_data_map;
@@ -140,6 +140,8 @@ class PredictorData {
 
   int get_datatype(std::string name) const;
 
+  void set_datatype(std::string name, int type);
+
   std::string print();
 
  private:
@@ -159,6 +161,7 @@ class PredictorData {
       oss << "{";
       oss << it->first << key_seg;
       const std::vector<T2>& v = it->second;
+      oss << v.size() << key_seg;
       for (size_t i = 0; i < v.size(); ++i) {
         if (i != v.size() - 1) {
           oss << v[i] << val_seg;
@@ -184,7 +187,9 @@ class PredictorData {
     typename std::map<T1, T2>::const_iterator itEnd = map.end();
     for (; it != itEnd; it++) {
       oss << "{";
-      oss << it->first << key_seg << it->second;
+      oss << it->first << key_seg
+          << "size=" << it->second.size() << key_seg
+          << "type=" << this->get_datatype(it->first);
       oss << "}";
     }
     return oss.str();
diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h
index 88ec7a59f1181eec32e2da800a9a1b71e3cdc084..4d16637a0eb7152b39cf125ae359b2ca3361ad60 100644
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -51,6 +51,8 @@ class ModelRes {
                             res._float_value_map.end());
     _int32_value_map.insert(res._int32_value_map.begin(),
                             res._int32_value_map.end());
+    _string_value_map.insert(res._string_value_map.begin(),
+                            res._string_value_map.end());
     _shape_map.insert(res._shape_map.begin(), res._shape_map.end());
     _lod_map.insert(res._lod_map.begin(), res._lod_map.end());
     _tensor_alias_names.insert(_tensor_alias_names.end(),
@@ -68,6 +70,9 @@ class ModelRes {
     _int32_value_map.insert(
         std::make_move_iterator(std::begin(res._int32_value_map)),
         std::make_move_iterator(std::end(res._int32_value_map)));
+    _string_value_map.insert(
+        std::make_move_iterator(std::begin(res._string_value_map)),
+        std::make_move_iterator(std::end(res._string_value_map)));
     _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
                       std::make_move_iterator(std::end(res._shape_map)));
     _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
@@ -96,6 +101,12 @@ class ModelRes {
   std::vector<int32_t>&& get_int32_by_name_with_rv(const std::string& name) {
     return std::move(_int32_value_map[name]);
   }
+  const std::string& get_string_by_name(const std::string& name) {
+    return _string_value_map[name];
+  }
+  std::string&& get_string_by_name_with_rv(const std::string& name) {
+    return std::move(_string_value_map[name]);
+  }
   const std::vector<int>& get_shape_by_name(const std::string& name) {
     return _shape_map[name];
   }
@@ -128,6 +139,9 @@ class ModelRes {
       _int32_value_map.insert(
           std::make_move_iterator(std::begin(res._int32_value_map)),
           std::make_move_iterator(std::end(res._int32_value_map)));
+      _string_value_map.insert(
+          std::make_move_iterator(std::begin(res._string_value_map)),
+          std::make_move_iterator(std::end(res._string_value_map)));
       _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
                         std::make_move_iterator(std::end(res._shape_map)));
       _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
@@ -145,6 +159,7 @@ class ModelRes {
   std::map<std::string, std::vector<int64_t>> _int64_value_map;
   std::map<std::string, std::vector<float>> _float_value_map;
   std::map<std::string, std::vector<int32_t>> _int32_value_map;
+  std::map<std::string, std::string> _string_value_map;
   std::map<std::string, std::vector<int>> _shape_map;
   std::map<std::string, std::vector<int>> _lod_map;
   std::vector<std::string> _tensor_alias_names;
@@ -184,6 +199,14 @@ class PredictorRes {
                                                    const std::string& name) {
     return std::move(_models[model_idx].get_int32_by_name_with_rv(name));
   }
+  const std::string& get_string_by_name(const int model_idx,
+                                                const std::string& name) {
+    return _models[model_idx].get_string_by_name(name);
+  }
+  std::string&& get_string_by_name_with_rv(const int model_idx,
+                                                   const std::string& name) {
+    return std::move(_models[model_idx].get_string_by_name_with_rv(name));
+  }
   const std::vector<int>& get_shape_by_name(const int model_idx,
                                             const std::string& name) {
     return _models[model_idx].get_shape_by_name(name);
diff --git a/core/general-client/src/client.cpp b/core/general-client/src/client.cpp
index 56fb1cd1d53ba04d9d071e778594635e5e3cba6d..4d3b99f2d8c00fd8dace85b219ce60b2b7444ff5 100644
--- a/core/general-client/src/client.cpp
+++ b/core/general-client/src/client.cpp
@@ -23,7 +23,23 @@ using configure::GeneralModelConfig;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Tensor;
-enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
+// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
+// will support: FLOAT16
+enum ProtoDataType {
+  P_INT64 = 0,
+  P_FLOAT32,
+  P_INT32,
+  P_FP64,
+  P_INT16,
+  P_FP16,
+  P_BF16,
+  P_UINT8,
+  P_INT8,
+  P_BOOL,
+  P_COMPLEX64,
+  P_COMPLEX128,
+  P_STRING = 20,
+};
 
 int ServingClient::init(const std::vector<std::string>& client_conf,
            const std::string server_port) {
@@ -156,6 +172,10 @@ int PredictorData::get_datatype(std::string name) const {
   return 0;
 }
 
+void PredictorData::set_datatype(std::string name, int type) {
+  _datatype_map[name] = type;
+}
+
 std::string PredictorData::print() {
   std::string res;
   res.append(map2string<std::string, float>(_float_data_map));
@@ -309,20 +329,25 @@ int PredictorInputs::GenProto(const PredictorInputs& inputs,
     tensor->set_name(feed_name[idx]);
     tensor->set_alias_name(name);
 
-    const int string_shape_size = string_shape.size();
-    // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
-    // we pass string via vector<vector<string> >.
-    if (string_shape_size != 1) {
-      LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
-                 << string_shape_size;
-      return -1;
-    }
-    switch (string_shape_size) {
-      case 1: {
-        tensor->add_data(string_data);
-        break;
+    if (datatype == P_STRING) {
+      const int string_shape_size = string_shape.size();
+      // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
+      // we pass string via vector<vector<string> >.
+      if (string_shape_size != 1) {
+        LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
+                   << string_shape_size;
+        return -1;
+      }
+      switch (string_shape_size) {
+        case 1: {
+          tensor->add_data(string_data);
+          break;
+        }
       }
+    } else {
+      tensor->set_tensor_content(string_data);
     }
+    
   }
   return 0;
 }
@@ -355,6 +380,8 @@ int PredictorOutputs::ParseProto(const Response& res,
     std::shared_ptr<PredictorOutputs::PredictorOutput> predictor_output =
         std::make_shared<PredictorOutputs::PredictorOutput>();
     predictor_output->engine_name = output.engine_name();
+
+    PredictorData& predictor_data = predictor_output->data;
     std::map<std::string, std::vector<float>>& float_data_map = *predictor_output->data.mutable_float_data_map();
     std::map<std::string, std::vector<int64_t>>& int64_data_map = *predictor_output->data.mutable_int64_data_map();
     std::map<std::string, std::vector<int32_t>>& int32_data_map = *predictor_output->data.mutable_int_data_map();
@@ -403,7 +430,13 @@ int PredictorOutputs::ParseProto(const Response& res,
         int32_data_map[name] = std::vector<int32_t>(
             output.tensor(idx).int_data().begin(),
             output.tensor(idx).int_data().begin() + size);
+      } else if (fetch_name_to_type[name] == P_UINT8
+                || fetch_name_to_type[name] == P_INT8) {
+        VLOG(2) << "fetch var [" << name << "]type="
+                << fetch_name_to_type[name];
+        string_data_map[name] = output.tensor(idx).tensor_content();
       }
+      predictor_data.set_datatype(name, output.tensor(idx).elem_type());
       idx += 1;
     }
     outputs.add_data(predictor_output);
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index d04ab89ae31d048e5a38ada7abec5f27d46ab62f..b8e8630b801f0777224d8c11c23578bc7049989c 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -25,7 +25,23 @@ using baidu::paddle_serving::Timer;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Tensor;
-enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
+// paddle inference support: FLOAT32, INT64, INT32, UINT8, INT8
+// will support: FLOAT16
+enum ProtoDataType {
+  P_INT64 = 0,
+  P_FLOAT32,
+  P_INT32,
+  P_FP64,
+  P_INT16,
+  P_FP16,
+  P_BF16,
+  P_UINT8,
+  P_INT8,
+  P_BOOL,
+  P_COMPLEX64,
+  P_COMPLEX128,
+  P_STRING = 20,
+};
 std::once_flag gflags_init_flag;
 namespace py = pybind11;
 
@@ -262,6 +278,8 @@ int PredictorClient::numpy_predict(
     vec_idx++;
   }
 
+  // Add !P_STRING feed data of string_input to tensor_content
+  // UINT8 INT8 FLOAT16
   vec_idx = 0;
   for (auto &name : string_feed_name) {
     int idx = _feed_name_to_idx[name];
@@ -277,22 +295,27 @@ int PredictorClient::numpy_predict(
     for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
       tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
     }
-    tensor->set_elem_type(P_STRING);
     tensor->set_name(_feed_name[idx]);
     tensor->set_alias_name(name);
 
-    const int string_shape_size = string_shape[vec_idx].size();
-    // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
-    // we pass string via vector<vector<string> >.
-    if (string_shape_size != 1) {
-      LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
-                 << string_shape_size;
-      return -1;
-    }
-    switch (string_shape_size) {
-      case 1: {
-        tensor->add_data(string_feed[vec_idx]);
-        break;
+    if (_type[idx] != P_STRING) {
+      tensor->set_elem_type(_type[idx]);
+      tensor->set_tensor_content(string_feed[vec_idx]);
+    } else {
+      tensor->set_elem_type(P_STRING);
+      const int string_shape_size = string_shape[vec_idx].size();
+      // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
+      // we pass string via vector<vector<string> >.
+      if (string_shape_size != 1) {
+        LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
+                   << string_shape_size;
+        return -1;
+      }
+      switch (string_shape_size) {
+        case 1: {
+          tensor->add_data(string_feed[vec_idx]);
+          break;
+        }
       }
     }
     vec_idx++;
@@ -366,6 +389,15 @@ int PredictorClient::numpy_predict(
           model._int32_value_map[name] = std::vector<int32_t>(
               output.tensor(idx).int_data().begin(),
               output.tensor(idx).int_data().begin() + size);
+        } else if (_fetch_name_to_type[name] == P_UINT8) {
+          VLOG(2) << "fetch var " << name << "type uint8";
+          model._string_value_map[name] = output.tensor(idx).tensor_content();
+        } else if (_fetch_name_to_type[name] == P_INT8) {
+          VLOG(2) << "fetch var " << name << "type int8";
+          model._string_value_map[name] = output.tensor(idx).tensor_content();
+        } else if (_fetch_name_to_type[name] == P_FP16) {
+          VLOG(2) << "fetch var " << name << "type float16";
+          model._string_value_map[name] = output.tensor(idx).tensor_content();
         }
       }
       predict_res_batch.add_model_res(std::move(model));
diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp
index ad26bb7d3c175f08438ee22a5a42425fd5147117..6a29d3313ed14601b2a520b32f810a596aafdd8a 100644
--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -49,6 +49,19 @@ PYBIND11_MODULE(serving_client, m) {
              });
              return py::array(ptr->size(), ptr->data(), capsule);
            })
+      .def("get_int32_by_name",
+           [](PredictorRes &self, int model_idx, std::string &name) {
+             std::vector<int32_t> *ptr = new std::vector<int32_t>(
+                 std::move(self.get_int32_by_name_with_rv(model_idx, name)));
+             auto capsule = py::capsule(ptr, [](void *p) {
+               delete reinterpret_cast<std::vector<int32_t> *>(p);
+             });
+             return py::array(ptr->size(), ptr->data(), capsule);
+           })
+      .def("get_string_by_name",
+           [](PredictorRes &self, int model_idx, std::string &name) {
+             return self.get_string_by_name_with_rv(model_idx, name);
+           })
       .def("get_shape",
            [](PredictorRes &self, int model_idx, std::string &name) {
              std::vector<int> *ptr = new std::vector<int>(
diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp
index af77df553837c594789b0e9943790fc37fc01c95..482097d3e1fa1c7f7369573b1b1a0a5fde57ae58 100644
--- a/core/general-server/op/general_reader_op.cpp
+++ b/core/general-server/op/general_reader_op.cpp
@@ -31,7 +31,23 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
-enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
+// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
+// will support: FLOAT16
+enum ProtoDataType {
+  P_INT64 = 0,
+  P_FLOAT32,
+  P_INT32,
+  P_FP64,
+  P_INT16,
+  P_FP16,
+  P_BF16,
+  P_UINT8,
+  P_INT8,
+  P_BOOL,
+  P_COMPLEX64,
+  P_COMPLEX128,
+  P_STRING = 20,
+};
 
 int GeneralReaderOp::inference() {
   // read request from client
@@ -78,6 +94,7 @@ int GeneralReaderOp::inference() {
   int64_t elem_type = 0;
   int64_t elem_size = 0;
   int64_t databuf_size = 0;
+  const void* src_ptr = nullptr;
   for (int i = 0; i < var_num; ++i) {
     paddle::PaddleTensor paddleTensor;
     const Tensor &tensor = req->tensor(i);
@@ -86,19 +103,38 @@ int GeneralReaderOp::inference() {
     elem_size = 0;
     databuf_size = 0;
     elem_type = tensor.elem_type();
-    VLOG(2) << "var[" << i << "] has elem type: " << elem_type;
+    src_ptr = nullptr ;
     if (elem_type == P_INT64) {  // int64
       elem_size = sizeof(int64_t);
       paddleTensor.dtype = paddle::PaddleDType::INT64;
       data_len = tensor.int64_data_size();
+      src_ptr = tensor.int64_data().data();
     } else if (elem_type == P_FLOAT32) {
       elem_size = sizeof(float);
       paddleTensor.dtype = paddle::PaddleDType::FLOAT32;
       data_len = tensor.float_data_size();
+      src_ptr = tensor.float_data().data();
     } else if (elem_type == P_INT32) {
       elem_size = sizeof(int32_t);
       paddleTensor.dtype = paddle::PaddleDType::INT32;
       data_len = tensor.int_data_size();
+      src_ptr = tensor.int_data().data();
+    } else if (elem_type == P_UINT8) {
+      elem_size = sizeof(uint8_t);
+      paddleTensor.dtype = paddle::PaddleDType::UINT8;
+      data_len = tensor.tensor_content().size();
+      src_ptr = tensor.tensor_content().data();
+    } else if (elem_type == P_INT8) {
+      elem_size = sizeof(int8_t);
+      paddleTensor.dtype = paddle::PaddleDType::INT8;
+      data_len = tensor.tensor_content().size();
+      src_ptr = tensor.tensor_content().data();
+    } else if (elem_type == P_FP16) {
+      // paddle inference will support FLOAT16
+      // elem_size = 1;
+      // paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
+      // data_len = tensor.tensor_content().size();
+      // src_ptr = tensor.tensor_content().data();
     } else if (elem_type == P_STRING) {
       // use paddle::PaddleDType::UINT8 as for String.
       elem_size = sizeof(char);
@@ -109,8 +145,18 @@ int GeneralReaderOp::inference() {
       // now only support single string
       for (int idx = 0; idx < tensor.data_size(); idx++) {
         data_len += tensor.data()[idx].length() + 1;
+        src_ptr = tensor.data()[idx].data();
       }
     }
+    VLOG(2) << "var[" << i << "] has elem type: " << elem_type << ";"
+            << "elem_size=" << elem_size << ";"
+            << "dtype=" << paddleTensor.dtype << ";"
+            << "data_len=" << data_len;
+    if (src_ptr == nullptr) {
+      LOG(ERROR) << "Not support var[" << i << "] with elem_type[" 
+                 << elem_type << "]";
+      continue;
+    }
     // implement lod tensor here
     // only support 1-D lod
     // TODO(HexToString): support 2-D lod
@@ -141,44 +187,17 @@ int GeneralReaderOp::inference() {
       VLOG(2) << "(logid=" << log_id << ") var[" << i
               << "] has lod_tensor and len=" << out->at(i).lod[0].back();
     }
-    if (elem_type == P_INT64) {
-      int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
-      VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
-              << "] is " << tensor.int64_data(0);
-      if (!dst_ptr) {
-        LOG(ERROR) << "dst_ptr is nullptr";
-        return -1;
-      }
-      memcpy(dst_ptr, tensor.int64_data().data(), databuf_size);
-      /*
-      int elem_num = tensor.int64_data_size();
-      for (int k = 0; k < elem_num; ++k) {
-        dst_ptr[k] = tensor.int64_data(k);
-      }
-      */
-    } else if (elem_type == P_FLOAT32) {
-      float *dst_ptr = static_cast<float *>(out->at(i).data.data());
-      VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
-              << "] is " << tensor.float_data(0);
-      if (!dst_ptr) {
-        LOG(ERROR) << "dst_ptr is nullptr";
-        return -1;
-      }
-      memcpy(dst_ptr, tensor.float_data().data(), databuf_size);
-      /*int elem_num = tensor.float_data_size();
-      for (int k = 0; k < elem_num; ++k) {
-        dst_ptr[k] = tensor.float_data(k);
-      }*/
-    } else if (elem_type == P_INT32) {
-      int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
-      VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
-              << "] is " << tensor.int_data(0);
-      if (!dst_ptr) {
-        LOG(ERROR) << "dst_ptr is nullptr";
-        return -1;
-      }
-      memcpy(dst_ptr, tensor.int_data().data(), databuf_size);
-    } else if (elem_type == P_STRING) {
+    void* dst_ptr = out->at(i).data.data();
+    if (!dst_ptr) {
+      LOG(ERROR) << "dst_ptr is nullptr";
+      return -1;
+    }
+
+    // For common data, we just copy from src to dst
+    // For string data, we need to iterate through all str
+    if (elem_type != P_STRING) {
+      memcpy(dst_ptr, src_ptr, databuf_size);
+    } else {
       char *dst_ptr = static_cast<char *>(out->at(i).data.data());
       VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
               << "] is " << tensor.data(0);
diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp
index 9f6c8aabd72c7e1e9b8ff933c807ee7fcdc0662f..e944c8d82d8aa2ad540455200cf835ce26eb366e 100644
--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -168,7 +168,24 @@ int GeneralResponseOp::inference() {
         google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
                                                           data_ptr + cap);
         output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data);
-      }
+      } else if (dtype == paddle::PaddleDType::UINT8) {
+        tensor->set_elem_type(7);
+        VLOG(2) << "(logid=" << log_id << ")Prepare uint8 var ["
+                << model_config->_fetch_name[idx] << "].";
+        tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
+      } else if (dtype == paddle::PaddleDType::INT8) {
+        tensor->set_elem_type(8);
+        VLOG(2) << "(logid=" << log_id << ")Prepare int8 var ["
+                << model_config->_fetch_name[idx] << "].";
+        tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
+      } 
+      // inference will support fp16
+      //   else if (dtype == paddle::PaddleDType::FLOAT16) {
+      //   tensor->set_elem_type(5);
+      //   VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
+      //           << model_config->_fetch_name[idx] << "].";
+      //   tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
+      // }
 
       VLOG(2) << "(logid=" << log_id << ") fetch var ["
               << model_config->_fetch_name[idx] << "] ready";
diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto
index 8fedb60e97ec5b81263687b47ff0794880da8671..4b6282637ca6ea0617096a18bbbc3268067906bc 100755
--- a/core/general-server/proto/general_model_service.proto
+++ b/core/general-server/proto/general_model_service.proto
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-syntax = "proto2";
+syntax = "proto3";
 import "pds_option.proto";
 import "builtin_format.proto";
 package baidu.paddle_serving.predictor.general_model;
@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model;
 option cc_generic_services = true;
 
 message Tensor {
-  repeated string data = 1;
-  repeated int32 int_data = 2;
-  repeated int64 int64_data = 3;
-  repeated float float_data = 4;
-  optional int32 elem_type =
-      5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
-  repeated int32 shape = 6;       // shape should include batch
-  repeated int32 lod = 7;         // only for fetch tensor currently
-  optional string name = 8;       // get from the Model prototxt
-  optional string alias_name = 9; // get from the Model prototxt
+  // VarType: INT64
+  repeated int64 int64_data = 1;
+
+  // VarType: FP32
+  repeated float float_data = 2;
+
+  // VarType: INT32
+  repeated int32 int_data = 3;
+
+  // VarType: FP64
+  repeated double float64_data = 4;
+
+  // VarType: UINT32
+  repeated uint32 uint32_data = 5;
+
+  // VarType: BOOL
+  repeated bool bool_data = 6;
+
+  // (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
+  // represents the imaginary part
+  repeated float complex64_data = 7;
+
+  // (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
+  // represents the imaginary part
+  repeated double complex128_data = 8;
+
+  // VarType: STRING
+  repeated string data = 9;
+
+  // Element types:
+  //   0 => INT64
+  //   1 => FP32
+  //   2 => INT32
+  //   3 => FP64
+  //   4 => INT16
+  //   5 => FP16
+  //   6 => BF16
+  //   7 => UINT8
+  //   8 => INT8
+  //   9 => BOOL
+  //  10 => COMPLEX64
+  //  11 => COMPLEX128
+  //  20 => STRING
+  int32 elem_type = 10;
+
+  // Shape of the tensor, including batch dimensions.
+  repeated int32 shape = 11;
+
+  // Level of data(LOD), support variable length data, only for fetch tensor
+  // currently.
+  repeated int32 lod = 12;
+
+  // Correspond to the variable 'name' in the model description prototxt.
+  string name = 13;
+
+  // Correspond to the variable 'alias_name' in the model description prototxt.
+  string alias_name = 14; // get from the Model prototxt
+
+  // VarType: FP16, INT16, INT8, BF16, UINT8
+  bytes tensor_content = 15;
 };
 
 message Request {
   repeated Tensor tensor = 1;
   repeated string fetch_var_names = 2;
-  optional bool profile_server = 3 [ default = false ];
-  required uint64 log_id = 4 [ default = 0 ];
+  bool profile_server = 3;
+  uint64 log_id = 4;
 };
 
 message Response {
   repeated ModelOutput outputs = 1;
   repeated int64 profile_time = 2;
+  // Error code
+  int32 err_no = 3;
+
+  // Error messages
+  string err_msg = 4;
 };
 
 message ModelOutput {
   repeated Tensor tensor = 1;
-  optional string engine_name = 2;
+  string engine_name = 2;
 }
 
 service GeneralModelService {
diff --git a/core/pdcodegen/src/pdcodegen.cpp b/core/pdcodegen/src/pdcodegen.cpp
index a99828ee3466a32d45dcabb61a2700f9362539d4..b41ccc8077a0a4e88c474a124e000df1c85697d3 100644
--- a/core/pdcodegen/src/pdcodegen.cpp
+++ b/core/pdcodegen/src/pdcodegen.cpp
@@ -1492,11 +1492,6 @@ class PdsCodeGenerator : public CodeGenerator {
       const FieldDescriptor* fd = in_shared_fields[si];
       std::string field_name = fd->name();
       printer->Print("\n/////$field_name$\n", "field_name", field_name);
-      if (fd->is_optional()) {
-        printer->Print(
-            "if (req->has_$field_name$()) {\n", "field_name", field_name);
-        printer->Indent();
-      }
       if (fd->cpp_type() ==
               google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE ||
           fd->is_repeated()) {
@@ -1509,10 +1504,6 @@ class PdsCodeGenerator : public CodeGenerator {
                        "field_name",
                        field_name);
       }
-      if (fd->is_optional()) {
-        printer->Outdent();
-        printer->Print("}\n");
-      }
     }
 
     printer->Print(
diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h
index 45014d28d0034ec402bbd9b21eac3e832da7c1f9..67a7cf2f6396ec1b5b47c23f87b78ae77c178427 100644
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -533,7 +533,30 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
                  paddle::PaddleDType::INT32) {
         int32_t* data = static_cast<int32_t*>(origin_data);
         lod_tensor_in->CopyFromCpu(data);
+      } else if ((*tensorVector_in_pointer)[i].dtype ==
+                 paddle::PaddleDType::UINT8) {
+        uint8_t* data = static_cast<uint8_t*>(origin_data);
+        lod_tensor_in->CopyFromCpu(data);
+      } else if ((*tensorVector_in_pointer)[i].dtype ==
+                 paddle::PaddleDType::INT8) {
+        int8_t* data = static_cast<int8_t*>(origin_data);
+        lod_tensor_in->CopyFromCpu(data);
+      } else {
+        LOG(ERROR) << "Inference not support type["
+                   << (*tensorVector_in_pointer)[i].dtype
+                   << "],name[" << (*tensorVector_in_pointer)[i].name
+                   << "]" << " copy into core failed!";
       }
+      // Paddle inference will support FP16 in next version.
+      // else if ((*tensorVector_in_pointer)[i].dtype ==
+      //          paddle::PaddleDType::FLOAT16) {
+      //   paddle::platform::float16* data =
+      //       static_cast<paddle::platform::float16*>(origin_data);
+      //   lod_tensor_in->CopyFromCpu(data);
+      // }
+      VLOG(2) << "Tensor:name=" << (*tensorVector_in_pointer)[i].name
+              << ";in_dtype=" << (*tensorVector_in_pointer)[i].dtype
+              << ";tensor_dtype=" << lod_tensor_in->type();
     }
     // After the input data is passed in,
     // call 'core->Run()' perform the prediction process.
@@ -598,7 +621,41 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
         int32_t* data_out = reinterpret_cast<int32_t*>(databuf_data);
         lod_tensor_out->CopyToCpu(data_out);
         databuf_char = reinterpret_cast<char*>(data_out);
+      } else if (dataType == paddle::PaddleDType::UINT8) {
+        databuf_size = out_num * sizeof(uint8_t);
+        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+        if (!databuf_data) {
+          LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+          return -1;
+        }
+        uint8_t* data_out = reinterpret_cast<uint8_t*>(databuf_data);
+        lod_tensor_out->CopyToCpu(data_out);
+        databuf_char = reinterpret_cast<char*>(data_out);
+      } else if (dataType == paddle::PaddleDType::INT8) {
+        databuf_size = out_num * sizeof(int8_t);
+        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+        if (!databuf_data) {
+          LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+          return -1;
+        }
+        int8_t* data_out = reinterpret_cast<int8_t*>(databuf_data);
+        lod_tensor_out->CopyToCpu(data_out);
+        databuf_char = reinterpret_cast<char*>(data_out);
       }
+      // Inference will support FP16 in next version
+      //  else if (dataType == paddle::PaddleDType::FLOAT16) {
+      //   using float16 = paddle::platform::float16;
+      //   databuf_size = out_num * sizeof(float16);
+      //   databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+      //   if (!databuf_data) {
+      //     LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+      //     return -1;
+      //   }
+      //   float16* data_out = reinterpret_cast<float16*>(databuf_data);
+      //   lod_tensor_out->CopyToCpu(data_out);
+      //   databuf_char = reinterpret_cast<char*>(data_out);
+      // }
+
       // Because task scheduling requires OPs to use 'Channel'
       // (which is a data structure) to transfer data between OPs.
       // We need to copy the processed data to the 'Channel' for the next OP.
diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto
index 92032ab77e88a515c48db312e20b8acb13c9cddc..5340f4226e12b0b99147bc2972928b7d7c733057 100755
--- a/core/sdk-cpp/proto/general_model_service.proto
+++ b/core/sdk-cpp/proto/general_model_service.proto
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-syntax = "proto2";
+syntax = "proto3";
 import "pds_option.proto";
 import "builtin_format.proto";
 package baidu.paddle_serving.predictor.general_model;
@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model;
 option cc_generic_services = true;
 
 message Tensor {
-  repeated string data = 1;
-  repeated int32 int_data = 2;
-  repeated int64 int64_data = 3;
-  repeated float float_data = 4;
-  optional int32 elem_type =
-      5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
-  repeated int32 shape = 6;       // shape should include batch
-  repeated int32 lod = 7;         // only for fetch tensor currently
-  optional string name = 8;       // get from the Model prototxt
-  optional string alias_name = 9; // get from the Model prototxt
+  // VarType: INT64
+  repeated int64 int64_data = 1;
+
+  // VarType: FP32
+  repeated float float_data = 2;
+
+  // VarType: INT32
+  repeated int32 int_data = 3;
+
+  // VarType: FP64
+  repeated double float64_data = 4;
+
+  // VarType: UINT32
+  repeated uint32 uint32_data = 5;
+
+  // VarType: BOOL
+  repeated bool bool_data = 6;
+
+  // (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
+  // represents the imaginary part
+  repeated float complex64_data = 7;
+
+  // (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
+  // represents the imaginary part
+  repeated double complex128_data = 8;
+
+  // VarType: STRING
+  repeated string data = 9;
+
+  // Element types:
+  //   0 => INT64
+  //   1 => FP32
+  //   2 => INT32
+  //   3 => FP64
+  //   4 => INT16
+  //   5 => FP16
+  //   6 => BF16
+  //   7 => UINT8
+  //   8 => INT8
+  //   9 => BOOL
+  //  10 => COMPLEX64
+  //  11 => COMPLEX128
+  //  20 => STRING
+  int32 elem_type = 10;
+
+  // Shape of the tensor, including batch dimensions.
+  repeated int32 shape = 11;
+
+  // Level of data(LOD), support variable length data, only for fetch tensor
+  // currently.
+  repeated int32 lod = 12;
+
+  // Correspond to the variable 'name' in the model description prototxt.
+  string name = 13;
+
+  // Correspond to the variable 'alias_name' in the model description prototxt.
+  string alias_name = 14; // get from the Model prototxt
+
+  // VarType: FP16, INT16, INT8, BF16, UINT8
+  bytes tensor_content = 15;
 };
 
 message Request {
   repeated Tensor tensor = 1;
   repeated string fetch_var_names = 2;
-  optional bool profile_server = 3 [ default = false ];
-  required uint64 log_id = 4 [ default = 0 ];
+  bool profile_server = 3;
+  uint64 log_id = 4;
 };
 
 message Response {
   repeated ModelOutput outputs = 1;
   repeated int64 profile_time = 2;
+  // Error code
+  int32 err_no = 3;
+
+  // Error messages
+  string err_msg = 4;
 };
 
 message ModelOutput {
   repeated Tensor tensor = 1;
-  optional string engine_name = 2;
+  string engine_name = 2;
 }
 
 service GeneralModelService {
diff --git a/doc/HTTP_SERVICE_CN.md b/doc/HTTP_SERVICE_CN.md
old mode 100644
new mode 100755
index e8050a6d48275224b2dabe2298b5d8eb9ddccc80..ef35eff2f3d9cd259a7d66800dc6866605d4cf6d
--- a/doc/HTTP_SERVICE_CN.md
+++ b/doc/HTTP_SERVICE_CN.md
@@ -42,7 +42,7 @@ python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 -
 
 为了方便用户快速的使用Http方式请求Server端预测服务，我们已经将常用的Http请求的数据体封装、压缩、请求加密等功能封装为一个HttpClient类提供给用户，方便用户使用。
 
-使用HttpClient最简单只需要三步，1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件（本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt)，3、调用Predict函数，通过Http方式请求预测服务。
+使用HttpClient最简单只需要四步，1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件（本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt)。3、调用connect函数。4、调用Predict函数，通过Http方式请求预测服务。
 
 此外，您可以根据自己的需要配置Server端IP、Port、服务名称（此服务名称需要与[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)文件中的Service服务名和rpc方法名对应，即`GeneralModelService`字段和`inference`字段），设置Request数据体压缩，设置Response支持压缩传输，模型加密预测（需要配置Server端使用模型加密）、设置响应超时时间等功能。
 
@@ -103,7 +103,7 @@ repeated int32 numbers = 1;
 ```
 #### elem_type
 
-表示数据类型，0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
+表示数据类型，0 means int64, 1 means float32, 2 means int32, 20 means bytes(string)
 
 #### fetch_var_names
 
diff --git a/java/src/main/java/io/paddle/serving/client/Client.java b/java/src/main/java/io/paddle/serving/client/Client.java
index 63e861ba6199c7a56129c4d3b0cb03a77d26f6b7..af4ccc5246262336ef9df05aa65beb5b91de33fd 100755
--- a/java/src/main/java/io/paddle/serving/client/Client.java
+++ b/java/src/main/java/io/paddle/serving/client/Client.java
@@ -59,9 +59,20 @@ import java.util.zip.GZIPInputStream;
 import java.util.zip.GZIPOutputStream;
  
 
-enum ElementType
-{
-    Int64_type, Float32_type, Int32_type, Bytes_type;
+class ElementType {
+    public static final int Int64_type = 0;
+    public static final int Float32_type = 1;
+    public static final int Int32_type = 2;
+    public static final int String_type = 20;
+    public static final Map<Integer, String> feedTypeToDataKey_;
+    static
+    {
+        feedTypeToDataKey_ = new HashMap<Integer, String>();
+        feedTypeToDataKey_.put(ElementType.Int64_type, "int64_data");
+        feedTypeToDataKey_.put(ElementType.Float32_type, "float_data");
+        feedTypeToDataKey_.put(ElementType.Int32_type, "int_data");
+        feedTypeToDataKey_.put(ElementType.String_type, "data");
+    }
 }
 
 class Profiler {
@@ -104,7 +115,6 @@ public class Client {
     private Map<String, Integer> feedTypes_;
     private Map<String, List<Integer>> feedShapes_;
     private Map<String, Integer> feedNameToIndex_;
-    private Map<Integer, String> feedTypeToDataKey_;
     private List<String> fetchNames_;
     private Map<String, Integer> fetchTypes_;
     private Set<String> lodTensorSet_;
@@ -147,12 +157,6 @@ public class Client {
         channel_ = null;
         blockingStub_ = null;
 
-        feedTypeToDataKey_ = new HashMap<Integer, String>();
-        feedTypeToDataKey_.put(0, "int64_data");
-        feedTypeToDataKey_.put(1, "float_data");
-        feedTypeToDataKey_.put(2, "int_data");
-        feedTypeToDataKey_.put(3, "data");
-
         profiler_ = new Profiler();
         boolean is_profile = false;
         String FLAGS_profile_client = System.getenv("FLAGS_profile_client");
@@ -525,7 +529,7 @@ public class Client {
                     jsonTensor.put("elem_type", element_type);
 
                     // 处理数据与shape
-                    String protoDataKey = feedTypeToDataKey_.get(element_type);
+                    String protoDataKey = ElementType.feedTypeToDataKey_.get(element_type);
                     // 如果是INDArray类型，先转为一维.
                     // 此时shape为INDArray的shape
                     if(objectValue instanceof INDArray){
@@ -535,11 +539,11 @@ public class Client {
                         for(long dim:indarrayShape){
                             shape.add((int)dim);
                         }
-                        if(element_type == ElementType.Int64_type.ordinal()){
+                        if(element_type == ElementType.Int64_type){
                             objectValue = tempIndArray.data().asLong();
-                        }else if(element_type == ElementType.Int32_type.ordinal()){
+                        }else if(element_type == ElementType.Int32_type){
                             objectValue = tempIndArray.data().asInt();
-                        }else if(element_type == ElementType.Float32_type.ordinal()){
+                        }else if(element_type == ElementType.Float32_type){
                             objectValue = tempIndArray.data().asFloat();
                         }else{
                             throw new Exception("INDArray 类型不支持");
@@ -564,11 +568,11 @@ public class Client {
                         // 此时无法获取batch信息，故对shape不处理
                         // 由于Proto中为Repeated,需要把数据包装成list
                         if(objectValue instanceof String){
-                            if(feedTypes_.get(protoDataKey)!= ElementType.Bytes_type.ordinal()){
+                            if(feedTypes_.get(protoDataKey)!= ElementType.String_type){
                                 throw new Exception("feedvar is not string-type,feed can`t be a single string.");
                             }
                         }else{
-                            if(feedTypes_.get(protoDataKey)== ElementType.Bytes_type.ordinal()){
+                            if(feedTypes_.get(protoDataKey)== ElementType.String_type){
                                 throw new Exception("feedvar is string-type,feed, feed can`t be a single int or others.");
                             }
                         }
@@ -662,17 +666,17 @@ public class Client {
                         for(long dim:indarrayShape){
                             shape.add((int)dim);
                         }   
-                        if(element_type == ElementType.Int64_type.ordinal()){
+                        if(element_type == ElementType.Int64_type){
                             
                             List<Long> iter = Arrays.stream(tempIndArray.data().asLong()).boxed().collect(Collectors.toList());
                             tensor_builder.addAllInt64Data(iter);
                             
-                        }else if(element_type == ElementType.Int32_type.ordinal()){
+                        }else if(element_type == ElementType.Int32_type){
                             
                             List<Integer> iter = Arrays.stream(tempIndArray.data().asInt()).boxed().collect(Collectors.toList());
                             tensor_builder.addAllIntData(iter);
                             
-                        }else if(element_type == ElementType.Float32_type.ordinal()){
+                        }else if(element_type == ElementType.Float32_type){
                             List<Float> iter = Arrays.asList(ArrayUtils.toObject(tempIndArray.data().asFloat()));
                             tensor_builder.addAllFloatData(iter);
                             
@@ -684,13 +688,13 @@ public class Client {
                         // 如果是数组类型，则无须处理，直接使用即可。
                         // 且数组无法嵌套，此时batch无法从数据中获取
                         // 默认batch维度为1，或者feedVar的shape信息中已包含batch
-                        if(element_type == ElementType.Int64_type.ordinal()){
+                        if(element_type == ElementType.Int64_type){
                             List<Long> iter = Arrays.stream((long[])objectValue).boxed().collect(Collectors.toList());
                             tensor_builder.addAllInt64Data(iter);
-                        }else if(element_type == ElementType.Int32_type.ordinal()){
+                        }else if(element_type == ElementType.Int32_type){
                             List<Integer> iter = Arrays.stream((int[])objectValue).boxed().collect(Collectors.toList());
                             tensor_builder.addAllIntData(iter);
-                        }else if(element_type == ElementType.Float32_type.ordinal()){
+                        }else if(element_type == ElementType.Float32_type){
                             List<Float> iter = Arrays.asList(ArrayUtils.toObject((float[])objectValue));
                             tensor_builder.addAllFloatData(iter);
                         }else{
@@ -707,11 +711,11 @@ public class Client {
                             // 在index=0处，加上batch
                             shape.add(0, list.size());
                         }
-                        if(element_type == ElementType.Int64_type.ordinal()){
+                        if(element_type == ElementType.Int64_type){
                             tensor_builder.addAllInt64Data((List<Long>)(List)recursiveExtract(objectValue));
-                        }else if(element_type == ElementType.Int32_type.ordinal()){
+                        }else if(element_type == ElementType.Int32_type){
                             tensor_builder.addAllIntData((List<Integer>)(List)recursiveExtract(objectValue));
-                        }else if(element_type == ElementType.Float32_type.ordinal()){
+                        }else if(element_type == ElementType.Float32_type){
                             tensor_builder.addAllFloatData((List<Float>)(List)recursiveExtract(objectValue));
                         }else{
                             // 看接口是String还是Bytes
@@ -723,11 +727,11 @@ public class Client {
                         // 由于Proto中为Repeated,需要把数据包装成list
                         List<Object> tempList = new ArrayList<>();
                         tempList.add(objectValue);
-                        if(element_type == ElementType.Int64_type.ordinal()){
+                        if(element_type == ElementType.Int64_type){
                             tensor_builder.addAllInt64Data((List<Long>)(List)tempList);
-                        }else if(element_type == ElementType.Int32_type.ordinal()){
+                        }else if(element_type == ElementType.Int32_type){
                             tensor_builder.addAllIntData((List<Integer>)(List)tempList);
-                        }else if(element_type == ElementType.Float32_type.ordinal()){
+                        }else if(element_type == ElementType.Float32_type){
                             tensor_builder.addAllFloatData((List<Float>)(List)tempList);
                         }else{
                             // 看接口是String还是Bytes
diff --git a/java/src/main/proto/general_model_service.proto b/java/src/main/proto/general_model_service.proto
index 89ac489f8ae3b90b74c94a3f9f3c82711086cd64..aa06d388a468d71e968aa53b19f25c55f8c42ee1 100644
--- a/java/src/main/proto/general_model_service.proto
+++ b/java/src/main/proto/general_model_service.proto
@@ -12,41 +12,96 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-syntax = "proto2";
+syntax = "proto3";
 package baidu.paddle_serving.predictor.general_model;
 option java_multiple_files = true;
 
 message Tensor {
-  repeated string data = 1;
-  repeated int32 int_data = 2;
-  repeated int64 int64_data = 3;
-  repeated float float_data = 4;
-  optional int32 elem_type =
-      5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
-  repeated int32 shape = 6;       // shape should include batch
-  repeated int32 lod = 7;         // only for fetch tensor currently
-  optional string name = 8;       // get from the Model prototxt
-  optional string alias_name = 9; // get from the Model prototxt
+  // VarType: INT64
+  repeated int64 int64_data = 1;
+
+  // VarType: FP32
+  repeated float float_data = 2;
+
+  // VarType: INT32
+  repeated int32 int_data = 3;
+
+  // VarType: FP64
+  repeated double float64_data = 4;
+
+  // VarType: UINT32
+  repeated uint32 uint32_data = 5;
+
+  // VarType: BOOL
+  repeated bool bool_data = 6;
+
+  // (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
+  // represents the imaginary part
+  repeated float complex64_data = 7;
+
+  // (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
+  // represents the imaginary part
+  repeated double complex128_data = 8;
+
+  // VarType: STRING
+  repeated string data = 9;
+
+  // Element types:
+  //   0 => INT64
+  //   1 => FP32
+  //   2 => INT32
+  //   3 => FP64
+  //   4 => INT16
+  //   5 => FP16
+  //   6 => BF16
+  //   7 => UINT8
+  //   8 => INT8
+  //   9 => BOOL
+  //  10 => COMPLEX64
+  //  11 => COMPLEX128
+  //  20 => STRING
+  int32 elem_type = 10;
+
+  // Shape of the tensor, including batch dimensions.
+  repeated int32 shape = 11;
+
+  // Level of data(LOD), support variable length data, only for fetch tensor
+  // currently.
+  repeated int32 lod = 12;
+
+  // Correspond to the variable 'name' in the model description prototxt.
+  string name = 13;
+
+  // Correspond to the variable 'alias_name' in the model description prototxt.
+  string alias_name = 14; // get from the Model prototxt
+
+  // VarType: FP16, INT16, INT8, BF16, UINT8
+  bytes tensor_content = 15;
 };
 
 message Request {
   repeated Tensor tensor = 1;
   repeated string fetch_var_names = 2;
-  optional bool profile_server = 3 [ default = false ];
-  required uint64 log_id = 4 [ default = 0 ];
+  bool profile_server = 3;
+  uint64 log_id = 4;
 };
 
 message Response {
   repeated ModelOutput outputs = 1;
   repeated int64 profile_time = 2;
+  // Error code
+  int32 err_no = 3;
+
+  // Error messages
+  string err_msg = 4;
 };
 
 message ModelOutput {
   repeated Tensor tensor = 1;
-  optional string engine_name = 2;
+  string engine_name = 2;
 }
 
 service GeneralModelService {
-  rpc inference(Request) returns (Response) {}
-  rpc debug(Request) returns (Response) {}
+  rpc inference(Request) returns (Response);
+  rpc debug(Request) returns (Response);
 };
diff --git a/python/examples/ocr/README.md b/python/examples/ocr/README.md
old mode 100644
new mode 100755
index 630f01d999943b9948e153430b30d80fbabd0549..95cc210a7e68d5582e68460f2eec89419bf7fd7c
--- a/python/examples/ocr/README.md
+++ b/python/examples/ocr/README.md
@@ -119,7 +119,7 @@ The pre-processing and post-processing is in the C + + server part, the image's
 
 so the value of parameter `feed_var` which is in the file `ocr_det_client/serving_client_conf.prototxt` should be changed.
 
-for this case, `feed_type` should be 3(which means the data type is string),`shape` should be 1.
+for this case, `feed_type` should be 20(which means the data type is string),`shape` should be 1.
 
 By passing in multiple client folder paths, the client can be started for multi model prediction.
 ```
diff --git a/python/examples/ocr/README_CN.md b/python/examples/ocr/README_CN.md
old mode 100644
new mode 100755
index 421a4b930507abd3d36ef6db737f85a060647ced..5c0734c94aa6d61e1fdb9e8f87d5ee187c805ff0
--- a/python/examples/ocr/README_CN.md
+++ b/python/examples/ocr/README_CN.md
@@ -118,7 +118,7 @@ python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --por
 
 即`ocr_det_client/serving_client_conf.prototxt`中`feed_var`字段
 
-对于本示例而言，`feed_type`应修改为3(数据类型为string),`shape`为1.
+对于本示例而言，`feed_type`应修改为20(数据类型为string),`shape`为1.
 
 通过在客户端启动后加入多个client模型的client配置文件夹路径，启动client进行预测。
 ```
diff --git a/python/paddle_serving_client/client.py b/python/paddle_serving_client/client.py
index c64254bf312e46d6159ba63ea159b01f5d0c3cbc..826a2edb5d2434b5937e7ba7e6bb92708b8225d4 100755
--- a/python/paddle_serving_client/client.py
+++ b/python/paddle_serving_client/client.py
@@ -31,15 +31,21 @@ sys.path.append(
 #param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
 #param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
 #param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
-#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto)
+#param 'type'(which is in feed_var or fetch_var) = 5 means dataType is float16
+#param 'type'(which is in feed_var or fetch_var) = 7 means dataType is uint8
+#param 'type'(which is in feed_var or fetch_var) = 8 means dataType is int8
+#param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto)
 int64_type = 0
 float32_type = 1
 int32_type = 2
-bytes_type = 3
+float16_type = 5
+uint8_type = 7
+int8_type = 8
+bytes_type = 20
 #int_type,float_type,string_type are the set of each subdivision classes.
 int_type = set([int64_type, int32_type])
 float_type = set([float32_type])
-string_type = set([bytes_type])
+string_type = set([bytes_type, float16_type, uint8_type, int8_type])
 
 
 class _NOPProfiler(object):
@@ -411,7 +417,10 @@ class Client(object):
                         key)])
                 else:
                     string_lod_slot_batch.append([])
-                string_slot.append(feed_dict[key])
+                if type(feed_dict[key]) is np.ndarray:
+                    string_slot.append(feed_dict[key].tostring())
+                else:
+                    string_slot.append(feed_dict[key])
                 self.has_numpy_input = True
 
         self.profile_.record('py_prepro_1')
@@ -492,6 +501,38 @@ class Client(object):
                         tmp_lod = result_batch_handle.get_lod(mi, name)
                         if np.size(tmp_lod) > 0:
                             result_map["{}.lod".format(name)] = tmp_lod
+                elif self.fetch_names_to_type_[name] == uint8_type:
+                    # result_map[name] will be py::array(numpy array)
+                    tmp_str = result_batch_handle.get_string_by_name(
+                        mi, name)
+                    result_map[name] = np.fromstring(tmp_str, dtype = np.uint8)
+                    if result_map[name].size == 0:
+                        raise ValueError(
+                            "Failed to fetch, maybe the type of [{}]"
+                            " is wrong, please check the model file".format(
+                                name))
+                    shape = result_batch_handle.get_shape(mi, name)
+                    result_map[name].shape = shape
+                    if name in self.lod_tensor_set:
+                        tmp_lod = result_batch_handle.get_lod(mi, name)
+                        if np.size(tmp_lod) > 0:
+                            result_map["{}.lod".format(name)] = tmp_lod
+                elif self.fetch_names_to_type_[name] == int8_type:
+                    # result_map[name] will be py::array(numpy array)
+                    tmp_str = result_batch_handle.get_string_by_name(
+                        mi, name)
+                    result_map[name] = np.fromstring(tmp_str, dtype = np.int8)
+                    if result_map[name].size == 0:
+                        raise ValueError(
+                            "Failed to fetch, maybe the type of [{}]"
+                            " is wrong, please check the model file".format(
+                                name))
+                    shape = result_batch_handle.get_shape(mi, name)
+                    result_map[name].shape = shape
+                    if name in self.lod_tensor_set:
+                        tmp_lod = result_batch_handle.get_lod(mi, name)
+                        if np.size(tmp_lod) > 0:
+                            result_map["{}.lod".format(name)] = tmp_lod
             multi_result_map.append(result_map)
         ret = None
         if len(model_engine_names) == 1:
diff --git a/python/paddle_serving_client/httpclient.py b/python/paddle_serving_client/httpclient.py
index 27ed269db0cccc1856e963a7b02c702d845c7ca6..bb056a99732aeb1fa855b6ce1e020ada82072ed0 100755
--- a/python/paddle_serving_client/httpclient.py
+++ b/python/paddle_serving_client/httpclient.py
@@ -32,13 +32,18 @@ from .proto import general_model_service_pb2_grpc
 #param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
 #param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
 #param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
-#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto)
+#param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto)
 int64_type = 0
 float32_type = 1
 int32_type = 2
-bytes_type = 3
+bytes_type = 20
 # this is corresponding to the proto
-proto_data_key_list = ["int64_data", "float_data", "int_data", "data"]
+proto_data_key_list = {
+    0: "int64_data",
+    1: "float_data",
+    2: "int_data",
+    20: "data"
+}
 
 
 def list_flatten(items, ignore_types=(str, bytes)):