diff --git a/core/general-client/src/client.cpp b/core/general-client/src/client.cpp
index 4d3b99f2d8c00fd8dace85b219ce60b2b7444ff5..cc55dd30a5649afac98810fb83f98a837932a523 100644
--- a/core/general-client/src/client.cpp
+++ b/core/general-client/src/client.cpp
@@ -23,8 +23,7 @@ using configure::GeneralModelConfig;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Tensor;
-// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
-// will support: FLOAT16
+// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
 enum ProtoDataType {
   P_INT64 = 0,
   P_FLOAT32,
@@ -431,7 +430,8 @@ int PredictorOutputs::ParseProto(const Response& res,
             output.tensor(idx).int_data().begin(),
             output.tensor(idx).int_data().begin() + size);
       } else if (fetch_name_to_type[name] == P_UINT8
-                || fetch_name_to_type[name] == P_INT8) {
+                || fetch_name_to_type[name] == P_INT8
+                || fetch_name_to_type[name] == P_FP16) {
         VLOG(2) << "fetch var [" << name << "]type="
                 << fetch_name_to_type[name];
         string_data_map[name] = output.tensor(idx).tensor_content();
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index fb71c0c9fc6e3680b8b51bad9ca891e41ef3a849..403119594c759a35d5dfd6251174627f367d9c65 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -25,8 +25,7 @@ using baidu::paddle_serving::Timer;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Tensor;
-// paddle inference support: FLOAT32, INT64, INT32, UINT8, INT8
-// will support: FLOAT16
+// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
 enum ProtoDataType {
   P_INT64 = 0,
   P_FLOAT32,
diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp
index 482097d3e1fa1c7f7369573b1b1a0a5fde57ae58..2ad3e4cab6b77b305494c3833f0e3781ed0fd0b7 100644
--- a/core/general-server/op/general_reader_op.cpp
+++ b/core/general-server/op/general_reader_op.cpp
@@ -31,8 +31,7 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
 using baidu::paddle_serving::predictor::general_model::Tensor;
 using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
-// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
-// will support: FLOAT16
+// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16
 enum ProtoDataType {
   P_INT64 = 0,
   P_FLOAT32,
@@ -130,11 +129,11 @@ int GeneralReaderOp::inference() {
       data_len = tensor.tensor_content().size();
       src_ptr = tensor.tensor_content().data();
     } else if (elem_type == P_FP16) {
-      // paddle inference will support FLOAT16
-      // elem_size = 1;
-      // paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
-      // data_len = tensor.tensor_content().size();
-      // src_ptr = tensor.tensor_content().data();
+      // copy bytes from tensor content to TensorVector
+      elem_size = 1;
+      paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
+      data_len = tensor.tensor_content().size();
+      src_ptr = tensor.tensor_content().data();
     } else if (elem_type == P_STRING) {
       // use paddle::PaddleDType::UINT8 as for String.
       elem_size = sizeof(char);
diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp
index e944c8d82d8aa2ad540455200cf835ce26eb366e..07d3473ec6ce12373114bfc50a67890ac2757634 100644
--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -178,14 +178,12 @@ int GeneralResponseOp::inference() {
         VLOG(2) << "(logid=" << log_id << ")Prepare int8 var ["
                 << model_config->_fetch_name[idx] << "].";
         tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
-      } 
-      // inference will support fp16
-      //   else if (dtype == paddle::PaddleDType::FLOAT16) {
-      //   tensor->set_elem_type(5);
-      //   VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
-      //           << model_config->_fetch_name[idx] << "].";
-      //   tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
-      // }
+      } else if (dtype == paddle::PaddleDType::FLOAT16) {
+        tensor->set_elem_type(5);
+        VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
+                << model_config->_fetch_name[idx] << "].";
+        tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
+      }
 
       VLOG(2) << "(logid=" << log_id << ") fetch var ["
               << model_config->_fetch_name[idx] << "] ready";
diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h
index a824acaff2417dcb5e885c0ae9e1acd6c17e7def..a6815d4939edfb2a0d6dcebaa602b545b770d52f 100644
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -31,6 +31,7 @@
 #include "core/predictor/framework/infer_data.h"
 #include "core/predictor/framework/memory.h"
 #include "paddle_inference_api.h"  // NOLINT
+#include "experimental/float16.h"
 namespace baidu {
 namespace paddle_serving {
 namespace predictor {
@@ -541,19 +542,17 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
                  paddle::PaddleDType::INT8) {
         int8_t* data = static_cast<int8_t*>(origin_data);
         lod_tensor_in->CopyFromCpu(data);
+      } else if ((*tensorVector_in_pointer)[i].dtype ==
+               paddle::PaddleDType::FLOAT16) {
+        paddle::platform::float16* data =
+            static_cast<paddle::platform::float16*>(origin_data);
+        lod_tensor_in->CopyFromCpu(data);
       } else {
         LOG(ERROR) << "Inference not support type["
                    << (*tensorVector_in_pointer)[i].dtype << "],name["
                    << (*tensorVector_in_pointer)[i].name << "]"
                    << " copy into core failed!";
       }
-      // Paddle inference will support FP16 in next version.
-      // else if ((*tensorVector_in_pointer)[i].dtype ==
-      //          paddle::PaddleDType::FLOAT16) {
-      //   paddle::platform::float16* data =
-      //       static_cast<paddle::platform::float16*>(origin_data);
-      //   lod_tensor_in->CopyFromCpu(data);
-      // }
       VLOG(2) << "Tensor:name=" << (*tensorVector_in_pointer)[i].name
               << ";in_dtype=" << (*tensorVector_in_pointer)[i].dtype
               << ";tensor_dtype=" << lod_tensor_in->type();
@@ -641,20 +640,18 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
         int8_t* data_out = reinterpret_cast<int8_t*>(databuf_data);
         lod_tensor_out->CopyToCpu(data_out);
         databuf_char = reinterpret_cast<char*>(data_out);
+      } else if (dataType == paddle::PaddleDType::FLOAT16) {
+        databuf_size = out_num * sizeof(paddle::platform::float16);
+        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+        if (!databuf_data) {
+          LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+          return -1;
+        }
+        paddle::platform::float16* data_out =
+            reinterpret_cast<paddle::platform::float16*>(databuf_data);
+        lod_tensor_out->CopyToCpu(data_out);
+        databuf_char = reinterpret_cast<char*>(data_out);
       }
-      // Inference will support FP16 in next version
-      //  else if (dataType == paddle::PaddleDType::FLOAT16) {
-      //   using float16 = paddle::platform::float16;
-      //   databuf_size = out_num * sizeof(float16);
-      //   databuf_data = MempoolWrapper::instance().malloc(databuf_size);
-      //   if (!databuf_data) {
-      //     LOG(ERROR) << "Malloc failed, size: " << databuf_size;
-      //     return -1;
-      //   }
-      //   float16* data_out = reinterpret_cast<float16*>(databuf_data);
-      //   lod_tensor_out->CopyToCpu(data_out);
-      //   databuf_char = reinterpret_cast<char*>(data_out);
-      // }
 
       // Because task scheduling requires OPs to use 'Channel'
       // (which is a data structure) to transfer data between OPs.
diff --git a/python/paddle_serving_client/client.py b/python/paddle_serving_client/client.py
index 9a8bb3db0180b8ce4617aeee6c7462da490884d8..648678f3afd9ffdc0af4c505779fc5eca0c42a37 100755
--- a/python/paddle_serving_client/client.py
+++ b/python/paddle_serving_client/client.py
@@ -551,6 +551,22 @@ class Client(object):
                         tmp_lod = result_batch_handle.get_lod(mi, name)
                         if np.size(tmp_lod) > 0:
                             result_map["{}.lod".format(name)] = tmp_lod
+                elif self.fetch_names_to_type_[name] == float16_type:
+                    # result_map[name] will be py::array(numpy array)
+                    tmp_str = result_batch_handle.get_string_by_name(
+                        mi, name)
+                    result_map[name] = np.fromstring(tmp_str, dtype = np.float16)
+                    if result_map[name].size == 0:
+                        raise ValueError(
+                            "Failed to fetch, maybe the type of [{}]"
+                            " is wrong, please check the model file".format(
+                                name))
+                    shape = result_batch_handle.get_shape(mi, name)
+                    result_map[name].shape = shape
+                    if name in self.lod_tensor_set:
+                        tmp_lod = result_batch_handle.get_lod(mi, name)
+                        if np.size(tmp_lod) > 0:
+                            result_map["{}.lod".format(name)] = tmp_lod
             multi_result_map.append(result_map)
         ret = None
         if len(model_engine_names) == 1: