From 9a61d07115050106150ad411251f80ccb3b3a342 Mon Sep 17 00:00:00 2001 From: Thomas Young <35565423+HexToString@users.noreply.github.com> Date: Wed, 20 Oct 2021 16:38:26 +0800 Subject: [PATCH] Merge pull request #1420 from ShiningZhang/dev-fp16 support datatype of fp16 --- core/general-client/src/client.cpp | 6 +-- core/general-client/src/general_model.cpp | 3 +- core/general-server/op/general_reader_op.cpp | 13 +++---- .../general-server/op/general_response_op.cpp | 14 +++---- core/predictor/framework/infer.h | 37 +++++++++---------- python/paddle_serving_client/client.py | 16 ++++++++ 6 files changed, 49 insertions(+), 40 deletions(-) diff --git a/core/general-client/src/client.cpp b/core/general-client/src/client.cpp index 4d3b99f2..cc55dd30 100644 --- a/core/general-client/src/client.cpp +++ b/core/general-client/src/client.cpp @@ -23,8 +23,7 @@ using configure::GeneralModelConfig; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; -// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8 -// will support: FLOAT16 +// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 enum ProtoDataType { P_INT64 = 0, P_FLOAT32, @@ -431,7 +430,8 @@ int PredictorOutputs::ParseProto(const Response& res, output.tensor(idx).int_data().begin(), output.tensor(idx).int_data().begin() + size); } else if (fetch_name_to_type[name] == P_UINT8 - || fetch_name_to_type[name] == P_INT8) { + || fetch_name_to_type[name] == P_INT8 + || fetch_name_to_type[name] == P_FP16) { VLOG(2) << "fetch var [" << name << "]type=" << fetch_name_to_type[name]; string_data_map[name] = output.tensor(idx).tensor_content(); diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index fb71c0c9..40311959 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -25,8 +25,7 @@ using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; -// paddle inference support: FLOAT32, INT64, INT32, UINT8, INT8 -// will support: FLOAT16 +// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 enum ProtoDataType { P_INT64 = 0, P_FLOAT32, diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 482097d3..2ad3e4ca 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -31,8 +31,7 @@ using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; -// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8 -// will support: FLOAT16 +// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 enum ProtoDataType { P_INT64 = 0, P_FLOAT32, @@ -130,11 +129,11 @@ int GeneralReaderOp::inference() { data_len = tensor.tensor_content().size(); src_ptr = tensor.tensor_content().data(); } else if (elem_type == P_FP16) { - // paddle inference will support FLOAT16 - // elem_size = 1; - // paddleTensor.dtype = paddle::PaddleDType::FLOAT16; - // data_len = tensor.tensor_content().size(); - // src_ptr = tensor.tensor_content().data(); + // copy bytes from tensor content to TensorVector + elem_size = 1; + paddleTensor.dtype = paddle::PaddleDType::FLOAT16; + data_len = tensor.tensor_content().size(); + src_ptr = tensor.tensor_content().data(); } else if (elem_type == P_STRING) { // use paddle::PaddleDType::UINT8 as for String. elem_size = sizeof(char); diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index e944c8d8..07d3473e 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -178,14 +178,12 @@ int GeneralResponseOp::inference() { VLOG(2) << "(logid=" << log_id << ")Prepare int8 var [" << model_config->_fetch_name[idx] << "]."; tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length()); - } - // inference will support fp16 - // else if (dtype == paddle::PaddleDType::FLOAT16) { - // tensor->set_elem_type(5); - // VLOG(2) << "(logid=" << log_id << ")Prepare float16 var [" - // << model_config->_fetch_name[idx] << "]."; - // tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length()); - // } + } else if (dtype == paddle::PaddleDType::FLOAT16) { + tensor->set_elem_type(5); + VLOG(2) << "(logid=" << log_id << ")Prepare float16 var [" + << model_config->_fetch_name[idx] << "]."; + tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length()); + } VLOG(2) << "(logid=" << log_id << ") fetch var [" << model_config->_fetch_name[idx] << "] ready"; diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index a824acaf..a6815d49 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -31,6 +31,7 @@ #include "core/predictor/framework/infer_data.h" #include "core/predictor/framework/memory.h" #include "paddle_inference_api.h" // NOLINT +#include "experimental/float16.h" namespace baidu { namespace paddle_serving { namespace predictor { @@ -541,19 +542,17 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { paddle::PaddleDType::INT8) { int8_t* data = static_cast(origin_data); lod_tensor_in->CopyFromCpu(data); + } else if ((*tensorVector_in_pointer)[i].dtype == + paddle::PaddleDType::FLOAT16) { + paddle::platform::float16* data = + static_cast(origin_data); + lod_tensor_in->CopyFromCpu(data); } else { LOG(ERROR) << "Inference not support type[" << (*tensorVector_in_pointer)[i].dtype << "],name[" << (*tensorVector_in_pointer)[i].name << "]" << " copy into core failed!"; } - // Paddle inference will support FP16 in next version. - // else if ((*tensorVector_in_pointer)[i].dtype == - // paddle::PaddleDType::FLOAT16) { - // paddle::platform::float16* data = - // static_cast(origin_data); - // lod_tensor_in->CopyFromCpu(data); - // } VLOG(2) << "Tensor:name=" << (*tensorVector_in_pointer)[i].name << ";in_dtype=" << (*tensorVector_in_pointer)[i].dtype << ";tensor_dtype=" << lod_tensor_in->type(); @@ -641,20 +640,18 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { int8_t* data_out = reinterpret_cast(databuf_data); lod_tensor_out->CopyToCpu(data_out); databuf_char = reinterpret_cast(data_out); + } else if (dataType == paddle::PaddleDType::FLOAT16) { + databuf_size = out_num * sizeof(paddle::platform::float16); + databuf_data = MempoolWrapper::instance().malloc(databuf_size); + if (!databuf_data) { + LOG(ERROR) << "Malloc failed, size: " << databuf_size; + return -1; + } + paddle::platform::float16* data_out = + reinterpret_cast(databuf_data); + lod_tensor_out->CopyToCpu(data_out); + databuf_char = reinterpret_cast(data_out); } - // Inference will support FP16 in next version - // else if (dataType == paddle::PaddleDType::FLOAT16) { - // using float16 = paddle::platform::float16; - // databuf_size = out_num * sizeof(float16); - // databuf_data = MempoolWrapper::instance().malloc(databuf_size); - // if (!databuf_data) { - // LOG(ERROR) << "Malloc failed, size: " << databuf_size; - // return -1; - // } - // float16* data_out = reinterpret_cast(databuf_data); - // lod_tensor_out->CopyToCpu(data_out); - // databuf_char = reinterpret_cast(data_out); - // } // Because task scheduling requires OPs to use 'Channel' // (which is a data structure) to transfer data between OPs. diff --git a/python/paddle_serving_client/client.py b/python/paddle_serving_client/client.py index 9a8bb3db..648678f3 100755 --- a/python/paddle_serving_client/client.py +++ b/python/paddle_serving_client/client.py @@ -551,6 +551,22 @@ class Client(object): tmp_lod = result_batch_handle.get_lod(mi, name) if np.size(tmp_lod) > 0: result_map["{}.lod".format(name)] = tmp_lod + elif self.fetch_names_to_type_[name] == float16_type: + # result_map[name] will be py::array(numpy array) + tmp_str = result_batch_handle.get_string_by_name( + mi, name) + result_map[name] = np.fromstring(tmp_str, dtype = np.float16) + if result_map[name].size == 0: + raise ValueError( + "Failed to fetch, maybe the type of [{}]" + " is wrong, please check the model file".format( + name)) + shape = result_batch_handle.get_shape(mi, name) + result_map[name].shape = shape + if name in self.lod_tensor_set: + tmp_lod = result_batch_handle.get_lod(mi, name) + if np.size(tmp_lod) > 0: + result_map["{}.lod".format(name)] = tmp_lod multi_result_map.append(result_map) ret = None if len(model_engine_names) == 1: -- GitLab