diff --git a/core/general-client/src/client.cpp b/core/general-client/src/client.cpp index 4d3b99f2d8c00fd8dace85b219ce60b2b7444ff5..cc55dd30a5649afac98810fb83f98a837932a523 100644 --- a/core/general-client/src/client.cpp +++ b/core/general-client/src/client.cpp @@ -23,8 +23,7 @@ using configure::GeneralModelConfig; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; -// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8 -// will support: FLOAT16 +// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 enum ProtoDataType { P_INT64 = 0, P_FLOAT32, @@ -431,7 +430,8 @@ int PredictorOutputs::ParseProto(const Response& res, output.tensor(idx).int_data().begin(), output.tensor(idx).int_data().begin() + size); } else if (fetch_name_to_type[name] == P_UINT8 - || fetch_name_to_type[name] == P_INT8) { + || fetch_name_to_type[name] == P_INT8 + || fetch_name_to_type[name] == P_FP16) { VLOG(2) << "fetch var [" << name << "]type=" << fetch_name_to_type[name]; string_data_map[name] = output.tensor(idx).tensor_content(); diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index fb71c0c9fc6e3680b8b51bad9ca891e41ef3a849..403119594c759a35d5dfd6251174627f367d9c65 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -25,8 +25,7 @@ using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; -// paddle inference support: FLOAT32, INT64, INT32, UINT8, INT8 -// will support: FLOAT16 +// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 enum ProtoDataType { P_INT64 = 0, P_FLOAT32, diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 482097d3e1fa1c7f7369573b1b1a0a5fde57ae58..2ad3e4cab6b77b305494c3833f0e3781ed0fd0b7 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -31,8 +31,7 @@ using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; -// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8 -// will support: FLOAT16 +// support: FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 enum ProtoDataType { P_INT64 = 0, P_FLOAT32, @@ -130,11 +129,11 @@ int GeneralReaderOp::inference() { data_len = tensor.tensor_content().size(); src_ptr = tensor.tensor_content().data(); } else if (elem_type == P_FP16) { - // paddle inference will support FLOAT16 - // elem_size = 1; - // paddleTensor.dtype = paddle::PaddleDType::FLOAT16; - // data_len = tensor.tensor_content().size(); - // src_ptr = tensor.tensor_content().data(); + // copy bytes from tensor content to TensorVector + elem_size = 1; + paddleTensor.dtype = paddle::PaddleDType::FLOAT16; + data_len = tensor.tensor_content().size(); + src_ptr = tensor.tensor_content().data(); } else if (elem_type == P_STRING) { // use paddle::PaddleDType::UINT8 as for String. elem_size = sizeof(char); diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index e944c8d82d8aa2ad540455200cf835ce26eb366e..07d3473ec6ce12373114bfc50a67890ac2757634 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -178,14 +178,12 @@ int GeneralResponseOp::inference() { VLOG(2) << "(logid=" << log_id << ")Prepare int8 var [" << model_config->_fetch_name[idx] << "]."; tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length()); - } - // inference will support fp16 - // else if (dtype == paddle::PaddleDType::FLOAT16) { - // tensor->set_elem_type(5); - // VLOG(2) << "(logid=" << log_id << ")Prepare float16 var [" - // << model_config->_fetch_name[idx] << "]."; - // tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length()); - // } + } else if (dtype == paddle::PaddleDType::FLOAT16) { + tensor->set_elem_type(5); + VLOG(2) << "(logid=" << log_id << ")Prepare float16 var [" + << model_config->_fetch_name[idx] << "]."; + tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length()); + } VLOG(2) << "(logid=" << log_id << ") fetch var [" << model_config->_fetch_name[idx] << "] ready"; diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index a824acaff2417dcb5e885c0ae9e1acd6c17e7def..a6815d4939edfb2a0d6dcebaa602b545b770d52f 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -31,6 +31,7 @@ #include "core/predictor/framework/infer_data.h" #include "core/predictor/framework/memory.h" #include "paddle_inference_api.h" // NOLINT +#include "experimental/float16.h" namespace baidu { namespace paddle_serving { namespace predictor { @@ -541,19 +542,17 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { paddle::PaddleDType::INT8) { int8_t* data = static_cast(origin_data); lod_tensor_in->CopyFromCpu(data); + } else if ((*tensorVector_in_pointer)[i].dtype == + paddle::PaddleDType::FLOAT16) { + paddle::platform::float16* data = + static_cast(origin_data); + lod_tensor_in->CopyFromCpu(data); } else { LOG(ERROR) << "Inference not support type[" << (*tensorVector_in_pointer)[i].dtype << "],name[" << (*tensorVector_in_pointer)[i].name << "]" << " copy into core failed!"; } - // Paddle inference will support FP16 in next version. - // else if ((*tensorVector_in_pointer)[i].dtype == - // paddle::PaddleDType::FLOAT16) { - // paddle::platform::float16* data = - // static_cast(origin_data); - // lod_tensor_in->CopyFromCpu(data); - // } VLOG(2) << "Tensor:name=" << (*tensorVector_in_pointer)[i].name << ";in_dtype=" << (*tensorVector_in_pointer)[i].dtype << ";tensor_dtype=" << lod_tensor_in->type(); @@ -641,20 +640,18 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { int8_t* data_out = reinterpret_cast(databuf_data); lod_tensor_out->CopyToCpu(data_out); databuf_char = reinterpret_cast(data_out); + } else if (dataType == paddle::PaddleDType::FLOAT16) { + databuf_size = out_num * sizeof(paddle::platform::float16); + databuf_data = MempoolWrapper::instance().malloc(databuf_size); + if (!databuf_data) { + LOG(ERROR) << "Malloc failed, size: " << databuf_size; + return -1; + } + paddle::platform::float16* data_out = + reinterpret_cast(databuf_data); + lod_tensor_out->CopyToCpu(data_out); + databuf_char = reinterpret_cast(data_out); } - // Inference will support FP16 in next version - // else if (dataType == paddle::PaddleDType::FLOAT16) { - // using float16 = paddle::platform::float16; - // databuf_size = out_num * sizeof(float16); - // databuf_data = MempoolWrapper::instance().malloc(databuf_size); - // if (!databuf_data) { - // LOG(ERROR) << "Malloc failed, size: " << databuf_size; - // return -1; - // } - // float16* data_out = reinterpret_cast(databuf_data); - // lod_tensor_out->CopyToCpu(data_out); - // databuf_char = reinterpret_cast(data_out); - // } // Because task scheduling requires OPs to use 'Channel' // (which is a data structure) to transfer data between OPs. diff --git a/python/paddle_serving_client/client.py b/python/paddle_serving_client/client.py index 9a8bb3db0180b8ce4617aeee6c7462da490884d8..648678f3afd9ffdc0af4c505779fc5eca0c42a37 100755 --- a/python/paddle_serving_client/client.py +++ b/python/paddle_serving_client/client.py @@ -551,6 +551,22 @@ class Client(object): tmp_lod = result_batch_handle.get_lod(mi, name) if np.size(tmp_lod) > 0: result_map["{}.lod".format(name)] = tmp_lod + elif self.fetch_names_to_type_[name] == float16_type: + # result_map[name] will be py::array(numpy array) + tmp_str = result_batch_handle.get_string_by_name( + mi, name) + result_map[name] = np.fromstring(tmp_str, dtype = np.float16) + if result_map[name].size == 0: + raise ValueError( + "Failed to fetch, maybe the type of [{}]" + " is wrong, please check the model file".format( + name)) + shape = result_batch_handle.get_shape(mi, name) + result_map[name].shape = shape + if name in self.lod_tensor_set: + tmp_lod = result_batch_handle.get_lod(mi, name) + if np.size(tmp_lod) > 0: + result_map["{}.lod".format(name)] = tmp_lod multi_result_map.append(result_map) ret = None if len(model_engine_names) == 1: