提交 ee5a9489 编写于 作者: S ShiningZhang

server support dtype uint8&int8

上级 52f2c635
......@@ -23,7 +23,23 @@ using configure::GeneralModelConfig;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8
// will support: INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING,
};
int ServingClient::init(const std::vector<std::string>& client_conf,
const std::string server_port) {
......
......@@ -25,7 +25,23 @@ using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8
// will support: INT8, FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING,
};
std::once_flag gflags_init_flag;
namespace py = pybind11;
......
......@@ -31,7 +31,23 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
int GeneralReaderOp::inference() {
// read request from client
......@@ -78,6 +94,7 @@ int GeneralReaderOp::inference() {
int64_t elem_type = 0;
int64_t elem_size = 0;
int64_t databuf_size = 0;
const void* src_ptr = nullptr;
for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor paddleTensor;
const Tensor &tensor = req->tensor(i);
......@@ -86,19 +103,38 @@ int GeneralReaderOp::inference() {
elem_size = 0;
databuf_size = 0;
elem_type = tensor.elem_type();
VLOG(2) << "var[" << i << "] has elem type: " << elem_type;
src_ptr = nullptr ;
if (elem_type == P_INT64) { // int64
elem_size = sizeof(int64_t);
paddleTensor.dtype = paddle::PaddleDType::INT64;
data_len = tensor.int64_data_size();
src_ptr = tensor.int64_data().data();
} else if (elem_type == P_FLOAT32) {
elem_size = sizeof(float);
paddleTensor.dtype = paddle::PaddleDType::FLOAT32;
data_len = tensor.float_data_size();
src_ptr = tensor.float_data().data();
} else if (elem_type == P_INT32) {
elem_size = sizeof(int32_t);
paddleTensor.dtype = paddle::PaddleDType::INT32;
data_len = tensor.int_data_size();
src_ptr = tensor.int_data().data();
} else if (elem_type == P_UINT8) {
elem_size = sizeof(uint8_t);
paddleTensor.dtype = paddle::PaddleDType::UINT8;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_INT8) {
elem_size = sizeof(int8_t);
paddleTensor.dtype = paddle::PaddleDType::INT8;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_FP16) {
// paddle inference will support FLOAT16
// elem_size = 1;
// paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
// data_len = tensor.tensor_content().size();
// src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_STRING) {
// use paddle::PaddleDType::UINT8 as for String.
elem_size = sizeof(char);
......@@ -109,8 +145,18 @@ int GeneralReaderOp::inference() {
// now only support single string
for (int idx = 0; idx < tensor.data_size(); idx++) {
data_len += tensor.data()[idx].length() + 1;
src_ptr = tensor.data()[idx].data();
}
}
VLOG(2) << "var[" << i << "] has elem type: " << elem_type << ";"
<< "elem_size=" << elem_size << ";"
<< "dtype=" << paddleTensor.dtype << ";"
<< "data_len=" << data_len;
if (src_ptr == nullptr) {
LOG(ERROR) << "Not support var[" << i << "] with elem_type["
<< elem_type << "]";
continue;
}
// implement lod tensor here
// only support 1-D lod
// TODO(HexToString): support 2-D lod
......@@ -141,44 +187,17 @@ int GeneralReaderOp::inference() {
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has lod_tensor and len=" << out->at(i).lod[0].back();
}
if (elem_type == P_INT64) {
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.int64_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.int64_data().data(), databuf_size);
/*
int elem_num = tensor.int64_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.int64_data(k);
}
*/
} else if (elem_type == P_FLOAT32) {
float *dst_ptr = static_cast<float *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.float_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.float_data().data(), databuf_size);
/*int elem_num = tensor.float_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.float_data(k);
}*/
} else if (elem_type == P_INT32) {
int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.int_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.int_data().data(), databuf_size);
} else if (elem_type == P_STRING) {
void* dst_ptr = out->at(i).data.data();
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
// For common data, we just copy from src to dst
// For string data, we need to iterate through all str
if (elem_type != P_STRING) {
memcpy(dst_ptr, src_ptr, databuf_size);
} else {
char *dst_ptr = static_cast<char *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.data(0);
......
......@@ -168,7 +168,24 @@ int GeneralResponseOp::inference() {
google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
data_ptr + cap);
output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data);
}
} else if (dtype == paddle::PaddleDType::UINT8) {
tensor->set_elem_type(7);
VLOG(2) << "(logid=" << log_id << ")Prepare uint8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
} else if (dtype == paddle::PaddleDType::INT8) {
tensor->set_elem_type(8);
VLOG(2) << "(logid=" << log_id << ")Prepare int8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
}
// inference will support fp16
// else if (dtype == paddle::PaddleDType::FLOAT16) {
// tensor->set_elem_type(5);
// VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
// << model_config->_fetch_name[idx] << "].";
// tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
// }
VLOG(2) << "(logid=" << log_id << ") fetch var ["
<< model_config->_fetch_name[idx] << "] ready";
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
syntax = "proto3";
import "pds_option.proto";
import "builtin_format.proto";
package baidu.paddle_serving.predictor.general_model;
......@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model;
option cc_generic_services = true;
message Tensor {
repeated string data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
// VarType: INT64
repeated int64 int64_data = 1;
// VarType: FP32
repeated float float_data = 2;
// VarType: INT32
repeated int32 int_data = 3;
// VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 12 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
};
message Request {
repeated Tensor tensor = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
bool profile_server = 3;
uint64 log_id = 4;
};
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
};
message ModelOutput {
repeated Tensor tensor = 1;
optional string engine_name = 2;
string engine_name = 2;
}
service GeneralModelService {
......
......@@ -443,7 +443,30 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
paddle::PaddleDType::INT32) {
int32_t* data = static_cast<int32_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::UINT8) {
uint8_t* data = static_cast<uint8_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::INT8) {
int8_t* data = static_cast<int8_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else {
LOG(ERROR) << "Inference not support type["
<< (*tensorVector_in_pointer)[i].dtype
<< "],name[" << (*tensorVector_in_pointer)[i].name
<< "]" << " copy into core failed!";
}
// Paddle inference will support FP16 in next version.
// else if ((*tensorVector_in_pointer)[i].dtype ==
// paddle::PaddleDType::FLOAT16) {
// paddle::platform::float16* data =
// static_cast<paddle::platform::float16*>(origin_data);
// lod_tensor_in->CopyFromCpu(data);
// }
VLOG(2) << "Tensor:name=" << (*tensorVector_in_pointer)[i].name
<< ";in_dtype=" << (*tensorVector_in_pointer)[i].dtype
<< ";tensor_dtype=" << lod_tensor_in->type();
}
// After the input data is passed in,
// call 'core->Run()' perform the prediction process.
......@@ -508,7 +531,41 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
int32_t* data_out = reinterpret_cast<int32_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
} else if (dataType == paddle::PaddleDType::UINT8) {
databuf_size = out_num * sizeof(uint8_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
uint8_t* data_out = reinterpret_cast<uint8_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
} else if (dataType == paddle::PaddleDType::INT8) {
databuf_size = out_num * sizeof(int8_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
int8_t* data_out = reinterpret_cast<int8_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
}
// Inference will support FP16 in next version
// else if (dataType == paddle::PaddleDType::FLOAT16) {
// using float16 = paddle::platform::float16;
// databuf_size = out_num * sizeof(float16);
// databuf_data = MempoolWrapper::instance().malloc(databuf_size);
// if (!databuf_data) {
// LOG(ERROR) << "Malloc failed, size: " << databuf_size;
// return -1;
// }
// float16* data_out = reinterpret_cast<float16*>(databuf_data);
// lod_tensor_out->CopyToCpu(data_out);
// databuf_char = reinterpret_cast<char*>(data_out);
// }
// Because task scheduling requires OPs to use 'Channel'
// (which is a data structure) to transfer data between OPs.
// We need to copy the processed data to the 'Channel' for the next OP.
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
syntax = "proto3";
import "pds_option.proto";
import "builtin_format.proto";
package baidu.paddle_serving.predictor.general_model;
......@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model;
option cc_generic_services = true;
message Tensor {
repeated string data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
// VarType: INT64
repeated int64 int64_data = 1;
// VarType: FP32
repeated float float_data = 2;
// VarType: INT32
repeated int32 int_data = 3;
// VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
};
message Request {
repeated Tensor tensor = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
bool profile_server = 3;
uint64 log_id = 4;
};
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
};
message ModelOutput {
repeated Tensor tensor = 1;
optional string engine_name = 2;
string engine_name = 2;
}
service GeneralModelService {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册