未验证 提交 2793de7c 编写于 作者: T Thomas Young 提交者: GitHub

Merge pull request #1378 from ShiningZhang/develop

Server&client support uint8&int8
......@@ -12,41 +12,97 @@
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
syntax = "proto3";
package baidu.paddle_serving.predictor.general_model;
option java_multiple_files = true;
option cc_generic_services = true;
message Tensor {
repeated string data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
// VarType: INT64
repeated int64 int64_data = 1;
// VarType: FP32
repeated float float_data = 2;
// VarType: INT32
repeated int32 int_data = 3;
// VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
};
message Request {
repeated Tensor tensor = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
bool profile_server = 3;
uint64 log_id = 4;
};
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
};
message ModelOutput {
repeated Tensor tensor = 1;
optional string engine_name = 2;
string engine_name = 2;
}
service GeneralModelService {
rpc inference(Request) returns (Response) {}
rpc debug(Request) returns (Response) {}
rpc inference(Request) returns (Response);
rpc debug(Request) returns (Response);
};
......@@ -88,7 +88,7 @@ class PredictorData {
const std::string& name,
const std::vector<int>& shape,
const std::vector<int>& lod,
const int datatype = 3);
const int datatype = 20);
const std::map<std::string, std::vector<float>>& float_data_map() const {
return _float_data_map;
......@@ -140,6 +140,8 @@ class PredictorData {
int get_datatype(std::string name) const;
void set_datatype(std::string name, int type);
std::string print();
private:
......@@ -159,6 +161,7 @@ class PredictorData {
oss << "{";
oss << it->first << key_seg;
const std::vector<T2>& v = it->second;
oss << v.size() << key_seg;
for (size_t i = 0; i < v.size(); ++i) {
if (i != v.size() - 1) {
oss << v[i] << val_seg;
......@@ -184,7 +187,9 @@ class PredictorData {
typename std::map<T1, T2>::const_iterator itEnd = map.end();
for (; it != itEnd; it++) {
oss << "{";
oss << it->first << key_seg << it->second;
oss << it->first << key_seg
<< "size=" << it->second.size() << key_seg
<< "type=" << this->get_datatype(it->first);
oss << "}";
}
return oss.str();
......
......@@ -51,6 +51,8 @@ class ModelRes {
res._float_value_map.end());
_int32_value_map.insert(res._int32_value_map.begin(),
res._int32_value_map.end());
_string_value_map.insert(res._string_value_map.begin(),
res._string_value_map.end());
_shape_map.insert(res._shape_map.begin(), res._shape_map.end());
_lod_map.insert(res._lod_map.begin(), res._lod_map.end());
_tensor_alias_names.insert(_tensor_alias_names.end(),
......@@ -68,6 +70,9 @@ class ModelRes {
_int32_value_map.insert(
std::make_move_iterator(std::begin(res._int32_value_map)),
std::make_move_iterator(std::end(res._int32_value_map)));
_string_value_map.insert(
std::make_move_iterator(std::begin(res._string_value_map)),
std::make_move_iterator(std::end(res._string_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
......@@ -96,6 +101,12 @@ class ModelRes {
std::vector<int32_t>&& get_int32_by_name_with_rv(const std::string& name) {
return std::move(_int32_value_map[name]);
}
const std::string& get_string_by_name(const std::string& name) {
return _string_value_map[name];
}
std::string&& get_string_by_name_with_rv(const std::string& name) {
return std::move(_string_value_map[name]);
}
const std::vector<int>& get_shape_by_name(const std::string& name) {
return _shape_map[name];
}
......@@ -128,6 +139,9 @@ class ModelRes {
_int32_value_map.insert(
std::make_move_iterator(std::begin(res._int32_value_map)),
std::make_move_iterator(std::end(res._int32_value_map)));
_string_value_map.insert(
std::make_move_iterator(std::begin(res._string_value_map)),
std::make_move_iterator(std::end(res._string_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
......@@ -145,6 +159,7 @@ class ModelRes {
std::map<std::string, std::vector<int64_t>> _int64_value_map;
std::map<std::string, std::vector<float>> _float_value_map;
std::map<std::string, std::vector<int32_t>> _int32_value_map;
std::map<std::string, std::string> _string_value_map;
std::map<std::string, std::vector<int>> _shape_map;
std::map<std::string, std::vector<int>> _lod_map;
std::vector<std::string> _tensor_alias_names;
......@@ -184,6 +199,14 @@ class PredictorRes {
const std::string& name) {
return std::move(_models[model_idx].get_int32_by_name_with_rv(name));
}
const std::string& get_string_by_name(const int model_idx,
const std::string& name) {
return _models[model_idx].get_string_by_name(name);
}
std::string&& get_string_by_name_with_rv(const int model_idx,
const std::string& name) {
return std::move(_models[model_idx].get_string_by_name_with_rv(name));
}
const std::vector<int>& get_shape_by_name(const int model_idx,
const std::string& name) {
return _models[model_idx].get_shape_by_name(name);
......
......@@ -23,7 +23,23 @@ using configure::GeneralModelConfig;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
int ServingClient::init(const std::vector<std::string>& client_conf,
const std::string server_port) {
......@@ -156,6 +172,10 @@ int PredictorData::get_datatype(std::string name) const {
return 0;
}
void PredictorData::set_datatype(std::string name, int type) {
_datatype_map[name] = type;
}
std::string PredictorData::print() {
std::string res;
res.append(map2string<std::string, float>(_float_data_map));
......@@ -309,20 +329,25 @@ int PredictorInputs::GenProto(const PredictorInputs& inputs,
tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name);
const int string_shape_size = string_shape.size();
// string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >.
if (string_shape_size != 1) {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
<< string_shape_size;
return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_data);
break;
if (datatype == P_STRING) {
const int string_shape_size = string_shape.size();
// string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >.
if (string_shape_size != 1) {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
<< string_shape_size;
return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_data);
break;
}
}
} else {
tensor->set_tensor_content(string_data);
}
}
return 0;
}
......@@ -355,6 +380,8 @@ int PredictorOutputs::ParseProto(const Response& res,
std::shared_ptr<PredictorOutputs::PredictorOutput> predictor_output =
std::make_shared<PredictorOutputs::PredictorOutput>();
predictor_output->engine_name = output.engine_name();
PredictorData& predictor_data = predictor_output->data;
std::map<std::string, std::vector<float>>& float_data_map = *predictor_output->data.mutable_float_data_map();
std::map<std::string, std::vector<int64_t>>& int64_data_map = *predictor_output->data.mutable_int64_data_map();
std::map<std::string, std::vector<int32_t>>& int32_data_map = *predictor_output->data.mutable_int_data_map();
......@@ -403,7 +430,13 @@ int PredictorOutputs::ParseProto(const Response& res,
int32_data_map[name] = std::vector<int32_t>(
output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size);
} else if (fetch_name_to_type[name] == P_UINT8
|| fetch_name_to_type[name] == P_INT8) {
VLOG(2) << "fetch var [" << name << "]type="
<< fetch_name_to_type[name];
string_data_map[name] = output.tensor(idx).tensor_content();
}
predictor_data.set_datatype(name, output.tensor(idx).elem_type());
idx += 1;
}
outputs.add_data(predictor_output);
......
......@@ -25,7 +25,23 @@ using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
// paddle inference support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
std::once_flag gflags_init_flag;
namespace py = pybind11;
......@@ -262,6 +278,8 @@ int PredictorClient::numpy_predict(
vec_idx++;
}
// Add !P_STRING feed data of string_input to tensor_content
// UINT8 INT8 FLOAT16
vec_idx = 0;
for (auto &name : string_feed_name) {
int idx = _feed_name_to_idx[name];
......@@ -277,22 +295,27 @@ int PredictorClient::numpy_predict(
for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(P_STRING);
tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name);
const int string_shape_size = string_shape[vec_idx].size();
// string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >.
if (string_shape_size != 1) {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
<< string_shape_size;
return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_feed[vec_idx]);
break;
if (_type[idx] != P_STRING) {
tensor->set_elem_type(_type[idx]);
tensor->set_tensor_content(string_feed[vec_idx]);
} else {
tensor->set_elem_type(P_STRING);
const int string_shape_size = string_shape[vec_idx].size();
// string_shape[vec_idx] = [1];cause numpy has no datatype of string.
// we pass string via vector<vector<string> >.
if (string_shape_size != 1) {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
<< string_shape_size;
return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_feed[vec_idx]);
break;
}
}
}
vec_idx++;
......@@ -366,6 +389,15 @@ int PredictorClient::numpy_predict(
model._int32_value_map[name] = std::vector<int32_t>(
output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size);
} else if (_fetch_name_to_type[name] == P_UINT8) {
VLOG(2) << "fetch var " << name << "type uint8";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} else if (_fetch_name_to_type[name] == P_INT8) {
VLOG(2) << "fetch var " << name << "type int8";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} else if (_fetch_name_to_type[name] == P_FP16) {
VLOG(2) << "fetch var " << name << "type float16";
model._string_value_map[name] = output.tensor(idx).tensor_content();
}
}
predict_res_batch.add_model_res(std::move(model));
......
......@@ -49,6 +49,19 @@ PYBIND11_MODULE(serving_client, m) {
});
return py::array(ptr->size(), ptr->data(), capsule);
})
.def("get_int32_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
std::vector<int32_t> *ptr = new std::vector<int32_t>(
std::move(self.get_int32_by_name_with_rv(model_idx, name)));
auto capsule = py::capsule(ptr, [](void *p) {
delete reinterpret_cast<std::vector<int32_t> *>(p);
});
return py::array(ptr->size(), ptr->data(), capsule);
})
.def("get_string_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
return self.get_string_by_name_with_rv(model_idx, name);
})
.def("get_shape",
[](PredictorRes &self, int model_idx, std::string &name) {
std::vector<int> *ptr = new std::vector<int>(
......
......@@ -31,7 +31,23 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING };
// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
int GeneralReaderOp::inference() {
// read request from client
......@@ -78,6 +94,7 @@ int GeneralReaderOp::inference() {
int64_t elem_type = 0;
int64_t elem_size = 0;
int64_t databuf_size = 0;
const void* src_ptr = nullptr;
for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor paddleTensor;
const Tensor &tensor = req->tensor(i);
......@@ -86,19 +103,38 @@ int GeneralReaderOp::inference() {
elem_size = 0;
databuf_size = 0;
elem_type = tensor.elem_type();
VLOG(2) << "var[" << i << "] has elem type: " << elem_type;
src_ptr = nullptr ;
if (elem_type == P_INT64) { // int64
elem_size = sizeof(int64_t);
paddleTensor.dtype = paddle::PaddleDType::INT64;
data_len = tensor.int64_data_size();
src_ptr = tensor.int64_data().data();
} else if (elem_type == P_FLOAT32) {
elem_size = sizeof(float);
paddleTensor.dtype = paddle::PaddleDType::FLOAT32;
data_len = tensor.float_data_size();
src_ptr = tensor.float_data().data();
} else if (elem_type == P_INT32) {
elem_size = sizeof(int32_t);
paddleTensor.dtype = paddle::PaddleDType::INT32;
data_len = tensor.int_data_size();
src_ptr = tensor.int_data().data();
} else if (elem_type == P_UINT8) {
elem_size = sizeof(uint8_t);
paddleTensor.dtype = paddle::PaddleDType::UINT8;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_INT8) {
elem_size = sizeof(int8_t);
paddleTensor.dtype = paddle::PaddleDType::INT8;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_FP16) {
// paddle inference will support FLOAT16
// elem_size = 1;
// paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
// data_len = tensor.tensor_content().size();
// src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_STRING) {
// use paddle::PaddleDType::UINT8 as for String.
elem_size = sizeof(char);
......@@ -109,8 +145,18 @@ int GeneralReaderOp::inference() {
// now only support single string
for (int idx = 0; idx < tensor.data_size(); idx++) {
data_len += tensor.data()[idx].length() + 1;
src_ptr = tensor.data()[idx].data();
}
}
VLOG(2) << "var[" << i << "] has elem type: " << elem_type << ";"
<< "elem_size=" << elem_size << ";"
<< "dtype=" << paddleTensor.dtype << ";"
<< "data_len=" << data_len;
if (src_ptr == nullptr) {
LOG(ERROR) << "Not support var[" << i << "] with elem_type["
<< elem_type << "]";
continue;
}
// implement lod tensor here
// only support 1-D lod
// TODO(HexToString): support 2-D lod
......@@ -141,44 +187,17 @@ int GeneralReaderOp::inference() {
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has lod_tensor and len=" << out->at(i).lod[0].back();
}
if (elem_type == P_INT64) {
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.int64_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.int64_data().data(), databuf_size);
/*
int elem_num = tensor.int64_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.int64_data(k);
}
*/
} else if (elem_type == P_FLOAT32) {
float *dst_ptr = static_cast<float *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.float_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.float_data().data(), databuf_size);
/*int elem_num = tensor.float_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.float_data(k);
}*/
} else if (elem_type == P_INT32) {
int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.int_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.int_data().data(), databuf_size);
} else if (elem_type == P_STRING) {
void* dst_ptr = out->at(i).data.data();
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
// For common data, we just copy from src to dst
// For string data, we need to iterate through all str
if (elem_type != P_STRING) {
memcpy(dst_ptr, src_ptr, databuf_size);
} else {
char *dst_ptr = static_cast<char *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.data(0);
......
......@@ -168,7 +168,24 @@ int GeneralResponseOp::inference() {
google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
data_ptr + cap);
output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data);
}
} else if (dtype == paddle::PaddleDType::UINT8) {
tensor->set_elem_type(7);
VLOG(2) << "(logid=" << log_id << ")Prepare uint8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
} else if (dtype == paddle::PaddleDType::INT8) {
tensor->set_elem_type(8);
VLOG(2) << "(logid=" << log_id << ")Prepare int8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
}
// inference will support fp16
// else if (dtype == paddle::PaddleDType::FLOAT16) {
// tensor->set_elem_type(5);
// VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
// << model_config->_fetch_name[idx] << "].";
// tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
// }
VLOG(2) << "(logid=" << log_id << ") fetch var ["
<< model_config->_fetch_name[idx] << "] ready";
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
syntax = "proto3";
import "pds_option.proto";
import "builtin_format.proto";
package baidu.paddle_serving.predictor.general_model;
......@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model;
option cc_generic_services = true;
message Tensor {
repeated string data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
// VarType: INT64
repeated int64 int64_data = 1;
// VarType: FP32
repeated float float_data = 2;
// VarType: INT32
repeated int32 int_data = 3;
// VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
};
message Request {
repeated Tensor tensor = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
bool profile_server = 3;
uint64 log_id = 4;
};
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
};
message ModelOutput {
repeated Tensor tensor = 1;
optional string engine_name = 2;
string engine_name = 2;
}
service GeneralModelService {
......
......@@ -1492,11 +1492,6 @@ class PdsCodeGenerator : public CodeGenerator {
const FieldDescriptor* fd = in_shared_fields[si];
std::string field_name = fd->name();
printer->Print("\n/////$field_name$\n", "field_name", field_name);
if (fd->is_optional()) {
printer->Print(
"if (req->has_$field_name$()) {\n", "field_name", field_name);
printer->Indent();
}
if (fd->cpp_type() ==
google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE ||
fd->is_repeated()) {
......@@ -1509,10 +1504,6 @@ class PdsCodeGenerator : public CodeGenerator {
"field_name",
field_name);
}
if (fd->is_optional()) {
printer->Outdent();
printer->Print("}\n");
}
}
printer->Print(
......
......@@ -533,7 +533,30 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
paddle::PaddleDType::INT32) {
int32_t* data = static_cast<int32_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::UINT8) {
uint8_t* data = static_cast<uint8_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::INT8) {
int8_t* data = static_cast<int8_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else {
LOG(ERROR) << "Inference not support type["
<< (*tensorVector_in_pointer)[i].dtype
<< "],name[" << (*tensorVector_in_pointer)[i].name
<< "]" << " copy into core failed!";
}
// Paddle inference will support FP16 in next version.
// else if ((*tensorVector_in_pointer)[i].dtype ==
// paddle::PaddleDType::FLOAT16) {
// paddle::platform::float16* data =
// static_cast<paddle::platform::float16*>(origin_data);
// lod_tensor_in->CopyFromCpu(data);
// }
VLOG(2) << "Tensor:name=" << (*tensorVector_in_pointer)[i].name
<< ";in_dtype=" << (*tensorVector_in_pointer)[i].dtype
<< ";tensor_dtype=" << lod_tensor_in->type();
}
// After the input data is passed in,
// call 'core->Run()' perform the prediction process.
......@@ -598,7 +621,41 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
int32_t* data_out = reinterpret_cast<int32_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
} else if (dataType == paddle::PaddleDType::UINT8) {
databuf_size = out_num * sizeof(uint8_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
uint8_t* data_out = reinterpret_cast<uint8_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
} else if (dataType == paddle::PaddleDType::INT8) {
databuf_size = out_num * sizeof(int8_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
int8_t* data_out = reinterpret_cast<int8_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
}
// Inference will support FP16 in next version
// else if (dataType == paddle::PaddleDType::FLOAT16) {
// using float16 = paddle::platform::float16;
// databuf_size = out_num * sizeof(float16);
// databuf_data = MempoolWrapper::instance().malloc(databuf_size);
// if (!databuf_data) {
// LOG(ERROR) << "Malloc failed, size: " << databuf_size;
// return -1;
// }
// float16* data_out = reinterpret_cast<float16*>(databuf_data);
// lod_tensor_out->CopyToCpu(data_out);
// databuf_char = reinterpret_cast<char*>(data_out);
// }
// Because task scheduling requires OPs to use 'Channel'
// (which is a data structure) to transfer data between OPs.
// We need to copy the processed data to the 'Channel' for the next OP.
......
......@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
syntax = "proto3";
import "pds_option.proto";
import "builtin_format.proto";
package baidu.paddle_serving.predictor.general_model;
......@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model;
option cc_generic_services = true;
message Tensor {
repeated string data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
// VarType: INT64
repeated int64 int64_data = 1;
// VarType: FP32
repeated float float_data = 2;
// VarType: INT32
repeated int32 int_data = 3;
// VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
};
message Request {
repeated Tensor tensor = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
bool profile_server = 3;
uint64 log_id = 4;
};
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
};
message ModelOutput {
repeated Tensor tensor = 1;
optional string engine_name = 2;
string engine_name = 2;
}
service GeneralModelService {
......
......@@ -42,7 +42,7 @@ python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 -
为了方便用户快速的使用Http方式请求Server端预测服务,我们已经将常用的Http请求的数据体封装、压缩、请求加密等功能封装为一个HttpClient类提供给用户,方便用户使用。
使用HttpClient最简单只需要三步,1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件(本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt),3、调用Predict函数,通过Http方式请求预测服务。
使用HttpClient最简单只需要四步,1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件(本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt)。3、调用connect函数。4、调用Predict函数,通过Http方式请求预测服务。
此外,您可以根据自己的需要配置Server端IP、Port、服务名称(此服务名称需要与[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)文件中的Service服务名和rpc方法名对应,即`GeneralModelService`字段和`inference`字段),设置Request数据体压缩,设置Response支持压缩传输,模型加密预测(需要配置Server端使用模型加密)、设置响应超时时间等功能。
......@@ -103,7 +103,7 @@ repeated int32 numbers = 1;
```
#### elem_type
表示数据类型,0 means int64, 1 means float32, 2 means int32, 3 means bytes(string)
表示数据类型,0 means int64, 1 means float32, 2 means int32, 20 means bytes(string)
#### fetch_var_names
......
......@@ -59,9 +59,20 @@ import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
enum ElementType
{
Int64_type, Float32_type, Int32_type, Bytes_type;
class ElementType {
public static final int Int64_type = 0;
public static final int Float32_type = 1;
public static final int Int32_type = 2;
public static final int String_type = 20;
public static final Map<Integer, String> feedTypeToDataKey_;
static
{
feedTypeToDataKey_ = new HashMap<Integer, String>();
feedTypeToDataKey_.put(ElementType.Int64_type, "int64_data");
feedTypeToDataKey_.put(ElementType.Float32_type, "float_data");
feedTypeToDataKey_.put(ElementType.Int32_type, "int_data");
feedTypeToDataKey_.put(ElementType.String_type, "data");
}
}
class Profiler {
......@@ -104,7 +115,6 @@ public class Client {
private Map<String, Integer> feedTypes_;
private Map<String, List<Integer>> feedShapes_;
private Map<String, Integer> feedNameToIndex_;
private Map<Integer, String> feedTypeToDataKey_;
private List<String> fetchNames_;
private Map<String, Integer> fetchTypes_;
private Set<String> lodTensorSet_;
......@@ -147,12 +157,6 @@ public class Client {
channel_ = null;
blockingStub_ = null;
feedTypeToDataKey_ = new HashMap<Integer, String>();
feedTypeToDataKey_.put(0, "int64_data");
feedTypeToDataKey_.put(1, "float_data");
feedTypeToDataKey_.put(2, "int_data");
feedTypeToDataKey_.put(3, "data");
profiler_ = new Profiler();
boolean is_profile = false;
String FLAGS_profile_client = System.getenv("FLAGS_profile_client");
......@@ -525,7 +529,7 @@ public class Client {
jsonTensor.put("elem_type", element_type);
// 处理数据与shape
String protoDataKey = feedTypeToDataKey_.get(element_type);
String protoDataKey = ElementType.feedTypeToDataKey_.get(element_type);
// 如果是INDArray类型,先转为一维.
// 此时shape为INDArray的shape
if(objectValue instanceof INDArray){
......@@ -535,11 +539,11 @@ public class Client {
for(long dim:indarrayShape){
shape.add((int)dim);
}
if(element_type == ElementType.Int64_type.ordinal()){
if(element_type == ElementType.Int64_type){
objectValue = tempIndArray.data().asLong();
}else if(element_type == ElementType.Int32_type.ordinal()){
}else if(element_type == ElementType.Int32_type){
objectValue = tempIndArray.data().asInt();
}else if(element_type == ElementType.Float32_type.ordinal()){
}else if(element_type == ElementType.Float32_type){
objectValue = tempIndArray.data().asFloat();
}else{
throw new Exception("INDArray 类型不支持");
......@@ -564,11 +568,11 @@ public class Client {
// 此时无法获取batch信息,故对shape不处理
// 由于Proto中为Repeated,需要把数据包装成list
if(objectValue instanceof String){
if(feedTypes_.get(protoDataKey)!= ElementType.Bytes_type.ordinal()){
if(feedTypes_.get(protoDataKey)!= ElementType.String_type){
throw new Exception("feedvar is not string-type,feed can`t be a single string.");
}
}else{
if(feedTypes_.get(protoDataKey)== ElementType.Bytes_type.ordinal()){
if(feedTypes_.get(protoDataKey)== ElementType.String_type){
throw new Exception("feedvar is string-type,feed, feed can`t be a single int or others.");
}
}
......@@ -662,17 +666,17 @@ public class Client {
for(long dim:indarrayShape){
shape.add((int)dim);
}
if(element_type == ElementType.Int64_type.ordinal()){
if(element_type == ElementType.Int64_type){
List<Long> iter = Arrays.stream(tempIndArray.data().asLong()).boxed().collect(Collectors.toList());
tensor_builder.addAllInt64Data(iter);
}else if(element_type == ElementType.Int32_type.ordinal()){
}else if(element_type == ElementType.Int32_type){
List<Integer> iter = Arrays.stream(tempIndArray.data().asInt()).boxed().collect(Collectors.toList());
tensor_builder.addAllIntData(iter);
}else if(element_type == ElementType.Float32_type.ordinal()){
}else if(element_type == ElementType.Float32_type){
List<Float> iter = Arrays.asList(ArrayUtils.toObject(tempIndArray.data().asFloat()));
tensor_builder.addAllFloatData(iter);
......@@ -684,13 +688,13 @@ public class Client {
// 如果是数组类型,则无须处理,直接使用即可。
// 且数组无法嵌套,此时batch无法从数据中获取
// 默认batch维度为1,或者feedVar的shape信息中已包含batch
if(element_type == ElementType.Int64_type.ordinal()){
if(element_type == ElementType.Int64_type){
List<Long> iter = Arrays.stream((long[])objectValue).boxed().collect(Collectors.toList());
tensor_builder.addAllInt64Data(iter);
}else if(element_type == ElementType.Int32_type.ordinal()){
}else if(element_type == ElementType.Int32_type){
List<Integer> iter = Arrays.stream((int[])objectValue).boxed().collect(Collectors.toList());
tensor_builder.addAllIntData(iter);
}else if(element_type == ElementType.Float32_type.ordinal()){
}else if(element_type == ElementType.Float32_type){
List<Float> iter = Arrays.asList(ArrayUtils.toObject((float[])objectValue));
tensor_builder.addAllFloatData(iter);
}else{
......@@ -707,11 +711,11 @@ public class Client {
// 在index=0处,加上batch
shape.add(0, list.size());
}
if(element_type == ElementType.Int64_type.ordinal()){
if(element_type == ElementType.Int64_type){
tensor_builder.addAllInt64Data((List<Long>)(List)recursiveExtract(objectValue));
}else if(element_type == ElementType.Int32_type.ordinal()){
}else if(element_type == ElementType.Int32_type){
tensor_builder.addAllIntData((List<Integer>)(List)recursiveExtract(objectValue));
}else if(element_type == ElementType.Float32_type.ordinal()){
}else if(element_type == ElementType.Float32_type){
tensor_builder.addAllFloatData((List<Float>)(List)recursiveExtract(objectValue));
}else{
// 看接口是String还是Bytes
......@@ -723,11 +727,11 @@ public class Client {
// 由于Proto中为Repeated,需要把数据包装成list
List<Object> tempList = new ArrayList<>();
tempList.add(objectValue);
if(element_type == ElementType.Int64_type.ordinal()){
if(element_type == ElementType.Int64_type){
tensor_builder.addAllInt64Data((List<Long>)(List)tempList);
}else if(element_type == ElementType.Int32_type.ordinal()){
}else if(element_type == ElementType.Int32_type){
tensor_builder.addAllIntData((List<Integer>)(List)tempList);
}else if(element_type == ElementType.Float32_type.ordinal()){
}else if(element_type == ElementType.Float32_type){
tensor_builder.addAllFloatData((List<Float>)(List)tempList);
}else{
// 看接口是String还是Bytes
......
......@@ -12,41 +12,96 @@
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
syntax = "proto3";
package baidu.paddle_serving.predictor.general_model;
option java_multiple_files = true;
message Tensor {
repeated string data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
// VarType: INT64
repeated int64 int64_data = 1;
// VarType: FP32
repeated float float_data = 2;
// VarType: INT32
repeated int32 int_data = 3;
// VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
};
message Request {
repeated Tensor tensor = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
bool profile_server = 3;
uint64 log_id = 4;
};
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
};
message ModelOutput {
repeated Tensor tensor = 1;
optional string engine_name = 2;
string engine_name = 2;
}
service GeneralModelService {
rpc inference(Request) returns (Response) {}
rpc debug(Request) returns (Response) {}
rpc inference(Request) returns (Response);
rpc debug(Request) returns (Response);
};
......@@ -119,7 +119,7 @@ The pre-processing and post-processing is in the C + + server part, the image's
so the value of parameter `feed_var` which is in the file `ocr_det_client/serving_client_conf.prototxt` should be changed.
for this case, `feed_type` should be 3(which means the data type is string),`shape` should be 1.
for this case, `feed_type` should be 20(which means the data type is string),`shape` should be 1.
By passing in multiple client folder paths, the client can be started for multi model prediction.
```
......
......@@ -118,7 +118,7 @@ python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --por
`ocr_det_client/serving_client_conf.prototxt``feed_var`字段
对于本示例而言,`feed_type`应修改为3(数据类型为string),`shape`为1.
对于本示例而言,`feed_type`应修改为20(数据类型为string),`shape`为1.
通过在客户端启动后加入多个client模型的client配置文件夹路径,启动client进行预测。
```
......
......@@ -31,15 +31,21 @@ sys.path.append(
#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto)
#param 'type'(which is in feed_var or fetch_var) = 5 means dataType is float16
#param 'type'(which is in feed_var or fetch_var) = 7 means dataType is uint8
#param 'type'(which is in feed_var or fetch_var) = 8 means dataType is int8
#param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto)
int64_type = 0
float32_type = 1
int32_type = 2
bytes_type = 3
float16_type = 5
uint8_type = 7
int8_type = 8
bytes_type = 20
#int_type,float_type,string_type are the set of each subdivision classes.
int_type = set([int64_type, int32_type])
float_type = set([float32_type])
string_type = set([bytes_type])
string_type = set([bytes_type, float16_type, uint8_type, int8_type])
class _NOPProfiler(object):
......@@ -411,7 +417,10 @@ class Client(object):
key)])
else:
string_lod_slot_batch.append([])
string_slot.append(feed_dict[key])
if type(feed_dict[key]) is np.ndarray:
string_slot.append(feed_dict[key].tostring())
else:
string_slot.append(feed_dict[key])
self.has_numpy_input = True
self.profile_.record('py_prepro_1')
......@@ -492,6 +501,38 @@ class Client(object):
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == uint8_type:
# result_map[name] will be py::array(numpy array)
tmp_str = result_batch_handle.get_string_by_name(
mi, name)
result_map[name] = np.fromstring(tmp_str, dtype = np.uint8)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == int8_type:
# result_map[name] will be py::array(numpy array)
tmp_str = result_batch_handle.get_string_by_name(
mi, name)
result_map[name] = np.fromstring(tmp_str, dtype = np.int8)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
multi_result_map.append(result_map)
ret = None
if len(model_engine_names) == 1:
......
......@@ -32,13 +32,18 @@ from .proto import general_model_service_pb2_grpc
#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto)
#param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto)
int64_type = 0
float32_type = 1
int32_type = 2
bytes_type = 3
bytes_type = 20
# this is corresponding to the proto
proto_data_key_list = ["int64_data", "float_data", "int_data", "data"]
proto_data_key_list = {
0: "int64_data",
1: "float_data",
2: "int_data",
20: "data"
}
def list_flatten(items, ignore_types=(str, bytes)):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册