diff --git a/core/configure/proto/general_model_service.proto b/core/configure/proto/general_model_service.proto index 89ac489f8ae3b90b74c94a3f9f3c82711086cd64..c2deab2f69ea6f6ca5e77354ec955bf679f9a3d6 100644 --- a/core/configure/proto/general_model_service.proto +++ b/core/configure/proto/general_model_service.proto @@ -12,41 +12,97 @@ // See the License for the specific language governing permissions and // limitations under the License. -syntax = "proto2"; +syntax = "proto3"; package baidu.paddle_serving.predictor.general_model; option java_multiple_files = true; +option cc_generic_services = true; message Tensor { - repeated string data = 1; - repeated int32 int_data = 2; - repeated int64 int64_data = 3; - repeated float float_data = 4; - optional int32 elem_type = - 5; // 0 means int64, 1 means float32, 2 means int32, 3 means string - repeated int32 shape = 6; // shape should include batch - repeated int32 lod = 7; // only for fetch tensor currently - optional string name = 8; // get from the Model prototxt - optional string alias_name = 9; // get from the Model prototxt + // VarType: INT64 + repeated int64 int64_data = 1; + + // VarType: FP32 + repeated float float_data = 2; + + // VarType: INT32 + repeated int32 int_data = 3; + + // VarType: FP64 + repeated double float64_data = 4; + + // VarType: UINT32 + repeated uint32 uint32_data = 5; + + // VarType: BOOL + repeated bool bool_data = 6; + + // (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated float complex64_data = 7; + + // (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated double complex128_data = 8; + + // VarType: STRING + repeated string data = 9; + + // Element types: + // 0 => INT64 + // 1 => FP32 + // 2 => INT32 + // 3 => FP64 + // 4 => INT16 + // 5 => FP16 + // 6 => BF16 + // 7 => UINT8 + // 8 => INT8 + // 9 => BOOL + // 10 => COMPLEX64 + // 11 => COMPLEX128 + // 20 => STRING + int32 elem_type = 10; + + // Shape of the tensor, including batch dimensions. + repeated int32 shape = 11; + + // Level of data(LOD), support variable length data, only for fetch tensor + // currently. + repeated int32 lod = 12; + + // Correspond to the variable 'name' in the model description prototxt. + string name = 13; + + // Correspond to the variable 'alias_name' in the model description prototxt. + string alias_name = 14; // get from the Model prototxt + + // VarType: FP16, INT16, INT8, BF16, UINT8 + bytes tensor_content = 15; }; message Request { repeated Tensor tensor = 1; repeated string fetch_var_names = 2; - optional bool profile_server = 3 [ default = false ]; - required uint64 log_id = 4 [ default = 0 ]; + bool profile_server = 3; + uint64 log_id = 4; }; message Response { repeated ModelOutput outputs = 1; repeated int64 profile_time = 2; + // Error code + int32 err_no = 3; + + // Error messages + string err_msg = 4; }; message ModelOutput { repeated Tensor tensor = 1; - optional string engine_name = 2; + string engine_name = 2; } service GeneralModelService { - rpc inference(Request) returns (Response) {} - rpc debug(Request) returns (Response) {} + rpc inference(Request) returns (Response); + rpc debug(Request) returns (Response); }; diff --git a/core/general-client/include/client.h b/core/general-client/include/client.h index 689732c512fcb7612cbd3af025a470f4cbfc84fe..11c6a2b7aa324cd09d9895f7ba1c2f8b990aad29 100644 --- a/core/general-client/include/client.h +++ b/core/general-client/include/client.h @@ -88,7 +88,7 @@ class PredictorData { const std::string& name, const std::vector& shape, const std::vector& lod, - const int datatype = 3); + const int datatype = 20); const std::map>& float_data_map() const { return _float_data_map; @@ -140,6 +140,8 @@ class PredictorData { int get_datatype(std::string name) const; + void set_datatype(std::string name, int type); + std::string print(); private: @@ -159,6 +161,7 @@ class PredictorData { oss << "{"; oss << it->first << key_seg; const std::vector& v = it->second; + oss << v.size() << key_seg; for (size_t i = 0; i < v.size(); ++i) { if (i != v.size() - 1) { oss << v[i] << val_seg; @@ -184,7 +187,9 @@ class PredictorData { typename std::map::const_iterator itEnd = map.end(); for (; it != itEnd; it++) { oss << "{"; - oss << it->first << key_seg << it->second; + oss << it->first << key_seg + << "size=" << it->second.size() << key_seg + << "type=" << this->get_datatype(it->first); oss << "}"; } return oss.str(); diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h index 88ec7a59f1181eec32e2da800a9a1b71e3cdc084..4d16637a0eb7152b39cf125ae359b2ca3361ad60 100644 --- a/core/general-client/include/general_model.h +++ b/core/general-client/include/general_model.h @@ -51,6 +51,8 @@ class ModelRes { res._float_value_map.end()); _int32_value_map.insert(res._int32_value_map.begin(), res._int32_value_map.end()); + _string_value_map.insert(res._string_value_map.begin(), + res._string_value_map.end()); _shape_map.insert(res._shape_map.begin(), res._shape_map.end()); _lod_map.insert(res._lod_map.begin(), res._lod_map.end()); _tensor_alias_names.insert(_tensor_alias_names.end(), @@ -68,6 +70,9 @@ class ModelRes { _int32_value_map.insert( std::make_move_iterator(std::begin(res._int32_value_map)), std::make_move_iterator(std::end(res._int32_value_map))); + _string_value_map.insert( + std::make_move_iterator(std::begin(res._string_value_map)), + std::make_move_iterator(std::end(res._string_value_map))); _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)), std::make_move_iterator(std::end(res._shape_map))); _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)), @@ -96,6 +101,12 @@ class ModelRes { std::vector&& get_int32_by_name_with_rv(const std::string& name) { return std::move(_int32_value_map[name]); } + const std::string& get_string_by_name(const std::string& name) { + return _string_value_map[name]; + } + std::string&& get_string_by_name_with_rv(const std::string& name) { + return std::move(_string_value_map[name]); + } const std::vector& get_shape_by_name(const std::string& name) { return _shape_map[name]; } @@ -128,6 +139,9 @@ class ModelRes { _int32_value_map.insert( std::make_move_iterator(std::begin(res._int32_value_map)), std::make_move_iterator(std::end(res._int32_value_map))); + _string_value_map.insert( + std::make_move_iterator(std::begin(res._string_value_map)), + std::make_move_iterator(std::end(res._string_value_map))); _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)), std::make_move_iterator(std::end(res._shape_map))); _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)), @@ -145,6 +159,7 @@ class ModelRes { std::map> _int64_value_map; std::map> _float_value_map; std::map> _int32_value_map; + std::map _string_value_map; std::map> _shape_map; std::map> _lod_map; std::vector _tensor_alias_names; @@ -184,6 +199,14 @@ class PredictorRes { const std::string& name) { return std::move(_models[model_idx].get_int32_by_name_with_rv(name)); } + const std::string& get_string_by_name(const int model_idx, + const std::string& name) { + return _models[model_idx].get_string_by_name(name); + } + std::string&& get_string_by_name_with_rv(const int model_idx, + const std::string& name) { + return std::move(_models[model_idx].get_string_by_name_with_rv(name)); + } const std::vector& get_shape_by_name(const int model_idx, const std::string& name) { return _models[model_idx].get_shape_by_name(name); diff --git a/core/general-client/src/client.cpp b/core/general-client/src/client.cpp index 56fb1cd1d53ba04d9d071e778594635e5e3cba6d..4d3b99f2d8c00fd8dace85b219ce60b2b7444ff5 100644 --- a/core/general-client/src/client.cpp +++ b/core/general-client/src/client.cpp @@ -23,7 +23,23 @@ using configure::GeneralModelConfig; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; -enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING }; +// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8 +// will support: FLOAT16 +enum ProtoDataType { + P_INT64 = 0, + P_FLOAT32, + P_INT32, + P_FP64, + P_INT16, + P_FP16, + P_BF16, + P_UINT8, + P_INT8, + P_BOOL, + P_COMPLEX64, + P_COMPLEX128, + P_STRING = 20, +}; int ServingClient::init(const std::vector& client_conf, const std::string server_port) { @@ -156,6 +172,10 @@ int PredictorData::get_datatype(std::string name) const { return 0; } +void PredictorData::set_datatype(std::string name, int type) { + _datatype_map[name] = type; +} + std::string PredictorData::print() { std::string res; res.append(map2string(_float_data_map)); @@ -309,20 +329,25 @@ int PredictorInputs::GenProto(const PredictorInputs& inputs, tensor->set_name(feed_name[idx]); tensor->set_alias_name(name); - const int string_shape_size = string_shape.size(); - // string_shape[vec_idx] = [1];cause numpy has no datatype of string. - // we pass string via vector >. - if (string_shape_size != 1) { - LOG(ERROR) << "string_shape_size should be 1-D, but received is : " - << string_shape_size; - return -1; - } - switch (string_shape_size) { - case 1: { - tensor->add_data(string_data); - break; + if (datatype == P_STRING) { + const int string_shape_size = string_shape.size(); + // string_shape[vec_idx] = [1];cause numpy has no datatype of string. + // we pass string via vector >. + if (string_shape_size != 1) { + LOG(ERROR) << "string_shape_size should be 1-D, but received is : " + << string_shape_size; + return -1; + } + switch (string_shape_size) { + case 1: { + tensor->add_data(string_data); + break; + } } + } else { + tensor->set_tensor_content(string_data); } + } return 0; } @@ -355,6 +380,8 @@ int PredictorOutputs::ParseProto(const Response& res, std::shared_ptr predictor_output = std::make_shared(); predictor_output->engine_name = output.engine_name(); + + PredictorData& predictor_data = predictor_output->data; std::map>& float_data_map = *predictor_output->data.mutable_float_data_map(); std::map>& int64_data_map = *predictor_output->data.mutable_int64_data_map(); std::map>& int32_data_map = *predictor_output->data.mutable_int_data_map(); @@ -403,7 +430,13 @@ int PredictorOutputs::ParseProto(const Response& res, int32_data_map[name] = std::vector( output.tensor(idx).int_data().begin(), output.tensor(idx).int_data().begin() + size); + } else if (fetch_name_to_type[name] == P_UINT8 + || fetch_name_to_type[name] == P_INT8) { + VLOG(2) << "fetch var [" << name << "]type=" + << fetch_name_to_type[name]; + string_data_map[name] = output.tensor(idx).tensor_content(); } + predictor_data.set_datatype(name, output.tensor(idx).elem_type()); idx += 1; } outputs.add_data(predictor_output); diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index d04ab89ae31d048e5a38ada7abec5f27d46ab62f..b8e8630b801f0777224d8c11c23578bc7049989c 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -25,7 +25,23 @@ using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; -enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING }; +// paddle inference support: FLOAT32, INT64, INT32, UINT8, INT8 +// will support: FLOAT16 +enum ProtoDataType { + P_INT64 = 0, + P_FLOAT32, + P_INT32, + P_FP64, + P_INT16, + P_FP16, + P_BF16, + P_UINT8, + P_INT8, + P_BOOL, + P_COMPLEX64, + P_COMPLEX128, + P_STRING = 20, +}; std::once_flag gflags_init_flag; namespace py = pybind11; @@ -262,6 +278,8 @@ int PredictorClient::numpy_predict( vec_idx++; } + // Add !P_STRING feed data of string_input to tensor_content + // UINT8 INT8 FLOAT16 vec_idx = 0; for (auto &name : string_feed_name) { int idx = _feed_name_to_idx[name]; @@ -277,22 +295,27 @@ int PredictorClient::numpy_predict( for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) { tensor->add_lod(string_lod_slot_batch[vec_idx][j]); } - tensor->set_elem_type(P_STRING); tensor->set_name(_feed_name[idx]); tensor->set_alias_name(name); - const int string_shape_size = string_shape[vec_idx].size(); - // string_shape[vec_idx] = [1];cause numpy has no datatype of string. - // we pass string via vector >. - if (string_shape_size != 1) { - LOG(ERROR) << "string_shape_size should be 1-D, but received is : " - << string_shape_size; - return -1; - } - switch (string_shape_size) { - case 1: { - tensor->add_data(string_feed[vec_idx]); - break; + if (_type[idx] != P_STRING) { + tensor->set_elem_type(_type[idx]); + tensor->set_tensor_content(string_feed[vec_idx]); + } else { + tensor->set_elem_type(P_STRING); + const int string_shape_size = string_shape[vec_idx].size(); + // string_shape[vec_idx] = [1];cause numpy has no datatype of string. + // we pass string via vector >. + if (string_shape_size != 1) { + LOG(ERROR) << "string_shape_size should be 1-D, but received is : " + << string_shape_size; + return -1; + } + switch (string_shape_size) { + case 1: { + tensor->add_data(string_feed[vec_idx]); + break; + } } } vec_idx++; @@ -366,6 +389,15 @@ int PredictorClient::numpy_predict( model._int32_value_map[name] = std::vector( output.tensor(idx).int_data().begin(), output.tensor(idx).int_data().begin() + size); + } else if (_fetch_name_to_type[name] == P_UINT8) { + VLOG(2) << "fetch var " << name << "type uint8"; + model._string_value_map[name] = output.tensor(idx).tensor_content(); + } else if (_fetch_name_to_type[name] == P_INT8) { + VLOG(2) << "fetch var " << name << "type int8"; + model._string_value_map[name] = output.tensor(idx).tensor_content(); + } else if (_fetch_name_to_type[name] == P_FP16) { + VLOG(2) << "fetch var " << name << "type float16"; + model._string_value_map[name] = output.tensor(idx).tensor_content(); } } predict_res_batch.add_model_res(std::move(model)); diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp index ad26bb7d3c175f08438ee22a5a42425fd5147117..6a29d3313ed14601b2a520b32f810a596aafdd8a 100644 --- a/core/general-client/src/pybind_general_model.cpp +++ b/core/general-client/src/pybind_general_model.cpp @@ -49,6 +49,19 @@ PYBIND11_MODULE(serving_client, m) { }); return py::array(ptr->size(), ptr->data(), capsule); }) + .def("get_int32_by_name", + [](PredictorRes &self, int model_idx, std::string &name) { + std::vector *ptr = new std::vector( + std::move(self.get_int32_by_name_with_rv(model_idx, name))); + auto capsule = py::capsule(ptr, [](void *p) { + delete reinterpret_cast *>(p); + }); + return py::array(ptr->size(), ptr->data(), capsule); + }) + .def("get_string_by_name", + [](PredictorRes &self, int model_idx, std::string &name) { + return self.get_string_by_name_with_rv(model_idx, name); + }) .def("get_shape", [](PredictorRes &self, int model_idx, std::string &name) { std::vector *ptr = new std::vector( diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index af77df553837c594789b0e9943790fc37fc01c95..482097d3e1fa1c7f7369573b1b1a0a5fde57ae58 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -31,7 +31,23 @@ using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; -enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING }; +// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8 +// will support: FLOAT16 +enum ProtoDataType { + P_INT64 = 0, + P_FLOAT32, + P_INT32, + P_FP64, + P_INT16, + P_FP16, + P_BF16, + P_UINT8, + P_INT8, + P_BOOL, + P_COMPLEX64, + P_COMPLEX128, + P_STRING = 20, +}; int GeneralReaderOp::inference() { // read request from client @@ -78,6 +94,7 @@ int GeneralReaderOp::inference() { int64_t elem_type = 0; int64_t elem_size = 0; int64_t databuf_size = 0; + const void* src_ptr = nullptr; for (int i = 0; i < var_num; ++i) { paddle::PaddleTensor paddleTensor; const Tensor &tensor = req->tensor(i); @@ -86,19 +103,38 @@ int GeneralReaderOp::inference() { elem_size = 0; databuf_size = 0; elem_type = tensor.elem_type(); - VLOG(2) << "var[" << i << "] has elem type: " << elem_type; + src_ptr = nullptr ; if (elem_type == P_INT64) { // int64 elem_size = sizeof(int64_t); paddleTensor.dtype = paddle::PaddleDType::INT64; data_len = tensor.int64_data_size(); + src_ptr = tensor.int64_data().data(); } else if (elem_type == P_FLOAT32) { elem_size = sizeof(float); paddleTensor.dtype = paddle::PaddleDType::FLOAT32; data_len = tensor.float_data_size(); + src_ptr = tensor.float_data().data(); } else if (elem_type == P_INT32) { elem_size = sizeof(int32_t); paddleTensor.dtype = paddle::PaddleDType::INT32; data_len = tensor.int_data_size(); + src_ptr = tensor.int_data().data(); + } else if (elem_type == P_UINT8) { + elem_size = sizeof(uint8_t); + paddleTensor.dtype = paddle::PaddleDType::UINT8; + data_len = tensor.tensor_content().size(); + src_ptr = tensor.tensor_content().data(); + } else if (elem_type == P_INT8) { + elem_size = sizeof(int8_t); + paddleTensor.dtype = paddle::PaddleDType::INT8; + data_len = tensor.tensor_content().size(); + src_ptr = tensor.tensor_content().data(); + } else if (elem_type == P_FP16) { + // paddle inference will support FLOAT16 + // elem_size = 1; + // paddleTensor.dtype = paddle::PaddleDType::FLOAT16; + // data_len = tensor.tensor_content().size(); + // src_ptr = tensor.tensor_content().data(); } else if (elem_type == P_STRING) { // use paddle::PaddleDType::UINT8 as for String. elem_size = sizeof(char); @@ -109,8 +145,18 @@ int GeneralReaderOp::inference() { // now only support single string for (int idx = 0; idx < tensor.data_size(); idx++) { data_len += tensor.data()[idx].length() + 1; + src_ptr = tensor.data()[idx].data(); } } + VLOG(2) << "var[" << i << "] has elem type: " << elem_type << ";" + << "elem_size=" << elem_size << ";" + << "dtype=" << paddleTensor.dtype << ";" + << "data_len=" << data_len; + if (src_ptr == nullptr) { + LOG(ERROR) << "Not support var[" << i << "] with elem_type[" + << elem_type << "]"; + continue; + } // implement lod tensor here // only support 1-D lod // TODO(HexToString): support 2-D lod @@ -141,44 +187,17 @@ int GeneralReaderOp::inference() { VLOG(2) << "(logid=" << log_id << ") var[" << i << "] has lod_tensor and len=" << out->at(i).lod[0].back(); } - if (elem_type == P_INT64) { - int64_t *dst_ptr = static_cast(out->at(i).data.data()); - VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i - << "] is " << tensor.int64_data(0); - if (!dst_ptr) { - LOG(ERROR) << "dst_ptr is nullptr"; - return -1; - } - memcpy(dst_ptr, tensor.int64_data().data(), databuf_size); - /* - int elem_num = tensor.int64_data_size(); - for (int k = 0; k < elem_num; ++k) { - dst_ptr[k] = tensor.int64_data(k); - } - */ - } else if (elem_type == P_FLOAT32) { - float *dst_ptr = static_cast(out->at(i).data.data()); - VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i - << "] is " << tensor.float_data(0); - if (!dst_ptr) { - LOG(ERROR) << "dst_ptr is nullptr"; - return -1; - } - memcpy(dst_ptr, tensor.float_data().data(), databuf_size); - /*int elem_num = tensor.float_data_size(); - for (int k = 0; k < elem_num; ++k) { - dst_ptr[k] = tensor.float_data(k); - }*/ - } else if (elem_type == P_INT32) { - int32_t *dst_ptr = static_cast(out->at(i).data.data()); - VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i - << "] is " << tensor.int_data(0); - if (!dst_ptr) { - LOG(ERROR) << "dst_ptr is nullptr"; - return -1; - } - memcpy(dst_ptr, tensor.int_data().data(), databuf_size); - } else if (elem_type == P_STRING) { + void* dst_ptr = out->at(i).data.data(); + if (!dst_ptr) { + LOG(ERROR) << "dst_ptr is nullptr"; + return -1; + } + + // For common data, we just copy from src to dst + // For string data, we need to iterate through all str + if (elem_type != P_STRING) { + memcpy(dst_ptr, src_ptr, databuf_size); + } else { char *dst_ptr = static_cast(out->at(i).data.data()); VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i << "] is " << tensor.data(0); diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index 9f6c8aabd72c7e1e9b8ff933c807ee7fcdc0662f..e944c8d82d8aa2ad540455200cf835ce26eb366e 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -168,7 +168,24 @@ int GeneralResponseOp::inference() { google::protobuf::RepeatedField tmp_data(data_ptr, data_ptr + cap); output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data); - } + } else if (dtype == paddle::PaddleDType::UINT8) { + tensor->set_elem_type(7); + VLOG(2) << "(logid=" << log_id << ")Prepare uint8 var [" + << model_config->_fetch_name[idx] << "]."; + tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length()); + } else if (dtype == paddle::PaddleDType::INT8) { + tensor->set_elem_type(8); + VLOG(2) << "(logid=" << log_id << ")Prepare int8 var [" + << model_config->_fetch_name[idx] << "]."; + tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length()); + } + // inference will support fp16 + // else if (dtype == paddle::PaddleDType::FLOAT16) { + // tensor->set_elem_type(5); + // VLOG(2) << "(logid=" << log_id << ")Prepare float16 var [" + // << model_config->_fetch_name[idx] << "]."; + // tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length()); + // } VLOG(2) << "(logid=" << log_id << ") fetch var [" << model_config->_fetch_name[idx] << "] ready"; diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto index 8fedb60e97ec5b81263687b47ff0794880da8671..4b6282637ca6ea0617096a18bbbc3268067906bc 100755 --- a/core/general-server/proto/general_model_service.proto +++ b/core/general-server/proto/general_model_service.proto @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -syntax = "proto2"; +syntax = "proto3"; import "pds_option.proto"; import "builtin_format.proto"; package baidu.paddle_serving.predictor.general_model; @@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model; option cc_generic_services = true; message Tensor { - repeated string data = 1; - repeated int32 int_data = 2; - repeated int64 int64_data = 3; - repeated float float_data = 4; - optional int32 elem_type = - 5; // 0 means int64, 1 means float32, 2 means int32, 3 means string - repeated int32 shape = 6; // shape should include batch - repeated int32 lod = 7; // only for fetch tensor currently - optional string name = 8; // get from the Model prototxt - optional string alias_name = 9; // get from the Model prototxt + // VarType: INT64 + repeated int64 int64_data = 1; + + // VarType: FP32 + repeated float float_data = 2; + + // VarType: INT32 + repeated int32 int_data = 3; + + // VarType: FP64 + repeated double float64_data = 4; + + // VarType: UINT32 + repeated uint32 uint32_data = 5; + + // VarType: BOOL + repeated bool bool_data = 6; + + // (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated float complex64_data = 7; + + // (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated double complex128_data = 8; + + // VarType: STRING + repeated string data = 9; + + // Element types: + // 0 => INT64 + // 1 => FP32 + // 2 => INT32 + // 3 => FP64 + // 4 => INT16 + // 5 => FP16 + // 6 => BF16 + // 7 => UINT8 + // 8 => INT8 + // 9 => BOOL + // 10 => COMPLEX64 + // 11 => COMPLEX128 + // 20 => STRING + int32 elem_type = 10; + + // Shape of the tensor, including batch dimensions. + repeated int32 shape = 11; + + // Level of data(LOD), support variable length data, only for fetch tensor + // currently. + repeated int32 lod = 12; + + // Correspond to the variable 'name' in the model description prototxt. + string name = 13; + + // Correspond to the variable 'alias_name' in the model description prototxt. + string alias_name = 14; // get from the Model prototxt + + // VarType: FP16, INT16, INT8, BF16, UINT8 + bytes tensor_content = 15; }; message Request { repeated Tensor tensor = 1; repeated string fetch_var_names = 2; - optional bool profile_server = 3 [ default = false ]; - required uint64 log_id = 4 [ default = 0 ]; + bool profile_server = 3; + uint64 log_id = 4; }; message Response { repeated ModelOutput outputs = 1; repeated int64 profile_time = 2; + // Error code + int32 err_no = 3; + + // Error messages + string err_msg = 4; }; message ModelOutput { repeated Tensor tensor = 1; - optional string engine_name = 2; + string engine_name = 2; } service GeneralModelService { diff --git a/core/pdcodegen/src/pdcodegen.cpp b/core/pdcodegen/src/pdcodegen.cpp index a99828ee3466a32d45dcabb61a2700f9362539d4..b41ccc8077a0a4e88c474a124e000df1c85697d3 100644 --- a/core/pdcodegen/src/pdcodegen.cpp +++ b/core/pdcodegen/src/pdcodegen.cpp @@ -1492,11 +1492,6 @@ class PdsCodeGenerator : public CodeGenerator { const FieldDescriptor* fd = in_shared_fields[si]; std::string field_name = fd->name(); printer->Print("\n/////$field_name$\n", "field_name", field_name); - if (fd->is_optional()) { - printer->Print( - "if (req->has_$field_name$()) {\n", "field_name", field_name); - printer->Indent(); - } if (fd->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE || fd->is_repeated()) { @@ -1509,10 +1504,6 @@ class PdsCodeGenerator : public CodeGenerator { "field_name", field_name); } - if (fd->is_optional()) { - printer->Outdent(); - printer->Print("}\n"); - } } printer->Print( diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index 45014d28d0034ec402bbd9b21eac3e832da7c1f9..67a7cf2f6396ec1b5b47c23f87b78ae77c178427 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -533,7 +533,30 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { paddle::PaddleDType::INT32) { int32_t* data = static_cast(origin_data); lod_tensor_in->CopyFromCpu(data); + } else if ((*tensorVector_in_pointer)[i].dtype == + paddle::PaddleDType::UINT8) { + uint8_t* data = static_cast(origin_data); + lod_tensor_in->CopyFromCpu(data); + } else if ((*tensorVector_in_pointer)[i].dtype == + paddle::PaddleDType::INT8) { + int8_t* data = static_cast(origin_data); + lod_tensor_in->CopyFromCpu(data); + } else { + LOG(ERROR) << "Inference not support type[" + << (*tensorVector_in_pointer)[i].dtype + << "],name[" << (*tensorVector_in_pointer)[i].name + << "]" << " copy into core failed!"; } + // Paddle inference will support FP16 in next version. + // else if ((*tensorVector_in_pointer)[i].dtype == + // paddle::PaddleDType::FLOAT16) { + // paddle::platform::float16* data = + // static_cast(origin_data); + // lod_tensor_in->CopyFromCpu(data); + // } + VLOG(2) << "Tensor:name=" << (*tensorVector_in_pointer)[i].name + << ";in_dtype=" << (*tensorVector_in_pointer)[i].dtype + << ";tensor_dtype=" << lod_tensor_in->type(); } // After the input data is passed in, // call 'core->Run()' perform the prediction process. @@ -598,7 +621,41 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { int32_t* data_out = reinterpret_cast(databuf_data); lod_tensor_out->CopyToCpu(data_out); databuf_char = reinterpret_cast(data_out); + } else if (dataType == paddle::PaddleDType::UINT8) { + databuf_size = out_num * sizeof(uint8_t); + databuf_data = MempoolWrapper::instance().malloc(databuf_size); + if (!databuf_data) { + LOG(ERROR) << "Malloc failed, size: " << databuf_size; + return -1; + } + uint8_t* data_out = reinterpret_cast(databuf_data); + lod_tensor_out->CopyToCpu(data_out); + databuf_char = reinterpret_cast(data_out); + } else if (dataType == paddle::PaddleDType::INT8) { + databuf_size = out_num * sizeof(int8_t); + databuf_data = MempoolWrapper::instance().malloc(databuf_size); + if (!databuf_data) { + LOG(ERROR) << "Malloc failed, size: " << databuf_size; + return -1; + } + int8_t* data_out = reinterpret_cast(databuf_data); + lod_tensor_out->CopyToCpu(data_out); + databuf_char = reinterpret_cast(data_out); } + // Inference will support FP16 in next version + // else if (dataType == paddle::PaddleDType::FLOAT16) { + // using float16 = paddle::platform::float16; + // databuf_size = out_num * sizeof(float16); + // databuf_data = MempoolWrapper::instance().malloc(databuf_size); + // if (!databuf_data) { + // LOG(ERROR) << "Malloc failed, size: " << databuf_size; + // return -1; + // } + // float16* data_out = reinterpret_cast(databuf_data); + // lod_tensor_out->CopyToCpu(data_out); + // databuf_char = reinterpret_cast(data_out); + // } + // Because task scheduling requires OPs to use 'Channel' // (which is a data structure) to transfer data between OPs. // We need to copy the processed data to the 'Channel' for the next OP. diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto index 92032ab77e88a515c48db312e20b8acb13c9cddc..5340f4226e12b0b99147bc2972928b7d7c733057 100755 --- a/core/sdk-cpp/proto/general_model_service.proto +++ b/core/sdk-cpp/proto/general_model_service.proto @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -syntax = "proto2"; +syntax = "proto3"; import "pds_option.proto"; import "builtin_format.proto"; package baidu.paddle_serving.predictor.general_model; @@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model; option cc_generic_services = true; message Tensor { - repeated string data = 1; - repeated int32 int_data = 2; - repeated int64 int64_data = 3; - repeated float float_data = 4; - optional int32 elem_type = - 5; // 0 means int64, 1 means float32, 2 means int32, 3 means string - repeated int32 shape = 6; // shape should include batch - repeated int32 lod = 7; // only for fetch tensor currently - optional string name = 8; // get from the Model prototxt - optional string alias_name = 9; // get from the Model prototxt + // VarType: INT64 + repeated int64 int64_data = 1; + + // VarType: FP32 + repeated float float_data = 2; + + // VarType: INT32 + repeated int32 int_data = 3; + + // VarType: FP64 + repeated double float64_data = 4; + + // VarType: UINT32 + repeated uint32 uint32_data = 5; + + // VarType: BOOL + repeated bool bool_data = 6; + + // (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated float complex64_data = 7; + + // (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated double complex128_data = 8; + + // VarType: STRING + repeated string data = 9; + + // Element types: + // 0 => INT64 + // 1 => FP32 + // 2 => INT32 + // 3 => FP64 + // 4 => INT16 + // 5 => FP16 + // 6 => BF16 + // 7 => UINT8 + // 8 => INT8 + // 9 => BOOL + // 10 => COMPLEX64 + // 11 => COMPLEX128 + // 20 => STRING + int32 elem_type = 10; + + // Shape of the tensor, including batch dimensions. + repeated int32 shape = 11; + + // Level of data(LOD), support variable length data, only for fetch tensor + // currently. + repeated int32 lod = 12; + + // Correspond to the variable 'name' in the model description prototxt. + string name = 13; + + // Correspond to the variable 'alias_name' in the model description prototxt. + string alias_name = 14; // get from the Model prototxt + + // VarType: FP16, INT16, INT8, BF16, UINT8 + bytes tensor_content = 15; }; message Request { repeated Tensor tensor = 1; repeated string fetch_var_names = 2; - optional bool profile_server = 3 [ default = false ]; - required uint64 log_id = 4 [ default = 0 ]; + bool profile_server = 3; + uint64 log_id = 4; }; message Response { repeated ModelOutput outputs = 1; repeated int64 profile_time = 2; + // Error code + int32 err_no = 3; + + // Error messages + string err_msg = 4; }; message ModelOutput { repeated Tensor tensor = 1; - optional string engine_name = 2; + string engine_name = 2; } service GeneralModelService { diff --git a/doc/HTTP_SERVICE_CN.md b/doc/HTTP_SERVICE_CN.md old mode 100644 new mode 100755 index e8050a6d48275224b2dabe2298b5d8eb9ddccc80..ef35eff2f3d9cd259a7d66800dc6866605d4cf6d --- a/doc/HTTP_SERVICE_CN.md +++ b/doc/HTTP_SERVICE_CN.md @@ -42,7 +42,7 @@ python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 - 为了方便用户快速的使用Http方式请求Server端预测服务,我们已经将常用的Http请求的数据体封装、压缩、请求加密等功能封装为一个HttpClient类提供给用户,方便用户使用。 -使用HttpClient最简单只需要三步,1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件(本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt),3、调用Predict函数,通过Http方式请求预测服务。 +使用HttpClient最简单只需要四步,1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件(本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt)。3、调用connect函数。4、调用Predict函数,通过Http方式请求预测服务。 此外,您可以根据自己的需要配置Server端IP、Port、服务名称(此服务名称需要与[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)文件中的Service服务名和rpc方法名对应,即`GeneralModelService`字段和`inference`字段),设置Request数据体压缩,设置Response支持压缩传输,模型加密预测(需要配置Server端使用模型加密)、设置响应超时时间等功能。 @@ -103,7 +103,7 @@ repeated int32 numbers = 1; ``` #### elem_type -表示数据类型,0 means int64, 1 means float32, 2 means int32, 3 means bytes(string) +表示数据类型,0 means int64, 1 means float32, 2 means int32, 20 means bytes(string) #### fetch_var_names diff --git a/java/src/main/java/io/paddle/serving/client/Client.java b/java/src/main/java/io/paddle/serving/client/Client.java index 63e861ba6199c7a56129c4d3b0cb03a77d26f6b7..af4ccc5246262336ef9df05aa65beb5b91de33fd 100755 --- a/java/src/main/java/io/paddle/serving/client/Client.java +++ b/java/src/main/java/io/paddle/serving/client/Client.java @@ -59,9 +59,20 @@ import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; -enum ElementType -{ - Int64_type, Float32_type, Int32_type, Bytes_type; +class ElementType { + public static final int Int64_type = 0; + public static final int Float32_type = 1; + public static final int Int32_type = 2; + public static final int String_type = 20; + public static final Map feedTypeToDataKey_; + static + { + feedTypeToDataKey_ = new HashMap(); + feedTypeToDataKey_.put(ElementType.Int64_type, "int64_data"); + feedTypeToDataKey_.put(ElementType.Float32_type, "float_data"); + feedTypeToDataKey_.put(ElementType.Int32_type, "int_data"); + feedTypeToDataKey_.put(ElementType.String_type, "data"); + } } class Profiler { @@ -104,7 +115,6 @@ public class Client { private Map feedTypes_; private Map> feedShapes_; private Map feedNameToIndex_; - private Map feedTypeToDataKey_; private List fetchNames_; private Map fetchTypes_; private Set lodTensorSet_; @@ -147,12 +157,6 @@ public class Client { channel_ = null; blockingStub_ = null; - feedTypeToDataKey_ = new HashMap(); - feedTypeToDataKey_.put(0, "int64_data"); - feedTypeToDataKey_.put(1, "float_data"); - feedTypeToDataKey_.put(2, "int_data"); - feedTypeToDataKey_.put(3, "data"); - profiler_ = new Profiler(); boolean is_profile = false; String FLAGS_profile_client = System.getenv("FLAGS_profile_client"); @@ -525,7 +529,7 @@ public class Client { jsonTensor.put("elem_type", element_type); // 处理数据与shape - String protoDataKey = feedTypeToDataKey_.get(element_type); + String protoDataKey = ElementType.feedTypeToDataKey_.get(element_type); // 如果是INDArray类型,先转为一维. // 此时shape为INDArray的shape if(objectValue instanceof INDArray){ @@ -535,11 +539,11 @@ public class Client { for(long dim:indarrayShape){ shape.add((int)dim); } - if(element_type == ElementType.Int64_type.ordinal()){ + if(element_type == ElementType.Int64_type){ objectValue = tempIndArray.data().asLong(); - }else if(element_type == ElementType.Int32_type.ordinal()){ + }else if(element_type == ElementType.Int32_type){ objectValue = tempIndArray.data().asInt(); - }else if(element_type == ElementType.Float32_type.ordinal()){ + }else if(element_type == ElementType.Float32_type){ objectValue = tempIndArray.data().asFloat(); }else{ throw new Exception("INDArray 类型不支持"); @@ -564,11 +568,11 @@ public class Client { // 此时无法获取batch信息,故对shape不处理 // 由于Proto中为Repeated,需要把数据包装成list if(objectValue instanceof String){ - if(feedTypes_.get(protoDataKey)!= ElementType.Bytes_type.ordinal()){ + if(feedTypes_.get(protoDataKey)!= ElementType.String_type){ throw new Exception("feedvar is not string-type,feed can`t be a single string."); } }else{ - if(feedTypes_.get(protoDataKey)== ElementType.Bytes_type.ordinal()){ + if(feedTypes_.get(protoDataKey)== ElementType.String_type){ throw new Exception("feedvar is string-type,feed, feed can`t be a single int or others."); } } @@ -662,17 +666,17 @@ public class Client { for(long dim:indarrayShape){ shape.add((int)dim); } - if(element_type == ElementType.Int64_type.ordinal()){ + if(element_type == ElementType.Int64_type){ List iter = Arrays.stream(tempIndArray.data().asLong()).boxed().collect(Collectors.toList()); tensor_builder.addAllInt64Data(iter); - }else if(element_type == ElementType.Int32_type.ordinal()){ + }else if(element_type == ElementType.Int32_type){ List iter = Arrays.stream(tempIndArray.data().asInt()).boxed().collect(Collectors.toList()); tensor_builder.addAllIntData(iter); - }else if(element_type == ElementType.Float32_type.ordinal()){ + }else if(element_type == ElementType.Float32_type){ List iter = Arrays.asList(ArrayUtils.toObject(tempIndArray.data().asFloat())); tensor_builder.addAllFloatData(iter); @@ -684,13 +688,13 @@ public class Client { // 如果是数组类型,则无须处理,直接使用即可。 // 且数组无法嵌套,此时batch无法从数据中获取 // 默认batch维度为1,或者feedVar的shape信息中已包含batch - if(element_type == ElementType.Int64_type.ordinal()){ + if(element_type == ElementType.Int64_type){ List iter = Arrays.stream((long[])objectValue).boxed().collect(Collectors.toList()); tensor_builder.addAllInt64Data(iter); - }else if(element_type == ElementType.Int32_type.ordinal()){ + }else if(element_type == ElementType.Int32_type){ List iter = Arrays.stream((int[])objectValue).boxed().collect(Collectors.toList()); tensor_builder.addAllIntData(iter); - }else if(element_type == ElementType.Float32_type.ordinal()){ + }else if(element_type == ElementType.Float32_type){ List iter = Arrays.asList(ArrayUtils.toObject((float[])objectValue)); tensor_builder.addAllFloatData(iter); }else{ @@ -707,11 +711,11 @@ public class Client { // 在index=0处,加上batch shape.add(0, list.size()); } - if(element_type == ElementType.Int64_type.ordinal()){ + if(element_type == ElementType.Int64_type){ tensor_builder.addAllInt64Data((List)(List)recursiveExtract(objectValue)); - }else if(element_type == ElementType.Int32_type.ordinal()){ + }else if(element_type == ElementType.Int32_type){ tensor_builder.addAllIntData((List)(List)recursiveExtract(objectValue)); - }else if(element_type == ElementType.Float32_type.ordinal()){ + }else if(element_type == ElementType.Float32_type){ tensor_builder.addAllFloatData((List)(List)recursiveExtract(objectValue)); }else{ // 看接口是String还是Bytes @@ -723,11 +727,11 @@ public class Client { // 由于Proto中为Repeated,需要把数据包装成list List tempList = new ArrayList<>(); tempList.add(objectValue); - if(element_type == ElementType.Int64_type.ordinal()){ + if(element_type == ElementType.Int64_type){ tensor_builder.addAllInt64Data((List)(List)tempList); - }else if(element_type == ElementType.Int32_type.ordinal()){ + }else if(element_type == ElementType.Int32_type){ tensor_builder.addAllIntData((List)(List)tempList); - }else if(element_type == ElementType.Float32_type.ordinal()){ + }else if(element_type == ElementType.Float32_type){ tensor_builder.addAllFloatData((List)(List)tempList); }else{ // 看接口是String还是Bytes diff --git a/java/src/main/proto/general_model_service.proto b/java/src/main/proto/general_model_service.proto index 89ac489f8ae3b90b74c94a3f9f3c82711086cd64..aa06d388a468d71e968aa53b19f25c55f8c42ee1 100644 --- a/java/src/main/proto/general_model_service.proto +++ b/java/src/main/proto/general_model_service.proto @@ -12,41 +12,96 @@ // See the License for the specific language governing permissions and // limitations under the License. -syntax = "proto2"; +syntax = "proto3"; package baidu.paddle_serving.predictor.general_model; option java_multiple_files = true; message Tensor { - repeated string data = 1; - repeated int32 int_data = 2; - repeated int64 int64_data = 3; - repeated float float_data = 4; - optional int32 elem_type = - 5; // 0 means int64, 1 means float32, 2 means int32, 3 means string - repeated int32 shape = 6; // shape should include batch - repeated int32 lod = 7; // only for fetch tensor currently - optional string name = 8; // get from the Model prototxt - optional string alias_name = 9; // get from the Model prototxt + // VarType: INT64 + repeated int64 int64_data = 1; + + // VarType: FP32 + repeated float float_data = 2; + + // VarType: INT32 + repeated int32 int_data = 3; + + // VarType: FP64 + repeated double float64_data = 4; + + // VarType: UINT32 + repeated uint32 uint32_data = 5; + + // VarType: BOOL + repeated bool bool_data = 6; + + // (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated float complex64_data = 7; + + // (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated double complex128_data = 8; + + // VarType: STRING + repeated string data = 9; + + // Element types: + // 0 => INT64 + // 1 => FP32 + // 2 => INT32 + // 3 => FP64 + // 4 => INT16 + // 5 => FP16 + // 6 => BF16 + // 7 => UINT8 + // 8 => INT8 + // 9 => BOOL + // 10 => COMPLEX64 + // 11 => COMPLEX128 + // 20 => STRING + int32 elem_type = 10; + + // Shape of the tensor, including batch dimensions. + repeated int32 shape = 11; + + // Level of data(LOD), support variable length data, only for fetch tensor + // currently. + repeated int32 lod = 12; + + // Correspond to the variable 'name' in the model description prototxt. + string name = 13; + + // Correspond to the variable 'alias_name' in the model description prototxt. + string alias_name = 14; // get from the Model prototxt + + // VarType: FP16, INT16, INT8, BF16, UINT8 + bytes tensor_content = 15; }; message Request { repeated Tensor tensor = 1; repeated string fetch_var_names = 2; - optional bool profile_server = 3 [ default = false ]; - required uint64 log_id = 4 [ default = 0 ]; + bool profile_server = 3; + uint64 log_id = 4; }; message Response { repeated ModelOutput outputs = 1; repeated int64 profile_time = 2; + // Error code + int32 err_no = 3; + + // Error messages + string err_msg = 4; }; message ModelOutput { repeated Tensor tensor = 1; - optional string engine_name = 2; + string engine_name = 2; } service GeneralModelService { - rpc inference(Request) returns (Response) {} - rpc debug(Request) returns (Response) {} + rpc inference(Request) returns (Response); + rpc debug(Request) returns (Response); }; diff --git a/python/examples/ocr/README.md b/python/examples/ocr/README.md old mode 100644 new mode 100755 index 630f01d999943b9948e153430b30d80fbabd0549..95cc210a7e68d5582e68460f2eec89419bf7fd7c --- a/python/examples/ocr/README.md +++ b/python/examples/ocr/README.md @@ -119,7 +119,7 @@ The pre-processing and post-processing is in the C + + server part, the image's so the value of parameter `feed_var` which is in the file `ocr_det_client/serving_client_conf.prototxt` should be changed. -for this case, `feed_type` should be 3(which means the data type is string),`shape` should be 1. +for this case, `feed_type` should be 20(which means the data type is string),`shape` should be 1. By passing in multiple client folder paths, the client can be started for multi model prediction. ``` diff --git a/python/examples/ocr/README_CN.md b/python/examples/ocr/README_CN.md old mode 100644 new mode 100755 index 421a4b930507abd3d36ef6db737f85a060647ced..5c0734c94aa6d61e1fdb9e8f87d5ee187c805ff0 --- a/python/examples/ocr/README_CN.md +++ b/python/examples/ocr/README_CN.md @@ -118,7 +118,7 @@ python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --por 即`ocr_det_client/serving_client_conf.prototxt`中`feed_var`字段 -对于本示例而言,`feed_type`应修改为3(数据类型为string),`shape`为1. +对于本示例而言,`feed_type`应修改为20(数据类型为string),`shape`为1. 通过在客户端启动后加入多个client模型的client配置文件夹路径,启动client进行预测。 ``` diff --git a/python/paddle_serving_client/client.py b/python/paddle_serving_client/client.py index c64254bf312e46d6159ba63ea159b01f5d0c3cbc..826a2edb5d2434b5937e7ba7e6bb92708b8225d4 100755 --- a/python/paddle_serving_client/client.py +++ b/python/paddle_serving_client/client.py @@ -31,15 +31,21 @@ sys.path.append( #param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64 #param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32 #param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32 -#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto) +#param 'type'(which is in feed_var or fetch_var) = 5 means dataType is float16 +#param 'type'(which is in feed_var or fetch_var) = 7 means dataType is uint8 +#param 'type'(which is in feed_var or fetch_var) = 8 means dataType is int8 +#param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto) int64_type = 0 float32_type = 1 int32_type = 2 -bytes_type = 3 +float16_type = 5 +uint8_type = 7 +int8_type = 8 +bytes_type = 20 #int_type,float_type,string_type are the set of each subdivision classes. int_type = set([int64_type, int32_type]) float_type = set([float32_type]) -string_type = set([bytes_type]) +string_type = set([bytes_type, float16_type, uint8_type, int8_type]) class _NOPProfiler(object): @@ -411,7 +417,10 @@ class Client(object): key)]) else: string_lod_slot_batch.append([]) - string_slot.append(feed_dict[key]) + if type(feed_dict[key]) is np.ndarray: + string_slot.append(feed_dict[key].tostring()) + else: + string_slot.append(feed_dict[key]) self.has_numpy_input = True self.profile_.record('py_prepro_1') @@ -492,6 +501,38 @@ class Client(object): tmp_lod = result_batch_handle.get_lod(mi, name) if np.size(tmp_lod) > 0: result_map["{}.lod".format(name)] = tmp_lod + elif self.fetch_names_to_type_[name] == uint8_type: + # result_map[name] will be py::array(numpy array) + tmp_str = result_batch_handle.get_string_by_name( + mi, name) + result_map[name] = np.fromstring(tmp_str, dtype = np.uint8) + if result_map[name].size == 0: + raise ValueError( + "Failed to fetch, maybe the type of [{}]" + " is wrong, please check the model file".format( + name)) + shape = result_batch_handle.get_shape(mi, name) + result_map[name].shape = shape + if name in self.lod_tensor_set: + tmp_lod = result_batch_handle.get_lod(mi, name) + if np.size(tmp_lod) > 0: + result_map["{}.lod".format(name)] = tmp_lod + elif self.fetch_names_to_type_[name] == int8_type: + # result_map[name] will be py::array(numpy array) + tmp_str = result_batch_handle.get_string_by_name( + mi, name) + result_map[name] = np.fromstring(tmp_str, dtype = np.int8) + if result_map[name].size == 0: + raise ValueError( + "Failed to fetch, maybe the type of [{}]" + " is wrong, please check the model file".format( + name)) + shape = result_batch_handle.get_shape(mi, name) + result_map[name].shape = shape + if name in self.lod_tensor_set: + tmp_lod = result_batch_handle.get_lod(mi, name) + if np.size(tmp_lod) > 0: + result_map["{}.lod".format(name)] = tmp_lod multi_result_map.append(result_map) ret = None if len(model_engine_names) == 1: diff --git a/python/paddle_serving_client/httpclient.py b/python/paddle_serving_client/httpclient.py index 27ed269db0cccc1856e963a7b02c702d845c7ca6..bb056a99732aeb1fa855b6ce1e020ada82072ed0 100755 --- a/python/paddle_serving_client/httpclient.py +++ b/python/paddle_serving_client/httpclient.py @@ -32,13 +32,18 @@ from .proto import general_model_service_pb2_grpc #param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64 #param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32 #param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32 -#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto) +#param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto) int64_type = 0 float32_type = 1 int32_type = 2 -bytes_type = 3 +bytes_type = 20 # this is corresponding to the proto -proto_data_key_list = ["int64_data", "float_data", "int_data", "data"] +proto_data_key_list = { + 0: "int64_data", + 1: "float_data", + 2: "int_data", + 20: "data" +} def list_flatten(items, ignore_types=(str, bytes)):