From 8390238607fb0f6c75cae12be3b81e56d46eb067 Mon Sep 17 00:00:00 2001 From: ShiningZhang Date: Fri, 3 Sep 2021 16:28:30 +0800 Subject: [PATCH] python client support uint8&int8 --- .../proto/general_model_service.proto | 88 +++++++++++++++---- core/general-client/include/general_model.h | 23 +++++ core/general-client/src/client.cpp | 6 +- core/general-client/src/general_model.cpp | 46 +++++++++- .../src/pybind_general_model.cpp | 13 +++ .../proto/general_model_service.proto | 2 +- core/pdcodegen/src/pdcodegen.cpp | 9 -- python/paddle_serving_client/client.py | 46 +++++++++- python/paddle_serving_client/httpclient.py | 4 +- 9 files changed, 199 insertions(+), 38 deletions(-) diff --git a/core/configure/proto/general_model_service.proto b/core/configure/proto/general_model_service.proto index 89ac489f..c2deab2f 100644 --- a/core/configure/proto/general_model_service.proto +++ b/core/configure/proto/general_model_service.proto @@ -12,41 +12,97 @@ // See the License for the specific language governing permissions and // limitations under the License. -syntax = "proto2"; +syntax = "proto3"; package baidu.paddle_serving.predictor.general_model; option java_multiple_files = true; +option cc_generic_services = true; message Tensor { - repeated string data = 1; - repeated int32 int_data = 2; - repeated int64 int64_data = 3; - repeated float float_data = 4; - optional int32 elem_type = - 5; // 0 means int64, 1 means float32, 2 means int32, 3 means string - repeated int32 shape = 6; // shape should include batch - repeated int32 lod = 7; // only for fetch tensor currently - optional string name = 8; // get from the Model prototxt - optional string alias_name = 9; // get from the Model prototxt + // VarType: INT64 + repeated int64 int64_data = 1; + + // VarType: FP32 + repeated float float_data = 2; + + // VarType: INT32 + repeated int32 int_data = 3; + + // VarType: FP64 + repeated double float64_data = 4; + + // VarType: UINT32 + repeated uint32 uint32_data = 5; + + // VarType: BOOL + repeated bool bool_data = 6; + + // (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated float complex64_data = 7; + + // (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated double complex128_data = 8; + + // VarType: STRING + repeated string data = 9; + + // Element types: + // 0 => INT64 + // 1 => FP32 + // 2 => INT32 + // 3 => FP64 + // 4 => INT16 + // 5 => FP16 + // 6 => BF16 + // 7 => UINT8 + // 8 => INT8 + // 9 => BOOL + // 10 => COMPLEX64 + // 11 => COMPLEX128 + // 20 => STRING + int32 elem_type = 10; + + // Shape of the tensor, including batch dimensions. + repeated int32 shape = 11; + + // Level of data(LOD), support variable length data, only for fetch tensor + // currently. + repeated int32 lod = 12; + + // Correspond to the variable 'name' in the model description prototxt. + string name = 13; + + // Correspond to the variable 'alias_name' in the model description prototxt. + string alias_name = 14; // get from the Model prototxt + + // VarType: FP16, INT16, INT8, BF16, UINT8 + bytes tensor_content = 15; }; message Request { repeated Tensor tensor = 1; repeated string fetch_var_names = 2; - optional bool profile_server = 3 [ default = false ]; - required uint64 log_id = 4 [ default = 0 ]; + bool profile_server = 3; + uint64 log_id = 4; }; message Response { repeated ModelOutput outputs = 1; repeated int64 profile_time = 2; + // Error code + int32 err_no = 3; + + // Error messages + string err_msg = 4; }; message ModelOutput { repeated Tensor tensor = 1; - optional string engine_name = 2; + string engine_name = 2; } service GeneralModelService { - rpc inference(Request) returns (Response) {} - rpc debug(Request) returns (Response) {} + rpc inference(Request) returns (Response); + rpc debug(Request) returns (Response); }; diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h index 88ec7a59..4d16637a 100644 --- a/core/general-client/include/general_model.h +++ b/core/general-client/include/general_model.h @@ -51,6 +51,8 @@ class ModelRes { res._float_value_map.end()); _int32_value_map.insert(res._int32_value_map.begin(), res._int32_value_map.end()); + _string_value_map.insert(res._string_value_map.begin(), + res._string_value_map.end()); _shape_map.insert(res._shape_map.begin(), res._shape_map.end()); _lod_map.insert(res._lod_map.begin(), res._lod_map.end()); _tensor_alias_names.insert(_tensor_alias_names.end(), @@ -68,6 +70,9 @@ class ModelRes { _int32_value_map.insert( std::make_move_iterator(std::begin(res._int32_value_map)), std::make_move_iterator(std::end(res._int32_value_map))); + _string_value_map.insert( + std::make_move_iterator(std::begin(res._string_value_map)), + std::make_move_iterator(std::end(res._string_value_map))); _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)), std::make_move_iterator(std::end(res._shape_map))); _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)), @@ -96,6 +101,12 @@ class ModelRes { std::vector&& get_int32_by_name_with_rv(const std::string& name) { return std::move(_int32_value_map[name]); } + const std::string& get_string_by_name(const std::string& name) { + return _string_value_map[name]; + } + std::string&& get_string_by_name_with_rv(const std::string& name) { + return std::move(_string_value_map[name]); + } const std::vector& get_shape_by_name(const std::string& name) { return _shape_map[name]; } @@ -128,6 +139,9 @@ class ModelRes { _int32_value_map.insert( std::make_move_iterator(std::begin(res._int32_value_map)), std::make_move_iterator(std::end(res._int32_value_map))); + _string_value_map.insert( + std::make_move_iterator(std::begin(res._string_value_map)), + std::make_move_iterator(std::end(res._string_value_map))); _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)), std::make_move_iterator(std::end(res._shape_map))); _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)), @@ -145,6 +159,7 @@ class ModelRes { std::map> _int64_value_map; std::map> _float_value_map; std::map> _int32_value_map; + std::map _string_value_map; std::map> _shape_map; std::map> _lod_map; std::vector _tensor_alias_names; @@ -184,6 +199,14 @@ class PredictorRes { const std::string& name) { return std::move(_models[model_idx].get_int32_by_name_with_rv(name)); } + const std::string& get_string_by_name(const int model_idx, + const std::string& name) { + return _models[model_idx].get_string_by_name(name); + } + std::string&& get_string_by_name_with_rv(const int model_idx, + const std::string& name) { + return std::move(_models[model_idx].get_string_by_name_with_rv(name)); + } const std::vector& get_shape_by_name(const int model_idx, const std::string& name) { return _models[model_idx].get_shape_by_name(name); diff --git a/core/general-client/src/client.cpp b/core/general-client/src/client.cpp index 9af56303..0b9f067f 100644 --- a/core/general-client/src/client.cpp +++ b/core/general-client/src/client.cpp @@ -23,8 +23,8 @@ using configure::GeneralModelConfig; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; -// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8 -// will support: INT8, FLOAT16 +// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8 +// will support: FLOAT16 enum ProtoDataType { P_INT64 = 0, P_FLOAT32, @@ -38,7 +38,7 @@ enum ProtoDataType { P_BOOL, P_COMPLEX64, P_COMPLEX128, - P_STRING, + P_STRING = 20, }; int ServingClient::init(const std::vector& client_conf, diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index 29636bc2..b293af75 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -25,8 +25,8 @@ using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; -// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8 -// will support: INT8, FLOAT16 +// paddle inference support: FLOAT32, INT64, INT32, UINT8, INT8 +// will support: FLOAT16 enum ProtoDataType { P_INT64 = 0, P_FLOAT32, @@ -40,7 +40,7 @@ enum ProtoDataType { P_BOOL, P_COMPLEX64, P_COMPLEX128, - P_STRING, + P_STRING = 20, }; std::once_flag gflags_init_flag; namespace py = pybind11; @@ -278,6 +278,8 @@ int PredictorClient::numpy_predict( vec_idx++; } + // Add !P_STRING feed data of string_input to tensor_content + // UINT8 INT8 FLOAT16 vec_idx = 0; for (auto &name : string_feed_name) { int idx = _feed_name_to_idx[name]; @@ -285,6 +287,35 @@ int PredictorClient::numpy_predict( LOG(ERROR) << "idx > tensor_vec.size()"; return -1; } + if (_type[idx] == P_STRING) { + continue; + } + Tensor *tensor = tensor_vec[idx]; + + for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) { + tensor->add_shape(string_shape[vec_idx][j]); + } + for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) { + tensor->add_lod(string_lod_slot_batch[vec_idx][j]); + } + tensor->set_elem_type(_type[idx]); + tensor->set_name(_feed_name[idx]); + tensor->set_alias_name(name); + + tensor->set_tensor_content(string_feed[vec_idx]); + vec_idx++; + } + + vec_idx = 0; + for (auto &name : string_feed_name) { + int idx = _feed_name_to_idx[name]; + if (idx >= tensor_vec.size()) { + LOG(ERROR) << "idx > tensor_vec.size()"; + return -1; + } + if (_type[idx] != P_STRING) { + continue; + } Tensor *tensor = tensor_vec[idx]; for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) { @@ -382,6 +413,15 @@ int PredictorClient::numpy_predict( model._int32_value_map[name] = std::vector( output.tensor(idx).int_data().begin(), output.tensor(idx).int_data().begin() + size); + } else if (_fetch_name_to_type[name] == P_UINT8) { + VLOG(2) << "fetch var " << name << "type uint8"; + model._string_value_map[name] = output.tensor(idx).tensor_content(); + } else if (_fetch_name_to_type[name] == P_INT8) { + VLOG(2) << "fetch var " << name << "type int8"; + model._string_value_map[name] = output.tensor(idx).tensor_content(); + } else if (_fetch_name_to_type[name] == P_FP16) { + VLOG(2) << "fetch var " << name << "type float16"; + model._string_value_map[name] = output.tensor(idx).tensor_content(); } } predict_res_batch.add_model_res(std::move(model)); diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp index ad26bb7d..6a29d331 100644 --- a/core/general-client/src/pybind_general_model.cpp +++ b/core/general-client/src/pybind_general_model.cpp @@ -49,6 +49,19 @@ PYBIND11_MODULE(serving_client, m) { }); return py::array(ptr->size(), ptr->data(), capsule); }) + .def("get_int32_by_name", + [](PredictorRes &self, int model_idx, std::string &name) { + std::vector *ptr = new std::vector( + std::move(self.get_int32_by_name_with_rv(model_idx, name))); + auto capsule = py::capsule(ptr, [](void *p) { + delete reinterpret_cast *>(p); + }); + return py::array(ptr->size(), ptr->data(), capsule); + }) + .def("get_string_by_name", + [](PredictorRes &self, int model_idx, std::string &name) { + return self.get_string_by_name_with_rv(model_idx, name); + }) .def("get_shape", [](PredictorRes &self, int model_idx, std::string &name) { std::vector *ptr = new std::vector( diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto index 6d337b91..4b628263 100755 --- a/core/general-server/proto/general_model_service.proto +++ b/core/general-server/proto/general_model_service.proto @@ -62,7 +62,7 @@ message Tensor { // 9 => BOOL // 10 => COMPLEX64 // 11 => COMPLEX128 - // 12 => STRING + // 20 => STRING int32 elem_type = 10; // Shape of the tensor, including batch dimensions. diff --git a/core/pdcodegen/src/pdcodegen.cpp b/core/pdcodegen/src/pdcodegen.cpp index a99828ee..b41ccc80 100644 --- a/core/pdcodegen/src/pdcodegen.cpp +++ b/core/pdcodegen/src/pdcodegen.cpp @@ -1492,11 +1492,6 @@ class PdsCodeGenerator : public CodeGenerator { const FieldDescriptor* fd = in_shared_fields[si]; std::string field_name = fd->name(); printer->Print("\n/////$field_name$\n", "field_name", field_name); - if (fd->is_optional()) { - printer->Print( - "if (req->has_$field_name$()) {\n", "field_name", field_name); - printer->Indent(); - } if (fd->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE || fd->is_repeated()) { @@ -1509,10 +1504,6 @@ class PdsCodeGenerator : public CodeGenerator { "field_name", field_name); } - if (fd->is_optional()) { - printer->Outdent(); - printer->Print("}\n"); - } } printer->Print( diff --git a/python/paddle_serving_client/client.py b/python/paddle_serving_client/client.py index c64254bf..eada4b8d 100755 --- a/python/paddle_serving_client/client.py +++ b/python/paddle_serving_client/client.py @@ -31,15 +31,21 @@ sys.path.append( #param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64 #param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32 #param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32 -#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto) +#param 'type'(which is in feed_var or fetch_var) = 5 means dataType is float16 +#param 'type'(which is in feed_var or fetch_var) = 7 means dataType is uint8 +#param 'type'(which is in feed_var or fetch_var) = 8 means dataType is int8 +#param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto) int64_type = 0 float32_type = 1 int32_type = 2 -bytes_type = 3 +float16_type = 5 +uint8_type = 7 +int8_type = 8 +bytes_type = 20 #int_type,float_type,string_type are the set of each subdivision classes. int_type = set([int64_type, int32_type]) float_type = set([float32_type]) -string_type = set([bytes_type]) +string_type = set([bytes_type, float16_type, uint8_type, int8_type]) class _NOPProfiler(object): @@ -411,7 +417,7 @@ class Client(object): key)]) else: string_lod_slot_batch.append([]) - string_slot.append(feed_dict[key]) + string_slot.append(feed_dict[key].tostring()) self.has_numpy_input = True self.profile_.record('py_prepro_1') @@ -492,6 +498,38 @@ class Client(object): tmp_lod = result_batch_handle.get_lod(mi, name) if np.size(tmp_lod) > 0: result_map["{}.lod".format(name)] = tmp_lod + elif self.fetch_names_to_type_[name] == uint8_type: + # result_map[name] will be py::array(numpy array) + tmp_str = result_batch_handle.get_string_by_name( + mi, name) + result_map[name] = np.fromstring(tmp_str, dtype = np.uint8) + if result_map[name].size == 0: + raise ValueError( + "Failed to fetch, maybe the type of [{}]" + " is wrong, please check the model file".format( + name)) + shape = result_batch_handle.get_shape(mi, name) + result_map[name].shape = shape + if name in self.lod_tensor_set: + tmp_lod = result_batch_handle.get_lod(mi, name) + if np.size(tmp_lod) > 0: + result_map["{}.lod".format(name)] = tmp_lod + elif self.fetch_names_to_type_[name] == int8_type: + # result_map[name] will be py::array(numpy array) + tmp_str = result_batch_handle.get_string_by_name( + mi, name) + result_map[name] = np.fromstring(tmp_str, dtype = np.int8) + if result_map[name].size == 0: + raise ValueError( + "Failed to fetch, maybe the type of [{}]" + " is wrong, please check the model file".format( + name)) + shape = result_batch_handle.get_shape(mi, name) + result_map[name].shape = shape + if name in self.lod_tensor_set: + tmp_lod = result_batch_handle.get_lod(mi, name) + if np.size(tmp_lod) > 0: + result_map["{}.lod".format(name)] = tmp_lod multi_result_map.append(result_map) ret = None if len(model_engine_names) == 1: diff --git a/python/paddle_serving_client/httpclient.py b/python/paddle_serving_client/httpclient.py index 27ed269d..9506cac3 100755 --- a/python/paddle_serving_client/httpclient.py +++ b/python/paddle_serving_client/httpclient.py @@ -32,11 +32,11 @@ from .proto import general_model_service_pb2_grpc #param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64 #param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32 #param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32 -#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto) +#param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto) int64_type = 0 float32_type = 1 int32_type = 2 -bytes_type = 3 +bytes_type = 20 # this is corresponding to the proto proto_data_key_list = ["int64_data", "float_data", "int_data", "data"] -- GitLab