提交 83902386 编写于 作者: S ShiningZhang

python client support uint8&int8

上级 ee5a9489
......@@ -12,41 +12,97 @@
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
syntax = "proto3";
package baidu.paddle_serving.predictor.general_model;
option java_multiple_files = true;
option cc_generic_services = true;
message Tensor {
repeated string data = 1;
repeated int32 int_data = 2;
repeated int64 int64_data = 3;
repeated float float_data = 4;
optional int32 elem_type =
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch
repeated int32 lod = 7; // only for fetch tensor currently
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt
// VarType: INT64
repeated int64 int64_data = 1;
// VarType: FP32
repeated float float_data = 2;
// VarType: INT32
repeated int32 int_data = 3;
// VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
};
message Request {
repeated Tensor tensor = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
bool profile_server = 3;
uint64 log_id = 4;
};
message Response {
repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
};
message ModelOutput {
repeated Tensor tensor = 1;
optional string engine_name = 2;
string engine_name = 2;
}
service GeneralModelService {
rpc inference(Request) returns (Response) {}
rpc debug(Request) returns (Response) {}
rpc inference(Request) returns (Response);
rpc debug(Request) returns (Response);
};
......@@ -51,6 +51,8 @@ class ModelRes {
res._float_value_map.end());
_int32_value_map.insert(res._int32_value_map.begin(),
res._int32_value_map.end());
_string_value_map.insert(res._string_value_map.begin(),
res._string_value_map.end());
_shape_map.insert(res._shape_map.begin(), res._shape_map.end());
_lod_map.insert(res._lod_map.begin(), res._lod_map.end());
_tensor_alias_names.insert(_tensor_alias_names.end(),
......@@ -68,6 +70,9 @@ class ModelRes {
_int32_value_map.insert(
std::make_move_iterator(std::begin(res._int32_value_map)),
std::make_move_iterator(std::end(res._int32_value_map)));
_string_value_map.insert(
std::make_move_iterator(std::begin(res._string_value_map)),
std::make_move_iterator(std::end(res._string_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
......@@ -96,6 +101,12 @@ class ModelRes {
std::vector<int32_t>&& get_int32_by_name_with_rv(const std::string& name) {
return std::move(_int32_value_map[name]);
}
const std::string& get_string_by_name(const std::string& name) {
return _string_value_map[name];
}
std::string&& get_string_by_name_with_rv(const std::string& name) {
return std::move(_string_value_map[name]);
}
const std::vector<int>& get_shape_by_name(const std::string& name) {
return _shape_map[name];
}
......@@ -128,6 +139,9 @@ class ModelRes {
_int32_value_map.insert(
std::make_move_iterator(std::begin(res._int32_value_map)),
std::make_move_iterator(std::end(res._int32_value_map)));
_string_value_map.insert(
std::make_move_iterator(std::begin(res._string_value_map)),
std::make_move_iterator(std::end(res._string_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
......@@ -145,6 +159,7 @@ class ModelRes {
std::map<std::string, std::vector<int64_t>> _int64_value_map;
std::map<std::string, std::vector<float>> _float_value_map;
std::map<std::string, std::vector<int32_t>> _int32_value_map;
std::map<std::string, std::string> _string_value_map;
std::map<std::string, std::vector<int>> _shape_map;
std::map<std::string, std::vector<int>> _lod_map;
std::vector<std::string> _tensor_alias_names;
......@@ -184,6 +199,14 @@ class PredictorRes {
const std::string& name) {
return std::move(_models[model_idx].get_int32_by_name_with_rv(name));
}
const std::string& get_string_by_name(const int model_idx,
const std::string& name) {
return _models[model_idx].get_string_by_name(name);
}
std::string&& get_string_by_name_with_rv(const int model_idx,
const std::string& name) {
return std::move(_models[model_idx].get_string_by_name_with_rv(name));
}
const std::vector<int>& get_shape_by_name(const int model_idx,
const std::string& name) {
return _models[model_idx].get_shape_by_name(name);
......
......@@ -23,8 +23,8 @@ using configure::GeneralModelConfig;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8
// will support: INT8, FLOAT16
// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
......@@ -38,7 +38,7 @@ enum ProtoDataType {
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING,
P_STRING = 20,
};
int ServingClient::init(const std::vector<std::string>& client_conf,
......
......@@ -25,8 +25,8 @@ using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8
// will support: INT8, FLOAT16
// paddle inference support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
......@@ -40,7 +40,7 @@ enum ProtoDataType {
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING,
P_STRING = 20,
};
std::once_flag gflags_init_flag;
namespace py = pybind11;
......@@ -278,6 +278,8 @@ int PredictorClient::numpy_predict(
vec_idx++;
}
// Add !P_STRING feed data of string_input to tensor_content
// UINT8 INT8 FLOAT16
vec_idx = 0;
for (auto &name : string_feed_name) {
int idx = _feed_name_to_idx[name];
......@@ -285,6 +287,35 @@ int PredictorClient::numpy_predict(
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
if (_type[idx] == P_STRING) {
continue;
}
Tensor *tensor = tensor_vec[idx];
for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) {
tensor->add_shape(string_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(_type[idx]);
tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name);
tensor->set_tensor_content(string_feed[vec_idx]);
vec_idx++;
}
vec_idx = 0;
for (auto &name : string_feed_name) {
int idx = _feed_name_to_idx[name];
if (idx >= tensor_vec.size()) {
LOG(ERROR) << "idx > tensor_vec.size()";
return -1;
}
if (_type[idx] != P_STRING) {
continue;
}
Tensor *tensor = tensor_vec[idx];
for (uint32_t j = 0; j < string_shape[vec_idx].size(); ++j) {
......@@ -382,6 +413,15 @@ int PredictorClient::numpy_predict(
model._int32_value_map[name] = std::vector<int32_t>(
output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size);
} else if (_fetch_name_to_type[name] == P_UINT8) {
VLOG(2) << "fetch var " << name << "type uint8";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} else if (_fetch_name_to_type[name] == P_INT8) {
VLOG(2) << "fetch var " << name << "type int8";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} else if (_fetch_name_to_type[name] == P_FP16) {
VLOG(2) << "fetch var " << name << "type float16";
model._string_value_map[name] = output.tensor(idx).tensor_content();
}
}
predict_res_batch.add_model_res(std::move(model));
......
......@@ -49,6 +49,19 @@ PYBIND11_MODULE(serving_client, m) {
});
return py::array(ptr->size(), ptr->data(), capsule);
})
.def("get_int32_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
std::vector<int32_t> *ptr = new std::vector<int32_t>(
std::move(self.get_int32_by_name_with_rv(model_idx, name)));
auto capsule = py::capsule(ptr, [](void *p) {
delete reinterpret_cast<std::vector<int32_t> *>(p);
});
return py::array(ptr->size(), ptr->data(), capsule);
})
.def("get_string_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
return self.get_string_by_name_with_rv(model_idx, name);
})
.def("get_shape",
[](PredictorRes &self, int model_idx, std::string &name) {
std::vector<int> *ptr = new std::vector<int>(
......
......@@ -62,7 +62,7 @@ message Tensor {
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 12 => STRING
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
......
......@@ -1492,11 +1492,6 @@ class PdsCodeGenerator : public CodeGenerator {
const FieldDescriptor* fd = in_shared_fields[si];
std::string field_name = fd->name();
printer->Print("\n/////$field_name$\n", "field_name", field_name);
if (fd->is_optional()) {
printer->Print(
"if (req->has_$field_name$()) {\n", "field_name", field_name);
printer->Indent();
}
if (fd->cpp_type() ==
google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE ||
fd->is_repeated()) {
......@@ -1509,10 +1504,6 @@ class PdsCodeGenerator : public CodeGenerator {
"field_name",
field_name);
}
if (fd->is_optional()) {
printer->Outdent();
printer->Print("}\n");
}
}
printer->Print(
......
......@@ -31,15 +31,21 @@ sys.path.append(
#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto)
#param 'type'(which is in feed_var or fetch_var) = 5 means dataType is float16
#param 'type'(which is in feed_var or fetch_var) = 7 means dataType is uint8
#param 'type'(which is in feed_var or fetch_var) = 8 means dataType is int8
#param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto)
int64_type = 0
float32_type = 1
int32_type = 2
bytes_type = 3
float16_type = 5
uint8_type = 7
int8_type = 8
bytes_type = 20
#int_type,float_type,string_type are the set of each subdivision classes.
int_type = set([int64_type, int32_type])
float_type = set([float32_type])
string_type = set([bytes_type])
string_type = set([bytes_type, float16_type, uint8_type, int8_type])
class _NOPProfiler(object):
......@@ -411,7 +417,7 @@ class Client(object):
key)])
else:
string_lod_slot_batch.append([])
string_slot.append(feed_dict[key])
string_slot.append(feed_dict[key].tostring())
self.has_numpy_input = True
self.profile_.record('py_prepro_1')
......@@ -492,6 +498,38 @@ class Client(object):
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == uint8_type:
# result_map[name] will be py::array(numpy array)
tmp_str = result_batch_handle.get_string_by_name(
mi, name)
result_map[name] = np.fromstring(tmp_str, dtype = np.uint8)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == int8_type:
# result_map[name] will be py::array(numpy array)
tmp_str = result_batch_handle.get_string_by_name(
mi, name)
result_map[name] = np.fromstring(tmp_str, dtype = np.int8)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
multi_result_map.append(result_map)
ret = None
if len(model_engine_names) == 1:
......
......@@ -32,11 +32,11 @@ from .proto import general_model_service_pb2_grpc
#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto)
#param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto)
int64_type = 0
float32_type = 1
int32_type = 2
bytes_type = 3
bytes_type = 20
# this is corresponding to the proto
proto_data_key_list = ["int64_data", "float_data", "int_data", "data"]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册