未验证 提交 9d18988d 编写于 作者: T TeslaZhao 提交者: GitHub

Merge branch 'develop' into develop

...@@ -12,41 +12,97 @@ ...@@ -12,41 +12,97 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
syntax = "proto2"; syntax = "proto3";
package baidu.paddle_serving.predictor.general_model; package baidu.paddle_serving.predictor.general_model;
option java_multiple_files = true; option java_multiple_files = true;
option cc_generic_services = true;
message Tensor { message Tensor {
repeated string data = 1; // VarType: INT64
repeated int32 int_data = 2; repeated int64 int64_data = 1;
repeated int64 int64_data = 3;
repeated float float_data = 4; // VarType: FP32
optional int32 elem_type = repeated float float_data = 2;
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch // VarType: INT32
repeated int32 lod = 7; // only for fetch tensor currently repeated int32 int_data = 3;
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt // VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
}; };
message Request { message Request {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; bool profile_server = 3;
required uint64 log_id = 4 [ default = 0 ]; uint64 log_id = 4;
}; };
message Response { message Response {
repeated ModelOutput outputs = 1; repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
}; };
message ModelOutput { message ModelOutput {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
optional string engine_name = 2; string engine_name = 2;
} }
service GeneralModelService { service GeneralModelService {
rpc inference(Request) returns (Response) {} rpc inference(Request) returns (Response);
rpc debug(Request) returns (Response) {} rpc debug(Request) returns (Response);
}; };
...@@ -88,7 +88,7 @@ class PredictorData { ...@@ -88,7 +88,7 @@ class PredictorData {
const std::string& name, const std::string& name,
const std::vector<int>& shape, const std::vector<int>& shape,
const std::vector<int>& lod, const std::vector<int>& lod,
const int datatype = 3); const int datatype = 20);
const std::map<std::string, std::vector<float>>& float_data_map() const { const std::map<std::string, std::vector<float>>& float_data_map() const {
return _float_data_map; return _float_data_map;
...@@ -140,6 +140,8 @@ class PredictorData { ...@@ -140,6 +140,8 @@ class PredictorData {
int get_datatype(std::string name) const; int get_datatype(std::string name) const;
void set_datatype(std::string name, int type);
std::string print(); std::string print();
private: private:
...@@ -159,6 +161,7 @@ class PredictorData { ...@@ -159,6 +161,7 @@ class PredictorData {
oss << "{"; oss << "{";
oss << it->first << key_seg; oss << it->first << key_seg;
const std::vector<T2>& v = it->second; const std::vector<T2>& v = it->second;
oss << v.size() << key_seg;
for (size_t i = 0; i < v.size(); ++i) { for (size_t i = 0; i < v.size(); ++i) {
if (i != v.size() - 1) { if (i != v.size() - 1) {
oss << v[i] << val_seg; oss << v[i] << val_seg;
...@@ -184,7 +187,9 @@ class PredictorData { ...@@ -184,7 +187,9 @@ class PredictorData {
typename std::map<T1, T2>::const_iterator itEnd = map.end(); typename std::map<T1, T2>::const_iterator itEnd = map.end();
for (; it != itEnd; it++) { for (; it != itEnd; it++) {
oss << "{"; oss << "{";
oss << it->first << key_seg << it->second; oss << it->first << key_seg
<< "size=" << it->second.size() << key_seg
<< "type=" << this->get_datatype(it->first);
oss << "}"; oss << "}";
} }
return oss.str(); return oss.str();
......
...@@ -51,6 +51,8 @@ class ModelRes { ...@@ -51,6 +51,8 @@ class ModelRes {
res._float_value_map.end()); res._float_value_map.end());
_int32_value_map.insert(res._int32_value_map.begin(), _int32_value_map.insert(res._int32_value_map.begin(),
res._int32_value_map.end()); res._int32_value_map.end());
_string_value_map.insert(res._string_value_map.begin(),
res._string_value_map.end());
_shape_map.insert(res._shape_map.begin(), res._shape_map.end()); _shape_map.insert(res._shape_map.begin(), res._shape_map.end());
_lod_map.insert(res._lod_map.begin(), res._lod_map.end()); _lod_map.insert(res._lod_map.begin(), res._lod_map.end());
_tensor_alias_names.insert(_tensor_alias_names.end(), _tensor_alias_names.insert(_tensor_alias_names.end(),
...@@ -68,6 +70,9 @@ class ModelRes { ...@@ -68,6 +70,9 @@ class ModelRes {
_int32_value_map.insert( _int32_value_map.insert(
std::make_move_iterator(std::begin(res._int32_value_map)), std::make_move_iterator(std::begin(res._int32_value_map)),
std::make_move_iterator(std::end(res._int32_value_map))); std::make_move_iterator(std::end(res._int32_value_map)));
_string_value_map.insert(
std::make_move_iterator(std::begin(res._string_value_map)),
std::make_move_iterator(std::end(res._string_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)), _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map))); std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)), _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
...@@ -96,6 +101,12 @@ class ModelRes { ...@@ -96,6 +101,12 @@ class ModelRes {
std::vector<int32_t>&& get_int32_by_name_with_rv(const std::string& name) { std::vector<int32_t>&& get_int32_by_name_with_rv(const std::string& name) {
return std::move(_int32_value_map[name]); return std::move(_int32_value_map[name]);
} }
const std::string& get_string_by_name(const std::string& name) {
return _string_value_map[name];
}
std::string&& get_string_by_name_with_rv(const std::string& name) {
return std::move(_string_value_map[name]);
}
const std::vector<int>& get_shape_by_name(const std::string& name) { const std::vector<int>& get_shape_by_name(const std::string& name) {
return _shape_map[name]; return _shape_map[name];
} }
...@@ -128,6 +139,9 @@ class ModelRes { ...@@ -128,6 +139,9 @@ class ModelRes {
_int32_value_map.insert( _int32_value_map.insert(
std::make_move_iterator(std::begin(res._int32_value_map)), std::make_move_iterator(std::begin(res._int32_value_map)),
std::make_move_iterator(std::end(res._int32_value_map))); std::make_move_iterator(std::end(res._int32_value_map)));
_string_value_map.insert(
std::make_move_iterator(std::begin(res._string_value_map)),
std::make_move_iterator(std::end(res._string_value_map)));
_shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)), _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)),
std::make_move_iterator(std::end(res._shape_map))); std::make_move_iterator(std::end(res._shape_map)));
_lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)), _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)),
...@@ -145,6 +159,7 @@ class ModelRes { ...@@ -145,6 +159,7 @@ class ModelRes {
std::map<std::string, std::vector<int64_t>> _int64_value_map; std::map<std::string, std::vector<int64_t>> _int64_value_map;
std::map<std::string, std::vector<float>> _float_value_map; std::map<std::string, std::vector<float>> _float_value_map;
std::map<std::string, std::vector<int32_t>> _int32_value_map; std::map<std::string, std::vector<int32_t>> _int32_value_map;
std::map<std::string, std::string> _string_value_map;
std::map<std::string, std::vector<int>> _shape_map; std::map<std::string, std::vector<int>> _shape_map;
std::map<std::string, std::vector<int>> _lod_map; std::map<std::string, std::vector<int>> _lod_map;
std::vector<std::string> _tensor_alias_names; std::vector<std::string> _tensor_alias_names;
...@@ -184,6 +199,14 @@ class PredictorRes { ...@@ -184,6 +199,14 @@ class PredictorRes {
const std::string& name) { const std::string& name) {
return std::move(_models[model_idx].get_int32_by_name_with_rv(name)); return std::move(_models[model_idx].get_int32_by_name_with_rv(name));
} }
const std::string& get_string_by_name(const int model_idx,
const std::string& name) {
return _models[model_idx].get_string_by_name(name);
}
std::string&& get_string_by_name_with_rv(const int model_idx,
const std::string& name) {
return std::move(_models[model_idx].get_string_by_name_with_rv(name));
}
const std::vector<int>& get_shape_by_name(const int model_idx, const std::vector<int>& get_shape_by_name(const int model_idx,
const std::string& name) { const std::string& name) {
return _models[model_idx].get_shape_by_name(name); return _models[model_idx].get_shape_by_name(name);
......
...@@ -23,7 +23,23 @@ using configure::GeneralModelConfig; ...@@ -23,7 +23,23 @@ using configure::GeneralModelConfig;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING }; // paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
int ServingClient::init(const std::vector<std::string>& client_conf, int ServingClient::init(const std::vector<std::string>& client_conf,
const std::string server_port) { const std::string server_port) {
...@@ -156,6 +172,10 @@ int PredictorData::get_datatype(std::string name) const { ...@@ -156,6 +172,10 @@ int PredictorData::get_datatype(std::string name) const {
return 0; return 0;
} }
void PredictorData::set_datatype(std::string name, int type) {
_datatype_map[name] = type;
}
std::string PredictorData::print() { std::string PredictorData::print() {
std::string res; std::string res;
res.append(map2string<std::string, float>(_float_data_map)); res.append(map2string<std::string, float>(_float_data_map));
...@@ -309,20 +329,25 @@ int PredictorInputs::GenProto(const PredictorInputs& inputs, ...@@ -309,20 +329,25 @@ int PredictorInputs::GenProto(const PredictorInputs& inputs,
tensor->set_name(feed_name[idx]); tensor->set_name(feed_name[idx]);
tensor->set_alias_name(name); tensor->set_alias_name(name);
const int string_shape_size = string_shape.size(); if (datatype == P_STRING) {
// string_shape[vec_idx] = [1];cause numpy has no datatype of string. const int string_shape_size = string_shape.size();
// we pass string via vector<vector<string> >. // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
if (string_shape_size != 1) { // we pass string via vector<vector<string> >.
LOG(ERROR) << "string_shape_size should be 1-D, but received is : " if (string_shape_size != 1) {
<< string_shape_size; LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
return -1; << string_shape_size;
} return -1;
switch (string_shape_size) { }
case 1: { switch (string_shape_size) {
tensor->add_data(string_data); case 1: {
break; tensor->add_data(string_data);
break;
}
} }
} else {
tensor->set_tensor_content(string_data);
} }
} }
return 0; return 0;
} }
...@@ -355,6 +380,8 @@ int PredictorOutputs::ParseProto(const Response& res, ...@@ -355,6 +380,8 @@ int PredictorOutputs::ParseProto(const Response& res,
std::shared_ptr<PredictorOutputs::PredictorOutput> predictor_output = std::shared_ptr<PredictorOutputs::PredictorOutput> predictor_output =
std::make_shared<PredictorOutputs::PredictorOutput>(); std::make_shared<PredictorOutputs::PredictorOutput>();
predictor_output->engine_name = output.engine_name(); predictor_output->engine_name = output.engine_name();
PredictorData& predictor_data = predictor_output->data;
std::map<std::string, std::vector<float>>& float_data_map = *predictor_output->data.mutable_float_data_map(); std::map<std::string, std::vector<float>>& float_data_map = *predictor_output->data.mutable_float_data_map();
std::map<std::string, std::vector<int64_t>>& int64_data_map = *predictor_output->data.mutable_int64_data_map(); std::map<std::string, std::vector<int64_t>>& int64_data_map = *predictor_output->data.mutable_int64_data_map();
std::map<std::string, std::vector<int32_t>>& int32_data_map = *predictor_output->data.mutable_int_data_map(); std::map<std::string, std::vector<int32_t>>& int32_data_map = *predictor_output->data.mutable_int_data_map();
...@@ -403,7 +430,13 @@ int PredictorOutputs::ParseProto(const Response& res, ...@@ -403,7 +430,13 @@ int PredictorOutputs::ParseProto(const Response& res,
int32_data_map[name] = std::vector<int32_t>( int32_data_map[name] = std::vector<int32_t>(
output.tensor(idx).int_data().begin(), output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size); output.tensor(idx).int_data().begin() + size);
} else if (fetch_name_to_type[name] == P_UINT8
|| fetch_name_to_type[name] == P_INT8) {
VLOG(2) << "fetch var [" << name << "]type="
<< fetch_name_to_type[name];
string_data_map[name] = output.tensor(idx).tensor_content();
} }
predictor_data.set_datatype(name, output.tensor(idx).elem_type());
idx += 1; idx += 1;
} }
outputs.add_data(predictor_output); outputs.add_data(predictor_output);
......
...@@ -25,7 +25,23 @@ using baidu::paddle_serving::Timer; ...@@ -25,7 +25,23 @@ using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING }; // paddle inference support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
std::once_flag gflags_init_flag; std::once_flag gflags_init_flag;
namespace py = pybind11; namespace py = pybind11;
...@@ -262,6 +278,8 @@ int PredictorClient::numpy_predict( ...@@ -262,6 +278,8 @@ int PredictorClient::numpy_predict(
vec_idx++; vec_idx++;
} }
// Add !P_STRING feed data of string_input to tensor_content
// UINT8 INT8 FLOAT16
vec_idx = 0; vec_idx = 0;
for (auto &name : string_feed_name) { for (auto &name : string_feed_name) {
int idx = _feed_name_to_idx[name]; int idx = _feed_name_to_idx[name];
...@@ -277,22 +295,27 @@ int PredictorClient::numpy_predict( ...@@ -277,22 +295,27 @@ int PredictorClient::numpy_predict(
for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) { for (uint32_t j = 0; j < string_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(string_lod_slot_batch[vec_idx][j]); tensor->add_lod(string_lod_slot_batch[vec_idx][j]);
} }
tensor->set_elem_type(P_STRING);
tensor->set_name(_feed_name[idx]); tensor->set_name(_feed_name[idx]);
tensor->set_alias_name(name); tensor->set_alias_name(name);
const int string_shape_size = string_shape[vec_idx].size(); if (_type[idx] != P_STRING) {
// string_shape[vec_idx] = [1];cause numpy has no datatype of string. tensor->set_elem_type(_type[idx]);
// we pass string via vector<vector<string> >. tensor->set_tensor_content(string_feed[vec_idx]);
if (string_shape_size != 1) { } else {
LOG(ERROR) << "string_shape_size should be 1-D, but received is : " tensor->set_elem_type(P_STRING);
<< string_shape_size; const int string_shape_size = string_shape[vec_idx].size();
return -1; // string_shape[vec_idx] = [1];cause numpy has no datatype of string.
} // we pass string via vector<vector<string> >.
switch (string_shape_size) { if (string_shape_size != 1) {
case 1: { LOG(ERROR) << "string_shape_size should be 1-D, but received is : "
tensor->add_data(string_feed[vec_idx]); << string_shape_size;
break; return -1;
}
switch (string_shape_size) {
case 1: {
tensor->add_data(string_feed[vec_idx]);
break;
}
} }
} }
vec_idx++; vec_idx++;
...@@ -366,6 +389,15 @@ int PredictorClient::numpy_predict( ...@@ -366,6 +389,15 @@ int PredictorClient::numpy_predict(
model._int32_value_map[name] = std::vector<int32_t>( model._int32_value_map[name] = std::vector<int32_t>(
output.tensor(idx).int_data().begin(), output.tensor(idx).int_data().begin(),
output.tensor(idx).int_data().begin() + size); output.tensor(idx).int_data().begin() + size);
} else if (_fetch_name_to_type[name] == P_UINT8) {
VLOG(2) << "fetch var " << name << "type uint8";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} else if (_fetch_name_to_type[name] == P_INT8) {
VLOG(2) << "fetch var " << name << "type int8";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} else if (_fetch_name_to_type[name] == P_FP16) {
VLOG(2) << "fetch var " << name << "type float16";
model._string_value_map[name] = output.tensor(idx).tensor_content();
} }
} }
predict_res_batch.add_model_res(std::move(model)); predict_res_batch.add_model_res(std::move(model));
......
...@@ -49,6 +49,19 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -49,6 +49,19 @@ PYBIND11_MODULE(serving_client, m) {
}); });
return py::array(ptr->size(), ptr->data(), capsule); return py::array(ptr->size(), ptr->data(), capsule);
}) })
.def("get_int32_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
std::vector<int32_t> *ptr = new std::vector<int32_t>(
std::move(self.get_int32_by_name_with_rv(model_idx, name)));
auto capsule = py::capsule(ptr, [](void *p) {
delete reinterpret_cast<std::vector<int32_t> *>(p);
});
return py::array(ptr->size(), ptr->data(), capsule);
})
.def("get_string_by_name",
[](PredictorRes &self, int model_idx, std::string &name) {
return self.get_string_by_name_with_rv(model_idx, name);
})
.def("get_shape", .def("get_shape",
[](PredictorRes &self, int model_idx, std::string &name) { [](PredictorRes &self, int model_idx, std::string &name) {
std::vector<int> *ptr = new std::vector<int>( std::vector<int> *ptr = new std::vector<int>(
......
...@@ -31,7 +31,23 @@ using baidu::paddle_serving::predictor::MempoolWrapper; ...@@ -31,7 +31,23 @@ using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
enum ProtoDataType { P_INT64, P_FLOAT32, P_INT32, P_STRING }; // paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
enum ProtoDataType {
P_INT64 = 0,
P_FLOAT32,
P_INT32,
P_FP64,
P_INT16,
P_FP16,
P_BF16,
P_UINT8,
P_INT8,
P_BOOL,
P_COMPLEX64,
P_COMPLEX128,
P_STRING = 20,
};
int GeneralReaderOp::inference() { int GeneralReaderOp::inference() {
// read request from client // read request from client
...@@ -78,6 +94,7 @@ int GeneralReaderOp::inference() { ...@@ -78,6 +94,7 @@ int GeneralReaderOp::inference() {
int64_t elem_type = 0; int64_t elem_type = 0;
int64_t elem_size = 0; int64_t elem_size = 0;
int64_t databuf_size = 0; int64_t databuf_size = 0;
const void* src_ptr = nullptr;
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor paddleTensor; paddle::PaddleTensor paddleTensor;
const Tensor &tensor = req->tensor(i); const Tensor &tensor = req->tensor(i);
...@@ -86,19 +103,38 @@ int GeneralReaderOp::inference() { ...@@ -86,19 +103,38 @@ int GeneralReaderOp::inference() {
elem_size = 0; elem_size = 0;
databuf_size = 0; databuf_size = 0;
elem_type = tensor.elem_type(); elem_type = tensor.elem_type();
VLOG(2) << "var[" << i << "] has elem type: " << elem_type; src_ptr = nullptr ;
if (elem_type == P_INT64) { // int64 if (elem_type == P_INT64) { // int64
elem_size = sizeof(int64_t); elem_size = sizeof(int64_t);
paddleTensor.dtype = paddle::PaddleDType::INT64; paddleTensor.dtype = paddle::PaddleDType::INT64;
data_len = tensor.int64_data_size(); data_len = tensor.int64_data_size();
src_ptr = tensor.int64_data().data();
} else if (elem_type == P_FLOAT32) { } else if (elem_type == P_FLOAT32) {
elem_size = sizeof(float); elem_size = sizeof(float);
paddleTensor.dtype = paddle::PaddleDType::FLOAT32; paddleTensor.dtype = paddle::PaddleDType::FLOAT32;
data_len = tensor.float_data_size(); data_len = tensor.float_data_size();
src_ptr = tensor.float_data().data();
} else if (elem_type == P_INT32) { } else if (elem_type == P_INT32) {
elem_size = sizeof(int32_t); elem_size = sizeof(int32_t);
paddleTensor.dtype = paddle::PaddleDType::INT32; paddleTensor.dtype = paddle::PaddleDType::INT32;
data_len = tensor.int_data_size(); data_len = tensor.int_data_size();
src_ptr = tensor.int_data().data();
} else if (elem_type == P_UINT8) {
elem_size = sizeof(uint8_t);
paddleTensor.dtype = paddle::PaddleDType::UINT8;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_INT8) {
elem_size = sizeof(int8_t);
paddleTensor.dtype = paddle::PaddleDType::INT8;
data_len = tensor.tensor_content().size();
src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_FP16) {
// paddle inference will support FLOAT16
// elem_size = 1;
// paddleTensor.dtype = paddle::PaddleDType::FLOAT16;
// data_len = tensor.tensor_content().size();
// src_ptr = tensor.tensor_content().data();
} else if (elem_type == P_STRING) { } else if (elem_type == P_STRING) {
// use paddle::PaddleDType::UINT8 as for String. // use paddle::PaddleDType::UINT8 as for String.
elem_size = sizeof(char); elem_size = sizeof(char);
...@@ -109,8 +145,18 @@ int GeneralReaderOp::inference() { ...@@ -109,8 +145,18 @@ int GeneralReaderOp::inference() {
// now only support single string // now only support single string
for (int idx = 0; idx < tensor.data_size(); idx++) { for (int idx = 0; idx < tensor.data_size(); idx++) {
data_len += tensor.data()[idx].length() + 1; data_len += tensor.data()[idx].length() + 1;
src_ptr = tensor.data()[idx].data();
} }
} }
VLOG(2) << "var[" << i << "] has elem type: " << elem_type << ";"
<< "elem_size=" << elem_size << ";"
<< "dtype=" << paddleTensor.dtype << ";"
<< "data_len=" << data_len;
if (src_ptr == nullptr) {
LOG(ERROR) << "Not support var[" << i << "] with elem_type["
<< elem_type << "]";
continue;
}
// implement lod tensor here // implement lod tensor here
// only support 1-D lod // only support 1-D lod
// TODO(HexToString): support 2-D lod // TODO(HexToString): support 2-D lod
...@@ -141,44 +187,17 @@ int GeneralReaderOp::inference() { ...@@ -141,44 +187,17 @@ int GeneralReaderOp::inference() {
VLOG(2) << "(logid=" << log_id << ") var[" << i VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has lod_tensor and len=" << out->at(i).lod[0].back(); << "] has lod_tensor and len=" << out->at(i).lod[0].back();
} }
if (elem_type == P_INT64) { void* dst_ptr = out->at(i).data.data();
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data()); if (!dst_ptr) {
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i LOG(ERROR) << "dst_ptr is nullptr";
<< "] is " << tensor.int64_data(0); return -1;
if (!dst_ptr) { }
LOG(ERROR) << "dst_ptr is nullptr";
return -1; // For common data, we just copy from src to dst
} // For string data, we need to iterate through all str
memcpy(dst_ptr, tensor.int64_data().data(), databuf_size); if (elem_type != P_STRING) {
/* memcpy(dst_ptr, src_ptr, databuf_size);
int elem_num = tensor.int64_data_size(); } else {
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.int64_data(k);
}
*/
} else if (elem_type == P_FLOAT32) {
float *dst_ptr = static_cast<float *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.float_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.float_data().data(), databuf_size);
/*int elem_num = tensor.float_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[k] = tensor.float_data(k);
}*/
} else if (elem_type == P_INT32) {
int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.int_data(0);
if (!dst_ptr) {
LOG(ERROR) << "dst_ptr is nullptr";
return -1;
}
memcpy(dst_ptr, tensor.int_data().data(), databuf_size);
} else if (elem_type == P_STRING) {
char *dst_ptr = static_cast<char *>(out->at(i).data.data()); char *dst_ptr = static_cast<char *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << tensor.data(0); << "] is " << tensor.data(0);
......
...@@ -168,7 +168,24 @@ int GeneralResponseOp::inference() { ...@@ -168,7 +168,24 @@ int GeneralResponseOp::inference() {
google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr, google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
data_ptr + cap); data_ptr + cap);
output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data); output->mutable_tensor(var_idx)->mutable_int_data()->Swap(&tmp_data);
} } else if (dtype == paddle::PaddleDType::UINT8) {
tensor->set_elem_type(7);
VLOG(2) << "(logid=" << log_id << ")Prepare uint8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
} else if (dtype == paddle::PaddleDType::INT8) {
tensor->set_elem_type(8);
VLOG(2) << "(logid=" << log_id << ")Prepare int8 var ["
<< model_config->_fetch_name[idx] << "].";
tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
}
// inference will support fp16
// else if (dtype == paddle::PaddleDType::FLOAT16) {
// tensor->set_elem_type(5);
// VLOG(2) << "(logid=" << log_id << ")Prepare float16 var ["
// << model_config->_fetch_name[idx] << "].";
// tensor->set_tensor_content(in->at(idx).data.data(), in->at(idx).data.length());
// }
VLOG(2) << "(logid=" << log_id << ") fetch var [" VLOG(2) << "(logid=" << log_id << ") fetch var ["
<< model_config->_fetch_name[idx] << "] ready"; << model_config->_fetch_name[idx] << "] ready";
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
syntax = "proto2"; syntax = "proto3";
import "pds_option.proto"; import "pds_option.proto";
import "builtin_format.proto"; import "builtin_format.proto";
package baidu.paddle_serving.predictor.general_model; package baidu.paddle_serving.predictor.general_model;
...@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model; ...@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model;
option cc_generic_services = true; option cc_generic_services = true;
message Tensor { message Tensor {
repeated string data = 1; // VarType: INT64
repeated int32 int_data = 2; repeated int64 int64_data = 1;
repeated int64 int64_data = 3;
repeated float float_data = 4; // VarType: FP32
optional int32 elem_type = repeated float float_data = 2;
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch // VarType: INT32
repeated int32 lod = 7; // only for fetch tensor currently repeated int32 int_data = 3;
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt // VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
}; };
message Request { message Request {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; bool profile_server = 3;
required uint64 log_id = 4 [ default = 0 ]; uint64 log_id = 4;
}; };
message Response { message Response {
repeated ModelOutput outputs = 1; repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
}; };
message ModelOutput { message ModelOutput {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
optional string engine_name = 2; string engine_name = 2;
} }
service GeneralModelService { service GeneralModelService {
......
...@@ -1492,11 +1492,6 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1492,11 +1492,6 @@ class PdsCodeGenerator : public CodeGenerator {
const FieldDescriptor* fd = in_shared_fields[si]; const FieldDescriptor* fd = in_shared_fields[si];
std::string field_name = fd->name(); std::string field_name = fd->name();
printer->Print("\n/////$field_name$\n", "field_name", field_name); printer->Print("\n/////$field_name$\n", "field_name", field_name);
if (fd->is_optional()) {
printer->Print(
"if (req->has_$field_name$()) {\n", "field_name", field_name);
printer->Indent();
}
if (fd->cpp_type() == if (fd->cpp_type() ==
google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE || google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE ||
fd->is_repeated()) { fd->is_repeated()) {
...@@ -1509,10 +1504,6 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1509,10 +1504,6 @@ class PdsCodeGenerator : public CodeGenerator {
"field_name", "field_name",
field_name); field_name);
} }
if (fd->is_optional()) {
printer->Outdent();
printer->Print("}\n");
}
} }
printer->Print( printer->Print(
......
...@@ -533,7 +533,30 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> { ...@@ -533,7 +533,30 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
paddle::PaddleDType::INT32) { paddle::PaddleDType::INT32) {
int32_t* data = static_cast<int32_t*>(origin_data); int32_t* data = static_cast<int32_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data); lod_tensor_in->CopyFromCpu(data);
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::UINT8) {
uint8_t* data = static_cast<uint8_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::INT8) {
int8_t* data = static_cast<int8_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
} else {
LOG(ERROR) << "Inference not support type["
<< (*tensorVector_in_pointer)[i].dtype
<< "],name[" << (*tensorVector_in_pointer)[i].name
<< "]" << " copy into core failed!";
} }
// Paddle inference will support FP16 in next version.
// else if ((*tensorVector_in_pointer)[i].dtype ==
// paddle::PaddleDType::FLOAT16) {
// paddle::platform::float16* data =
// static_cast<paddle::platform::float16*>(origin_data);
// lod_tensor_in->CopyFromCpu(data);
// }
VLOG(2) << "Tensor:name=" << (*tensorVector_in_pointer)[i].name
<< ";in_dtype=" << (*tensorVector_in_pointer)[i].dtype
<< ";tensor_dtype=" << lod_tensor_in->type();
} }
// After the input data is passed in, // After the input data is passed in,
// call 'core->Run()' perform the prediction process. // call 'core->Run()' perform the prediction process.
...@@ -598,7 +621,41 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> { ...@@ -598,7 +621,41 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
int32_t* data_out = reinterpret_cast<int32_t*>(databuf_data); int32_t* data_out = reinterpret_cast<int32_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out); lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out); databuf_char = reinterpret_cast<char*>(data_out);
} else if (dataType == paddle::PaddleDType::UINT8) {
databuf_size = out_num * sizeof(uint8_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
uint8_t* data_out = reinterpret_cast<uint8_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
} else if (dataType == paddle::PaddleDType::INT8) {
databuf_size = out_num * sizeof(int8_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
LOG(ERROR) << "Malloc failed, size: " << databuf_size;
return -1;
}
int8_t* data_out = reinterpret_cast<int8_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
} }
// Inference will support FP16 in next version
// else if (dataType == paddle::PaddleDType::FLOAT16) {
// using float16 = paddle::platform::float16;
// databuf_size = out_num * sizeof(float16);
// databuf_data = MempoolWrapper::instance().malloc(databuf_size);
// if (!databuf_data) {
// LOG(ERROR) << "Malloc failed, size: " << databuf_size;
// return -1;
// }
// float16* data_out = reinterpret_cast<float16*>(databuf_data);
// lod_tensor_out->CopyToCpu(data_out);
// databuf_char = reinterpret_cast<char*>(data_out);
// }
// Because task scheduling requires OPs to use 'Channel' // Because task scheduling requires OPs to use 'Channel'
// (which is a data structure) to transfer data between OPs. // (which is a data structure) to transfer data between OPs.
// We need to copy the processed data to the 'Channel' for the next OP. // We need to copy the processed data to the 'Channel' for the next OP.
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
syntax = "proto2"; syntax = "proto3";
import "pds_option.proto"; import "pds_option.proto";
import "builtin_format.proto"; import "builtin_format.proto";
package baidu.paddle_serving.predictor.general_model; package baidu.paddle_serving.predictor.general_model;
...@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model; ...@@ -20,33 +20,88 @@ package baidu.paddle_serving.predictor.general_model;
option cc_generic_services = true; option cc_generic_services = true;
message Tensor { message Tensor {
repeated string data = 1; // VarType: INT64
repeated int32 int_data = 2; repeated int64 int64_data = 1;
repeated int64 int64_data = 3;
repeated float float_data = 4; // VarType: FP32
optional int32 elem_type = repeated float float_data = 2;
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch // VarType: INT32
repeated int32 lod = 7; // only for fetch tensor currently repeated int32 int_data = 3;
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt // VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
}; };
message Request { message Request {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; bool profile_server = 3;
required uint64 log_id = 4 [ default = 0 ]; uint64 log_id = 4;
}; };
message Response { message Response {
repeated ModelOutput outputs = 1; repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
}; };
message ModelOutput { message ModelOutput {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
optional string engine_name = 2; string engine_name = 2;
} }
service GeneralModelService { service GeneralModelService {
......
...@@ -42,7 +42,7 @@ python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 - ...@@ -42,7 +42,7 @@ python3.6 -m paddle_serving_server.serve --model uci_housing_model --thread 10 -
为了方便用户快速的使用Http方式请求Server端预测服务,我们已经将常用的Http请求的数据体封装、压缩、请求加密等功能封装为一个HttpClient类提供给用户,方便用户使用。 为了方便用户快速的使用Http方式请求Server端预测服务,我们已经将常用的Http请求的数据体封装、压缩、请求加密等功能封装为一个HttpClient类提供给用户,方便用户使用。
使用HttpClient最简单只需要三步,1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件(本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt),3、调用Predict函数,通过Http方式请求预测服务。 使用HttpClient最简单只需要四步,1、创建一个HttpClient对象。2、加载Client端的prototxt配置文件(本例中为python/examples/fit_a_line/目录下的uci_housing_client/serving_client_conf.prototxt)。3、调用connect函数。4、调用Predict函数,通过Http方式请求预测服务。
此外,您可以根据自己的需要配置Server端IP、Port、服务名称(此服务名称需要与[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)文件中的Service服务名和rpc方法名对应,即`GeneralModelService`字段和`inference`字段),设置Request数据体压缩,设置Response支持压缩传输,模型加密预测(需要配置Server端使用模型加密)、设置响应超时时间等功能。 此外,您可以根据自己的需要配置Server端IP、Port、服务名称(此服务名称需要与[`core/general-server/proto/general_model_service.proto`](../core/general-server/proto/general_model_service.proto)文件中的Service服务名和rpc方法名对应,即`GeneralModelService`字段和`inference`字段),设置Request数据体压缩,设置Response支持压缩传输,模型加密预测(需要配置Server端使用模型加密)、设置响应超时时间等功能。
...@@ -103,7 +103,7 @@ repeated int32 numbers = 1; ...@@ -103,7 +103,7 @@ repeated int32 numbers = 1;
``` ```
#### elem_type #### elem_type
表示数据类型,0 means int64, 1 means float32, 2 means int32, 3 means bytes(string) 表示数据类型,0 means int64, 1 means float32, 2 means int32, 20 means bytes(string)
#### fetch_var_names #### fetch_var_names
......
...@@ -59,9 +59,20 @@ import java.util.zip.GZIPInputStream; ...@@ -59,9 +59,20 @@ import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream; import java.util.zip.GZIPOutputStream;
enum ElementType class ElementType {
{ public static final int Int64_type = 0;
Int64_type, Float32_type, Int32_type, Bytes_type; public static final int Float32_type = 1;
public static final int Int32_type = 2;
public static final int String_type = 20;
public static final Map<Integer, String> feedTypeToDataKey_;
static
{
feedTypeToDataKey_ = new HashMap<Integer, String>();
feedTypeToDataKey_.put(ElementType.Int64_type, "int64_data");
feedTypeToDataKey_.put(ElementType.Float32_type, "float_data");
feedTypeToDataKey_.put(ElementType.Int32_type, "int_data");
feedTypeToDataKey_.put(ElementType.String_type, "data");
}
} }
class Profiler { class Profiler {
...@@ -104,7 +115,6 @@ public class Client { ...@@ -104,7 +115,6 @@ public class Client {
private Map<String, Integer> feedTypes_; private Map<String, Integer> feedTypes_;
private Map<String, List<Integer>> feedShapes_; private Map<String, List<Integer>> feedShapes_;
private Map<String, Integer> feedNameToIndex_; private Map<String, Integer> feedNameToIndex_;
private Map<Integer, String> feedTypeToDataKey_;
private List<String> fetchNames_; private List<String> fetchNames_;
private Map<String, Integer> fetchTypes_; private Map<String, Integer> fetchTypes_;
private Set<String> lodTensorSet_; private Set<String> lodTensorSet_;
...@@ -147,12 +157,6 @@ public class Client { ...@@ -147,12 +157,6 @@ public class Client {
channel_ = null; channel_ = null;
blockingStub_ = null; blockingStub_ = null;
feedTypeToDataKey_ = new HashMap<Integer, String>();
feedTypeToDataKey_.put(0, "int64_data");
feedTypeToDataKey_.put(1, "float_data");
feedTypeToDataKey_.put(2, "int_data");
feedTypeToDataKey_.put(3, "data");
profiler_ = new Profiler(); profiler_ = new Profiler();
boolean is_profile = false; boolean is_profile = false;
String FLAGS_profile_client = System.getenv("FLAGS_profile_client"); String FLAGS_profile_client = System.getenv("FLAGS_profile_client");
...@@ -525,7 +529,7 @@ public class Client { ...@@ -525,7 +529,7 @@ public class Client {
jsonTensor.put("elem_type", element_type); jsonTensor.put("elem_type", element_type);
// 处理数据与shape // 处理数据与shape
String protoDataKey = feedTypeToDataKey_.get(element_type); String protoDataKey = ElementType.feedTypeToDataKey_.get(element_type);
// 如果是INDArray类型,先转为一维. // 如果是INDArray类型,先转为一维.
// 此时shape为INDArray的shape // 此时shape为INDArray的shape
if(objectValue instanceof INDArray){ if(objectValue instanceof INDArray){
...@@ -535,11 +539,11 @@ public class Client { ...@@ -535,11 +539,11 @@ public class Client {
for(long dim:indarrayShape){ for(long dim:indarrayShape){
shape.add((int)dim); shape.add((int)dim);
} }
if(element_type == ElementType.Int64_type.ordinal()){ if(element_type == ElementType.Int64_type){
objectValue = tempIndArray.data().asLong(); objectValue = tempIndArray.data().asLong();
}else if(element_type == ElementType.Int32_type.ordinal()){ }else if(element_type == ElementType.Int32_type){
objectValue = tempIndArray.data().asInt(); objectValue = tempIndArray.data().asInt();
}else if(element_type == ElementType.Float32_type.ordinal()){ }else if(element_type == ElementType.Float32_type){
objectValue = tempIndArray.data().asFloat(); objectValue = tempIndArray.data().asFloat();
}else{ }else{
throw new Exception("INDArray 类型不支持"); throw new Exception("INDArray 类型不支持");
...@@ -564,11 +568,11 @@ public class Client { ...@@ -564,11 +568,11 @@ public class Client {
// 此时无法获取batch信息,故对shape不处理 // 此时无法获取batch信息,故对shape不处理
// 由于Proto中为Repeated,需要把数据包装成list // 由于Proto中为Repeated,需要把数据包装成list
if(objectValue instanceof String){ if(objectValue instanceof String){
if(feedTypes_.get(protoDataKey)!= ElementType.Bytes_type.ordinal()){ if(feedTypes_.get(protoDataKey)!= ElementType.String_type){
throw new Exception("feedvar is not string-type,feed can`t be a single string."); throw new Exception("feedvar is not string-type,feed can`t be a single string.");
} }
}else{ }else{
if(feedTypes_.get(protoDataKey)== ElementType.Bytes_type.ordinal()){ if(feedTypes_.get(protoDataKey)== ElementType.String_type){
throw new Exception("feedvar is string-type,feed, feed can`t be a single int or others."); throw new Exception("feedvar is string-type,feed, feed can`t be a single int or others.");
} }
} }
...@@ -662,17 +666,17 @@ public class Client { ...@@ -662,17 +666,17 @@ public class Client {
for(long dim:indarrayShape){ for(long dim:indarrayShape){
shape.add((int)dim); shape.add((int)dim);
} }
if(element_type == ElementType.Int64_type.ordinal()){ if(element_type == ElementType.Int64_type){
List<Long> iter = Arrays.stream(tempIndArray.data().asLong()).boxed().collect(Collectors.toList()); List<Long> iter = Arrays.stream(tempIndArray.data().asLong()).boxed().collect(Collectors.toList());
tensor_builder.addAllInt64Data(iter); tensor_builder.addAllInt64Data(iter);
}else if(element_type == ElementType.Int32_type.ordinal()){ }else if(element_type == ElementType.Int32_type){
List<Integer> iter = Arrays.stream(tempIndArray.data().asInt()).boxed().collect(Collectors.toList()); List<Integer> iter = Arrays.stream(tempIndArray.data().asInt()).boxed().collect(Collectors.toList());
tensor_builder.addAllIntData(iter); tensor_builder.addAllIntData(iter);
}else if(element_type == ElementType.Float32_type.ordinal()){ }else if(element_type == ElementType.Float32_type){
List<Float> iter = Arrays.asList(ArrayUtils.toObject(tempIndArray.data().asFloat())); List<Float> iter = Arrays.asList(ArrayUtils.toObject(tempIndArray.data().asFloat()));
tensor_builder.addAllFloatData(iter); tensor_builder.addAllFloatData(iter);
...@@ -684,13 +688,13 @@ public class Client { ...@@ -684,13 +688,13 @@ public class Client {
// 如果是数组类型,则无须处理,直接使用即可。 // 如果是数组类型,则无须处理,直接使用即可。
// 且数组无法嵌套,此时batch无法从数据中获取 // 且数组无法嵌套,此时batch无法从数据中获取
// 默认batch维度为1,或者feedVar的shape信息中已包含batch // 默认batch维度为1,或者feedVar的shape信息中已包含batch
if(element_type == ElementType.Int64_type.ordinal()){ if(element_type == ElementType.Int64_type){
List<Long> iter = Arrays.stream((long[])objectValue).boxed().collect(Collectors.toList()); List<Long> iter = Arrays.stream((long[])objectValue).boxed().collect(Collectors.toList());
tensor_builder.addAllInt64Data(iter); tensor_builder.addAllInt64Data(iter);
}else if(element_type == ElementType.Int32_type.ordinal()){ }else if(element_type == ElementType.Int32_type){
List<Integer> iter = Arrays.stream((int[])objectValue).boxed().collect(Collectors.toList()); List<Integer> iter = Arrays.stream((int[])objectValue).boxed().collect(Collectors.toList());
tensor_builder.addAllIntData(iter); tensor_builder.addAllIntData(iter);
}else if(element_type == ElementType.Float32_type.ordinal()){ }else if(element_type == ElementType.Float32_type){
List<Float> iter = Arrays.asList(ArrayUtils.toObject((float[])objectValue)); List<Float> iter = Arrays.asList(ArrayUtils.toObject((float[])objectValue));
tensor_builder.addAllFloatData(iter); tensor_builder.addAllFloatData(iter);
}else{ }else{
...@@ -707,11 +711,11 @@ public class Client { ...@@ -707,11 +711,11 @@ public class Client {
// 在index=0处,加上batch // 在index=0处,加上batch
shape.add(0, list.size()); shape.add(0, list.size());
} }
if(element_type == ElementType.Int64_type.ordinal()){ if(element_type == ElementType.Int64_type){
tensor_builder.addAllInt64Data((List<Long>)(List)recursiveExtract(objectValue)); tensor_builder.addAllInt64Data((List<Long>)(List)recursiveExtract(objectValue));
}else if(element_type == ElementType.Int32_type.ordinal()){ }else if(element_type == ElementType.Int32_type){
tensor_builder.addAllIntData((List<Integer>)(List)recursiveExtract(objectValue)); tensor_builder.addAllIntData((List<Integer>)(List)recursiveExtract(objectValue));
}else if(element_type == ElementType.Float32_type.ordinal()){ }else if(element_type == ElementType.Float32_type){
tensor_builder.addAllFloatData((List<Float>)(List)recursiveExtract(objectValue)); tensor_builder.addAllFloatData((List<Float>)(List)recursiveExtract(objectValue));
}else{ }else{
// 看接口是String还是Bytes // 看接口是String还是Bytes
...@@ -723,11 +727,11 @@ public class Client { ...@@ -723,11 +727,11 @@ public class Client {
// 由于Proto中为Repeated,需要把数据包装成list // 由于Proto中为Repeated,需要把数据包装成list
List<Object> tempList = new ArrayList<>(); List<Object> tempList = new ArrayList<>();
tempList.add(objectValue); tempList.add(objectValue);
if(element_type == ElementType.Int64_type.ordinal()){ if(element_type == ElementType.Int64_type){
tensor_builder.addAllInt64Data((List<Long>)(List)tempList); tensor_builder.addAllInt64Data((List<Long>)(List)tempList);
}else if(element_type == ElementType.Int32_type.ordinal()){ }else if(element_type == ElementType.Int32_type){
tensor_builder.addAllIntData((List<Integer>)(List)tempList); tensor_builder.addAllIntData((List<Integer>)(List)tempList);
}else if(element_type == ElementType.Float32_type.ordinal()){ }else if(element_type == ElementType.Float32_type){
tensor_builder.addAllFloatData((List<Float>)(List)tempList); tensor_builder.addAllFloatData((List<Float>)(List)tempList);
}else{ }else{
// 看接口是String还是Bytes // 看接口是String还是Bytes
......
...@@ -12,41 +12,96 @@ ...@@ -12,41 +12,96 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
syntax = "proto2"; syntax = "proto3";
package baidu.paddle_serving.predictor.general_model; package baidu.paddle_serving.predictor.general_model;
option java_multiple_files = true; option java_multiple_files = true;
message Tensor { message Tensor {
repeated string data = 1; // VarType: INT64
repeated int32 int_data = 2; repeated int64 int64_data = 1;
repeated int64 int64_data = 3;
repeated float float_data = 4; // VarType: FP32
optional int32 elem_type = repeated float float_data = 2;
5; // 0 means int64, 1 means float32, 2 means int32, 3 means string
repeated int32 shape = 6; // shape should include batch // VarType: INT32
repeated int32 lod = 7; // only for fetch tensor currently repeated int32 int_data = 3;
optional string name = 8; // get from the Model prototxt
optional string alias_name = 9; // get from the Model prototxt // VarType: FP64
repeated double float64_data = 4;
// VarType: UINT32
repeated uint32 uint32_data = 5;
// VarType: BOOL
repeated bool bool_data = 6;
// (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1
// represents the imaginary part
repeated float complex64_data = 7;
// (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1
// represents the imaginary part
repeated double complex128_data = 8;
// VarType: STRING
repeated string data = 9;
// Element types:
// 0 => INT64
// 1 => FP32
// 2 => INT32
// 3 => FP64
// 4 => INT16
// 5 => FP16
// 6 => BF16
// 7 => UINT8
// 8 => INT8
// 9 => BOOL
// 10 => COMPLEX64
// 11 => COMPLEX128
// 20 => STRING
int32 elem_type = 10;
// Shape of the tensor, including batch dimensions.
repeated int32 shape = 11;
// Level of data(LOD), support variable length data, only for fetch tensor
// currently.
repeated int32 lod = 12;
// Correspond to the variable 'name' in the model description prototxt.
string name = 13;
// Correspond to the variable 'alias_name' in the model description prototxt.
string alias_name = 14; // get from the Model prototxt
// VarType: FP16, INT16, INT8, BF16, UINT8
bytes tensor_content = 15;
}; };
message Request { message Request {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; bool profile_server = 3;
required uint64 log_id = 4 [ default = 0 ]; uint64 log_id = 4;
}; };
message Response { message Response {
repeated ModelOutput outputs = 1; repeated ModelOutput outputs = 1;
repeated int64 profile_time = 2; repeated int64 profile_time = 2;
// Error code
int32 err_no = 3;
// Error messages
string err_msg = 4;
}; };
message ModelOutput { message ModelOutput {
repeated Tensor tensor = 1; repeated Tensor tensor = 1;
optional string engine_name = 2; string engine_name = 2;
} }
service GeneralModelService { service GeneralModelService {
rpc inference(Request) returns (Response) {} rpc inference(Request) returns (Response);
rpc debug(Request) returns (Response) {} rpc debug(Request) returns (Response);
}; };
...@@ -119,7 +119,7 @@ The pre-processing and post-processing is in the C + + server part, the image's ...@@ -119,7 +119,7 @@ The pre-processing and post-processing is in the C + + server part, the image's
so the value of parameter `feed_var` which is in the file `ocr_det_client/serving_client_conf.prototxt` should be changed. so the value of parameter `feed_var` which is in the file `ocr_det_client/serving_client_conf.prototxt` should be changed.
for this case, `feed_type` should be 3(which means the data type is string),`shape` should be 1. for this case, `feed_type` should be 20(which means the data type is string),`shape` should be 1.
By passing in multiple client folder paths, the client can be started for multi model prediction. By passing in multiple client folder paths, the client can be started for multi model prediction.
``` ```
......
...@@ -118,7 +118,7 @@ python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --por ...@@ -118,7 +118,7 @@ python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --por
`ocr_det_client/serving_client_conf.prototxt``feed_var`字段 `ocr_det_client/serving_client_conf.prototxt``feed_var`字段
对于本示例而言,`feed_type`应修改为3(数据类型为string),`shape`为1. 对于本示例而言,`feed_type`应修改为20(数据类型为string),`shape`为1.
通过在客户端启动后加入多个client模型的client配置文件夹路径,启动client进行预测。 通过在客户端启动后加入多个client模型的client配置文件夹路径,启动client进行预测。
``` ```
......
...@@ -31,15 +31,21 @@ sys.path.append( ...@@ -31,15 +31,21 @@ sys.path.append(
#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64 #param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32 #param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32 #param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto) #param 'type'(which is in feed_var or fetch_var) = 5 means dataType is float16
#param 'type'(which is in feed_var or fetch_var) = 7 means dataType is uint8
#param 'type'(which is in feed_var or fetch_var) = 8 means dataType is int8
#param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto)
int64_type = 0 int64_type = 0
float32_type = 1 float32_type = 1
int32_type = 2 int32_type = 2
bytes_type = 3 float16_type = 5
uint8_type = 7
int8_type = 8
bytes_type = 20
#int_type,float_type,string_type are the set of each subdivision classes. #int_type,float_type,string_type are the set of each subdivision classes.
int_type = set([int64_type, int32_type]) int_type = set([int64_type, int32_type])
float_type = set([float32_type]) float_type = set([float32_type])
string_type = set([bytes_type]) string_type = set([bytes_type, float16_type, uint8_type, int8_type])
class _NOPProfiler(object): class _NOPProfiler(object):
...@@ -411,7 +417,10 @@ class Client(object): ...@@ -411,7 +417,10 @@ class Client(object):
key)]) key)])
else: else:
string_lod_slot_batch.append([]) string_lod_slot_batch.append([])
string_slot.append(feed_dict[key]) if type(feed_dict[key]) is np.ndarray:
string_slot.append(feed_dict[key].tostring())
else:
string_slot.append(feed_dict[key])
self.has_numpy_input = True self.has_numpy_input = True
self.profile_.record('py_prepro_1') self.profile_.record('py_prepro_1')
...@@ -492,6 +501,38 @@ class Client(object): ...@@ -492,6 +501,38 @@ class Client(object):
tmp_lod = result_batch_handle.get_lod(mi, name) tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0: if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == uint8_type:
# result_map[name] will be py::array(numpy array)
tmp_str = result_batch_handle.get_string_by_name(
mi, name)
result_map[name] = np.fromstring(tmp_str, dtype = np.uint8)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == int8_type:
# result_map[name] will be py::array(numpy array)
tmp_str = result_batch_handle.get_string_by_name(
mi, name)
result_map[name] = np.fromstring(tmp_str, dtype = np.int8)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
multi_result_map.append(result_map) multi_result_map.append(result_map)
ret = None ret = None
if len(model_engine_names) == 1: if len(model_engine_names) == 1:
......
...@@ -32,13 +32,18 @@ from .proto import general_model_service_pb2_grpc ...@@ -32,13 +32,18 @@ from .proto import general_model_service_pb2_grpc
#param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64 #param 'type'(which is in feed_var or fetch_var) = 0 means dataType is int64
#param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32 #param 'type'(which is in feed_var or fetch_var) = 1 means dataType is float32
#param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32 #param 'type'(which is in feed_var or fetch_var) = 2 means dataType is int32
#param 'type'(which is in feed_var or fetch_var) = 3 means dataType is string(also called bytes in proto) #param 'type'(which is in feed_var or fetch_var) = 20 means dataType is string(also called bytes in proto)
int64_type = 0 int64_type = 0
float32_type = 1 float32_type = 1
int32_type = 2 int32_type = 2
bytes_type = 3 bytes_type = 20
# this is corresponding to the proto # this is corresponding to the proto
proto_data_key_list = ["int64_data", "float_data", "int_data", "data"] proto_data_key_list = {
0: "int64_data",
1: "float_data",
2: "int_data",
20: "data"
}
def list_flatten(items, ignore_types=(str, bytes)): def list_flatten(items, ignore_types=(str, bytes)):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册