diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h index 21d3108a6e5ee8f20de477c7afa42eede536bfab..2ab3d1758e3bd8fa0ce54a9404630753a446a3d1 100644 --- a/core/general-client/include/general_model.h +++ b/core/general-client/include/general_model.h @@ -56,13 +56,17 @@ class ModelRes { std::make_move_iterator(std::end(res._float_map))); } ~ModelRes() {} - const std::vector>& get_int64_by_name( - const std::string& name) { - return _int64_map[name]; + const std::vector& get_int64_by_name(const std::string& name) { + return _int64_value_map[name]; } - const std::vector>& get_float_by_name( - const std::string& name) { - return _float_map[name]; + const std::vector& get_float_by_name(const std::string& name) { + return _float_value_map[name]; + } + const std::vector& get_shape(const std::string& name) { + return _shape_map[name]; + } + const std::vector& get_lod(const std::string& name) { + return _lod_map[name]; } void set_engine_name(const std::string& engine_name) { _engine_name = engine_name; @@ -81,8 +85,10 @@ class ModelRes { public: std::string _engine_name; - std::map>> _int64_map; - std::map>> _float_map; + std::map> _int64_value_map; + std::map> _float_value_map; + std::map> _shape_map; + std::map> _lod_map; }; class PredictorRes { @@ -95,14 +101,22 @@ class PredictorRes { _models.clear(); _engine_names.clear(); } - const std::vector>& get_int64_by_name( - const int model_idx, const std::string& name) { + const std::vector& get_int64_by_name(const int model_idx, + const std::string& name) { return _models[model_idx].get_int64_by_name(name); } - const std::vector>& get_float_by_name( - const int model_idx, const std::string& name) { + const std::vector& get_float_by_name(const int model_idx, + const std::string& name) { return _models[model_idx].get_float_by_name(name); } + const std::vector& get_shape(const int model_idx, + const std::string& name) { + return _models[model_idx].get_shape(name); + } + const std::vector& get_lod(const int model_idx, + const std::string& name) { + return _models[model_idx].get_lod(name); + } void add_model_res(ModelRes&& res) { _engine_names.push_back(res.engine_name()); _models.emplace_back(std::move(res)); @@ -134,21 +148,16 @@ class PredictorClient { int create_predictor_by_desc(const std::string& sdk_desc); int create_predictor(); - int destroy_predictor(); - int predict(const std::vector>& float_feed, - const std::vector& float_feed_name, - const std::vector>& int_feed, - const std::vector& int_feed_name, - const std::vector& fetch_name, - PredictorRes& predict_res, // NOLINT - const int& pid); + int destroy_predictor(); int batch_predict( const std::vector>>& float_feed_batch, const std::vector& float_feed_name, + const std::vector>& float_shape, const std::vector>>& int_feed_batch, const std::vector& int_feed_name, + const std::vector>& int_shape, const std::vector& fetch_name, PredictorRes& predict_res_batch, // NOLINT const int& pid); diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index 8f5c5a1c22c6a13958e5273a2133f267bfa3073f..e78cdf722cf72c3fc4f97c48a5cb02f8ad823671 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -135,154 +135,13 @@ int PredictorClient::create_predictor() { return 0; } -int PredictorClient::predict(const std::vector> &float_feed, - const std::vector &float_feed_name, - const std::vector> &int_feed, - const std::vector &int_feed_name, - const std::vector &fetch_name, - PredictorRes &predict_res, - const int &pid) { // NOLINT - predict_res.clear(); - Timer timeline; - int64_t preprocess_start = timeline.TimeStampUS(); - _api.thrd_clear(); - std::string variant_tag; - _predictor = _api.fetch_predictor("general_model", &variant_tag); - predict_res.set_variant_tag(variant_tag); - - Request req; - for (auto &name : fetch_name) { - req.add_fetch_var_names(name); - } - - std::vector tensor_vec; - FeedInst *inst = req.add_insts(); - for (auto &name : float_feed_name) { - tensor_vec.push_back(inst->add_tensor_array()); - } - - for (auto &name : int_feed_name) { - tensor_vec.push_back(inst->add_tensor_array()); - } - - int vec_idx = 0; - for (auto &name : float_feed_name) { - int idx = _feed_name_to_idx[name]; - Tensor *tensor = tensor_vec[idx]; - for (uint32_t j = 0; j < _shape[idx].size(); ++j) { - tensor->add_shape(_shape[idx][j]); - } - tensor->set_elem_type(1); - for (uint32_t j = 0; j < float_feed[vec_idx].size(); ++j) { - tensor->add_float_data(float_feed[vec_idx][j]); - } - vec_idx++; - } - - VLOG(2) << "feed float feed var done."; - vec_idx = 0; - - for (auto &name : int_feed_name) { - int idx = _feed_name_to_idx[name]; - Tensor *tensor = tensor_vec[idx]; - for (uint32_t j = 0; j < _shape[idx].size(); ++j) { - tensor->add_shape(_shape[idx][j]); - } - tensor->set_elem_type(0); - for (uint32_t j = 0; j < int_feed[vec_idx].size(); ++j) { - tensor->add_int64_data(int_feed[vec_idx][j]); - } - vec_idx++; - } - - int64_t preprocess_end = timeline.TimeStampUS(); - int64_t client_infer_start = timeline.TimeStampUS(); - Response res; - - int64_t client_infer_end = 0; - int64_t postprocess_start = 0; - int64_t postprocess_end = 0; - - if (FLAGS_profile_client) { - if (FLAGS_profile_server) { - req.set_profile_server(true); - } - } - - res.Clear(); - if (_predictor->inference(&req, &res) != 0) { - LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString(); - return -1; - } else { - VLOG(2) << "predict done."; - client_infer_end = timeline.TimeStampUS(); - postprocess_start = client_infer_end; - // multi-model output - uint32_t model_num = res.outputs_size(); - // predict_res._models.resize(model_num); - for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) { - VLOG(2) << "process model output index: " << m_idx; - auto output = res.outputs(m_idx); - ModelRes model; - model.set_engine_name(output.engine_name()); - for (auto &name : fetch_name) { - int idx = _fetch_name_to_idx[name]; - VLOG(2) << "fetch name: " << name; - if (_fetch_name_to_type[name] == 0) { - int len = output.insts(0).tensor_array(idx).int64_data_size(); - VLOG(2) << "fetch tensor : " << name << " type: int64 len : " << len; - model._int64_map[name].resize(1); - model._int64_map[name][0].resize(len); - for (int i = 0; i < len; ++i) { - model._int64_map[name][0][i] = - output.insts(0).tensor_array(idx).int64_data(i); - } - } else if (_fetch_name_to_type[name] == 1) { - int len = output.insts(0).tensor_array(idx).float_data_size(); - VLOG(2) << "fetch tensor : " << name - << " type: float32 len : " << len; - model._float_map[name].resize(1); - model._float_map[name][0].resize(len); - for (int i = 0; i < len; ++i) { - model._float_map[name][0][i] = - output.insts(0).tensor_array(idx).float_data(i); - } - } - } - predict_res.add_model_res(std::move(model)); - } - postprocess_end = timeline.TimeStampUS(); - } - - if (FLAGS_profile_client) { - std::ostringstream oss; - oss << "PROFILE\t" - << "pid:" << pid << "\t" - << "prepro_0:" << preprocess_start << " " - << "prepro_1:" << preprocess_end << " " - << "client_infer_0:" << client_infer_start << " " - << "client_infer_1:" << client_infer_end << " "; - if (FLAGS_profile_server) { - int op_num = res.profile_time_size() / 2; - for (int i = 0; i < op_num; ++i) { - oss << "op" << i << "_0:" << res.profile_time(i * 2) << " "; - oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " "; - } - } - - oss << "postpro_0:" << postprocess_start << " "; - oss << "postpro_1:" << postprocess_end; - - fprintf(stderr, "%s\n", oss.str().c_str()); - } - return 0; -} - int PredictorClient::batch_predict( const std::vector>> &float_feed_batch, const std::vector &float_feed_name, + const std::vector> &float_shape, const std::vector>> &int_feed_batch, const std::vector &int_feed_name, + const std::vector> &int_shape, const std::vector &fetch_name, PredictorRes &predict_res_batch, const int &pid) { @@ -320,14 +179,14 @@ int PredictorClient::batch_predict( tensor_vec.push_back(inst->add_tensor_array()); } - VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name " + VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name" << "prepared"; int vec_idx = 0; for (auto &name : float_feed_name) { int idx = _feed_name_to_idx[name]; Tensor *tensor = tensor_vec[idx]; - for (uint32_t j = 0; j < _shape[idx].size(); ++j) { - tensor->add_shape(_shape[idx][j]); + for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) { + tensor->add_shape(float_shape[vec_idx][j]); } tensor->set_elem_type(1); for (uint32_t j = 0; j < float_feed[vec_idx].size(); ++j) { @@ -343,8 +202,8 @@ int PredictorClient::batch_predict( for (auto &name : int_feed_name) { int idx = _feed_name_to_idx[name]; Tensor *tensor = tensor_vec[idx]; - for (uint32_t j = 0; j < _shape[idx].size(); ++j) { - tensor->add_shape(_shape[idx][j]); + for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) { + tensor->add_shape(int_shape[vec_idx][j]); } tensor->set_elem_type(0); VLOG(3) << "feed var name " << name << " index " << vec_idx @@ -384,48 +243,47 @@ int PredictorClient::batch_predict( postprocess_start = client_infer_end; uint32_t model_num = res.outputs_size(); - // predict_res_batch._models.resize(model_num); for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) { VLOG(2) << "process model output index: " << m_idx; auto output = res.outputs(m_idx); ModelRes model; model.set_engine_name(output.engine_name()); + for (auto &name : fetch_name) { - model._int64_map[name].resize(batch_size); - model._float_map[name].resize(batch_size); + int idx = _fetch_name_to_idx[name]; + int shape_size = output.insts(0).tensor_array(idx).shape_size(); + model._shape_map[name].resize(shape_size); + for (int i = 0; i < shape_size; ++i) { + model._shape_map[name][i] = + output.insts(0).tensor_array(idx).shape(i); + } + int lod_size = output.insts(0).tensor_array(idx).lod_size(); + if (lod_size > 0) { + model._lod_map[name].resize(lod_size); + for (int i = 0; i < lod_size; ++i) { + model._lod_map[name][i] = output.insts(0).tensor_array(idx).lod(i); + } + } } - VLOG(2) << "response batch size " << output.insts_size(); - VLOG(2) << "response var nmae " << output.insts(0).tensor_array_size(); - for (int bi = 0; bi < batch_size; bi++) { - int idx = 0; - for (auto &name : fetch_name) { - int len = output.insts(bi).tensor_array(idx).data_size(); - if (_fetch_name_to_type[name] == 0) { - int len = output.insts(bi).tensor_array(idx).int64_data_size(); - VLOG(2) << "fetch tensor : " << name - << " type: int64 len : " << len; - model._int64_map[name][bi].resize(len); - VLOG(2) << "fetch name " << name << " index " << idx - << " first data " - << output.insts(bi).tensor_array(idx).int64_data(0); - for (int i = 0; i < len; ++i) { - model._int64_map[name][bi][i] = - output.insts(bi).tensor_array(idx).int64_data(i); - } - } else if (_fetch_name_to_type[name] == 1) { - int len = output.insts(bi).tensor_array(idx).float_data_size(); - VLOG(2) << "fetch tensor : " << name - << " type: float32 len : " << len; - model._float_map[name][bi].resize(len); - VLOG(2) << "fetch name " << name << " index " << idx - << " first data " - << output.insts(bi).tensor_array(idx).float_data(0); - for (int i = 0; i < len; ++i) { - model._float_map[name][bi][i] = - output.insts(bi).tensor_array(idx).float_data(i); - } + + for (auto &name : fetch_name) { + int idx = _fetch_name_to_idx[name]; + if (_fetch_name_to_type[name] == 0) { + model._int64_value_map[name].resize( + output.insts(0).tensor_array(idx).int64_data_size()); + int size = output.insts(0).tensor_array(idx).int64_data_size(); + for (int i = 0; i < size; ++i) { + model._int64_value_map[name][i] = + output.insts(0).tensor_array(idx).int64_data(i); + } + } else { + model._float_value_map[name].resize( + output.insts(0).tensor_array(idx).float_data_size()); + int size = output.insts(0).tensor_array(idx).float_data_size(); + for (int i = 0; i < size; ++i) { + model._float_value_map[name][i] = + output.insts(0).tensor_array(idx).float_data(i); } - idx += 1; } } predict_res_batch.add_model_res(std::move(model)); diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp index abb43dad5e9136906923950d56554f7471ed99e8..066a2cfbe7af64807d4be1982a8822f93a6c32ec 100644 --- a/core/general-client/src/pybind_general_model.cpp +++ b/core/general-client/src/pybind_general_model.cpp @@ -40,6 +40,16 @@ PYBIND11_MODULE(serving_client, m) { return self.get_float_by_name(model_idx, name); }, py::return_value_policy::reference) + .def("get_shape", + [](PredictorRes &self, int model_idx, std::string &name) { + return self.get_shape(model_idx, name); + }, + py::return_value_policy::reference) + .def("get_lod", + [](PredictorRes &self, int model_idx, std::string &name) { + return self.get_lod(model_idx, name); + }, + py::return_value_policy::reference) .def("variant_tag", [](PredictorRes &self) { return self.variant_tag(); }) .def("get_engine_names", [](PredictorRes &self) { return self.get_engine_names(); }); @@ -68,42 +78,31 @@ PYBIND11_MODULE(serving_client, m) { [](PredictorClient &self) { self.create_predictor(); }) .def("destroy_predictor", [](PredictorClient &self) { self.destroy_predictor(); }) - .def("predict", - [](PredictorClient &self, - const std::vector> &float_feed, - const std::vector &float_feed_name, - const std::vector> &int_feed, - const std::vector &int_feed_name, - const std::vector &fetch_name, - PredictorRes &predict_res, - const int &pid) { - return self.predict(float_feed, - float_feed_name, - int_feed, - int_feed_name, - fetch_name, - predict_res, - pid); - }) + .def("batch_predict", [](PredictorClient &self, const std::vector>> &float_feed_batch, const std::vector &float_feed_name, + const std::vector> &float_shape, const std::vector>> &int_feed_batch, const std::vector &int_feed_name, + const std::vector> &int_shape, const std::vector &fetch_name, PredictorRes &predict_res_batch, const int &pid) { return self.batch_predict(float_feed_batch, float_feed_name, + float_shape, int_feed_batch, int_feed_name, + int_shape, fetch_name, predict_res_batch, pid); - }); + }, + py::call_guard()); } } // namespace general_model diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index 6992227cd51128d53253c97aee784af31a593dfc..5622970cc44e852864215c5bba14464362d99312 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -79,27 +79,25 @@ int GeneralResponseOp::inference() { } const TensorVector *in = &input_blob->tensor_vector; - int batch_size = input_blob->GetBatchSize(); - VLOG(2) << "input batch size: " << batch_size; ModelOutput *output = res->add_outputs(); - output->set_engine_name( - pre_name); // To get the order of model return values - for (int i = 0; i < batch_size; ++i) { - FetchInst *fetch_inst = output->add_insts(); - for (auto &idx : fetch_index) { - Tensor *tensor = fetch_inst->add_tensor_array(); - // currently only response float tensor or lod_tensor - tensor->set_elem_type(1); - if (model_config->_is_lod_fetch[idx]) { - VLOG(2) << "out[" << idx << " is lod_tensor"; - tensor->add_shape(-1); - } else { - VLOG(2) << "out[" << idx << "] is tensor"; - for (int k = 1; k < in->at(idx).shape.size(); ++k) { - VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k]; - tensor->add_shape(in->at(idx).shape[k]); - } + // To get the order of model return values + output->set_engine_name(pre_name); + FetchInst *fetch_inst = output->add_insts(); + for (auto &idx : fetch_index) { + Tensor *tensor = fetch_inst->add_tensor_array(); + tensor->set_elem_type(1); + if (model_config->_is_lod_fetch[idx]) { + VLOG(2) << "out[" << idx << "] is lod_tensor"; + for (int k = 0; k < in->at(idx).shape.size(); ++k) { + VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k]; + tensor->add_shape(in->at(idx).shape[k]); + } + } else { + VLOG(2) << "out[" << idx << "] is tensor"; + for (int k = 0; k < in->at(idx).shape.size(); ++k) { + VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k]; + tensor->add_shape(in->at(idx).shape[k]); } } } @@ -107,66 +105,42 @@ int GeneralResponseOp::inference() { int var_idx = 0; for (auto &idx : fetch_index) { int cap = 1; - for (int j = 1; j < in->at(idx).shape.size(); ++j) { + for (int j = 0; j < in->at(idx).shape.size(); ++j) { cap *= in->at(idx).shape[j]; } if (in->at(idx).dtype == paddle::PaddleDType::INT64) { int64_t *data_ptr = static_cast(in->at(idx).data.data()); if (model_config->_is_lod_fetch[idx]) { - for (int j = 0; j < batch_size; ++j) { - for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1]; - k++) { - FetchInst *fetch_p = output->mutable_insts(j); - fetch_p->mutable_tensor_array(var_idx)->add_int64_data( - data_ptr[k]); - } + FetchInst *fetch_p = output->mutable_insts(0); + for (int j = 0; j < in->at(idx).lod[0].size(); ++j) { + fetch_p->mutable_tensor_array(var_idx)->add_lod( + in->at(idx).lod[0][j]); + } + for (int j = 0; j < cap; ++j) { + fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[j]); } } else { - int var_size = in->at(idx).shape[0]; - if (var_size == batch_size) { - for (int j = 0; j < batch_size; ++j) { - for (int k = j * cap; k < (j + 1) * cap; ++k) { - FetchInst *fetch_p = output->mutable_insts(j); - fetch_p->mutable_tensor_array(var_idx)->add_int64_data( - data_ptr[k]); - } - } - } else { - for (int j = 0; j < batch_size; ++j) { - FetchInst *fetch_p = output->mutable_insts(j); - fetch_p->mutable_tensor_array(var_idx)->add_int64_data( - data_ptr[0]); - } + FetchInst *fetch_p = output->mutable_insts(0); + for (int j = 0; j < cap; ++j) { + fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]); } } var_idx++; } else if (in->at(idx).dtype == paddle::PaddleDType::FLOAT32) { float *data_ptr = static_cast(in->at(idx).data.data()); if (model_config->_is_lod_fetch[idx]) { - for (int j = 0; j < batch_size; ++j) { - for (int k = in->at(idx).lod[0][j]; k < in->at(idx).lod[0][j + 1]; - k++) { - FetchInst *fetch_p = output->mutable_insts(j); - fetch_p->mutable_tensor_array(var_idx)->add_float_data( - data_ptr[k]); - } + FetchInst *fetch_p = output->mutable_insts(0); + for (int j = 0; j < in->at(idx).lod[0].size(); ++j) { + fetch_p->mutable_tensor_array(var_idx)->add_lod( + in->at(idx).lod[0][j]); + } + for (int j = 0; j < cap; ++j) { + fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]); } } else { - int var_size = in->at(idx).shape[0]; - if (var_size == batch_size) { - for (int j = 0; j < batch_size; ++j) { - for (int k = j * cap; k < (j + 1) * cap; ++k) { - FetchInst *fetch_p = output->mutable_insts(j); - fetch_p->mutable_tensor_array(var_idx)->add_float_data( - data_ptr[k]); - } - } - } else { - for (int j = 0; j < batch_size; ++j) { - FetchInst *fetch_p = output->mutable_insts(j); - fetch_p->mutable_tensor_array(var_idx)->add_float_data( - data_ptr[0]); - } + FetchInst *fetch_p = output->mutable_insts(0); + for (int j = 0; j < cap; ++j) { + fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]); } } var_idx++; diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto index 9ddd029982c3b2116a1e782a87939f56da60aa1b..8581ecb2a2e10deced910a20ce26c2beaca956fa 100644 --- a/core/general-server/proto/general_model_service.proto +++ b/core/general-server/proto/general_model_service.proto @@ -26,6 +26,7 @@ message Tensor { repeated float float_data = 4; optional int32 elem_type = 5; repeated int32 shape = 6; + repeated int32 lod = 7; // only for fetch tensor currently }; message FeedInst { repeated Tensor tensor_array = 1; }; diff --git a/core/predictor/Dockerfile b/core/predictor/Dockerfile deleted file mode 100644 index 71b824c158f5231e41cfe885516c902fedfc521d..0000000000000000000000000000000000000000 --- a/core/predictor/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM registry.baidu.com/public/centos6u3-online:gcc482 -MAINTAINER predictor@baidu.com -LABEL Description="paddle serving docker image" -USER root -RUN echo "Enjoy your paddle serving journey!" -ADD conf /home/work/paddle-serving/conf -ADD data /home/work/paddle-serving/data -ADD bin /home/work/paddle-serving/bin -RUN wget ftp://st01-rdqa-dev055-wanlijin01.epc.baidu.com/home/users/wanlijin01/workspace/baidu/paddle-serving/predictor/data.tar.gz -O /tmp/data.tar.gz \ - && tar -C /home/work/paddle-serving -xvzf /tmp/data.tar.gz \ - && rm /tmp/data.tar.gz \ - && cd /home/work/paddle-serving/ \ - && chmod a+x bin/pdserving \ - && chmod a+x bin/start.sh \ - && sed -i 's/\.\/conf/\/home\/work\/paddle-serving\/conf/g' conf/workflow.conf \ - && sed -i 's/\.\/conf/\/home\/work\/paddle-serving\/conf/g' conf/resource.conf \ - && sed -i 's/\.\/log/\/home\/work\/paddle-serving\/log/g' conf/log.conf \ - && sed -i 's/\.\/data/\/home\/work\/paddle-serving\/data/g' conf/model_toolkit.conf \ - && mkdir -p /home/work/paddle-serving/log -CMD sh /home/work/paddle-serving/bin/start.sh -c "trap : TERM INT; sleep infinity & wait" diff --git a/core/predictor/Dockerfile.gpu b/core/predictor/Dockerfile.gpu deleted file mode 100644 index f0922dc83d65b6139730ad818c9fc781d40df994..0000000000000000000000000000000000000000 --- a/core/predictor/Dockerfile.gpu +++ /dev/null @@ -1,20 +0,0 @@ -FROM registry.baidu.com/paddlecloud/paddlecloud-runenv-centos6u3-bce:paddlecloud-fluid-gcc482-cuda8.0_cudnn5_bce -MAINTAINER predictor@baidu.com -LABEL Description="paddle serving docker image" -USER root -RUN echo "Enjoy your paddle serving journey!" -ADD conf /home/work/paddle-serving/conf -ADD data /home/work/paddle-serving/data -ADD bin /home/work/paddle-serving/bin -RUN wget ftp://st01-rdqa-dev055-wanlijin01.epc.baidu.com/home/users/wanlijin01/workspace/baidu/paddle-serving/predictor/data.tar.gz -O /tmp/data.tar.gz \ - && tar -C /home/work/paddle-serving -xvzf /tmp/data.tar.gz \ - && rm /tmp/data.tar.gz \ - && cd /home/work/paddle-serving/ \ - && chmod a+x bin/pdserving \ - && chmod a+x bin/start.sh \ - && sed -i 's/\.\/conf/\/home\/work\/paddle-serving\/conf/g' conf/workflow.conf \ - && sed -i 's/\.\/conf/\/home\/work\/paddle-serving\/conf/g' conf/resource.conf \ - && sed -i 's/\.\/log/\/home\/work\/paddle-serving\/log/g' conf/log.conf \ - && sed -i 's/\.\/data/\/home\/work\/paddle-serving\/data/g' conf/model_toolkit.conf \ - && mkdir -p /home/work/paddle-serving/log -CMD sh /home/work/paddle-serving/bin/start.sh -c "trap : TERM INT; sleep infinity & wait" diff --git a/core/predictor/build.sh b/core/predictor/build.sh deleted file mode 100755 index 781af834c233b879f96463d924c8facd185422f3..0000000000000000000000000000000000000000 --- a/core/predictor/build.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/bash -function install_pdserving_lib(){ - ret=1 - local pdserving_lib_mode=$1 - case $pdserving_lib_mode in - local) - local pdserving_local_path=$2 - if [ ! -d $pdserving_local_path ]; then - echo "[WARN failed to find local path]" - return ret - fi - lib_name=`basename $pdserving_local_path` - if [ -d ${CITOOLS}/$lib_name ]; then - rm -rf ${CITOOLS}/$lib_name - fi - cp -rf $pdserving_local_path ${CITOOLS}/ - source ${CITOOLS}/$lib_name/predictor_build_lib.sh - ;; - ftp) - local wgetOptions="--tries=3 --retry-connrefused -r -l0 -nv --limit-rate=50m -nH --cut-dirs=5" - pdserving_lib_ftp_path="ftp://tc-orp-app2.tc.baidu.com:/home/heqing/scmbak/common_lib/pdserving_cts/pdserving_lib" - lib_name=`basename $pdserving_lib_ftp_path` - if [ -d ${CITOOLS}/$lib_name ]; then - rm -rf ${CITOOLS}/$lib_name - fi - echo "wget cmd is :$wgetOptions $pdserving_lib_ftp_path" - echo "lib_name is :${lib_name}" - wget $wgetOptions$cur_dirs $pdserving_lib_ftp_path - mv ${lib_name} ${CITOOLS}/ - source ${CITOOLS}/${lib_name}/predictor_build_lib.sh - ;; - *) - ret=0 - echo "todo" - ;; - esac - return $ret -} - -CUR_PATH=$(pwd) -WORK_PATH=$(pwd) -WORK_ROOT=${WORK_PATH%%/baidu/*} -#co citools -CITOOLS="${WORK_ROOT}/baidu/fengchao-qa/citools" -if [ -d ${CITOOLS} ];then - rm -rf ${CITOOLS} -fi -git clone --depth 1 ssh://git@icode.baidu.com:8235/baidu/fengchao-qa/citools $CITOOLS >/dev/null -[[ $? != 0 ]] && exit 1 -source $CITOOLS/lib/localbuild_lib.sh -#source过后路径可能改变,需要重新赋值 -CITOOLS="${WORK_ROOT}/baidu/fengchao-qa/citools" - -#install_pdserving_lib -pdserving_lib_mode="ftp" -install_pdserving_lib ${pdserving_lib_mode} #两种模式:如果是local,需要指定本机上pdserving_lib的路径 -#source ${CITOOLS}/pdserving_lib/predictor_build_lib.sh - -COVMODULEID=8652 -TYPE=framework -#执行本模块构建初始化 -predictor_build_init -WORKROOT=$WORK_ROOT -#执行构建命令 -predictor_build_do $@ - -exit 0 diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto index cd8f59f64de9add013fd3bb6e45321fff250d4e7..51c0335a9db896e1260e83915de81e51451a904b 100644 --- a/core/sdk-cpp/proto/general_model_service.proto +++ b/core/sdk-cpp/proto/general_model_service.proto @@ -26,6 +26,7 @@ message Tensor { repeated float float_data = 4; optional int32 elem_type = 5; repeated int32 shape = 6; + repeated int32 lod = 7; // only for fetch tensor currently }; message FeedInst { repeated Tensor tensor_array = 1; }; diff --git a/doc/DESIGN.md b/doc/DESIGN.md index 5d00d02171dccf07bfdafb9cdd85222a92c20113..4d4055a7936fb1791ebe15a4c41c10a00a78c1f8 100644 --- a/doc/DESIGN.md +++ b/doc/DESIGN.md @@ -260,6 +260,7 @@ class Op { ``` + ### 5.4 Interfaces related to framework Service diff --git a/doc/SAVE_CN.md b/doc/SAVE_CN.md index 0e2ecd5b71b860e887027564940e9e64522e097f..43b62c2ac623b386505356194ac136ea305fe683 100644 --- a/doc/SAVE_CN.md +++ b/doc/SAVE_CN.md @@ -2,7 +2,7 @@ (绠浣撲腑鏂噟[English](./SAVE.md)) -- 鐩墠锛孭addle鏈嶅姟鎻愪緵浜嗕竴涓猻ave_model鎺ュ彛渚涚敤鎴疯闂紝璇ユ帴鍙d笌Paddle鐨刞save_inference_model`绫讳技銆 +- 鐩墠锛孭addle Serving鎻愪緵浜嗕竴涓猻ave_model鎺ュ彛渚涚敤鎴疯闂紝璇ユ帴鍙d笌Paddle鐨刞save_inference_model`绫讳技銆 ``` python import paddle_serving_client.io as serving_io diff --git a/doc/DOCKER.md b/doc/deprecated/DOCKER.md similarity index 100% rename from doc/DOCKER.md rename to doc/deprecated/DOCKER.md diff --git a/doc/DOCKER_CN.md b/doc/deprecated/DOCKER_CN.md similarity index 100% rename from doc/DOCKER_CN.md rename to doc/deprecated/DOCKER_CN.md diff --git a/doc/serving_logo.png b/doc/serving_logo.png deleted file mode 100644 index 2510fd62ecc2bf2954a4a2ff7491f565f1528ebf..0000000000000000000000000000000000000000 Binary files a/doc/serving_logo.png and /dev/null differ diff --git a/python/examples/criteo_ctr/test_client.py b/python/examples/criteo_ctr/test_client.py index d53c5541c36f4eb52618e3498eda571dd2bcab53..2beac850228291c49d56c1180365fdd8e627ffc0 100644 --- a/python/examples/criteo_ctr/test_client.py +++ b/python/examples/criteo_ctr/test_client.py @@ -51,6 +51,5 @@ for ei in range(1000): for i in range(1, 27): feed_dict["sparse_{}".format(i - 1)] = data[0][i] fetch_map = client.predict(feed=feed_dict, fetch=["prob"]) - #print(fetch_map) end = time.time() print(end - start) diff --git a/python/examples/criteo_ctr_with_cube/test_client.py b/python/examples/criteo_ctr_with_cube/test_client.py index de205ebc68af02e8dd978da51a4c43bef0cec0d4..ca752b763e067b6a73e28c1d2ab9f58b9b98ba5d 100755 --- a/python/examples/criteo_ctr_with_cube/test_client.py +++ b/python/examples/criteo_ctr_with_cube/test_client.py @@ -40,7 +40,7 @@ for ei in range(10000): for i in range(1, 27): feed_dict["embedding_{}.tmp_0".format(i - 1)] = data[0][i] fetch_map = client.predict(feed=feed_dict, fetch=["prob"]) - prob_list.append(fetch_map['prob'][1]) + prob_list.append(fetch_map['prob'][0][1]) label_list.append(data[0][-1][0]) print(auc(label_list, prob_list)) diff --git a/python/examples/fit_a_line/test_numpy_input_client.py b/python/examples/fit_a_line/test_numpy_input_client.py new file mode 100644 index 0000000000000000000000000000000000000000..8557ed09fe0118d35e1cb169cec4e93442cf927a --- /dev/null +++ b/python/examples/fit_a_line/test_numpy_input_client.py @@ -0,0 +1,33 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from paddle_serving_client import Client +import numpy as np +import sys + +client = Client() +client.load_client_config(sys.argv[1]) +client.connect(["127.0.0.1:9393"]) + +import paddle +test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.uci_housing.test(), buf_size=500), + batch_size=1) + +for data in test_reader(): + fetch_map = client.predict( + feed={"x": np.array(data[0][0])}, fetch=["price"]) + print("{} {}".format(fetch_map["price"][0][0], data[0][1][0])) diff --git a/python/examples/imdb/test_client.py b/python/examples/imdb/test_client.py index 548a40e4931e7f0a2ea4a4e9d3c05f40e7d34426..9de7a45b0646167c43ea7d3b98b0f3782112f6f0 100644 --- a/python/examples/imdb/test_client.py +++ b/python/examples/imdb/test_client.py @@ -31,4 +31,4 @@ for line in sys.stdin: feed = {"words": word_ids} fetch = ["acc", "cost", "prediction"] fetch_map = client.predict(feed=feed, fetch=fetch) - print("{} {}".format(fetch_map["prediction"][1], label[0])) + print("{} {}".format(fetch_map["prediction"][0][1], label[0])) diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index 03542de7481c24b0b21b72635e541b77f60d6d16..52e0467c7115258fd188ea2fc8c6036a6d903499 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -18,6 +18,8 @@ import os from .proto import sdk_configure_pb2 as sdk from .proto import general_model_config_pb2 as m_config import google.protobuf.text_format +import numpy as np +import time import sys int_type = 0 @@ -119,6 +121,7 @@ class Client(object): self.fetch_names_to_idx_ = {} self.lod_tensor_set = set() self.feed_tensor_len = {} + for i, var in enumerate(model_conf.feed_var): self.feed_names_to_idx_[var.alias_name] = i self.feed_types_[var.alias_name] = var.feed_type @@ -131,11 +134,11 @@ class Client(object): for dim in self.feed_shapes_[var.alias_name]: counter *= dim self.feed_tensor_len[var.alias_name] = counter - for i, var in enumerate(model_conf.fetch_var): self.fetch_names_to_idx_[var.alias_name] = i self.fetch_names_to_type_[var.alias_name] = var.fetch_type - + if var.is_lod_tensor: + self.lod_tensor_set.add(var.alias_name) return def add_variant(self, tag, cluster, variant_weight): @@ -162,7 +165,6 @@ class Client(object): "parameter endpoints({}) will not take effect, because you use the add_variant function.". format(endpoints)) sdk_desc = self.predictor_sdk_.gen_desc() - print(sdk_desc) self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString( )) @@ -203,6 +205,8 @@ class Client(object): float_slot_batch = [] int_feed_names = [] float_feed_names = [] + int_shape = [] + float_shape = [] fetch_names = [] counter = 0 batch_size = len(feed_batch) @@ -219,64 +223,85 @@ class Client(object): for i, feed_i in enumerate(feed_batch): int_slot = [] float_slot = [] + int_shape = [] + float_shape = [] for key in feed_i: if key not in self.feed_names_: raise ValueError("Wrong feed name: {}.".format(key)) - self.shape_check(feed_i, key) + if not isinstance(feed_i[key], np.ndarray): + self.shape_check(feed_i, key) if self.feed_types_[key] == int_type: if i == 0: int_feed_names.append(key) - int_slot.append(feed_i[key]) + if isinstance(feed_i[key], np.ndarray): + int_shape.append(list(feed_i[key].shape)) + else: + int_shape.append(self.feed_shapes_[key]) + if isinstance(feed_i[key], np.ndarray): + int_slot.append(np.reshape(feed_i[key], (-1)).tolist()) + else: + int_slot.append(feed_i[key]) elif self.feed_types_[key] == float_type: if i == 0: float_feed_names.append(key) - float_slot.append(feed_i[key]) - if len(int_slot) + len(float_slot) == 0: - raise ValueError("No feed data for predict.") + if isinstance(feed_i[key], np.ndarray): + float_shape.append(list(feed_i[key].shape)) + else: + float_shape.append(self.feed_shapes_[key]) + if isinstance(feed_i[key], np.ndarray): + float_slot.append( + np.reshape(feed_i[key], (-1)).tolist()) + else: + float_slot.append(feed_i[key]) int_slot_batch.append(int_slot) float_slot_batch.append(float_slot) result_batch = self.result_handle_ res = self.client_handle_.batch_predict( - float_slot_batch, float_feed_names, int_slot_batch, int_feed_names, - fetch_names, result_batch, self.pid) + float_slot_batch, float_feed_names, float_shape, int_slot_batch, + int_feed_names, int_shape, fetch_names, result_batch, self.pid) if res == -1: return None - multi_result_map_batch = [] + multi_result_map = [] model_engine_names = result_batch.get_engine_names() for mi, engine_name in enumerate(model_engine_names): - result_map_batch = [] result_map = {} + # result map needs to be a numpy array for i, name in enumerate(fetch_names): if self.fetch_names_to_type_[name] == int_type: result_map[name] = result_batch.get_int64_by_name(mi, name) + shape = result_batch.get_shape(mi, name) + result_map[name] = np.array(result_map[name]) + result_map[name].shape = shape + if name in self.lod_tensor_set: + result_map["{}.lod".format( + name)] = result_batch.get_lod(mi, name) elif self.fetch_names_to_type_[name] == float_type: result_map[name] = result_batch.get_float_by_name(mi, name) - for i in range(batch_size): - single_result = {} - for key in result_map: - single_result[key] = result_map[key][i] - result_map_batch.append(single_result) - multi_result_map_batch.append(result_map_batch) + shape = result_batch.get_shape(mi, name) + result_map[name] = np.array(result_map[name]) + result_map[name].shape = shape + if name in self.lod_tensor_set: + result_map["{}.lod".format( + name)] = result_batch.get_lod(mi, name) + multi_result_map.append(result_map) ret = None if len(model_engine_names) == 1: - if batch_size == 1: - ret = multi_result_map_batch[0][0] - else: - ret = multi_result_map_batch[0] + # If only one model result is returned, the format of ret is result_map + ret = multi_result_map[0] else: - ret = {} - if batch_size == 1: - for mi, result_map_batch in enumerate(multi_result_map_batch): - ret[model_engine_names[mi]] = result_map_batch[0] - else: - for mi, result_map_batch in enumerate(multi_result_map_batch): - ret[model_engine_names[mi]] = result_map_batch - return [ret, - self.result_handle_.variant_tag()] if need_variant_tag else ret + # If multiple model results are returned, the format of ret is {name: result_map} + ret = { + engine_name: multi_result_map[mi] + for mi, engine_name in enumerate(model_engine_names) + } + + return ret if not need_variant_tag else [ + ret, self.result_handle_.variant_tag() + ] def release(self): self.client_handle_.destroy_predictor() diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py index c1a86eaecc899c987bd346f8a747fb486d4789ee..ca43426c2a82a0c8be296c8410361acbf498fc5c 100755 --- a/python/paddle_serving_server/web_service.py +++ b/python/paddle_serving_server/web_service.py @@ -67,11 +67,15 @@ class WebService(object): feed_batch=feed, fetch=fetch) fetch_map_batch = self.postprocess( feed=request.json, fetch=fetch, fetch_map=fetch_map_batch) + for key in fetch_map_batch: + fetch_map_batch[key] = fetch_map_batch[key].tolist() result = {"result": fetch_map_batch} elif isinstance(feed, dict): if "fetch" in feed: del feed["fetch"] fetch_map = self.client_service.predict(feed=feed, fetch=fetch) + for key in fetch_map: + fetch_map[key] = fetch_map[key][0].tolist() result = self.postprocess( feed=request.json, fetch=fetch, fetch_map=fetch_map) except ValueError: diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py old mode 100755 new mode 100644 index 25e3a315dd5b848c77b9533a974d7707e5b67991..37425acd6b7209fbdcf38a52c5a78e0c15b4cf61 --- a/python/paddle_serving_server_gpu/web_service.py +++ b/python/paddle_serving_server_gpu/web_service.py @@ -104,22 +104,13 @@ class WebService(object): abort(400) if "fetch" not in request.json: abort(400) - try: - feed, fetch = self.preprocess(request.json, request.json["fetch"]) - if isinstance(feed, list): - fetch_map_batch = self.client.predict( - feed_batch=feed, fetch=fetch) - fetch_map_batch = self.postprocess( - feed=request.json, fetch=fetch, fetch_map=fetch_map_batch) - result = {"result": fetch_map_batch} - elif isinstance(feed, dict): - if "fetch" in feed: - del feed["fetch"] - fetch_map = self.client.predict(feed=feed, fetch=fetch) - result = self.postprocess( - feed=request.json, fetch=fetch, fetch_map=fetch_map) - except ValueError: - result = {"result": "Request Value Error"} + feed, fetch = self.preprocess(request.json, request.json["fetch"]) + fetch_map_batch = self.client.predict(feed=feed, fetch=fetch) + fetch_map_batch = self.postprocess( + feed=request.json, fetch=fetch, fetch_map=fetch_map_batch) + for key in fetch_map_batch: + fetch_map_batch[key] = fetch_map_batch[key].tolist() + result = {"result": fetch_map_batch} return result def run_server(self): diff --git a/python/requirements.txt b/python/requirements.txt index 5359d565e8f612822e1a0c61ee27018daa4b0e1b..d445216b3112ea3d5791045b43a6a3147865522f 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,3 +1 @@ -protobuf>=3.1.0 -six -paddlepaddle-gpu +numpy>=1.12, <=1.16.4 ; python_version<"3.5" diff --git a/python/setup.py.client.in b/python/setup.py.client.in index 381fb2a8853cc4d5494e3eac520ab183db6eab09..58061f7c887be23223f554d383c98bd75fb4828b 100644 --- a/python/setup.py.client.in +++ b/python/setup.py.client.in @@ -53,7 +53,7 @@ if '${PACK}' == 'ON': REQUIRED_PACKAGES = [ - 'six >= 1.10.0', 'protobuf >= 3.1.0' + 'six >= 1.10.0', 'protobuf >= 3.1.0', 'numpy >= 1.12' ] if not find_package("paddlepaddle") and not find_package("paddlepaddle-gpu"): diff --git a/tools/serving_build.sh b/tools/serving_build.sh index b613701110ee7f2ddb123bf611e2174b18d69346..ccd4c2f608e12a01c2a711ff503f99bc754bae2e 100644 --- a/tools/serving_build.sh +++ b/tools/serving_build.sh @@ -18,6 +18,7 @@ function init() { export PYTHONROOT=/usr cd Serving export SERVING_WORKDIR=$PWD + $PYTHONROOT/bin/python -m pip install -r python/requirements.txt } function check_cmd() {