提交 91ec1d08 编写于 作者: M MRXLT 提交者: GitHub

Merge branch 'develop' into 0.3.2-qa

...@@ -172,6 +172,11 @@ Here, `client.predict` function has two arguments. `feed` is a `python dict` wit ...@@ -172,6 +172,11 @@ Here, `client.predict` function has two arguments. `feed` is a `python dict` wit
- [An End-to-end tutorial from training to inference service deployment](doc/TRAIN_TO_SERVICE.md) - [An End-to-end tutorial from training to inference service deployment](doc/TRAIN_TO_SERVICE.md)
- [Write Bert-as-Service in 10 minutes](doc/BERT_10_MINS.md) - [Write Bert-as-Service in 10 minutes](doc/BERT_10_MINS.md)
### Tutorial at AIStudio
- [Introduction to PaddleServing](https://aistudio.baidu.com/aistudio/projectdetail/605819)
- [Image Segmentation on Paddle Serving](https://aistudio.baidu.com/aistudio/projectdetail/457715)
- [Sentimental Analysis](https://aistudio.baidu.com/aistudio/projectdetail/509014)
### Developers ### Developers
- [How to config Serving native operators on server side?](doc/SERVER_DAG.md) - [How to config Serving native operators on server side?](doc/SERVER_DAG.md)
- [How to develop a new Serving operator?](doc/NEW_OPERATOR.md) - [How to develop a new Serving operator?](doc/NEW_OPERATOR.md)
......
...@@ -169,6 +169,11 @@ print(fetch_map) ...@@ -169,6 +169,11 @@ print(fetch_map)
- [端到端完成从训练到部署全流程](doc/TRAIN_TO_SERVICE_CN.md) - [端到端完成从训练到部署全流程](doc/TRAIN_TO_SERVICE_CN.md)
- [十分钟构建Bert-As-Service](doc/BERT_10_MINS_CN.md) - [十分钟构建Bert-As-Service](doc/BERT_10_MINS_CN.md)
### AIStudio教程
- [PaddleServing作业](https://aistudio.baidu.com/aistudio/projectdetail/605819)
- [PaddleServing图像分割](https://aistudio.baidu.com/aistudio/projectdetail/457715)
- [PaddleServing情感分析](https://aistudio.baidu.com/aistudio/projectdetail/509014)
### 开发者教程 ### 开发者教程
- [如何配置Server端的计算图?](doc/SERVER_DAG_CN.md) - [如何配置Server端的计算图?](doc/SERVER_DAG_CN.md)
- [如何开发一个新的General Op?](doc/NEW_OPERATOR_CN.md) - [如何开发一个新的General Op?](doc/NEW_OPERATOR_CN.md)
......
...@@ -37,6 +37,7 @@ message InferenceRequest { ...@@ -37,6 +37,7 @@ message InferenceRequest {
repeated string feed_var_names = 2; repeated string feed_var_names = 2;
repeated string fetch_var_names = 3; repeated string fetch_var_names = 3;
required bool is_python = 4 [ default = false ]; required bool is_python = 4 [ default = false ];
required uint64 log_id = 5 [ default = 0 ];
}; };
message InferenceResponse { message InferenceResponse {
......
...@@ -227,7 +227,8 @@ class PredictorClient { ...@@ -227,7 +227,8 @@ class PredictorClient {
const std::vector<std::vector<int>>& int_shape, const std::vector<std::vector<int>>& int_shape,
const std::vector<std::string>& fetch_name, const std::vector<std::string>& fetch_name,
PredictorRes& predict_res_batch, // NOLINT PredictorRes& predict_res_batch, // NOLINT
const int& pid); const int& pid,
const uint64_t log_id);
int numpy_predict( int numpy_predict(
const std::vector<std::vector<py::array_t<float>>>& float_feed_batch, const std::vector<std::vector<py::array_t<float>>>& float_feed_batch,
...@@ -238,7 +239,8 @@ class PredictorClient { ...@@ -238,7 +239,8 @@ class PredictorClient {
const std::vector<std::vector<int>>& int_shape, const std::vector<std::vector<int>>& int_shape,
const std::vector<std::string>& fetch_name, const std::vector<std::string>& fetch_name,
PredictorRes& predict_res_batch, // NOLINT PredictorRes& predict_res_batch, // NOLINT
const int& pid); const int& pid,
const uint64_t log_id);
private: private:
PredictorApi _api; PredictorApi _api;
......
...@@ -144,7 +144,8 @@ int PredictorClient::batch_predict( ...@@ -144,7 +144,8 @@ int PredictorClient::batch_predict(
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int_shape,
const std::vector<std::string> &fetch_name, const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch, PredictorRes &predict_res_batch,
const int &pid) { const int &pid,
const uint64_t log_id) {
int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size()); int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
predict_res_batch.clear(); predict_res_batch.clear();
...@@ -162,6 +163,7 @@ int PredictorClient::batch_predict( ...@@ -162,6 +163,7 @@ int PredictorClient::batch_predict(
VLOG(2) << "int feed name size: " << int_feed_name.size(); VLOG(2) << "int feed name size: " << int_feed_name.size();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size; VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req; Request req;
req.set_log_id(log_id);
for (auto &name : fetch_name) { for (auto &name : fetch_name) {
req.add_fetch_var_names(name); req.add_fetch_var_names(name);
} }
...@@ -356,7 +358,8 @@ int PredictorClient::numpy_predict( ...@@ -356,7 +358,8 @@ int PredictorClient::numpy_predict(
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int_shape,
const std::vector<std::string> &fetch_name, const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch, PredictorRes &predict_res_batch,
const int &pid) { const int &pid,
const uint64_t log_id) {
int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size()); int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
VLOG(2) << "batch size: " << batch_size; VLOG(2) << "batch size: " << batch_size;
predict_res_batch.clear(); predict_res_batch.clear();
...@@ -374,6 +377,7 @@ int PredictorClient::numpy_predict( ...@@ -374,6 +377,7 @@ int PredictorClient::numpy_predict(
VLOG(2) << "int feed name size: " << int_feed_name.size(); VLOG(2) << "int feed name size: " << int_feed_name.size();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size; VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req; Request req;
req.set_log_id(log_id);
for (auto &name : fetch_name) { for (auto &name : fetch_name) {
req.add_fetch_var_names(name); req.add_fetch_var_names(name);
} }
......
...@@ -107,7 +107,8 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -107,7 +107,8 @@ PYBIND11_MODULE(serving_client, m) {
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int_shape,
const std::vector<std::string> &fetch_name, const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch, PredictorRes &predict_res_batch,
const int &pid) { const int &pid,
const uint64_t log_id) {
return self.batch_predict(float_feed_batch, return self.batch_predict(float_feed_batch,
float_feed_name, float_feed_name,
float_shape, float_shape,
...@@ -116,7 +117,8 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -116,7 +117,8 @@ PYBIND11_MODULE(serving_client, m) {
int_shape, int_shape,
fetch_name, fetch_name,
predict_res_batch, predict_res_batch,
pid); pid,
log_id);
}, },
py::call_guard<py::gil_scoped_release>()) py::call_guard<py::gil_scoped_release>())
.def("numpy_predict", .def("numpy_predict",
...@@ -131,7 +133,8 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -131,7 +133,8 @@ PYBIND11_MODULE(serving_client, m) {
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int_shape,
const std::vector<std::string> &fetch_name, const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch, PredictorRes &predict_res_batch,
const int &pid) { const int &pid,
const uint64_t log_id) {
return self.numpy_predict(float_feed_batch, return self.numpy_predict(float_feed_batch,
float_feed_name, float_feed_name,
float_shape, float_shape,
...@@ -140,7 +143,8 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -140,7 +143,8 @@ PYBIND11_MODULE(serving_client, m) {
int_shape, int_shape,
fetch_name, fetch_name,
predict_res_batch, predict_res_batch,
pid); pid,
log_id);
}, },
py::call_guard<py::gil_scoped_release>()); py::call_guard<py::gil_scoped_release>());
} }
......
...@@ -45,36 +45,41 @@ int GeneralCopyOp::inference() { ...@@ -45,36 +45,41 @@ int GeneralCopyOp::inference() {
const std::string pre_name = pre_node_names[0]; const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name); const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
VLOG(2) << "precedent name: " << pre_name; uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") precedent name: " << pre_name;
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
VLOG(2) << "input size: " << in->size(); VLOG(2) << "(logid=" << log_id << ") input size: " << in->size();
int batch_size = input_blob->GetBatchSize(); int batch_size = input_blob->GetBatchSize();
int input_var_num = 0; int input_var_num = 0;
GeneralBlob *res = mutable_data<GeneralBlob>(); GeneralBlob *res = mutable_data<GeneralBlob>();
res->SetLogId(log_id);
TensorVector *out = &res->tensor_vector; TensorVector *out = &res->tensor_vector;
VLOG(2) << "input batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
res->SetBatchSize(batch_size); res->SetBatchSize(batch_size);
if (!res) { if (!res) {
LOG(ERROR) << "Failed get op tls reader object output"; LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op tls reader object output";
} }
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
VLOG(2) << "Going to init lod tensor"; VLOG(2) << "(logid=" << log_id << ") Going to init lod tensor";
for (int i = 0; i < in->size(); ++i) { for (int i = 0; i < in->size(); ++i) {
paddle::PaddleTensor lod_tensor; paddle::PaddleTensor lod_tensor;
CopyLod(&in->at(i), &lod_tensor); CopyLod(&in->at(i), &lod_tensor);
lod_tensor.dtype = in->at(i).dtype; lod_tensor.dtype = in->at(i).dtype;
lod_tensor.name = in->at(i).name; lod_tensor.name = in->at(i).name;
VLOG(2) << "lod tensor [" << i << "].name = " << lod_tensor.name; VLOG(2) << "(logid=" << log_id << ") lod tensor [" << i
<< "].name = " << lod_tensor.name;
out->push_back(lod_tensor); out->push_back(lod_tensor);
} }
VLOG(2) << "pack done."; VLOG(2) << "(logid=" << log_id << ") pack done.";
for (int i = 0; i < out->size(); ++i) { for (int i = 0; i < out->size(); ++i) {
int64_t *src_ptr = static_cast<int64_t *>(in->at(i).data.data()); int64_t *src_ptr = static_cast<int64_t *>(in->at(i).data.data());
...@@ -86,7 +91,7 @@ int GeneralCopyOp::inference() { ...@@ -86,7 +91,7 @@ int GeneralCopyOp::inference() {
} }
} }
VLOG(2) << "output done."; VLOG(2) << "(logid=" << log_id << ") output done.";
timeline.Pause(); timeline.Pause();
int64_t end = timeline.TimeStampUS(); int64_t end = timeline.TimeStampUS();
...@@ -94,7 +99,7 @@ int GeneralCopyOp::inference() { ...@@ -94,7 +99,7 @@ int GeneralCopyOp::inference() {
AddBlobInfo(res, start); AddBlobInfo(res, start);
AddBlobInfo(res, end); AddBlobInfo(res, end);
VLOG(2) << "read data from client success"; VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0; return 0;
} }
......
...@@ -50,18 +50,20 @@ int GeneralDistKVInferOp::inference() { ...@@ -50,18 +50,20 @@ int GeneralDistKVInferOp::inference() {
const std::string pre_name = pre_node_names[0]; const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name); const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
VLOG(2) << "Get precedent op name: " << pre_name; uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data<GeneralBlob>(); GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!input_blob) { if (!input_blob) {
LOG(ERROR) << "Failed mutable depended argument, op:" << pre_name; LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op:" << pre_name;
return -1; return -1;
} }
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector; TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize(); int batch_size = input_blob->GetBatchSize();
VLOG(2) << "input batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
std::vector<uint64_t> keys; std::vector<uint64_t> keys;
std::vector<rec::mcube::CubeValue> values; std::vector<rec::mcube::CubeValue> values;
int sparse_count = 0; int sparse_count = 0;
...@@ -96,13 +98,14 @@ int GeneralDistKVInferOp::inference() { ...@@ -96,13 +98,14 @@ int GeneralDistKVInferOp::inference() {
rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance(); rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance();
std::vector<std::string> table_names = cube->get_table_names(); std::vector<std::string> table_names = cube->get_table_names();
if (table_names.size() == 0) { if (table_names.size() == 0) {
LOG(ERROR) << "cube init error or cube config not given."; LOG(ERROR) << "(logid=" << log_id
<< ") cube init error or cube config not given.";
return -1; return -1;
} }
int ret = cube->seek(table_names[0], keys, &values); int ret = cube->seek(table_names[0], keys, &values);
int64_t cube_end = timeline.TimeStampUS(); int64_t cube_end = timeline.TimeStampUS();
if (values.size() != keys.size() || values[0].buff.size() == 0) { if (values.size() != keys.size() || values[0].buff.size() == 0) {
LOG(ERROR) << "cube value return null"; LOG(ERROR) << "(logid=" << log_id << ") cube value return null";
} }
size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float); size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
TensorVector sparse_out; TensorVector sparse_out;
...@@ -153,14 +156,16 @@ int GeneralDistKVInferOp::inference() { ...@@ -153,14 +156,16 @@ int GeneralDistKVInferOp::inference() {
infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end()); infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end());
output_blob->SetBatchSize(batch_size); output_blob->SetBatchSize(batch_size);
output_blob->SetLogId(log_id);
VLOG(2) << "infer batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
if (InferManager::instance().infer( if (InferManager::instance().infer(
engine_name().c_str(), &infer_in, out, batch_size)) { engine_name().c_str(), &infer_in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " << engine_name(); LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name();
return -1; return -1;
} }
......
...@@ -59,10 +59,13 @@ int GeneralDistKVQuantInferOp::inference() { ...@@ -59,10 +59,13 @@ int GeneralDistKVQuantInferOp::inference() {
return -1; return -1;
} }
uint64_t log_id = input_blob->GetLogId();
output_blob->SetLogId(log_id);
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector; TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize(); int batch_size = input_blob->GetBatchSize();
VLOG(2) << "input batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
std::vector<uint64_t> keys; std::vector<uint64_t> keys;
std::vector<rec::mcube::CubeValue> values; std::vector<rec::mcube::CubeValue> values;
int sparse_count = 0; int sparse_count = 0;
...@@ -94,13 +97,14 @@ int GeneralDistKVQuantInferOp::inference() { ...@@ -94,13 +97,14 @@ int GeneralDistKVQuantInferOp::inference() {
rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance(); rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance();
std::vector<std::string> table_names = cube->get_table_names(); std::vector<std::string> table_names = cube->get_table_names();
if (table_names.size() == 0) { if (table_names.size() == 0) {
LOG(ERROR) << "cube init error or cube config not given."; LOG(ERROR) << "(logid=" << log_id
<< ") cube init error or cube config not given.";
return -1; return -1;
} }
int ret = cube->seek(table_names[0], keys, &values); int ret = cube->seek(table_names[0], keys, &values);
if (values.size() != keys.size() || values[0].buff.size() == 0) { if (values.size() != keys.size() || values[0].buff.size() == 0) {
LOG(ERROR) << "cube value return null"; LOG(ERROR) << "(logid=" << log_id << ") cube value return null";
} }
TensorVector sparse_out; TensorVector sparse_out;
...@@ -182,7 +186,7 @@ int GeneralDistKVQuantInferOp::inference() { ...@@ -182,7 +186,7 @@ int GeneralDistKVQuantInferOp::inference() {
output_blob->SetBatchSize(batch_size); output_blob->SetBatchSize(batch_size);
VLOG(2) << "infer batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
...@@ -190,7 +194,8 @@ int GeneralDistKVQuantInferOp::inference() { ...@@ -190,7 +194,8 @@ int GeneralDistKVQuantInferOp::inference() {
if (InferManager::instance().infer( if (InferManager::instance().infer(
engine_name().c_str(), &infer_in, out, batch_size)) { engine_name().c_str(), &infer_in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " << engine_name(); LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name();
return -1; return -1;
} }
......
...@@ -35,6 +35,7 @@ struct GeneralBlob { ...@@ -35,6 +35,7 @@ struct GeneralBlob {
std::vector<paddle::PaddleTensor> tensor_vector; std::vector<paddle::PaddleTensor> tensor_vector;
int64_t time_stamp[20]; int64_t time_stamp[20];
int p_size = 0; int p_size = 0;
uint64_t _log_id = -1; // for logging
int _batch_size; int _batch_size;
...@@ -46,9 +47,11 @@ struct GeneralBlob { ...@@ -46,9 +47,11 @@ struct GeneralBlob {
tensor_vector.clear(); tensor_vector.clear();
} }
int SetBatchSize(int batch_size) { _batch_size = batch_size; } void SetBatchSize(int batch_size) { _batch_size = batch_size; }
void SetLogId(uint64_t log_id) { _log_id = log_id; }
int GetBatchSize() const { return _batch_size; } int GetBatchSize() const { return _batch_size; }
uint64_t GetLogId() const { return _log_id; }
std::string ShortDebugString() const { return "Not implemented!"; } std::string ShortDebugString() const { return "Not implemented!"; }
}; };
......
...@@ -47,22 +47,25 @@ int GeneralInferOp::inference() { ...@@ -47,22 +47,25 @@ int GeneralInferOp::inference() {
const std::string pre_name = pre_node_names[0]; const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name); const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
VLOG(2) << "Get precedent op name: " << pre_name; uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data<GeneralBlob>(); GeneralBlob *output_blob = mutable_data<GeneralBlob>();
output_blob->SetLogId(log_id);
if (!input_blob) { if (!input_blob) {
LOG(ERROR) << "Failed mutable depended argument, op:" << pre_name; LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op:" << pre_name;
return -1; return -1;
} }
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector; TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize(); int batch_size = input_blob->GetBatchSize();
VLOG(2) << "input batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
output_blob->SetBatchSize(batch_size); output_blob->SetBatchSize(batch_size);
VLOG(2) << "infer batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
...@@ -70,7 +73,8 @@ int GeneralInferOp::inference() { ...@@ -70,7 +73,8 @@ int GeneralInferOp::inference() {
if (InferManager::instance().infer( if (InferManager::instance().infer(
engine_name().c_str(), in, out, batch_size)) { engine_name().c_str(), in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " << engine_name().c_str(); LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name().c_str();
return -1; return -1;
} }
......
...@@ -37,9 +37,9 @@ int conf_check(const Request *req, ...@@ -37,9 +37,9 @@ int conf_check(const Request *req,
const std::shared_ptr<PaddleGeneralModelConfig> &model_config) { const std::shared_ptr<PaddleGeneralModelConfig> &model_config) {
int var_num = req->insts(0).tensor_array_size(); int var_num = req->insts(0).tensor_array_size();
if (var_num != model_config->_feed_type.size()) { if (var_num != model_config->_feed_type.size()) {
VLOG(2) << "var num: " << var_num; LOG(ERROR) << "feed var number not match: model config["
VLOG(2) << "model config var num: " << model_config->_feed_type.size(); << model_config->_feed_type.size() << "] vs. actual[" << var_num
LOG(ERROR) << "feed var number not match."; << "]";
return -1; return -1;
} }
...@@ -72,6 +72,7 @@ int conf_check(const Request *req, ...@@ -72,6 +72,7 @@ int conf_check(const Request *req,
int GeneralReaderOp::inference() { int GeneralReaderOp::inference() {
// reade request from client // reade request from client
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
uint64_t log_id = req->log_id();
int batch_size = req->insts_size(); int batch_size = req->insts_size();
int input_var_num = 0; int input_var_num = 0;
...@@ -83,25 +84,28 @@ int GeneralReaderOp::inference() { ...@@ -83,25 +84,28 @@ int GeneralReaderOp::inference() {
TensorVector *out = &res->tensor_vector; TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size); res->SetBatchSize(batch_size);
res->SetLogId(log_id);
if (!res) { if (!res) {
LOG(ERROR) << "Failed get op tls reader object output"; LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op tls reader object output";
} }
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size(); int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "var num: " << var_num; VLOG(2) << "(logid=" << log_id << ") var num: " << var_num;
VLOG(2) << "start to call load general model_conf op"; VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "get resource pointer done."; VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config = std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config(); resource.get_general_model_config();
VLOG(2) << "print general model config done."; VLOG(2) << "(logid=" << log_id << ") print general model config done.";
// TODO(guru4elephant): how to do conditional check? // TODO(guru4elephant): how to do conditional check?
/* /*
...@@ -122,7 +126,8 @@ int GeneralReaderOp::inference() { ...@@ -122,7 +126,8 @@ int GeneralReaderOp::inference() {
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor; paddle::PaddleTensor lod_tensor;
elem_type[i] = req->insts(0).tensor_array(i).elem_type(); elem_type[i] = req->insts(0).tensor_array(i).elem_type();
VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i]; VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64 if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t); elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64; lod_tensor.dtype = paddle::PaddleDType::INT64;
...@@ -137,17 +142,19 @@ int GeneralReaderOp::inference() { ...@@ -137,17 +142,19 @@ int GeneralReaderOp::inference() {
if (model_config->_is_lod_feed[i]) { if (model_config->_is_lod_feed[i]) {
lod_tensor.lod.resize(1); lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0); lod_tensor.lod[0].push_back(0);
VLOG(2) << "var[" << i << "] is lod_tensor"; VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
} else { } else {
lod_tensor.shape.push_back(batch_size); lod_tensor.shape.push_back(batch_size);
capacity[i] = 1; capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) { for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k); int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(2) << "shape for var[" << i << "]: " << dim; VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
<< "]: " << dim;
capacity[i] *= dim; capacity[i] *= dim;
lod_tensor.shape.push_back(dim); lod_tensor.shape.push_back(dim);
} }
VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i]; VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor, capacity: " << capacity[i];
} }
lod_tensor.name = model_config->_feed_name[i]; lod_tensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor); out->push_back(lod_tensor);
...@@ -167,11 +174,12 @@ int GeneralReaderOp::inference() { ...@@ -167,11 +174,12 @@ int GeneralReaderOp::inference() {
} else if (tensor.int_data_size() > 0) { } else if (tensor.int_data_size() > 0) {
data_len = tensor.int_data_size(); data_len = tensor.int_data_size();
} }
VLOG(2) << "tensor size for var[" << i << "]: " << data_len; VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i
<< "]: " << data_len;
tensor_size += data_len; tensor_size += data_len;
int cur_len = out->at(i).lod[0].back(); int cur_len = out->at(i).lod[0].back();
VLOG(2) << "current len: " << cur_len; VLOG(2) << "(logid=" << log_id << ") current len: " << cur_len;
int sample_len = 0; int sample_len = 0;
if (tensor.shape_size() == 1) { if (tensor.shape_size() == 1) {
...@@ -180,7 +188,7 @@ int GeneralReaderOp::inference() { ...@@ -180,7 +188,7 @@ int GeneralReaderOp::inference() {
sample_len = tensor.shape(0); sample_len = tensor.shape(0);
} }
out->at(i).lod[0].push_back(cur_len + sample_len); out->at(i).lod[0].push_back(cur_len + sample_len);
VLOG(2) << "new len: " << cur_len + sample_len; VLOG(2) << "(logid=" << log_id << ") new len: " << cur_len + sample_len;
} }
out->at(i).data.Resize(tensor_size * elem_size[i]); out->at(i).data.Resize(tensor_size * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back()}; out->at(i).shape = {out->at(i).lod[0].back()};
...@@ -190,11 +198,11 @@ int GeneralReaderOp::inference() { ...@@ -190,11 +198,11 @@ int GeneralReaderOp::inference() {
if (out->at(i).shape.size() == 1) { if (out->at(i).shape.size() == 1) {
out->at(i).shape.push_back(1); out->at(i).shape.push_back(1);
} }
VLOG(2) << "var[" << i VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back(); << "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else { } else {
out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(2) << "var[" << i VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i]; << "] is tensor and capacity=" << batch_size * capacity[i];
} }
} }
...@@ -203,8 +211,8 @@ int GeneralReaderOp::inference() { ...@@ -203,8 +211,8 @@ int GeneralReaderOp::inference() {
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
if (elem_type[i] == 0) { if (elem_type[i] == 0) {
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data()); int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
VLOG(2) << "first element data in var[" << i << "] is " VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< req->insts(0).tensor_array(i).int64_data(0); << "] is " << req->insts(0).tensor_array(i).int64_data(0);
int offset = 0; int offset = 0;
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
int elem_num = req->insts(j).tensor_array(i).int64_data_size(); int elem_num = req->insts(j).tensor_array(i).int64_data_size();
...@@ -219,8 +227,8 @@ int GeneralReaderOp::inference() { ...@@ -219,8 +227,8 @@ int GeneralReaderOp::inference() {
} }
} else if (elem_type[i] == 1) { } else if (elem_type[i] == 1) {
float *dst_ptr = static_cast<float *>(out->at(i).data.data()); float *dst_ptr = static_cast<float *>(out->at(i).data.data());
VLOG(2) << "first element data in var[" << i << "] is " VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< req->insts(0).tensor_array(i).float_data(0); << "] is " << req->insts(0).tensor_array(i).float_data(0);
int offset = 0; int offset = 0;
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
int elem_num = req->insts(j).tensor_array(i).float_data_size(); int elem_num = req->insts(j).tensor_array(i).float_data_size();
...@@ -235,8 +243,8 @@ int GeneralReaderOp::inference() { ...@@ -235,8 +243,8 @@ int GeneralReaderOp::inference() {
} }
} else if (elem_type[i] == 2) { } else if (elem_type[i] == 2) {
int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data()); int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
VLOG(2) << "first element data in var[" << i << "] is " VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< req->insts(0).tensor_array(i).int_data(0); << "] is " << req->insts(0).tensor_array(i).int_data(0);
int offset = 0; int offset = 0;
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
int elem_num = req->insts(j).tensor_array(i).int_data_size(); int elem_num = req->insts(j).tensor_array(i).int_data_size();
...@@ -252,7 +260,7 @@ int GeneralReaderOp::inference() { ...@@ -252,7 +260,7 @@ int GeneralReaderOp::inference() {
} }
} }
VLOG(2) << "output size: " << out->size(); VLOG(2) << "(logid=" << log_id << ") output size: " << out->size();
timeline.Pause(); timeline.Pause();
int64_t end = timeline.TimeStampUS(); int64_t end = timeline.TimeStampUS();
...@@ -260,7 +268,7 @@ int GeneralReaderOp::inference() { ...@@ -260,7 +268,7 @@ int GeneralReaderOp::inference() {
AddBlobInfo(res, start); AddBlobInfo(res, start);
AddBlobInfo(res, end); AddBlobInfo(res, end);
VLOG(2) << "read data from client success"; VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0; return 0;
} }
DEFINE_OP(GeneralReaderOp); DEFINE_OP(GeneralReaderOp);
......
...@@ -42,6 +42,9 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; ...@@ -42,6 +42,9 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralResponseOp::inference() { int GeneralResponseOp::inference() {
const std::vector<std::string> pre_node_names = pre_names(); const std::vector<std::string> pre_node_names = pre_names();
VLOG(2) << "pre node names size: " << pre_node_names.size(); VLOG(2) << "pre node names size: " << pre_node_names.size();
const GeneralBlob *input_blob;
uint64_t log_id =
get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
// response inst with only fetch_var_names // response inst with only fetch_var_names
...@@ -52,15 +55,17 @@ int GeneralResponseOp::inference() { ...@@ -52,15 +55,17 @@ int GeneralResponseOp::inference() {
// timeline.Start(); // timeline.Start();
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
VLOG(2) << "start to call load general model_conf op"; VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "get resource pointer done."; VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config = std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config(); resource.get_general_model_config();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size; VLOG(2) << "(logid=" << log_id
<< ") max body size : " << brpc::fLU64::FLAGS_max_body_size;
std::vector<int> fetch_index; std::vector<int> fetch_index;
fetch_index.resize(req->fetch_var_names_size()); fetch_index.resize(req->fetch_var_names_size());
...@@ -69,16 +74,16 @@ int GeneralResponseOp::inference() { ...@@ -69,16 +74,16 @@ int GeneralResponseOp::inference() {
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
} }
const GeneralBlob *input_blob;
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
const std::string &pre_name = pre_node_names[pi]; const std::string &pre_name = pre_node_names[pi];
VLOG(2) << "pre names[" << pi << "]: " << pre_name << " (" VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name
<< pre_node_names.size() << ")"; << " (" << pre_node_names.size() << ")";
input_blob = get_depend_argument<GeneralBlob>(pre_name); input_blob = get_depend_argument<GeneralBlob>(pre_name);
// fprintf(stderr, "input(%s) blob address %x\n", pre_names.c_str(), // fprintf(stderr, "input(%s) blob address %x\n", pre_names.c_str(),
// input_blob); // input_blob);
if (!input_blob) { if (!input_blob) {
LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name; LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op: " << pre_name;
return -1; return -1;
} }
...@@ -92,17 +97,19 @@ int GeneralResponseOp::inference() { ...@@ -92,17 +97,19 @@ int GeneralResponseOp::inference() {
for (auto &idx : fetch_index) { for (auto &idx : fetch_index) {
Tensor *tensor = fetch_inst->add_tensor_array(); Tensor *tensor = fetch_inst->add_tensor_array();
if (model_config->_is_lod_fetch[idx]) { if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx] VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
<< " is lod_tensor"; << model_config->_fetch_name[idx] << " is lod_tensor";
for (int k = 0; k < in->at(idx).shape.size(); ++k) { for (int k = 0; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k]; VLOG(2) << "(logid=" << log_id << ") shape[" << k
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]); tensor->add_shape(in->at(idx).shape[k]);
} }
} else { } else {
VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx] VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
<< " is tensor"; << model_config->_fetch_name[idx] << " is tensor";
for (int k = 0; k < in->at(idx).shape.size(); ++k) { for (int k = 0; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k]; VLOG(2) << "(logid=" << log_id << ") shape[" << k
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]); tensor->add_shape(in->at(idx).shape[k]);
} }
} }
...@@ -119,8 +126,8 @@ int GeneralResponseOp::inference() { ...@@ -119,8 +126,8 @@ int GeneralResponseOp::inference() {
auto dtype = in->at(idx).dtype; auto dtype = in->at(idx).dtype;
if (dtype == paddle::PaddleDType::INT64) { if (dtype == paddle::PaddleDType::INT64) {
VLOG(2) << "Prepare int64 var [" << model_config->_fetch_name[idx] VLOG(2) << "(logid=" << log_id << ") Prepare int64 var ["
<< "]."; << model_config->_fetch_name[idx] << "].";
int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data()); int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
// from // from
// https://stackoverflow.com/questions/15499641/copy-a-stdvector-to-a-repeated-field-from-protobuf-with-memcpy // https://stackoverflow.com/questions/15499641/copy-a-stdvector-to-a-repeated-field-from-protobuf-with-memcpy
...@@ -130,16 +137,16 @@ int GeneralResponseOp::inference() { ...@@ -130,16 +137,16 @@ int GeneralResponseOp::inference() {
fetch_p->mutable_tensor_array(var_idx)->mutable_int64_data()->Swap( fetch_p->mutable_tensor_array(var_idx)->mutable_int64_data()->Swap(
&tmp_data); &tmp_data);
} else if (dtype == paddle::PaddleDType::FLOAT32) { } else if (dtype == paddle::PaddleDType::FLOAT32) {
VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx] VLOG(2) << "(logid=" << log_id << ") Prepare float var ["
<< "]."; << model_config->_fetch_name[idx] << "].";
float *data_ptr = static_cast<float *>(in->at(idx).data.data()); float *data_ptr = static_cast<float *>(in->at(idx).data.data());
google::protobuf::RepeatedField<float> tmp_data(data_ptr, google::protobuf::RepeatedField<float> tmp_data(data_ptr,
data_ptr + cap); data_ptr + cap);
fetch_p->mutable_tensor_array(var_idx)->mutable_float_data()->Swap( fetch_p->mutable_tensor_array(var_idx)->mutable_float_data()->Swap(
&tmp_data); &tmp_data);
} else if (dtype == paddle::PaddleDType::INT32) { } else if (dtype == paddle::PaddleDType::INT32) {
VLOG(2) << "Prepare int32 var [" << model_config->_fetch_name[idx] VLOG(2) << "(logid=" << log_id << ")Prepare int32 var ["
<< "]."; << model_config->_fetch_name[idx] << "].";
int32_t *data_ptr = static_cast<int32_t *>(in->at(idx).data.data()); int32_t *data_ptr = static_cast<int32_t *>(in->at(idx).data.data());
google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr, google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
data_ptr + cap); data_ptr + cap);
...@@ -154,7 +161,8 @@ int GeneralResponseOp::inference() { ...@@ -154,7 +161,8 @@ int GeneralResponseOp::inference() {
} }
} }
VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready"; VLOG(2) << "(logid=" << log_id << ") fetch var ["
<< model_config->_fetch_name[idx] << "] ready";
var_idx++; var_idx++;
} }
} }
...@@ -167,7 +175,8 @@ int GeneralResponseOp::inference() { ...@@ -167,7 +175,8 @@ int GeneralResponseOp::inference() {
// a more elegant way. // a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]); input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
VLOG(2) << "p size for input blob: " << input_blob->p_size; VLOG(2) << "(logid=" << log_id
<< ") p size for input blob: " << input_blob->p_size;
int profile_time_idx = -1; int profile_time_idx = -1;
if (pi == 0) { if (pi == 0) {
profile_time_idx = 0; profile_time_idx = 0;
......
...@@ -35,6 +35,7 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; ...@@ -35,6 +35,7 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralTextReaderOp::inference() { int GeneralTextReaderOp::inference() {
// reade request from client // reade request from client
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
uint64_t log_id = req->log_id();
int batch_size = req->insts_size(); int batch_size = req->insts_size();
int input_var_num = 0; int input_var_num = 0;
...@@ -44,16 +45,18 @@ int GeneralTextReaderOp::inference() { ...@@ -44,16 +45,18 @@ int GeneralTextReaderOp::inference() {
std::vector<int64_t> capacity; std::vector<int64_t> capacity;
GeneralBlob *res = mutable_data<GeneralBlob>(); GeneralBlob *res = mutable_data<GeneralBlob>();
TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size);
if (!res) { if (!res) {
LOG(ERROR) << "Failed get op tls reader object output"; LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op tls reader object output";
} }
TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size);
res->SetLogId(log_id);
if (batch_size <= 0) { if (batch_size <= 0) {
LOG(ERROR) << "Batch size < 0"; LOG(ERROR) << "(logid=" << log_id << ") Batch size < 0";
return -1; return -1;
} }
...@@ -61,17 +64,18 @@ int GeneralTextReaderOp::inference() { ...@@ -61,17 +64,18 @@ int GeneralTextReaderOp::inference() {
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size(); int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "var num: " << var_num; VLOG(2) << "(logid=" << log_id << ") var num: " << var_num;
VLOG(2) << "start to call load general model_conf op"; VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "get resource pointer done."; VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config = std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config(); resource.get_general_model_config();
VLOG(2) << "print general model config done."; VLOG(2) << "(logid=" << log_id << ") print general model config done.";
elem_type.resize(var_num); elem_type.resize(var_num);
elem_size.resize(var_num); elem_size.resize(var_num);
...@@ -79,7 +83,8 @@ int GeneralTextReaderOp::inference() { ...@@ -79,7 +83,8 @@ int GeneralTextReaderOp::inference() {
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor; paddle::PaddleTensor lod_tensor;
elem_type[i] = req->insts(0).tensor_array(i).elem_type(); elem_type[i] = req->insts(0).tensor_array(i).elem_type();
VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i]; VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64 if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t); elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64; lod_tensor.dtype = paddle::PaddleDType::INT64;
...@@ -91,17 +96,19 @@ int GeneralTextReaderOp::inference() { ...@@ -91,17 +96,19 @@ int GeneralTextReaderOp::inference() {
if (req->insts(0).tensor_array(i).shape(0) == -1) { if (req->insts(0).tensor_array(i).shape(0) == -1) {
lod_tensor.lod.resize(1); lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0); lod_tensor.lod[0].push_back(0);
VLOG(2) << "var[" << i << "] is lod_tensor"; VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
} else { } else {
lod_tensor.shape.push_back(batch_size); lod_tensor.shape.push_back(batch_size);
capacity[i] = 1; capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) { for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k); int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(2) << "shape for var[" << i << "]: " << dim; VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
<< "]: " << dim;
capacity[i] *= dim; capacity[i] *= dim;
lod_tensor.shape.push_back(dim); lod_tensor.shape.push_back(dim);
} }
VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i]; VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor, capacity: " << capacity[i];
} }
lod_tensor.name = model_config->_feed_name[i]; lod_tensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor); out->push_back(lod_tensor);
...@@ -117,11 +124,11 @@ int GeneralTextReaderOp::inference() { ...@@ -117,11 +124,11 @@ int GeneralTextReaderOp::inference() {
} }
out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]); out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back(), 1}; out->at(i).shape = {out->at(i).lod[0].back(), 1};
VLOG(2) << "var[" << i VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back(); << "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else { } else {
out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(2) << "var[" << i VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i]; << "] is tensor and capacity=" << batch_size * capacity[i];
} }
} }
...@@ -163,7 +170,7 @@ int GeneralTextReaderOp::inference() { ...@@ -163,7 +170,7 @@ int GeneralTextReaderOp::inference() {
AddBlobInfo(res, start); AddBlobInfo(res, start);
AddBlobInfo(res, end); AddBlobInfo(res, end);
VLOG(2) << "read data from client success"; VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0; return 0;
} }
DEFINE_OP(GeneralTextReaderOp); DEFINE_OP(GeneralTextReaderOp);
......
...@@ -40,6 +40,9 @@ int GeneralTextResponseOp::inference() { ...@@ -40,6 +40,9 @@ int GeneralTextResponseOp::inference() {
VLOG(2) << "Going to run inference"; VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names(); const std::vector<std::string> pre_node_names = pre_names();
VLOG(2) << "pre node names size: " << pre_node_names.size(); VLOG(2) << "pre node names size: " << pre_node_names.size();
const GeneralBlob *input_blob;
uint64_t log_id =
get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
// response inst with only fetch_var_names // response inst with only fetch_var_names
...@@ -48,11 +51,12 @@ int GeneralTextResponseOp::inference() { ...@@ -48,11 +51,12 @@ int GeneralTextResponseOp::inference() {
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
VLOG(2) << "start to call load general model_conf op"; VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "get resource pointer done."; VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config = std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config(); resource.get_general_model_config();
...@@ -63,20 +67,20 @@ int GeneralTextResponseOp::inference() { ...@@ -63,20 +67,20 @@ int GeneralTextResponseOp::inference() {
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
} }
const GeneralBlob *input_blob;
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
const std::string &pre_name = pre_node_names[pi]; const std::string &pre_name = pre_node_names[pi];
VLOG(2) << "pre names[" << pi << "]: " << pre_name << " (" VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name
<< pre_node_names.size() << ")"; << " (" << pre_node_names.size() << ")";
input_blob = get_depend_argument<GeneralBlob>(pre_name); input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) { if (!input_blob) {
LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name; LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op: " << pre_name;
return -1; return -1;
} }
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize(); int batch_size = input_blob->GetBatchSize();
VLOG(2) << "input batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
ModelOutput *output = res->add_outputs(); ModelOutput *output = res->add_outputs();
output->set_engine_name( output->set_engine_name(
...@@ -88,12 +92,13 @@ int GeneralTextResponseOp::inference() { ...@@ -88,12 +92,13 @@ int GeneralTextResponseOp::inference() {
// currently only response float tensor or lod_tensor // currently only response float tensor or lod_tensor
tensor->set_elem_type(1); tensor->set_elem_type(1);
if (model_config->_is_lod_fetch[idx]) { if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "out[" << idx << " is lod_tensor"; VLOG(2) << "(logid=" << log_id << ") out[" << idx << " is lod_tensor";
tensor->add_shape(-1); tensor->add_shape(-1);
} else { } else {
VLOG(2) << "out[" << idx << "] is tensor"; VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] is tensor";
for (int k = 1; k < in->at(idx).shape.size(); ++k) { for (int k = 1; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k]; VLOG(2) << "(logid=" << log_id << ") shape[" << k - 1
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]); tensor->add_shape(in->at(idx).shape[k]);
} }
} }
...@@ -137,7 +142,8 @@ int GeneralTextResponseOp::inference() { ...@@ -137,7 +142,8 @@ int GeneralTextResponseOp::inference() {
// a more elegant way. // a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]); input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
VLOG(2) << "p size for input blob: " << input_blob->p_size; VLOG(2) << "(logid=" << log_id
<< ") p size for input blob: " << input_blob->p_size;
int profile_time_idx = -1; int profile_time_idx = -1;
if (pi == 0) { if (pi == 0) {
profile_time_idx = 0; profile_time_idx = 0;
......
...@@ -37,6 +37,7 @@ message Request { ...@@ -37,6 +37,7 @@ message Request {
repeated FeedInst insts = 1; repeated FeedInst insts = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
}; };
message Response { message Response {
......
...@@ -21,6 +21,7 @@ option cc_generic_services = true; ...@@ -21,6 +21,7 @@ option cc_generic_services = true;
message RequestAndResponse { message RequestAndResponse {
required int32 a = 1; required int32 a = 1;
required float b = 2; required float b = 2;
required uint64 log_id = 3 [ default = 0 ];
}; };
service LoadGeneralModelService { service LoadGeneralModelService {
......
...@@ -280,25 +280,29 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -280,25 +280,29 @@ class PdsCodeGenerator : public CodeGenerator {
" baidu::rpc::ClosureGuard done_guard(done);\n" " baidu::rpc::ClosureGuard done_guard(done);\n"
" baidu::rpc::Controller* cntl = \n" " baidu::rpc::Controller* cntl = \n"
" static_cast<baidu::rpc::Controller*>(cntl_base);\n" " static_cast<baidu::rpc::Controller*>(cntl_base);\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n" " ::baidu::paddle_serving::predictor::InferService* svr = \n"
" " " "
"::baidu::paddle_serving::predictor::InferServiceManager::instance(" "::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n" ").item(\"$service$\");\n"
" if (svr == NULL) {\n" " if (svr == NULL) {\n"
" LOG(ERROR) << \"Not found service: $service$\";\n" " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n" " cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n" " return ;\n"
" }\n" " }\n"
" LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
"\"\]\";\n" "<< cntl->remote_side() << \"\]\";\n"
" LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
"\"\]\";\n" "<< cntl->local_side() << \"\]\";\n"
" LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
" LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT "<< \"$name$\" << \"\]\";\n"
" int err_code = svr->inference(request, response);\n" " int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n" " if (err_code != 0) {\n"
" LOG(WARNING)\n" " LOG(WARNING)\n"
" << \"Failed call inferservice[$name$], name[$service$]\"\n" " << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n" " << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference " " cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n" "failed!\");\n"
...@@ -306,7 +310,8 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -306,7 +310,8 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n" " gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n" " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n" " // flush notice log\n"
" LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n", // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n", // NOLINT
"name", "name",
class_name, class_name,
"service", "service",
...@@ -317,26 +322,31 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -317,26 +322,31 @@ class PdsCodeGenerator : public CodeGenerator {
" baidu::rpc::ClosureGuard done_guard(done);\n" " baidu::rpc::ClosureGuard done_guard(done);\n"
" baidu::rpc::Controller* cntl = \n" " baidu::rpc::Controller* cntl = \n"
" static_cast<baidu::rpc::Controller*>(cntl_base);\n" " static_cast<baidu::rpc::Controller*>(cntl_base);\n"
" uint64_t log_id = equest->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n" " ::baidu::paddle_serving::predictor::InferService* svr = \n"
" " " "
"::baidu::paddle_serving::predictor::InferServiceManager::instance(" "::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n" ").item(\"$service$\");\n"
" if (svr == NULL) {\n" " if (svr == NULL) {\n"
" LOG(ERROR) << \"Not found service: $service$\";\n" " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n" " cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n" " return ;\n"
" }\n" " }\n"
" LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
"\"\]\";\n" "<< cntl->remote_side() << \"\]\";\n"
" LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
"\"\]\";\n" "<< cntl->local_side() << \"\]\";\n"
" LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
" LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT "<< \"$name$\" << \"\]\";\n"
" butil::IOBufBuilder debug_os;\n" " butil::IOBufBuilder debug_os;\n"
" int err_code = svr->inference(request, response, &debug_os);\n" " int err_code = svr->inference(request, response, log_id, "
"&debug_os);\n"
" if (err_code != 0) {\n" " if (err_code != 0) {\n"
" LOG(WARNING)\n" " LOG(WARNING)\n"
" << \"Failed call inferservice[$name$], name[$service$]\"\n" " << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n" " << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference " " cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n" "failed!\");\n"
...@@ -345,9 +355,11 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -345,9 +355,11 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n" " gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n" " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n" " // flush notice log\n"
" LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n" // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n"
" LOG(INFO)\n" " LOG(INFO)\n"
" << \"TC=[\" << (end - start) << \"] Received debug " " << \"(logid=\" << log_id << \") TC=[\" << (end - start) << "
"\"] Received debug "
"request[log_id=\" << cntl->log_id()\n" "request[log_id=\" << cntl->log_id()\n"
" << \"] from \" << cntl->remote_side()\n" " << \"] from \" << cntl->remote_side()\n"
" << \" to \" << cntl->local_side();\n", " << \" to \" << cntl->local_side();\n",
...@@ -1011,25 +1023,31 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1011,25 +1023,31 @@ class PdsCodeGenerator : public CodeGenerator {
" brpc::ClosureGuard done_guard(done);\n" " brpc::ClosureGuard done_guard(done);\n"
" brpc::Controller* cntl = \n" " brpc::Controller* cntl = \n"
" static_cast<brpc::Controller*>(cntl_base);\n" " static_cast<brpc::Controller*>(cntl_base);\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n" " ::baidu::paddle_serving::predictor::InferService* svr = \n"
" " " "
"::baidu::paddle_serving::predictor::InferServiceManager::instance(" "::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n" ").item(\"$service$\");\n"
" if (svr == NULL) {\n" " if (svr == NULL) {\n"
" LOG(ERROR) << \"Not found service: $service$\";\n" " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n" " cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n" " return ;\n"
" }\n" " }\n"
" LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") "
"remote_side=\[\" << cntl->remote_side() << " // NOLINT
"\"\]\";\n" "\"\]\";\n"
" LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") "
"local_side=\[\" << cntl->local_side() << " // NOLINT
"\"\]\";\n" "\"\]\";\n"
" LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") "
" LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT "service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
" int err_code = svr->inference(request, response);\n" " int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n" " if (err_code != 0) {\n"
" LOG(WARNING)\n" " LOG(WARNING)\n"
" << \"Failed call inferservice[$name$], name[$service$]\"\n" " << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n" " << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference " " cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n" "failed!\");\n"
...@@ -1037,7 +1055,8 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1037,7 +1055,8 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n" " gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n" " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n" " // flush notice log\n"
" LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n", // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n", // NOLINT
"name", "name",
class_name, class_name,
"service", "service",
...@@ -1048,26 +1067,31 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1048,26 +1067,31 @@ class PdsCodeGenerator : public CodeGenerator {
" brpc::ClosureGuard done_guard(done);\n" " brpc::ClosureGuard done_guard(done);\n"
" brpc::Controller* cntl = \n" " brpc::Controller* cntl = \n"
" static_cast<brpc::Controller*>(cntl_base);\n" " static_cast<brpc::Controller*>(cntl_base);\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n" " ::baidu::paddle_serving::predictor::InferService* svr = \n"
" " " "
"::baidu::paddle_serving::predictor::InferServiceManager::instance(" "::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n" ").item(\"$service$\");\n"
" if (svr == NULL) {\n" " if (svr == NULL) {\n"
" LOG(ERROR) << \"Not found service: $service$\";\n" " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n" " cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n" " return ;\n"
" }\n" " }\n"
" LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
"\"\]\";\n" " << cntl->remote_side() << \"\]\";\n"
" LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
"\"\]\";\n" "<< cntl->local_side() << \"\]\";\n"
" LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
" LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT "<< \"$name$\" << \"\]\";\n"
" butil::IOBufBuilder debug_os;\n" " butil::IOBufBuilder debug_os;\n"
" int err_code = svr->inference(request, response, &debug_os);\n" " int err_code = svr->inference(request, response, log_id, "
"&debug_os);\n"
" if (err_code != 0) {\n" " if (err_code != 0) {\n"
" LOG(WARNING)\n" " LOG(WARNING)\n"
" << \"Failed call inferservice[$name$], name[$service$]\"\n" " << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n" " << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference " " cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n" "failed!\");\n"
...@@ -1076,9 +1100,11 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1076,9 +1100,11 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n" " gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n" " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n" " // flush notice log\n"
" LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n" // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n" // NOLINT
" LOG(INFO)\n" " LOG(INFO)\n"
" << \"TC=[\" << (end - start) << \"] Received debug " " << \"(logid=\" << log_id << \") TC=[\" << (end - start) << "
"\"] Received debug "
"request[log_id=\" << cntl->log_id()\n" "request[log_id=\" << cntl->log_id()\n"
" << \"] from \" << cntl->remote_side()\n" " << \"] from \" << cntl->remote_side()\n"
" << \" to \" << cntl->local_side();\n", " << \" to \" << cntl->local_side();\n",
......
...@@ -72,9 +72,10 @@ class Channel { ...@@ -72,9 +72,10 @@ class Channel {
const std::string& op() { return _op; } const std::string& op() { return _op; }
int share_to_bus(Bus* bus) { int share_to_bus(Bus* bus, const uint64_t log_id) {
if (bus->regist(_op, this) != 0) { if (bus->regist(_op, this) != 0) {
LOG(ERROR) << "Failed regist channel[" << _op << "] to bus!"; LOG(ERROR) << "(logid=" << log_id << ") Failed regist channel[" << _op
<< "] to bus!";
return -1; return -1;
} }
......
...@@ -155,13 +155,11 @@ int Dag::init(const configure::Workflow& conf, const std::string& name) { ...@@ -155,13 +155,11 @@ int Dag::init(const configure::Workflow& conf, const std::string& name) {
} }
if (FLAGS_el_log_level == 16) { if (FLAGS_el_log_level == 16) {
LOG(INFO) << "DAG: " << _dag_name; LOG(INFO) << "DAG: " << _dag_name << ", Op Num: " << _index_nodes.size();
LOG(INFO) << ", Op Num: " << _index_nodes.size();
for (uint32_t nid = 0; nid < _index_nodes.size(); nid++) { for (uint32_t nid = 0; nid < _index_nodes.size(); nid++) {
DagNode* node = _index_nodes[nid]; DagNode* node = _index_nodes[nid];
LOG(INFO) << ", OP-" << node->id << "-" << node->name << "-" LOG(INFO) << "OP-" << node->id << "-" << node->name << "-" << node->type
<< node->type; << " depends: " << node->depends.size();
LOG(INFO) << " depends: " << node->depends.size();
boost::unordered_map<std::string, EdgeMode>::iterator it; boost::unordered_map<std::string, EdgeMode>::iterator it;
for (it = node->depends.begin(); it != node->depends.end(); it++) { for (it = node->depends.begin(); it != node->depends.end(); it++) {
...@@ -214,8 +212,8 @@ int Dag::topo_sort() { ...@@ -214,8 +212,8 @@ int Dag::topo_sort() {
} }
} }
for (int i = 0; i < in_degree.size(); ++i) { for (int i = 0; i < in_degree.size(); ++i) {
LOG(INFO) << "(" << _index_nodes[i]->name << ") in_degree[" << i VLOG(2) << "(" << _index_nodes[i]->name << ") in_degree[" << i
<< "]: " << in_degree[i]; << "]: " << in_degree[i];
} }
int sorted_num = 0; int sorted_num = 0;
DagStage* stage = new (std::nothrow) DagStage(); DagStage* stage = new (std::nothrow) DagStage();
......
...@@ -26,7 +26,9 @@ namespace baidu { ...@@ -26,7 +26,9 @@ namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace predictor { namespace predictor {
int DagView::init(Dag* dag, const std::string& service_name) { int DagView::init(Dag* dag,
const std::string& service_name,
const uint64_t log_id) {
_name = dag->name(); _name = dag->name();
_full_name = service_name + NAME_DELIMITER + dag->name(); _full_name = service_name + NAME_DELIMITER + dag->name();
_bus = butil::get_object<Bus>(); _bus = butil::get_object<Bus>();
...@@ -36,17 +38,20 @@ int DagView::init(Dag* dag, const std::string& service_name) { ...@@ -36,17 +38,20 @@ int DagView::init(Dag* dag, const std::string& service_name) {
for (uint32_t si = 0; si < stage_size; si++) { for (uint32_t si = 0; si < stage_size; si++) {
const DagStage* stage = dag->stage_by_index(si); const DagStage* stage = dag->stage_by_index(si);
if (stage == NULL) { if (stage == NULL) {
LOG(ERROR) << "Failed get stage by index:" << si; LOG(ERROR) << "(logid=" << log_id << ") Failed get stage by index:" << si;
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
ViewStage* vstage = butil::get_object<ViewStage>(); ViewStage* vstage = butil::get_object<ViewStage>();
if (vstage == NULL) { if (vstage == NULL) {
LOG(ERROR) << "Failed get vstage from object pool" LOG(ERROR) << "(logid=" << log_id
<< ") Failed get vstage from object pool"
<< "at:" << si; << "at:" << si;
return ERR_MEM_ALLOC_FAILURE; return ERR_MEM_ALLOC_FAILURE;
} }
VLOG(2) << "stage[" << si << "] name: " << stage->full_name; VLOG(2) << "(logid=" << log_id << ") stage[" << si
VLOG(2) << "stage[" << si << "] node size: " << stage->nodes.size(); << "] name: " << stage->full_name;
VLOG(2) << "(logid=" << log_id << ") stage[" << si
<< "] node size: " << stage->nodes.size();
vstage->full_name = service_name + NAME_DELIMITER + stage->full_name; vstage->full_name = service_name + NAME_DELIMITER + stage->full_name;
uint32_t node_size = stage->nodes.size(); uint32_t node_size = stage->nodes.size();
// create tls view node // create tls view node
...@@ -54,31 +59,39 @@ int DagView::init(Dag* dag, const std::string& service_name) { ...@@ -54,31 +59,39 @@ int DagView::init(Dag* dag, const std::string& service_name) {
DagNode* node = stage->nodes[ni]; DagNode* node = stage->nodes[ni];
ViewNode* vnode = butil::get_object<ViewNode>(); ViewNode* vnode = butil::get_object<ViewNode>();
if (vnode == NULL) { if (vnode == NULL) {
LOG(ERROR) << "Failed get vnode at:" << ni; LOG(ERROR) << "(logid=" << log_id << ") Failed get vnode at:" << ni;
return ERR_MEM_ALLOC_FAILURE; return ERR_MEM_ALLOC_FAILURE;
} }
// factory type // factory type
Op* op = OpRepository::instance().get_op(node->type); Op* op = OpRepository::instance().get_op(node->type);
if (op == NULL) { if (op == NULL) {
LOG(ERROR) << "Failed get op with type:" << node->type; LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op with type:" << node->type;
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
// initialize a TLS op object // initialize a TLS op object
VLOG(2) << "dag view initialized: \n" VLOG(2) << "(logid=" << log_id << ") dag view initialized: \n"
<< "node id: " << node->id << "\n" << "node id: " << node->id << "\n"
<< "node name: " << node->name << "\n" << "node name: " << node->name << "\n"
<< "node type: " << node->type; << "node type: " << node->type;
if (op->init(_bus, dag, node->id, node->name, node->type, node->conf) != if (op->init(_bus,
0) { dag,
LOG(WARNING) << "Failed init op, type:" << node->type; node->id,
node->name,
node->type,
node->conf,
log_id) != 0) {
LOG(WARNING) << "(logid=" << log_id
<< ") Failed init op, type:" << node->type;
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
op->set_full_name(service_name + NAME_DELIMITER + node->full_name); op->set_full_name(service_name + NAME_DELIMITER + node->full_name);
// Set the name of the Op as the key of the matching engine. // Set the name of the Op as the key of the matching engine.
VLOG(2) << "op->set_engine_name(" << node->name.c_str() << ")"; VLOG(2) << "(logid=" << log_id << ") op->set_engine_name("
<< node->name.c_str() << ")";
op->set_engine_name(node->name); op->set_engine_name(node->name);
vnode->conf = node; vnode->conf = node;
...@@ -88,7 +101,7 @@ int DagView::init(Dag* dag, const std::string& service_name) { ...@@ -88,7 +101,7 @@ int DagView::init(Dag* dag, const std::string& service_name) {
it != vnode->conf->depends.end(); it != vnode->conf->depends.end();
++it) { ++it) {
std::string pre_node_name = it->first; std::string pre_node_name = it->first;
VLOG(2) << "add op pre name: \n" VLOG(2) << "(logid=" << log_id << ") add op pre name: \n"
<< "current op name: " << vnode->op->op_name() << "current op name: " << vnode->op->op_name()
<< ", previous op name: " << pre_node_name; << ", previous op name: " << pre_node_name;
vnode->op->add_pre_node_name(pre_node_name); vnode->op->add_pre_node_name(pre_node_name);
...@@ -102,7 +115,7 @@ int DagView::init(Dag* dag, const std::string& service_name) { ...@@ -102,7 +115,7 @@ int DagView::init(Dag* dag, const std::string& service_name) {
//<< " previous op name: " //<< " previous op name: "
//<< _view[si - 1]->nodes.back()->op->op_name(); //<< _view[si - 1]->nodes.back()->op->op_name();
// vstage->nodes.back()->op->set_pre_node_name( // vstage->nodes.back()->op->set_pre_node_name(
//_view[si - 1]->nodes.back()->op->op_name()); // _view[si - 1]->nodes.back()->op->op_name());
/*}*/ /*}*/
_view.push_back(vstage); _view.push_back(vstage);
} }
...@@ -133,14 +146,15 @@ int DagView::deinit() { ...@@ -133,14 +146,15 @@ int DagView::deinit() {
return ERR_OK; return ERR_OK;
} }
int DagView::execute(butil::IOBufBuilder* debug_os) { int DagView::execute(const uint64_t log_id, butil::IOBufBuilder* debug_os) {
uint32_t stage_size = _view.size(); uint32_t stage_size = _view.size();
for (uint32_t si = 0; si < stage_size; si++) { for (uint32_t si = 0; si < stage_size; si++) {
TRACEPRINTF("start to execute stage[%u]", si); TRACEPRINTF("(logid=%" PRIu64 ") start to execute stage[%u]", log_id, si);
int errcode = execute_one_stage(_view[si], debug_os); int errcode = execute_one_stage(_view[si], log_id, debug_os);
TRACEPRINTF("finish to execute stage[%u]", si); TRACEPRINTF("(logid=%" PRIu64 ") finish to execute stage[%u]", log_id, si);
if (errcode < 0) { if (errcode < 0) {
LOG(ERROR) << "failed execute stage[" << _view[si]->debug(); LOG(ERROR) << "(logid=" << log_id << ") Failed execute stage["
<< _view[si]->debug();
return errcode; return errcode;
} }
} }
...@@ -151,29 +165,34 @@ int DagView::execute(butil::IOBufBuilder* debug_os) { ...@@ -151,29 +165,34 @@ int DagView::execute(butil::IOBufBuilder* debug_os) {
// You can derive a subclass to implement this func. // You can derive a subclass to implement this func.
// ParallelDagView maybe the one you want. // ParallelDagView maybe the one you want.
int DagView::execute_one_stage(ViewStage* vstage, int DagView::execute_one_stage(ViewStage* vstage,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) { butil::IOBufBuilder* debug_os) {
butil::Timer stage_time(butil::Timer::STARTED); butil::Timer stage_time(butil::Timer::STARTED);
uint32_t node_size = vstage->nodes.size(); uint32_t node_size = vstage->nodes.size();
VLOG(2) << "vstage->nodes.size(): " << node_size; VLOG(2) << "(logid=" << log_id << ") vstage->nodes.size(): " << node_size;
for (uint32_t ni = 0; ni < node_size; ni++) { for (uint32_t ni = 0; ni < node_size; ni++) {
ViewNode* vnode = vstage->nodes[ni]; ViewNode* vnode = vstage->nodes[ni];
DagNode* conf = vnode->conf; DagNode* conf = vnode->conf;
Op* op = vnode->op; Op* op = vnode->op;
TRACEPRINTF("start to execute op[%s]", op->name()); TRACEPRINTF(
int errcode = op->process(debug_os != NULL); "(logid=%" PRIu64 ") start to execute op[%s]", log_id, op->name());
TRACEPRINTF("finish to execute op[%s]", op->name()); int errcode = op->process(log_id, debug_os != NULL);
TRACEPRINTF(
"(logid=%" PRIu64 ") finish to execute op[%s]", log_id, op->name());
if (errcode < 0) { if (errcode < 0) {
LOG(ERROR) << "Execute failed, Op:" << op->debug_string(); LOG(ERROR) << "(logid=" << log_id
<< ") Execute failed, Op:" << op->debug_string();
return errcode; return errcode;
} }
if (errcode > 0) { if (errcode > 0) {
LOG(INFO) << "Execute ignore, Op:" << op->debug_string(); LOG(INFO) << "(logid=" << log_id
<< ") Execute ignore, Op:" << op->debug_string();
continue; continue;
} }
if (debug_os) { if (debug_os) {
(*debug_os) << "{\"op_name\": \"" << op->name() (*debug_os) << "(logid=" << log_id << ") {\"op_name\": \"" << op->name()
<< "\", \"debug_str:\": \"" << op->debug_string() << "\", \"debug_str:\": \"" << op->debug_string()
<< "\", \"time_info\": \"" << op->time_info() << "\"}"; << "\", \"time_info\": \"" << op->time_info() << "\"}";
} }
...@@ -186,34 +205,34 @@ int DagView::execute_one_stage(ViewStage* vstage, ...@@ -186,34 +205,34 @@ int DagView::execute_one_stage(ViewStage* vstage,
return ERR_OK; return ERR_OK;
} }
int DagView::set_request_channel(Channel& request) { int DagView::set_request_channel(Channel& request, const uint64_t log_id) {
// Each workflow should get the very beginning // Each workflow should get the very beginning
// request (channel), and commit it to bus, for // request (channel), and commit it to bus, for
// the first stage ops consuming. // the first stage ops consuming.
request.share_to_bus(_bus); request.share_to_bus(_bus, log_id);
return ERR_OK; return ERR_OK;
} }
const Channel* DagView::get_response_channel() const { const Channel* DagView::get_response_channel(const uint64_t log_id) const {
// Caller obtains response channel from bus, and // Caller obtains response channel from bus, and
// writes it to rpc response(protbuf/json) // writes it to rpc response(protbuf/json)
if (_view.size() < 1) { if (_view.size() < 1) {
LOG(ERROR) << "invalid empty view stage!"; LOG(ERROR) << "(logid=" << log_id << ") invalid empty view stage!";
return NULL; return NULL;
} }
ViewStage* last_stage = _view[_view.size() - 1]; ViewStage* last_stage = _view[_view.size() - 1];
if (last_stage->nodes.size() != 1 || last_stage->nodes[0] == NULL) { if (last_stage->nodes.size() != 1 || last_stage->nodes[0] == NULL) {
LOG(ERROR) << "Invalid last stage, size[" << last_stage->nodes.size() LOG(ERROR) << "(logid=" << log_id << ") Invalid last stage, size["
<< "] != 1"; << last_stage->nodes.size() << "] != 1";
return NULL; return NULL;
} }
Op* last_op = last_stage->nodes[0]->op; Op* last_op = last_stage->nodes[0]->op;
if (last_op == NULL) { if (last_op == NULL) {
LOG(ERROR) << "Last op is NULL"; LOG(ERROR) << "(logid=" << log_id << ") Last op is NULL";
return NULL; return NULL;
} }
return last_op->mutable_channel(); return last_op->mutable_channel();
......
...@@ -47,21 +47,22 @@ class DagView { ...@@ -47,21 +47,22 @@ class DagView {
~DagView() {} ~DagView() {}
int init(Dag* dag, const std::string& service_name); int init(Dag* dag, const std::string& service_name, const uint64_t log_id);
int deinit(); int deinit();
int execute(butil::IOBufBuilder* debug_os); int execute(const uint64_t log_id, butil::IOBufBuilder* debug_os);
// The default execution strategy is in sequencing // The default execution strategy is in sequencing
// You can derive a subclass to implement this func. // You can derive a subclass to implement this func.
// ParallelDagView maybe the one you want. // ParallelDagView maybe the one you want.
virtual int execute_one_stage(ViewStage* vstage, virtual int execute_one_stage(ViewStage* vstage,
const uint64_t log_id,
butil::IOBufBuilder* debug_os); butil::IOBufBuilder* debug_os);
int set_request_channel(Channel& request); // NOLINT int set_request_channel(Channel& request, const uint64_t log_id); // NOLINT
const Channel* get_response_channel() const; const Channel* get_response_channel(const uint64_t log_id) const;
const std::string& name() const { return _name; } const std::string& name() const { return _name; }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <butil/time.h> // butil::Timer #include <butil/time.h> // butil::Timer
#endif #endif
#include <inttypes.h>
#include <list> #include <list>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -135,50 +136,63 @@ const std::string& InferService::name() const { return _infer_service_format; } ...@@ -135,50 +136,63 @@ const std::string& InferService::name() const { return _infer_service_format; }
// ´®ÐÐÖ´ÐÐÿ¸öworkflow // ´®ÐÐÖ´ÐÐÿ¸öworkflow
int InferService::inference(const google::protobuf::Message* request, int InferService::inference(const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) { butil::IOBufBuilder* debug_os) {
TRACEPRINTF("start to inference"); TRACEPRINTF("(logid=%" PRIu64 ") start to inference", log_id);
// when funtion call begins, framework will reset // when funtion call begins, framework will reset
// thread local variables&resources automatically. // thread local variables&resources automatically.
if (Resource::instance().thread_clear() != 0) { if (Resource::instance().thread_clear() != 0) {
LOG(ERROR) << "Failed thread clear whole resource"; LOG(ERROR) << "(logid=" << log_id << ") Failed thread clear whole resource";
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
TRACEPRINTF("finish to thread clear"); TRACEPRINTF("(logid=%" PRIu64 ") finish to thread clear", log_id);
if (_enable_map_request_to_workflow) { if (_enable_map_request_to_workflow) {
LOG(INFO) << "enable map request == True"; VLOG(2) << "(logid=" << log_id << ") enable map request == True";
std::vector<Workflow*>* workflows = _map_request_to_workflow(request); std::vector<Workflow*>* workflows =
_map_request_to_workflow(request, log_id);
if (!workflows || workflows->size() == 0) { if (!workflows || workflows->size() == 0) {
LOG(ERROR) << "Failed to map request to workflow"; LOG(ERROR) << "(logid=" << log_id
<< ") Failed to map request to workflow";
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
size_t fsize = workflows->size(); size_t fsize = workflows->size();
for (size_t fi = 0; fi < fsize; ++fi) { for (size_t fi = 0; fi < fsize; ++fi) {
Workflow* workflow = (*workflows)[fi]; Workflow* workflow = (*workflows)[fi];
if (workflow == NULL) { if (workflow == NULL) {
LOG(ERROR) << "Failed to get valid workflow at: " << fi; LOG(ERROR) << "(logid=" << log_id
<< ") Failed to get valid workflow at: " << fi;
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
TRACEPRINTF("start to execute workflow[%s]", workflow->name().c_str()); TRACEPRINTF("(logid=%" PRIu64 ") start to execute workflow[%s]",
int errcode = _execute_workflow(workflow, request, response, debug_os); log_id,
TRACEPRINTF("finish to execute workflow[%s]", workflow->name().c_str()); workflow->name().c_str());
int errcode =
_execute_workflow(workflow, request, response, log_id, debug_os);
TRACEPRINTF("(logid=%" PRIu64 ") finish to execute workflow[%s]",
log_id,
workflow->name().c_str());
if (errcode < 0) { if (errcode < 0) {
LOG(ERROR) << "Failed execute workflow[" << workflow->name() LOG(ERROR) << "(logid=" << log_id << ") Failed execute workflow["
<< "] in:" << name(); << workflow->name() << "] in:" << name();
return errcode; return errcode;
} }
} }
} else { } else {
LOG(INFO) << "enable map request == False"; VLOG(2) << "(logid=" << log_id << ") enable map request == False";
TRACEPRINTF("start to execute one workflow"); TRACEPRINTF("(logid=%" PRIu64 ") start to execute one workflow", log_id);
size_t fsize = _flows.size(); size_t fsize = _flows.size();
for (size_t fi = 0; fi < fsize; ++fi) { for (size_t fi = 0; fi < fsize; ++fi) {
TRACEPRINTF("start to execute one workflow-%lu", fi); TRACEPRINTF(
int errcode = execute_one_workflow(fi, request, response, debug_os); "(logid=%" PRIu64 ") start to execute one workflow-%lu", log_id, fi);
TRACEPRINTF("finish to execute one workflow-%lu", fi); int errcode =
execute_one_workflow(fi, request, response, log_id, debug_os);
TRACEPRINTF(
"(logid=%" PRIu64 ") finish to execute one workflow-%lu", log_id, fi);
if (errcode < 0) { if (errcode < 0) {
LOG(ERROR) << "Failed execute 0-th workflow in:" << name(); LOG(ERROR) << "(logid=" << log_id
<< ") Failed execute 0-th workflow in:" << name();
return errcode; return errcode;
} }
} }
...@@ -188,26 +202,30 @@ int InferService::inference(const google::protobuf::Message* request, ...@@ -188,26 +202,30 @@ int InferService::inference(const google::protobuf::Message* request,
int InferService::debug(const google::protobuf::Message* request, int InferService::debug(const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) { butil::IOBufBuilder* debug_os) {
return inference(request, response, debug_os); return inference(request, response, log_id, debug_os);
} }
int InferService::execute_one_workflow(uint32_t index, int InferService::execute_one_workflow(uint32_t index,
const google::protobuf::Message* request, const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) { butil::IOBufBuilder* debug_os) {
if (index >= _flows.size()) { if (index >= _flows.size()) {
LOG(ERROR) << "Faield execute workflow, index: " << index LOG(ERROR) << "(logid=" << log_id
<< ") Faield execute workflow, index: " << index
<< " >= max:" << _flows.size(); << " >= max:" << _flows.size();
return ERR_OVERFLOW_FAILURE; return ERR_OVERFLOW_FAILURE;
} }
Workflow* workflow = _flows[index]; Workflow* workflow = _flows[index];
return _execute_workflow(workflow, request, response, debug_os); return _execute_workflow(workflow, request, response, log_id, debug_os);
} }
int InferService::_execute_workflow(Workflow* workflow, int InferService::_execute_workflow(Workflow* workflow,
const google::protobuf::Message* request, const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) { butil::IOBufBuilder* debug_os) {
butil::Timer workflow_time(butil::Timer::STARTED); butil::Timer workflow_time(butil::Timer::STARTED);
// create and submit beginer channel // create and submit beginer channel
...@@ -215,54 +233,62 @@ int InferService::_execute_workflow(Workflow* workflow, ...@@ -215,54 +233,62 @@ int InferService::_execute_workflow(Workflow* workflow,
req_channel.init(0, START_OP_NAME); req_channel.init(0, START_OP_NAME);
req_channel = request; req_channel = request;
DagView* dv = workflow->fetch_dag_view(full_name()); DagView* dv = workflow->fetch_dag_view(full_name(), log_id);
dv->set_request_channel(req_channel); dv->set_request_channel(req_channel, log_id);
// call actual inference interface // call actual inference interface
int errcode = dv->execute(debug_os); int errcode = dv->execute(log_id, debug_os);
if (errcode < 0) { if (errcode < 0) {
LOG(ERROR) << "Failed execute dag for workflow:" << workflow->name(); LOG(ERROR) << "(logid=" << log_id
<< ") Failed execute dag for workflow:" << workflow->name();
return errcode; return errcode;
} }
TRACEPRINTF("finish to dv execute"); TRACEPRINTF("(logid=%" PRIu64 ") finish to dv execute", log_id);
// create ender channel and copy // create ender channel and copy
const Channel* res_channel = dv->get_response_channel(); const Channel* res_channel = dv->get_response_channel(log_id);
if (res_channel == NULL) {
LOG(ERROR) << "(logid=" << log_id << ") Failed get response channel";
return ERR_INTERNAL_FAILURE;
}
if (!_merger || !_merger->merge(res_channel->message(), response)) { if (!_merger || !_merger->merge(res_channel->message(), response)) {
LOG(ERROR) << "Failed merge channel res to response"; LOG(ERROR) << "(logid=" << log_id
<< ") Failed merge channel res to response";
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
TRACEPRINTF("finish to copy from"); TRACEPRINTF("(logid=%" PRIu64 ") finish to copy from", log_id);
workflow_time.stop(); workflow_time.stop();
LOG(INFO) << "workflow total time: " << workflow_time.u_elapsed(); LOG(INFO) << "(logid=" << log_id
<< ") workflow total time: " << workflow_time.u_elapsed();
PredictorMetric::GetInstance()->update_latency_metric( PredictorMetric::GetInstance()->update_latency_metric(
WORKFLOW_METRIC_PREFIX + dv->full_name(), workflow_time.u_elapsed()); WORKFLOW_METRIC_PREFIX + dv->full_name(), workflow_time.u_elapsed());
// return tls data to object pool // return tls data to object pool
workflow->return_dag_view(dv); workflow->return_dag_view(dv);
TRACEPRINTF("finish to return dag view"); TRACEPRINTF("(logid=%" PRIu64 ") finish to return dag view", log_id);
return ERR_OK; return ERR_OK;
} }
std::vector<Workflow*>* InferService::_map_request_to_workflow( std::vector<Workflow*>* InferService::_map_request_to_workflow(
const google::protobuf::Message* request) { const google::protobuf::Message* request, const uint64_t log_id) {
const google::protobuf::Descriptor* desc = request->GetDescriptor(); const google::protobuf::Descriptor* desc = request->GetDescriptor();
const google::protobuf::FieldDescriptor* field = const google::protobuf::FieldDescriptor* field =
desc->FindFieldByName(_request_field_key); desc->FindFieldByName(_request_field_key);
if (field == NULL) { if (field == NULL) {
LOG(ERROR) << "No field[" << _request_field_key << "] in [" LOG(ERROR) << "(logid=" << log_id << ") No field[" << _request_field_key
<< desc->full_name() << "]."; << "] in [" << desc->full_name() << "].";
return NULL; return NULL;
} }
if (field->is_repeated()) { if (field->is_repeated()) {
LOG(ERROR) << "field[" << desc->full_name() << "." << _request_field_key LOG(ERROR) << "(logid=" << log_id << ") field[" << desc->full_name() << "."
<< "] is repeated."; << _request_field_key << "] is repeated.";
return NULL; return NULL;
} }
if (field->cpp_type() != google::protobuf::FieldDescriptor::CPPTYPE_STRING) { if (field->cpp_type() != google::protobuf::FieldDescriptor::CPPTYPE_STRING) {
LOG(ERROR) << "field[" << desc->full_name() << "." << _request_field_key LOG(ERROR) << "(logid=" << log_id << ") field[" << desc->full_name() << "."
<< "] should be string"; << _request_field_key << "] should be string";
return NULL; return NULL;
} }
const std::string& field_value = const std::string& field_value =
...@@ -270,7 +296,7 @@ std::vector<Workflow*>* InferService::_map_request_to_workflow( ...@@ -270,7 +296,7 @@ std::vector<Workflow*>* InferService::_map_request_to_workflow(
std::vector<Workflow*>* p_workflow = std::vector<Workflow*>* p_workflow =
_request_to_workflow_map.seek(field_value); _request_to_workflow_map.seek(field_value);
if (p_workflow == NULL) { if (p_workflow == NULL) {
LOG(ERROR) << "cannot find key[" << field_value LOG(ERROR) << "(logid=" << log_id << ") cannot find key[" << field_value
<< "] in _request_to_workflow_map"; << "] in _request_to_workflow_map";
return NULL; return NULL;
} }
......
...@@ -52,25 +52,29 @@ class InferService { ...@@ -52,25 +52,29 @@ class InferService {
// Execute each workflow serially // Execute each workflow serially
virtual int inference(const google::protobuf::Message* request, virtual int inference(const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os = NULL); butil::IOBufBuilder* debug_os = NULL);
int debug(const google::protobuf::Message* request, int debug(const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os); butil::IOBufBuilder* debug_os);
int execute_one_workflow(uint32_t index, int execute_one_workflow(uint32_t index,
const google::protobuf::Message* request, const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os); butil::IOBufBuilder* debug_os);
private: private:
int _execute_workflow(Workflow* workflow, int _execute_workflow(Workflow* workflow,
const google::protobuf::Message* request, const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os); butil::IOBufBuilder* debug_os);
std::vector<Workflow*>* _map_request_to_workflow( std::vector<Workflow*>* _map_request_to_workflow(
const google::protobuf::Message* request); const google::protobuf::Message* request, const uint64_t log_id);
private: private:
std::vector<Workflow*> _flows; std::vector<Workflow*> _flows;
...@@ -88,6 +92,7 @@ class ParallelInferService : public InferService { ...@@ -88,6 +92,7 @@ class ParallelInferService : public InferService {
// Execute workflows in parallel // Execute workflows in parallel
int inference(const google::protobuf::Message* request, int inference(const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) { butil::IOBufBuilder* debug_os) {
return 0; return 0;
} }
......
...@@ -32,21 +32,22 @@ int Workflow::init(const configure::Workflow& conf) { ...@@ -32,21 +32,22 @@ int Workflow::init(const configure::Workflow& conf) {
return 0; return 0;
} }
DagView* Workflow::fetch_dag_view(const std::string& service_name) { DagView* Workflow::fetch_dag_view(const std::string& service_name,
const uint64_t log_id) {
DagView* view = NULL; DagView* view = NULL;
if (_type == "Sequence") { if (_type == "Sequence") {
view = butil::get_object<DagView>(); view = butil::get_object<DagView>();
} else if (_type == "Parallel") { } else if (_type == "Parallel") {
view = butil::get_object<ParallelDagView>(); view = butil::get_object<ParallelDagView>();
} else { } else {
LOG(ERROR) << "Unknown dag type:" << _type << "!"; LOG(ERROR) << "(logid=" << log_id << ") Unknown dag type:" << _type << "!";
return NULL; return NULL;
} }
if (view == NULL) { if (view == NULL) {
LOG(ERROR) << "create dag view from pool failed!"; LOG(ERROR) << "(logid=" << log_id << ") create dag view from pool failed!";
return NULL; return NULL;
} }
view->init(&_dag, service_name); view->init(&_dag, service_name, log_id);
return view; return view;
} }
......
...@@ -36,7 +36,8 @@ class Workflow { ...@@ -36,7 +36,8 @@ class Workflow {
// different apps. // different apps.
int init(const configure::Workflow& conf); int init(const configure::Workflow& conf);
DagView* fetch_dag_view(const std::string& service_name); DagView* fetch_dag_view(const std::string& service_name,
const uint64_t log_id);
int deinit() { return 0; } int deinit() { return 0; }
......
...@@ -35,7 +35,8 @@ int Op::init(Bus* bus, ...@@ -35,7 +35,8 @@ int Op::init(Bus* bus,
uint32_t id, uint32_t id,
const std::string& name, const std::string& name,
const std::string& type, const std::string& type,
void* conf) { void* conf,
const uint64_t log_id) {
_bus = bus; _bus = bus;
_dag = dag; _dag = dag;
_id = id; _id = id;
...@@ -45,7 +46,8 @@ int Op::init(Bus* bus, ...@@ -45,7 +46,8 @@ int Op::init(Bus* bus,
_timer = butil::get_object<TimerFlow>(); _timer = butil::get_object<TimerFlow>();
if (!_timer) { if (!_timer) {
LOG(ERROR) << "Invalid timerflow in op:" << this->name(); LOG(ERROR) << "(logid=" << log_id
<< ") Invalid timerflow in op:" << this->name();
return -1; return -1;
} }
...@@ -55,7 +57,8 @@ int Op::init(Bus* bus, ...@@ -55,7 +57,8 @@ int Op::init(Bus* bus,
Channel* channel = mutable_channel(); Channel* channel = mutable_channel();
if (channel == NULL) { if (channel == NULL) {
LOG(ERROR) << "Failed mutable channel in op: " << this->id() << ", " LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable channel in op: " << this->id() << ", "
<< this->name() << "!"; << this->name() << "!";
return -1; return -1;
} }
...@@ -96,18 +99,20 @@ int Op::check_time(const char* tag) { ...@@ -96,18 +99,20 @@ int Op::check_time(const char* tag) {
return 0; return 0;
} }
int Op::process(bool debug) { int Op::process(const uint64_t log_id, bool debug) {
butil::Timer op_time(butil::Timer::STARTED); butil::Timer op_time(butil::Timer::STARTED);
if (debug && _timer) { if (debug && _timer) {
_timer->start(); _timer->start();
} }
if (!_has_init) { if (!_has_init) {
LOG(ERROR) << "Make sure op has been init before inference"; LOG(ERROR) << "(logid=" << log_id
<< ") Make sure op has been init before inference";
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
if (_has_calc) { if (_has_calc) {
LOG(INFO) << "Op: " << _name << " already processed before"; LOG(INFO) << "(logid=" << log_id << ") Op: " << _name
<< " already processed before";
return ERR_OK; return ERR_OK;
} }
...@@ -143,7 +148,7 @@ int Op::process(bool debug) { ...@@ -143,7 +148,7 @@ int Op::process(bool debug) {
// 3. share output to bus // 3. share output to bus
Channel* channel = mutable_channel(); Channel* channel = mutable_channel();
channel->share_to_bus(_bus); channel->share_to_bus(_bus, log_id);
// 4. mark has calculated // 4. mark has calculated
_has_calc = true; _has_calc = true;
...@@ -156,7 +161,8 @@ int Op::process(bool debug) { ...@@ -156,7 +161,8 @@ int Op::process(bool debug) {
op_time.stop(); op_time.stop();
PredictorMetric::GetInstance()->update_latency_metric( PredictorMetric::GetInstance()->update_latency_metric(
OP_METRIC_PREFIX + full_name(), op_time.u_elapsed()); OP_METRIC_PREFIX + full_name(), op_time.u_elapsed());
LOG(INFO) << " " << name() << "_time=[" << op_time.u_elapsed() << "]"; LOG(INFO) << "(logid=" << log_id << ") " << name() << "_time=["
<< op_time.u_elapsed() << "]";
return ERR_OK; return ERR_OK;
} }
......
...@@ -113,13 +113,14 @@ class Op { ...@@ -113,13 +113,14 @@ class Op {
uint32_t id, uint32_t id,
const std::string& name, const std::string& name,
const std::string& type, const std::string& type,
void* conf); void* conf,
const uint64_t log_id);
int deinit(); int deinit();
int check_time(const char* tag); int check_time(const char* tag);
int process(bool debug); int process(const uint64_t log_id, bool debug);
std::string time_info(); std::string time_info();
......
...@@ -37,6 +37,7 @@ message Request { ...@@ -37,6 +37,7 @@ message Request {
repeated FeedInst insts = 1; repeated FeedInst insts = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
}; };
message Response { message Response {
......
...@@ -33,6 +33,7 @@ The graph execution engine consists of OPs and Channels, and the connected OPs s ...@@ -33,6 +33,7 @@ The graph execution engine consists of OPs and Channels, and the connected OPs s
- The default function of a single OP is to access a single Paddle Serving Service based on the input Channel data and put the result into the output Channel. - The default function of a single OP is to access a single Paddle Serving Service based on the input Channel data and put the result into the output Channel.
- OP supports user customization, including preprocess, process, postprocess functions that can be inherited and implemented by the user. - OP supports user customization, including preprocess, process, postprocess functions that can be inherited and implemented by the user.
- OP can set the number of concurrencies to increase the number of concurrencies processed. - OP can set the number of concurrencies to increase the number of concurrencies processed.
- OP can obtain data from multiple different RPC requests for Auto-Batching.
- OP can be started by a thread or process. - OP can be started by a thread or process.
### Channel Design ### Channel Design
...@@ -46,6 +47,7 @@ The graph execution engine consists of OPs and Channels, and the connected OPs s ...@@ -46,6 +47,7 @@ The graph execution engine consists of OPs and Channels, and the connected OPs s
</center> </center>
### Extreme Case Consideration ### Extreme Case Consideration
- Request timeout - Request timeout
...@@ -59,9 +61,9 @@ The graph execution engine consists of OPs and Channels, and the connected OPs s ...@@ -59,9 +61,9 @@ The graph execution engine consists of OPs and Channels, and the connected OPs s
- Whether input buffers and output buffers in Channel will increase indefinitely - Whether input buffers and output buffers in Channel will increase indefinitely
- It will not increase indefinitely. The input to the entire graph execution engine is placed inside a Channel's internal queue, directly acting as a traffic control buffer queue for the entire service. - It will not increase indefinitely. The input to the entire graph execution engine is placed inside a Channel's internal queue, directly acting as a traffic control buffer queue for the entire service.
- For input buffer, adjust the number of concurrencies of OP1 and OP2 according to the amount of computation, so that the number of input buffers from each input OP is relatively balanced. - For input buffer, adjust the number of concurrencies of OP1 and OP2 according to the amount of computation, so that the number of input buffers from each input OP is relatively balanced. (The length of the input buffer depends on the speed at which each item in the internal queue is ready)
- For output buffer, you can use a similar process as input buffer, which adjusts the concurrency of OP3 and OP4 to control the buffer length of output buffer. - For output buffer, you can use a similar process as input buffer, which adjusts the concurrency of OP3 and OP4 to control the buffer length of output buffer. (The length of the output buffer depends on the speed at which downstream OPs obtain data from the output buffer)
- Note: The length of the input buffer depends on the speed at which each item in the internal queue is ready, and the length of the output buffer depends on the speed at which downstream OPs obtain data from the output buffer. - The amount of data in the Channel will not exceed `worker_num` of gRPC, that is, it will not exceed the thread pool size.
## Detailed Design ## Detailed Design
...@@ -79,31 +81,36 @@ def __init__(name=None, ...@@ -79,31 +81,36 @@ def __init__(name=None,
client_config=None, client_config=None,
concurrency=1, concurrency=1,
timeout=-1, timeout=-1,
retry=1) retry=1,
batch_size=1,
auto_batching_timeout=None)
``` ```
The meaning of each parameter is as follows: The meaning of each parameter is as follows:
| Parameter | Meaning | | Parameter | Meaning |
| :--------------: | :----------------------------------------------------------: | | :-------------------: | :----------------------------------------------------------: |
| name | (str) String used to identify the OP type, which must be globally unique. | | name | (str) String used to identify the OP type, which must be globally unique. |
| input_ops | (list) A list of all previous OPs of the current Op. | | input_ops | (list) A list of all previous OPs of the current Op. |
| server_endpoints | (list) List of endpoints for remote Paddle Serving Service. If this parameter is not set, the OP will not access the remote Paddle Serving Service, that is, the process operation will not be performed. | | server_endpoints | (list) List of endpoints for remote Paddle Serving Service. If this parameter is not set, the OP will not access the remote Paddle Serving Service, that is, the process operation will not be performed. |
| fetch_list | (list) List of fetch variable names for remote Paddle Serving Service. | | fetch_list | (list) List of fetch variable names for remote Paddle Serving Service. |
| client_config | (str) The path of the client configuration file corresponding to the Paddle Serving Service. | | client_config | (str) The path of the client configuration file corresponding to the Paddle Serving Service. |
| concurrency | (int) The number of concurrent OPs. | | concurrency | (int) The number of concurrent OPs. |
| timeout | (int) The timeout time of the process operation, in seconds. If the value is less than zero, no timeout is considered. | | timeout | (int) The timeout time of the process operation, in ms. If the value is less than zero, no timeout is considered. |
| retry | (int) Timeout number of retries. When the value is 1, no retries are made. | | retry | (int) Timeout number of retries. When the value is 1, no retries are made. |
| batch_size | (int) The expected batch_size of Auto-Batching, since building batches may time out, the actual batch_size may be less than the set value. |
| auto_batching_timeout | (float) Timeout for building batches of Auto-Batching (the unit is ms). |
#### 2. General OP Secondary Development Interface #### 2. General OP Secondary Development Interface
| Interface or Variable | Explain | | Interface or Variable | Explain |
| :--------------------------------------------: | :----------------------------------------------------------: | | :----------------------------------------------: | :----------------------------------------------------------: |
| def preprocess(self, input_dicts) | Process the data obtained from the channel, and the processed data will be used as the input of the **process** function. | | def preprocess(self, input_dicts) | Process the data obtained from the channel, and the processed data will be used as the input of the **process** function. (This function handles a **sample**) |
| def process(self, feed_dict) | The RPC prediction process is based on the Paddle Serving Client, and the processed data will be used as the input of the **postprocess** function. | | def process(self, feed_dict_list, typical_logid) | The RPC prediction process is based on the Paddle Serving Client, and the processed data will be used as the input of the **postprocess** function. (This function handles a **batch**) |
| def postprocess(self, input_dicts, fetch_dict) | After processing the prediction results, the processed data will be put into the subsequent Channel to be obtained by the subsequent OP. | | def postprocess(self, input_dicts, fetch_dict) | After processing the prediction results, the processed data will be put into the subsequent Channel to be obtained by the subsequent OP. (This function handles a **sample**) |
| def init_op(self) | Used to load resources (such as word dictionary). | | def init_op(self) | Used to load resources (such as word dictionary). |
| self.concurrency_idx | Concurrency index of current thread / process (different kinds of OP are calculated separately). | | self.concurrency_idx | Concurrency index of current process(not thread) (different kinds of OP are calculated separately). |
In a running cycle, OP will execute three operations: preprocess, process, and postprocess (when the `server_endpoints` parameter is not set, the process operation is not executed). Users can rewrite these three functions. The default implementation is as follows: In a running cycle, OP will execute three operations: preprocess, process, and postprocess (when the `server_endpoints` parameter is not set, the process operation is not executed). Users can rewrite these three functions. The default implementation is as follows:
...@@ -117,24 +124,28 @@ def preprocess(self, input_dicts): ...@@ -117,24 +124,28 @@ def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items() (_, input_dict), = input_dicts.items()
return input_dict return input_dict
def process(self, feed_dict): def process(self, feed_dict_list, typical_logid):
err, err_info = ChannelData.check_npdata(feed_dict) err, err_info = ChannelData.check_batch_npdata(feed_dict_list)
if err != 0: if err != 0:
raise NotImplementedError( raise NotImplementedError(
"{} Please override preprocess func.".format(err_info)) "{} Please override preprocess func.".format(err_info))
call_result = self.client.predict( call_result = self.client.predict(
feed=feed_dict, fetch=self._fetch_names) feed=feed_dict_list, fetch=self._fetch_names, log_id=typical_logid)
if isinstance(self.client, MultiLangClient):
if call_result is None or call_result["serving_status_code"] != 0:
return None
call_result.pop("serving_status_code")
return call_result return call_result
def postprocess(self, input_dicts, fetch_dict): def postprocess(self, input_dicts, fetch_dict):
return fetch_dict return fetch_dict
``` ```
The parameter of **preprocess** is the data `input_dicts` in the previous Channel. This variable is a dictionary with the name of the previous OP as key and the output of the corresponding OP as value. The parameter of **preprocess** is the data `input_dicts` in the previous Channel. This variable (as a **sample**) is a dictionary with the name of the previous OP as key and the output of the corresponding OP as value.
The parameter of **process** is the input variable `fetch_dict` (the return value of the preprocess function) of the Paddle Serving Client prediction interface. This variable is a dictionary with feed_name as the key and the data in the ndarray format as the value. The parameter of **process** is the input variable `fetch_dict_list` (a list of the return value of the preprocess function) of the Paddle Serving Client prediction interface. This variable (as a **batch**) is a list of dictionaries with feed_name as the key and the data in the ndarray format as the value. `typical_logid` is used as the logid that penetrates to PaddleServingService.
The parameters of **postprocess** are `input_dicts` and `fetch_dict`. `input_dicts` is consistent with the parameter of preprocess, and `fetch_dict` is the return value of the process function (if process is not executed, this value is the return value of preprocess). The parameters of **postprocess** are `input_dicts` and `fetch_dict`. `input_dicts` is consistent with the parameter of preprocess, and `fetch_dict` (as a **sample**) is a sample of the return batch of the process function (if process is not executed, this value is the return value of preprocess).
Users can also rewrite the **init_op** function to load some custom resources (such as word dictionary). The default implementation is as follows: Users can also rewrite the **init_op** function to load some custom resources (such as word dictionary). The default implementation is as follows:
...@@ -143,7 +154,7 @@ def init_op(self): ...@@ -143,7 +154,7 @@ def init_op(self):
pass pass
``` ```
It should be noted that in the threaded version of OP, each OP will only call this function once, so the loaded resources must be thread safe. It should be **noted** that in the threaded version of OP, each OP will only call this function once, so the loaded resources must be thread safe.
#### 3. RequestOp Definition #### 3. RequestOp Definition
...@@ -248,6 +259,8 @@ dag: ...@@ -248,6 +259,8 @@ dag:
client_type: brpc # Use brpc or grpc client. The default is brpc client_type: brpc # Use brpc or grpc client. The default is brpc
retry: 1 # The number of times DAG executor retries after failure. The default value is 1, that is, no retrying retry: 1 # The number of times DAG executor retries after failure. The default value is 1, that is, no retrying
use_profile: false # Whether to print the log on the server side. The default is false use_profile: false # Whether to print the log on the server side. The default is false
tracer:
interval_s: 600 # Monitoring time interval of Tracer (in seconds). Do not start monitoring when the value is less than 1. The default value is -1
``` ```
...@@ -282,14 +295,8 @@ from paddle_serving_server.pipeline import PipelineServer ...@@ -282,14 +295,8 @@ from paddle_serving_server.pipeline import PipelineServer
from paddle_serving_server.pipeline.proto import pipeline_service_pb2 from paddle_serving_server.pipeline.proto import pipeline_service_pb2
from paddle_serving_server.pipeline.channel import ChannelDataEcode from paddle_serving_server.pipeline.channel import ChannelDataEcode
import numpy as np import numpy as np
import logging
from paddle_serving_app.reader import IMDBDataset from paddle_serving_app.reader import IMDBDataset
logging.basicConfig(level=logging.DEBUG)
_LOGGER = logging.getLogger()
class ImdbRequestOp(RequestOp): class ImdbRequestOp(RequestOp):
def init_op(self): def init_op(self):
self.imdb_dataset = IMDBDataset() self.imdb_dataset = IMDBDataset()
...@@ -390,15 +397,23 @@ dag: ...@@ -390,15 +397,23 @@ dag:
use_profile: true use_profile: true
``` ```
After the function is enabled, the server will print the corresponding log information to the standard output in the process of prediction. In order to show the time consumption of each stage more intuitively, scripts are provided for further analysis and processing of log files. After the function is enabled, the server will print the corresponding log information to the standard output in the process of prediction. In order to show the time consumption of each stage more intuitively, Analyst module is provided for further analysis and processing of log files.
The output of the server is first saved to a file. Taking profile as an example, the script converts the time monitoring information in the log into JSON format and saves it to the trace file. The trace file can be visualized through the tracing function of Chrome browser. The output of the server is first saved to a file. Taking `profile.txt` as an example, the script converts the time monitoring information in the log into JSON format and saves it to the `trace` file. The `trace` file can be visualized through the tracing function of Chrome browser.
```shell ```shell
python timeline_trace.py profile trace from paddle_serving_server.pipeline import Analyst
import json
import sys
if __name__ == "__main__":
log_filename = "profile.txt"
trace_filename = "trace"
analyst = Analyst(log_filename)
analyst.save_trace(trace_filename)
``` ```
Specific operation: open Chrome browser, input in the address bar `chrome://tracing/` , jump to the tracing page, click the load button, open the saved trace file, and then visualize the time information of each stage of the prediction service. Specific operation: open Chrome browser, input in the address bar `chrome://tracing/` , jump to the tracing page, click the load button, open the saved `trace` file, and then visualize the time information of each stage of the prediction service.
### Output profile information on client side ### Output profile information on client side
......
...@@ -6,6 +6,7 @@ Paddle Serving 通常用于单模型的一键部署,但端到端的深度学 ...@@ -6,6 +6,7 @@ Paddle Serving 通常用于单模型的一键部署,但端到端的深度学
Paddle Serving 提供了用户友好的多模型组合服务编程框架,Pipeline Serving,旨在降低编程门槛,提高资源使用率(尤其是GPU设备),提升整体的预估效率。 Paddle Serving 提供了用户友好的多模型组合服务编程框架,Pipeline Serving,旨在降低编程门槛,提高资源使用率(尤其是GPU设备),提升整体的预估效率。
## 整体架构设计 ## 整体架构设计
Server端基于 gRPC 和图执行引擎构建,两者的关系如下图所示。 Server端基于 gRPC 和图执行引擎构建,两者的关系如下图所示。
...@@ -30,9 +31,10 @@ Server端基于 gRPC 和图执行引擎构建,两者的关系如下图所示 ...@@ -30,9 +31,10 @@ Server端基于 gRPC 和图执行引擎构建,两者的关系如下图所示
### OP的设计 ### OP的设计
- 单个OP默认的功能是根据输入的 Channel 数据,访问一个 Paddle Serving 的单模型服务,并将结果存在输出的 Channel - 单个 OP 默认的功能是根据输入的 Channel 数据,访问一个 Paddle Serving 的单模型服务,并将结果存在输出的 Channel
- 单个 OP 可以支持用户自定义,包括 preprocess,process,postprocess 三个函数都可以由用户继承和实现 - 单个 OP 可以支持用户自定义,包括 preprocess,process,postprocess 三个函数都可以由用户继承和实现
- 单个 OP 可以控制并发数,从而增加处理并发数 - 单个 OP 可以控制并发数,从而增加处理并发数
- 单个 OP 可以获取多个不同 RPC 请求的数据,以实现 Auto-Batching
- OP 可以由线程或进程启动 - OP 可以由线程或进程启动
### Channel的设计 ### Channel的设计
...@@ -59,11 +61,9 @@ Server端基于 gRPC 和图执行引擎构建,两者的关系如下图所示 ...@@ -59,11 +61,9 @@ Server端基于 gRPC 和图执行引擎构建,两者的关系如下图所示
- Channel 设计中的 input buffer 和 output buffer 是否会无限增加 - Channel 设计中的 input buffer 和 output buffer 是否会无限增加
- 不会。整个图执行引擎的输入会放到一个 Channel 的 internal queue 里面,直接作为整个服务的流量控制缓冲队列 - 不会。整个图执行引擎的输入会放到一个 Channel 的 internal queue 里面,直接作为整个服务的流量控制缓冲队列
- 对于 input buffer,根据计算量的情况调整 OP1 和 OP2 的并发数,使得 input buffer 来自各个输入 OP 的数量相对平衡 - 对于 input buffer,根据计算量的情况调整 OP1 和 OP2 的并发数,使得 input buffer 来自各个输入 OP 的数量相对平衡(input buffer 的长度取决于 internal queue 中每个 item 完全 ready 的速度)
- 对于 output buffer,可以采用和 input buffer 类似的处理方法,即调整 OP3 和 OP4 的并发数,使得 output buffer 的缓冲长度得到控制 - 对于 output buffer,可以采用和 input buffer 类似的处理方法,即调整 OP3 和 OP4 的并发数,使得 output buffer 的缓冲长度得到控制(output buffer 的长度取决于下游 OP 从 output buffer 获取数据的速度)
- 注:input buffer 的长度取决于 internal queue 中每个 item 完全 ready 的速度,output buffer 的长度取决于下游 OP 从 output buffer 获取数据的速度 - 同时 Channel 中数据量不会超过 gRPC 的 `worker_num`,即线程池大小
## 详细设计
### 用户接口设计 ### 用户接口设计
...@@ -79,31 +79,36 @@ def __init__(name=None, ...@@ -79,31 +79,36 @@ def __init__(name=None,
client_config=None, client_config=None,
concurrency=1, concurrency=1,
timeout=-1, timeout=-1,
retry=1) retry=1,
batch_size=1,
auto_batching_timeout=None)
``` ```
各参数含义如下 各参数含义如下
| 参数名 | 含义 | | 参数名 | 含义 |
| :--------------: | :----------------------------------------------------------: | | :-------------------: | :----------------------------------------------------------: |
| name | (str)用于标识 OP 类型的字符串,该字段必须全局唯一。 | | name | (str)用于标识 OP 类型的字符串,该字段必须全局唯一。 |
| input_ops | (list)当前 OP 的所有前继 OP 的列表。 | | input_ops | (list)当前 OP 的所有前继 OP 的列表。 |
| server_endpoints | (list)远程 Paddle Serving Service 的 endpoints 列表。如果不设置该参数,则不访问远程 Paddle Serving Service,即 不会执行 process 操作。 | | server_endpoints | (list)远程 Paddle Serving Service 的 endpoints 列表。如果不设置该参数,则不访问远程 Paddle Serving Service,即 不会执行 process 操作。 |
| fetch_list | (list)远程 Paddle Serving Service 的 fetch 列表。 | | fetch_list | (list)远程 Paddle Serving Service 的 fetch 列表。 |
| client_config | (str)Paddle Serving Service 对应的 Client 端配置文件路径。 | | client_config | (str)Paddle Serving Service 对应的 Client 端配置文件路径。 |
| concurrency | (int)OP 的并发数。 | | concurrency | (int)OP 的并发数。 |
| timeout | (int)process 操作的超时时间,单位为秒。若该值小于零,则视作不超时。 | | timeout | (int)process 操作的超时时间,单位为毫秒。若该值小于零,则视作不超时。 |
| retry | (int)超时重试次数。当该值为 1 时,不进行重试。 | | retry | (int)超时重试次数。当该值为 1 时,不进行重试。 |
| batch_size | (int)进行 Auto-Batching 的期望 batch_size 大小,由于构建 batch 可能超时,实际 batch_size 可能小于设定值。 |
| auto_batching_timeout | (float)进行 Auto-Batching 构建 batch 的超时时间,单位为毫秒。 |
#### 2. 普通 OP二次开发接口 #### 2. 普通 OP二次开发接口
| 变量或接口 | 说明 | | 变量或接口 | 说明 |
| :--------------------------------------------: | :----------------------------------------------------------: | | :----------------------------------------------: | :----------------------------------------------------------: |
| def preprocess(self, input_dicts) | 对从 Channel 中获取的数据进行处理,处理完的数据将作为 **process** 函数的输入。 | | def preprocess(self, input_dicts) | 对从 Channel 中获取的数据进行处理,处理完的数据将作为 **process** 函数的输入。(该函数对一个 **sample** 进行处理) |
| def process(self, feed_dict) | 基于 Paddle Serving Client 进行 RPC 预测,处理完的数据将作为 **postprocess** 函数的输入。 | | def process(self, feed_dict_list, typical_logid) | 基于 Paddle Serving Client 进行 RPC 预测,处理完的数据将作为 **postprocess** 函数的输入。(该函数对一个 **batch** 进行处理) |
| def postprocess(self, input_dicts, fetch_dict) | 处理预测结果,处理完的数据将被放入后继 Channel 中,以被后继 OP 获取。 | | def postprocess(self, input_dicts, fetch_dict) | 处理预测结果,处理完的数据将被放入后继 Channel 中,以被后继 OP 获取。(该函数对一个 **sample** 进行处理) |
| def init_op(self) | 用于加载资源(如字典等)。 | | def init_op(self) | 用于加载资源(如字典等)。 |
| self.concurrency_idx | 当前线程(进程)的并发数索引(不同种类的 OP 单独计算)。 | | self.concurrency_idx | 当前进程(非线程)的并发数索引(不同种类的 OP 单独计算)。 |
OP 在一个运行周期中会依次执行 preprocess,process,postprocess 三个操作(当不设置 `server_endpoints` 参数时,不执行 process 操作),用户可以对这三个函数进行重写,默认实现如下: OP 在一个运行周期中会依次执行 preprocess,process,postprocess 三个操作(当不设置 `server_endpoints` 参数时,不执行 process 操作),用户可以对这三个函数进行重写,默认实现如下:
...@@ -117,25 +122,28 @@ def preprocess(self, input_dicts): ...@@ -117,25 +122,28 @@ def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items() (_, input_dict), = input_dicts.items()
return input_dict return input_dict
def process(self, feed_dict): def process(self, feed_dict_list, typical_logid):
err, err_info = ChannelData.check_npdata(feed_dict) err, err_info = ChannelData.check_batch_npdata(feed_dict_list)
if err != 0: if err != 0:
raise NotImplementedError( raise NotImplementedError(
"{} Please override preprocess func.".format(err_info)) "{} Please override preprocess func.".format(err_info))
call_result = self.client.predict( call_result = self.client.predict(
feed=feed_dict, fetch=self._fetch_names) feed=feed_dict_list, fetch=self._fetch_names, log_id=typical_logid)
if isinstance(self.client, MultiLangClient):
if call_result is None or call_result["serving_status_code"] != 0:
return None
call_result.pop("serving_status_code")
return call_result return call_result
def postprocess(self, input_dicts, fetch_dict): def postprocess(self, input_dicts, fetch_dict):
return fetch_dict return fetch_dict
``` ```
**preprocess** 的参数是前继 Channel 中的数据 `input_dicts`,该变量(作为一个 **sample**)是一个以前继 OP 的 name 为 Key,对应 OP 的输出为 Value 的字典。
**preprocess** 的参数是前继 Channel 中的数据 `input_dicts`,该变量是一个以前继 OP 的 name 为 Key,对应 OP 的输出为 Value 的字典 **process** 的参数是 Paddle Serving Client 预测接口的输入变量 `fetch_dict_list`(preprocess 函数的返回值的列表),该变量(作为一个 **batch**)是一个列表,列表中的元素为以 feed_name 为 Key,对应 ndarray 格式的数据为 Value 的字典。`typical_logid` 作为向 PaddleServingService 穿透的 logid
**process** 的参数是 Paddle Serving Client 预测接口的输入变量 `fetch_dict`(preprocess 函数的返回值),该变量是一个以 feed_name 为 Key,对应 ndarray 格式的数据为 Value 的字典。 **postprocess** 的参数是 `input_dicts``fetch_dict``input_dicts` 与 preprocess 的参数一致,`fetch_dict` (作为一个 **sample**)是 process 函数的返回 batch 中的一个 sample(如果没有执行 process ,则该值为 preprocess 的返回值)。
**postprocess** 的参数是 `input_dicts``fetch_dict``input_dicts` 与 preprocess 的参数一致,`fetch_dict` 是 process 函数的返回值(如果没有执行 process ,则该值为 preprocess 的返回值)。
用户还可以对 **init_op** 函数进行重写,已加载自定义的一些资源(比如字典等),默认实现如下: 用户还可以对 **init_op** 函数进行重写,已加载自定义的一些资源(比如字典等),默认实现如下:
...@@ -144,7 +152,7 @@ def init_op(self): ...@@ -144,7 +152,7 @@ def init_op(self):
pass pass
``` ```
需要注意的是,在线程版 OP 中,每个 OP 只会调用一次该函数,故加载的资源必须要求是线程安全的。 需要**注意**的是,在线程版 OP 中,每个 OP 只会调用一次该函数,故加载的资源必须要求是线程安全的。
#### 3. RequestOp 定义 #### 3. RequestOp 定义
...@@ -249,6 +257,8 @@ dag: ...@@ -249,6 +257,8 @@ dag:
client_type: brpc # 使用 brpc 或 grpc client,默认为 brpc client_type: brpc # 使用 brpc 或 grpc client,默认为 brpc
retry: 1 # DAG Executor 在失败后重试次数,默认为 1,即不重试 retry: 1 # DAG Executor 在失败后重试次数,默认为 1,即不重试
use_profile: false # 是否在 Server 端打印日志,默认为 false use_profile: false # 是否在 Server 端打印日志,默认为 false
tracer:
interval_s: 600 # Tracer 监控的时间间隔,单位为秒。当该值小于 1 时不启动监控,默认为 -1
``` ```
...@@ -283,14 +293,8 @@ from paddle_serving_server.pipeline import PipelineServer ...@@ -283,14 +293,8 @@ from paddle_serving_server.pipeline import PipelineServer
from paddle_serving_server.pipeline.proto import pipeline_service_pb2 from paddle_serving_server.pipeline.proto import pipeline_service_pb2
from paddle_serving_server.pipeline.channel import ChannelDataEcode from paddle_serving_server.pipeline.channel import ChannelDataEcode
import numpy as np import numpy as np
import logging
from paddle_serving_app.reader import IMDBDataset from paddle_serving_app.reader import IMDBDataset
logging.basicConfig(level=logging.DEBUG)
_LOGGER = logging.getLogger()
class ImdbRequestOp(RequestOp): class ImdbRequestOp(RequestOp):
def init_op(self): def init_op(self):
self.imdb_dataset = IMDBDataset() self.imdb_dataset = IMDBDataset()
...@@ -311,7 +315,6 @@ class CombineOp(Op): ...@@ -311,7 +315,6 @@ class CombineOp(Op):
def preprocess(self, input_data): def preprocess(self, input_data):
combined_prediction = 0 combined_prediction = 0
for op_name, data in input_data.items(): for op_name, data in input_data.items():
_LOGGER.info("{}: {}".format(op_name, data["prediction"]))
combined_prediction += data["prediction"] combined_prediction += data["prediction"]
data = {"prediction": combined_prediction / 2} data = {"prediction": combined_prediction / 2}
return data return data
...@@ -391,15 +394,23 @@ dag: ...@@ -391,15 +394,23 @@ dag:
use_profile: true use_profile: true
``` ```
开启该功能后,Server 端在预测的过程中会将对应的日志信息打印到标准输出,为了更直观地展现各阶段的耗时,提供脚本对日志文件做进一步的分析处理。 开启该功能后,Server 端在预测的过程中会将对应的日志信息打印到标准输出,为了更直观地展现各阶段的耗时,提供 Analyst 模块对日志文件做进一步的分析处理。
使用时先将 Server 的输出保存到文件,以 profile 为例,脚本将日志中的时间打点信息转换成 json 格式保存到trace 文件,trace 文件可以通过 chrome 浏览器的 tracing 功能进行可视化。 使用时先将 Server 的输出保存到文件,以 `profile.txt` 为例,脚本将日志中的时间打点信息转换成 json 格式保存到 `trace` 文件,`trace` 文件可以通过 chrome 浏览器的 tracing 功能进行可视化。
```shell ```python
python timeline_trace.py profile trace from paddle_serving_server.pipeline import Analyst
import json
import sys
if __name__ == "__main__":
log_filename = "profile.txt"
trace_filename = "trace"
analyst = Analyst(log_filename)
analyst.save_trace(trace_filename)
``` ```
具体操作:打开 chrome 浏览器,在地址栏输入 chrome://tracing/ ,跳转至 tracing 页面,点击 load 按钮,打开保存的 trace 文件,即可将预测服务的各阶段时间信息可视化。 具体操作:打开 chrome 浏览器,在地址栏输入 `chrome://tracing/` ,跳转至 tracing 页面,点击 load 按钮,打开保存的 `trace` 文件,即可将预测服务的各阶段时间信息可视化。
### 在 Client 端输出 Profile 信息 ### 在 Client 端输出 Profile 信息
......
doc/pipeline_serving-image1.png

96.0 KB | W: | H:

doc/pipeline_serving-image1.png

107.7 KB | W: | H:

doc/pipeline_serving-image1.png
doc/pipeline_serving-image1.png
doc/pipeline_serving-image1.png
doc/pipeline_serving-image1.png
  • 2-up
  • Swipe
  • Onion skin
...@@ -192,14 +192,16 @@ public class Client { ...@@ -192,14 +192,16 @@ public class Client {
private InferenceRequest _packInferenceRequest( private InferenceRequest _packInferenceRequest(
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch) throws IllegalArgumentException { Iterable<String> fetch,
long log_id) throws IllegalArgumentException {
List<String> feed_var_names = new ArrayList<String>(); List<String> feed_var_names = new ArrayList<String>();
feed_var_names.addAll(feed_batch.get(0).keySet()); feed_var_names.addAll(feed_batch.get(0).keySet());
InferenceRequest.Builder req_builder = InferenceRequest.newBuilder() InferenceRequest.Builder req_builder = InferenceRequest.newBuilder()
.addAllFeedVarNames(feed_var_names) .addAllFeedVarNames(feed_var_names)
.addAllFetchVarNames(fetch) .addAllFetchVarNames(fetch)
.setIsPython(false); .setIsPython(false)
.setLogId(log_id);
for (HashMap<String, INDArray> feed_data: feed_batch) { for (HashMap<String, INDArray> feed_data: feed_batch) {
FeedInst.Builder inst_builder = FeedInst.newBuilder(); FeedInst.Builder inst_builder = FeedInst.newBuilder();
for (String name: feed_var_names) { for (String name: feed_var_names) {
...@@ -332,76 +334,151 @@ public class Client { ...@@ -332,76 +334,151 @@ public class Client {
public Map<String, INDArray> predict( public Map<String, INDArray> predict(
HashMap<String, INDArray> feed, HashMap<String, INDArray> feed,
Iterable<String> fetch) { Iterable<String> fetch) {
return predict(feed, fetch, false); return predict(feed, fetch, false, 0);
}
public Map<String, INDArray> predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
long log_id) {
return predict(feed, fetch, false, log_id);
} }
public Map<String, HashMap<String, INDArray>> ensemble_predict( public Map<String, HashMap<String, INDArray>> ensemble_predict(
HashMap<String, INDArray> feed, HashMap<String, INDArray> feed,
Iterable<String> fetch) { Iterable<String> fetch) {
return ensemble_predict(feed, fetch, false); return ensemble_predict(feed, fetch, false, 0);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
long log_id) {
return ensemble_predict(feed, fetch, false, log_id);
} }
public PredictFuture asyn_predict( public PredictFuture asyn_predict(
HashMap<String, INDArray> feed, HashMap<String, INDArray> feed,
Iterable<String> fetch) { Iterable<String> fetch) {
return asyn_predict(feed, fetch, false); return asyn_predict(feed, fetch, false, 0);
}
public PredictFuture asyn_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
long log_id) {
return asyn_predict(feed, fetch, false, log_id);
} }
public Map<String, INDArray> predict( public Map<String, INDArray> predict(
HashMap<String, INDArray> feed, HashMap<String, INDArray> feed,
Iterable<String> fetch, Iterable<String> fetch,
Boolean need_variant_tag) { Boolean need_variant_tag) {
return predict(feed, fetch, need_variant_tag, 0);
}
public Map<String, INDArray> predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
List<HashMap<String, INDArray>> feed_batch List<HashMap<String, INDArray>> feed_batch
= new ArrayList<HashMap<String, INDArray>>(); = new ArrayList<HashMap<String, INDArray>>();
feed_batch.add(feed); feed_batch.add(feed);
return predict(feed_batch, fetch, need_variant_tag); return predict(feed_batch, fetch, need_variant_tag, log_id);
} }
public Map<String, HashMap<String, INDArray>> ensemble_predict( public Map<String, HashMap<String, INDArray>> ensemble_predict(
HashMap<String, INDArray> feed, HashMap<String, INDArray> feed,
Iterable<String> fetch, Iterable<String> fetch,
Boolean need_variant_tag) { Boolean need_variant_tag) {
return ensemble_predict(feed, fetch, need_variant_tag, 0);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
List<HashMap<String, INDArray>> feed_batch List<HashMap<String, INDArray>> feed_batch
= new ArrayList<HashMap<String, INDArray>>(); = new ArrayList<HashMap<String, INDArray>>();
feed_batch.add(feed); feed_batch.add(feed);
return ensemble_predict(feed_batch, fetch, need_variant_tag); return ensemble_predict(feed_batch, fetch, need_variant_tag, log_id);
} }
public PredictFuture asyn_predict( public PredictFuture asyn_predict(
HashMap<String, INDArray> feed, HashMap<String, INDArray> feed,
Iterable<String> fetch, Iterable<String> fetch,
Boolean need_variant_tag) { Boolean need_variant_tag) {
return asyn_predict(feed, fetch, need_variant_tag, 0);
}
public PredictFuture asyn_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
List<HashMap<String, INDArray>> feed_batch List<HashMap<String, INDArray>> feed_batch
= new ArrayList<HashMap<String, INDArray>>(); = new ArrayList<HashMap<String, INDArray>>();
feed_batch.add(feed); feed_batch.add(feed);
return asyn_predict(feed_batch, fetch, need_variant_tag); return asyn_predict(feed_batch, fetch, need_variant_tag, log_id);
} }
public Map<String, INDArray> predict( public Map<String, INDArray> predict(
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch) { Iterable<String> fetch) {
return predict(feed_batch, fetch, false); return predict(feed_batch, fetch, false, 0);
}
public Map<String, INDArray> predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
long log_id) {
return predict(feed_batch, fetch, false, log_id);
} }
public Map<String, HashMap<String, INDArray>> ensemble_predict( public Map<String, HashMap<String, INDArray>> ensemble_predict(
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch) { Iterable<String> fetch) {
return ensemble_predict(feed_batch, fetch, false); return ensemble_predict(feed_batch, fetch, false, 0);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
long log_id) {
return ensemble_predict(feed_batch, fetch, false, log_id);
} }
public PredictFuture asyn_predict( public PredictFuture asyn_predict(
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch) { Iterable<String> fetch) {
return asyn_predict(feed_batch, fetch, false); return asyn_predict(feed_batch, fetch, false, 0);
}
public PredictFuture asyn_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
long log_id) {
return asyn_predict(feed_batch, fetch, false, log_id);
} }
public Map<String, INDArray> predict( public Map<String, INDArray> predict(
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch, Iterable<String> fetch,
Boolean need_variant_tag) { Boolean need_variant_tag) {
return predict(feed_batch, fetch, need_variant_tag, 0);
}
public Map<String, INDArray> predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
try { try {
profiler_.record("java_prepro_0"); profiler_.record("java_prepro_0");
InferenceRequest req = _packInferenceRequest(feed_batch, fetch); InferenceRequest req = _packInferenceRequest(
feed_batch, fetch, log_id);
profiler_.record("java_prepro_1"); profiler_.record("java_prepro_1");
profiler_.record("java_client_infer_0"); profiler_.record("java_client_infer_0");
...@@ -415,7 +492,7 @@ public class Client { ...@@ -415,7 +492,7 @@ public class Client {
= new ArrayList<Map.Entry<String, HashMap<String, INDArray>>>( = new ArrayList<Map.Entry<String, HashMap<String, INDArray>>>(
ensemble_result.entrySet()); ensemble_result.entrySet());
if (list.size() != 1) { if (list.size() != 1) {
System.out.format("predict failed: please use ensemble_predict impl.\n"); System.out.format("Failed to predict: please use ensemble_predict impl.\n");
return null; return null;
} }
profiler_.record("java_postpro_1"); profiler_.record("java_postpro_1");
...@@ -423,7 +500,7 @@ public class Client { ...@@ -423,7 +500,7 @@ public class Client {
return list.get(0).getValue(); return list.get(0).getValue();
} catch (StatusRuntimeException e) { } catch (StatusRuntimeException e) {
System.out.format("predict failed: %s\n", e.toString()); System.out.format("Failed to predict: %s\n", e.toString());
return null; return null;
} }
} }
...@@ -432,9 +509,18 @@ public class Client { ...@@ -432,9 +509,18 @@ public class Client {
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch, Iterable<String> fetch,
Boolean need_variant_tag) { Boolean need_variant_tag) {
return ensemble_predict(feed_batch, fetch, need_variant_tag, 0);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
try { try {
profiler_.record("java_prepro_0"); profiler_.record("java_prepro_0");
InferenceRequest req = _packInferenceRequest(feed_batch, fetch); InferenceRequest req = _packInferenceRequest(
feed_batch, fetch, log_id);
profiler_.record("java_prepro_1"); profiler_.record("java_prepro_1");
profiler_.record("java_client_infer_0"); profiler_.record("java_client_infer_0");
...@@ -449,7 +535,7 @@ public class Client { ...@@ -449,7 +535,7 @@ public class Client {
return ensemble_result; return ensemble_result;
} catch (StatusRuntimeException e) { } catch (StatusRuntimeException e) {
System.out.format("predict failed: %s\n", e.toString()); System.out.format("Failed to predict: %s\n", e.toString());
return null; return null;
} }
} }
...@@ -458,7 +544,16 @@ public class Client { ...@@ -458,7 +544,16 @@ public class Client {
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch, Iterable<String> fetch,
Boolean need_variant_tag) { Boolean need_variant_tag) {
InferenceRequest req = _packInferenceRequest(feed_batch, fetch); return asyn_predict(feed_batch, fetch, need_variant_tag, 0);
}
public PredictFuture asyn_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
InferenceRequest req = _packInferenceRequest(
feed_batch, fetch, log_id);
ListenableFuture<InferenceResponse> future = futureStub_.inference(req); ListenableFuture<InferenceResponse> future = futureStub_.inference(req);
PredictFuture predict_future = new PredictFuture(future, PredictFuture predict_future = new PredictFuture(future,
(InferenceResponse resp) -> { (InferenceResponse resp) -> {
......
...@@ -37,6 +37,7 @@ message InferenceRequest { ...@@ -37,6 +37,7 @@ message InferenceRequest {
repeated string feed_var_names = 2; repeated string feed_var_names = 2;
repeated string fetch_var_names = 3; repeated string fetch_var_names = 3;
required bool is_python = 4 [ default = false ]; required bool is_python = 4 [ default = false ];
required uint64 log_id = 5 [ default = 0 ];
}; };
message InferenceResponse { message InferenceResponse {
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
## Get Model ## Get Model
``` ```
python -m paddle_serving_app.package --get_model blazeface python -m paddle_serving_app.package --get_model blazeface
tar -xzvf blazeface.tar.gz tar -xf blazeface.tar.gz
``` ```
## RPC Service ## RPC Service
......
port: 18080 port: 18080
worker_num: 1 worker_num: 4
build_dag_each_worker: false build_dag_each_worker: false
dag: dag:
is_thread_op: true is_thread_op: false
client_type: brpc client_type: brpc
retry: 1 retry: 1
use_profile: false use_profile: false
tracer:
interval_s: 10
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle_serving_client.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
client = PipelineClient() client = PipelineClient()
...@@ -20,12 +20,16 @@ client.connect(['127.0.0.1:18080']) ...@@ -20,12 +20,16 @@ client.connect(['127.0.0.1:18080'])
words = 'i am very sad | 0' words = 'i am very sad | 0'
futures = [] futures = []
for i in range(100): for i in range(4):
futures.append( futures.append(
client.predict( client.predict(
feed_dict={"words": words}, fetch=["prediction"], asyn=True)) feed_dict={"words": words},
fetch=["prediction"],
asyn=True,
profile=False))
for f in futures: for f in futures:
res = f.result() res = f.result()
if res["ecode"] != 0: if res["ecode"] != 0:
print("predict failed: {}".format(res)) print("predict failed: {}".format(res))
print(res)
...@@ -12,18 +12,21 @@ ...@@ -12,18 +12,21 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from paddle_serving_server.pipeline import Op, RequestOp, ResponseOp from paddle_serving_server.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server.pipeline import PipelineServer from paddle_serving_server.pipeline import PipelineServer
from paddle_serving_server.pipeline.proto import pipeline_service_pb2 from paddle_serving_server.pipeline.proto import pipeline_service_pb2
from paddle_serving_server.pipeline.channel import ChannelDataEcode from paddle_serving_server.pipeline.channel import ChannelDataEcode
import numpy as np import numpy as np
import logging
from paddle_serving_app.reader import IMDBDataset from paddle_serving_app.reader import IMDBDataset
import logging
logging.basicConfig(level=logging.DEBUG)
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger()
user_handler = logging.StreamHandler()
user_handler.setLevel(logging.INFO)
user_handler.setFormatter(
logging.Formatter(
"%(levelname)s %(asctime)s [%(filename)s:%(lineno)d] %(message)s"))
_LOGGER.addHandler(user_handler)
class ImdbRequestOp(RequestOp): class ImdbRequestOp(RequestOp):
...@@ -76,7 +79,9 @@ bow_op = Op(name="bow", ...@@ -76,7 +79,9 @@ bow_op = Op(name="bow",
client_config="imdb_bow_client_conf/serving_client_conf.prototxt", client_config="imdb_bow_client_conf/serving_client_conf.prototxt",
concurrency=1, concurrency=1,
timeout=-1, timeout=-1,
retry=1) retry=1,
batch_size=3,
auto_batching_timeout=1000)
cnn_op = Op(name="cnn", cnn_op = Op(name="cnn",
input_ops=[read_op], input_ops=[read_op],
server_endpoints=["127.0.0.1:9292"], server_endpoints=["127.0.0.1:9292"],
...@@ -84,13 +89,17 @@ cnn_op = Op(name="cnn", ...@@ -84,13 +89,17 @@ cnn_op = Op(name="cnn",
client_config="imdb_cnn_client_conf/serving_client_conf.prototxt", client_config="imdb_cnn_client_conf/serving_client_conf.prototxt",
concurrency=1, concurrency=1,
timeout=-1, timeout=-1,
retry=1) retry=1,
batch_size=1,
auto_batching_timeout=None)
combine_op = CombineOp( combine_op = CombineOp(
name="combine", name="combine",
input_ops=[bow_op, cnn_op], input_ops=[bow_op, cnn_op],
concurrency=5, concurrency=1,
timeout=-1, timeout=-1,
retry=1) retry=1,
batch_size=2,
auto_batching_timeout=None)
# fetch output of bow_op # fetch output of bow_op
# response_op = ImdbResponseOp(input_ops=[bow_op]) # response_op = ImdbResponseOp(input_ops=[bow_op])
......
...@@ -233,7 +233,7 @@ class Client(object): ...@@ -233,7 +233,7 @@ class Client(object):
# key)) # key))
pass pass
def predict(self, feed=None, fetch=None, need_variant_tag=False): def predict(self, feed=None, fetch=None, need_variant_tag=False, log_id=0):
self.profile_.record('py_prepro_0') self.profile_.record('py_prepro_0')
if feed is None or fetch is None: if feed is None or fetch is None:
...@@ -319,12 +319,12 @@ class Client(object): ...@@ -319,12 +319,12 @@ class Client(object):
res = self.client_handle_.numpy_predict( res = self.client_handle_.numpy_predict(
float_slot_batch, float_feed_names, float_shape, int_slot_batch, float_slot_batch, float_feed_names, float_shape, int_slot_batch,
int_feed_names, int_shape, fetch_names, result_batch_handle, int_feed_names, int_shape, fetch_names, result_batch_handle,
self.pid) self.pid, log_id)
elif self.has_numpy_input == False: elif self.has_numpy_input == False:
res = self.client_handle_.batch_predict( res = self.client_handle_.batch_predict(
float_slot_batch, float_feed_names, float_shape, int_slot_batch, float_slot_batch, float_feed_names, float_shape, int_slot_batch,
int_feed_names, int_shape, fetch_names, result_batch_handle, int_feed_names, int_shape, fetch_names, result_batch_handle,
self.pid) self.pid, log_id)
else: else:
raise ValueError( raise ValueError(
"Please make sure the inputs are all in list type or all in numpy.array type" "Please make sure the inputs are all in list type or all in numpy.array type"
...@@ -347,6 +347,11 @@ class Client(object): ...@@ -347,6 +347,11 @@ class Client(object):
result_map[name] = result_batch_handle.get_int64_by_name( result_map[name] = result_batch_handle.get_int64_by_name(
mi, name) mi, name)
shape = result_batch_handle.get_shape(mi, name) shape = result_batch_handle.get_shape(mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
result_map[name].shape = shape result_map[name].shape = shape
if name in self.lod_tensor_set: if name in self.lod_tensor_set:
result_map["{}.lod".format( result_map["{}.lod".format(
...@@ -354,6 +359,11 @@ class Client(object): ...@@ -354,6 +359,11 @@ class Client(object):
elif self.fetch_names_to_type_[name] == float32_type: elif self.fetch_names_to_type_[name] == float32_type:
result_map[name] = result_batch_handle.get_float_by_name( result_map[name] = result_batch_handle.get_float_by_name(
mi, name) mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name) shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape result_map[name].shape = shape
if name in self.lod_tensor_set: if name in self.lod_tensor_set:
...@@ -364,6 +374,11 @@ class Client(object): ...@@ -364,6 +374,11 @@ class Client(object):
# result_map[name] will be py::array(numpy array) # result_map[name] will be py::array(numpy array)
result_map[name] = result_batch_handle.get_int32_by_name( result_map[name] = result_batch_handle.get_int32_by_name(
mi, name) mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name) shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape result_map[name].shape = shape
if name in self.lod_tensor_set: if name in self.lod_tensor_set:
...@@ -466,10 +481,11 @@ class MultiLangClient(object): ...@@ -466,10 +481,11 @@ class MultiLangClient(object):
if var.is_lod_tensor: if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name) self.lod_tensor_set_.add(var.alias_name)
def _pack_inference_request(self, feed, fetch, is_python): def _pack_inference_request(self, feed, fetch, is_python, log_id):
req = multi_lang_general_model_service_pb2.InferenceRequest() req = multi_lang_general_model_service_pb2.InferenceRequest()
req.fetch_var_names.extend(fetch) req.fetch_var_names.extend(fetch)
req.is_python = is_python req.is_python = is_python
req.log_id = log_id
feed_batch = None feed_batch = None
if isinstance(feed, dict): if isinstance(feed, dict):
feed_batch = [feed] feed_batch = [feed]
...@@ -602,12 +618,13 @@ class MultiLangClient(object): ...@@ -602,12 +618,13 @@ class MultiLangClient(object):
fetch, fetch,
need_variant_tag=False, need_variant_tag=False,
asyn=False, asyn=False,
is_python=True): is_python=True,
log_id=0):
if not asyn: if not asyn:
try: try:
self.profile_.record('py_prepro_0') self.profile_.record('py_prepro_0')
req = self._pack_inference_request( req = self._pack_inference_request(
feed, fetch, is_python=is_python) feed, fetch, is_python=is_python, log_id=log_id)
self.profile_.record('py_prepro_1') self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0') self.profile_.record('py_client_infer_0')
...@@ -626,7 +643,8 @@ class MultiLangClient(object): ...@@ -626,7 +643,8 @@ class MultiLangClient(object):
except grpc.RpcError as e: except grpc.RpcError as e:
return {"serving_status_code": e.code()} return {"serving_status_code": e.code()}
else: else:
req = self._pack_inference_request(feed, fetch, is_python=is_python) req = self._pack_inference_request(
feed, fetch, is_python=is_python, log_id=log_id)
call_future = self.stub_.Inference.future( call_future = self.stub_.Inference.future(
req, timeout=self.rpc_timeout_s_) req, timeout=self.rpc_timeout_s_)
return MultiLangPredictFuture( return MultiLangPredictFuture(
......
...@@ -502,6 +502,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -502,6 +502,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
feed_names = list(request.feed_var_names) feed_names = list(request.feed_var_names)
fetch_names = list(request.fetch_var_names) fetch_names = list(request.fetch_var_names)
is_python = request.is_python is_python = request.is_python
log_id = request.log_id
feed_batch = [] feed_batch = []
for feed_inst in request.insts: for feed_inst in request.insts:
feed_dict = {} feed_dict = {}
...@@ -530,7 +531,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -530,7 +531,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
data.shape = list(feed_inst.tensor_array[idx].shape) data.shape = list(feed_inst.tensor_array[idx].shape)
feed_dict[name] = data feed_dict[name] = data
feed_batch.append(feed_dict) feed_batch.append(feed_dict)
return feed_batch, fetch_names, is_python return feed_batch, fetch_names, is_python, log_id
def _pack_inference_response(self, ret, fetch_names, is_python): def _pack_inference_response(self, ret, fetch_names, is_python):
resp = multi_lang_general_model_service_pb2.InferenceResponse() resp = multi_lang_general_model_service_pb2.InferenceResponse()
...@@ -583,10 +584,13 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -583,10 +584,13 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
return resp return resp
def Inference(self, request, context): def Inference(self, request, context):
feed_dict, fetch_names, is_python = self._unpack_inference_request( feed_dict, fetch_names, is_python, log_id = \
request) self._unpack_inference_request(request)
ret = self.bclient_.predict( ret = self.bclient_.predict(
feed=feed_dict, fetch=fetch_names, need_variant_tag=True) feed=feed_dict,
fetch=fetch_names,
need_variant_tag=True,
log_id=log_id)
return self._pack_inference_response(ret, fetch_names, is_python) return self._pack_inference_response(ret, fetch_names, is_python)
def GetClientConfig(self, request, context): def GetClientConfig(self, request, context):
......
...@@ -552,6 +552,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -552,6 +552,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
feed_names = list(request.feed_var_names) feed_names = list(request.feed_var_names)
fetch_names = list(request.fetch_var_names) fetch_names = list(request.fetch_var_names)
is_python = request.is_python is_python = request.is_python
log_id = request.log_id
feed_batch = [] feed_batch = []
for feed_inst in request.insts: for feed_inst in request.insts:
feed_dict = {} feed_dict = {}
...@@ -580,7 +581,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -580,7 +581,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
data.shape = list(feed_inst.tensor_array[idx].shape) data.shape = list(feed_inst.tensor_array[idx].shape)
feed_dict[name] = data feed_dict[name] = data
feed_batch.append(feed_dict) feed_batch.append(feed_dict)
return feed_batch, fetch_names, is_python return feed_batch, fetch_names, is_python, log_id
def _pack_inference_response(self, ret, fetch_names, is_python): def _pack_inference_response(self, ret, fetch_names, is_python):
resp = multi_lang_general_model_service_pb2.InferenceResponse() resp = multi_lang_general_model_service_pb2.InferenceResponse()
...@@ -633,10 +634,13 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -633,10 +634,13 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
return resp return resp
def Inference(self, request, context): def Inference(self, request, context):
feed_dict, fetch_names, is_python = self._unpack_inference_request( feed_dict, fetch_names, is_python, log_id \
request) = self._unpack_inference_request(request)
ret = self.bclient_.predict( ret = self.bclient_.predict(
feed=feed_dict, fetch=fetch_names, need_variant_tag=True) feed=feed_dict,
fetch=fetch_names,
need_variant_tag=True,
log_id=log_id)
return self._pack_inference_response(ret, fetch_names, is_python) return self._pack_inference_response(ret, fetch_names, is_python)
def GetClientConfig(self, request, context): def GetClientConfig(self, request, context):
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logger # this module must be the first to import
from operator import Op, RequestOp, ResponseOp from operator import Op, RequestOp, ResponseOp
from pipeline_server import PipelineServer from pipeline_server import PipelineServer
from pipeline_client import PipelineClient from pipeline_client import PipelineClient
......
...@@ -17,7 +17,7 @@ import copy ...@@ -17,7 +17,7 @@ import copy
import re import re
import logging import logging
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger(__name__)
class Analyst(object): class Analyst(object):
...@@ -69,7 +69,7 @@ class Analyst(object): ...@@ -69,7 +69,7 @@ class Analyst(object):
with open(self._profile_file) as f: with open(self._profile_file) as f:
for line in f.readlines(): for line in f.readlines():
line = line.strip().split("\t") line = line.strip().split("\t")
if line[0] == "PROFILE": if line[0] == "PROFILE" and len(line) >= 3:
trace_list = self._prase_line(line[1], line[2], counter) trace_list = self._prase_line(line[1], line[2], counter)
counter += 1 counter += 1
for trace in trace_list: for trace in trace_list:
...@@ -164,7 +164,7 @@ class OpAnalyst(object): ...@@ -164,7 +164,7 @@ class OpAnalyst(object):
def add(self, name_str, ts_list): def add(self, name_str, ts_list):
if self._close: if self._close:
_LOGGER.error("OpAnalyst is closed.") _LOGGER.error("Failed to add item: OpAnalyst is closed.")
return return
op_name, curr_idx, step = self._parse(name_str) op_name, curr_idx, step = self._parse(name_str)
if op_name not in self.op_time_list_dict: if op_name not in self.op_time_list_dict:
......
此差异已折叠。
此差异已折叠。
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import logging.config
import os
class SectionLevelFilter(object):
def __init__(self, levels):
self._levels = levels
def filter(self, logRecord):
return logRecord.levelno in self._levels
log_dir = "PipelineServingLogs"
if not os.path.exists(log_dir):
os.makedirs(log_dir)
logger_config = {
"version": 1,
"formatters": {
"normal_fmt": {
"format":
"%(levelname)s %(asctime)s [%(filename)s:%(lineno)d] %(message)s",
},
"tracer_fmt": {
"format": "%(asctime)s %(message)s",
},
},
"handlers": {
"f_pipeline.log": {
"class": "logging.FileHandler",
"level": "INFO",
"formatter": "normal_fmt",
"filename": os.path.join(log_dir, "pipeline.log"),
},
"f_pipeline.log.wf": {
"class": "logging.FileHandler",
"level": "WARNING",
"formatter": "normal_fmt",
"filename": os.path.join(log_dir, "pipeline.log.wf"),
},
"f_tracer.log": {
"class": "logging.FileHandler",
"level": "INFO",
"formatter": "tracer_fmt",
"filename": os.path.join(log_dir, "pipeline.tracer"),
},
},
"loggers": {
# propagate = True
".".join(__name__.split(".")[:-1] + ["profiler"]): {
"level": "INFO",
"handlers": ["f_tracer.log"],
},
},
"root": {
"level": "DEBUG",
"handlers": ["f_pipeline.log", "f_pipeline.log.wf"],
},
}
logging.config.dictConfig(logger_config)
此差异已折叠。
...@@ -18,10 +18,11 @@ import numpy as np ...@@ -18,10 +18,11 @@ import numpy as np
from numpy import * from numpy import *
import logging import logging
import functools import functools
from .channel import ChannelDataEcode
from .proto import pipeline_service_pb2 from .proto import pipeline_service_pb2
from .proto import pipeline_service_pb2_grpc from .proto import pipeline_service_pb2_grpc
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger(__name__)
class PipelineClient(object): class PipelineClient(object):
...@@ -59,7 +60,11 @@ class PipelineClient(object): ...@@ -59,7 +60,11 @@ class PipelineClient(object):
def _unpack_response_package(self, resp, fetch): def _unpack_response_package(self, resp, fetch):
if resp.ecode != 0: if resp.ecode != 0:
return {"ecode": resp.ecode, "error_info": resp.error_info} return {
"ecode": resp.ecode,
"ecode_desc": ChannelDataEcode(resp.ecode),
"error_info": resp.error_info,
}
fetch_map = {"ecode": resp.ecode} fetch_map = {"ecode": resp.ecode}
for idx, key in enumerate(resp.key): for idx, key in enumerate(resp.key):
if key == self._profile_key: if key == self._profile_key:
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
...@@ -776,7 +776,7 @@ function python_test_pipeline(){ ...@@ -776,7 +776,7 @@ function python_test_pipeline(){
# test: thread servicer & thread op # test: thread servicer & thread op
cat << EOF > config.yml cat << EOF > config.yml
port: 18080 port: 18080
worker_num: 2 worker_num: 4
build_dag_each_worker: false build_dag_each_worker: false
dag: dag:
is_thread_op: true is_thread_op: true
...@@ -793,7 +793,7 @@ EOF ...@@ -793,7 +793,7 @@ EOF
# test: thread servicer & process op # test: thread servicer & process op
cat << EOF > config.yml cat << EOF > config.yml
port: 18080 port: 18080
worker_num: 2 worker_num: 4
build_dag_each_worker: false build_dag_each_worker: false
dag: dag:
is_thread_op: false is_thread_op: false
...@@ -807,13 +807,13 @@ EOF ...@@ -807,13 +807,13 @@ EOF
ps -ef | grep "pipeline_server" | grep -v grep | awk '{print $2}' | xargs kill ps -ef | grep "pipeline_server" | grep -v grep | awk '{print $2}' | xargs kill
kill_process_by_port 18080 kill_process_by_port 18080
# test: process servicer & thread op # test: process servicer & process op
cat << EOF > config.yml cat << EOF > config.yml
port: 18080 port: 18080
worker_num: 2 worker_num: 4
build_dag_each_worker: true build_dag_each_worker: false
dag: dag:
is_thread_op: flase is_thread_op: false
client_type: brpc client_type: brpc
retry: 1 retry: 1
use_profile: false use_profile: false
...@@ -823,12 +823,14 @@ EOF ...@@ -823,12 +823,14 @@ EOF
check_cmd "python test_pipeline_client.py" check_cmd "python test_pipeline_client.py"
ps -ef | grep "pipeline_server" | grep -v grep | awk '{print $2}' | xargs kill ps -ef | grep "pipeline_server" | grep -v grep | awk '{print $2}' | xargs kill
kill_process_by_port 18080 kill_process_by_port 18080
# test: process servicer & process op # test: process servicer & thread op
pip uninstall grpcio -y
pip install grpcio --no-binary=grpcio
cat << EOF > config.yml cat << EOF > config.yml
port: 18080 port: 18080
worker_num: 2 worker_num: 4
build_dag_each_worker: false build_dag_each_worker: true
dag: dag:
is_thread_op: false is_thread_op: false
client_type: brpc client_type: brpc
...@@ -840,7 +842,7 @@ EOF ...@@ -840,7 +842,7 @@ EOF
check_cmd "python test_pipeline_client.py" check_cmd "python test_pipeline_client.py"
ps -ef | grep "pipeline_server" | grep -v grep | awk '{print $2}' | xargs kill ps -ef | grep "pipeline_server" | grep -v grep | awk '{print $2}' | xargs kill
kill_process_by_port 18080 kill_process_by_port 18080
kill_server_process kill_server_process
kill_process_by_port 9292 kill_process_by_port 9292
kill_process_by_port 9393 kill_process_by_port 9393
...@@ -851,7 +853,7 @@ EOF ...@@ -851,7 +853,7 @@ EOF
sleep 5 sleep 5
cat << EOF > config.yml cat << EOF > config.yml
port: 18080 port: 18080
worker_num: 2 worker_num: 4
build_dag_each_worker: false build_dag_each_worker: false
dag: dag:
is_thread_op: false is_thread_op: false
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册