提交 6bf50b78 编写于 作者: B barriery

add log_id to InferService::inference/debug

上级 99781bce
......@@ -35,6 +35,7 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralTextReaderOp::inference() {
// reade request from client
const Request *req = dynamic_cast<const Request *>(get_request_message());
uint64_t log_id = req->log_id();
int batch_size = req->insts_size();
int input_var_num = 0;
......@@ -44,16 +45,18 @@ int GeneralTextReaderOp::inference() {
std::vector<int64_t> capacity;
GeneralBlob *res = mutable_data<GeneralBlob>();
TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size);
if (!res) {
LOG(ERROR) << "Failed get op tls reader object output";
LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op tls reader object output";
}
TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size);
res->SetLogId(log_id);
if (batch_size <= 0) {
LOG(ERROR) << "Batch size < 0";
LOG(ERROR) << "(logid=" << log_id << ") Batch size < 0";
return -1;
}
......@@ -61,17 +64,18 @@ int GeneralTextReaderOp::inference() {
int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "var num: " << var_num;
VLOG(2) << "(logid=" << log_id << ") var num: " << var_num;
VLOG(2) << "start to call load general model_conf op";
VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "get resource pointer done.";
VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config();
VLOG(2) << "print general model config done.";
VLOG(2) << "(logid=" << log_id << ") print general model config done.";
elem_type.resize(var_num);
elem_size.resize(var_num);
......@@ -79,7 +83,8 @@ int GeneralTextReaderOp::inference() {
for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor;
elem_type[i] = req->insts(0).tensor_array(i).elem_type();
VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i];
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64;
......@@ -91,17 +96,19 @@ int GeneralTextReaderOp::inference() {
if (req->insts(0).tensor_array(i).shape(0) == -1) {
lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0);
VLOG(2) << "var[" << i << "] is lod_tensor";
VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
} else {
lod_tensor.shape.push_back(batch_size);
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(2) << "shape for var[" << i << "]: " << dim;
VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
<< "]: " << dim;
capacity[i] *= dim;
lod_tensor.shape.push_back(dim);
}
VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i];
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor, capacity: " << capacity[i];
}
lod_tensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor);
......@@ -117,11 +124,11 @@ int GeneralTextReaderOp::inference() {
}
out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back(), 1};
VLOG(2) << "var[" << i
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else {
out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(2) << "var[" << i
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i];
}
}
......@@ -163,7 +170,7 @@ int GeneralTextReaderOp::inference() {
AddBlobInfo(res, start);
AddBlobInfo(res, end);
VLOG(2) << "read data from client success";
VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0;
}
DEFINE_OP(GeneralTextReaderOp);
......
......@@ -89,12 +89,13 @@ int GeneralTextResponseOp::inference() {
// currently only response float tensor or lod_tensor
tensor->set_elem_type(1);
if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "out[" << idx << " is lod_tensor";
VLOG(2) << "(logid=" << log_id << ") out[" << idx << " is lod_tensor";
tensor->add_shape(-1);
} else {
VLOG(2) << "out[" << idx << "] is tensor";
VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] is tensor";
for (int k = 1; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k];
VLOG(2) << "(logid=" << log_id << ") shape[" << k - 1
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
}
}
......@@ -138,7 +139,9 @@ int GeneralTextResponseOp::inference() {
// a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
VLOG(2) << "p size for input blob: " << input_blob->p_size;
uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id
<< ") p size for input blob: " << input_blob->p_size;
int profile_time_idx = -1;
if (pi == 0) {
profile_time_idx = 0;
......
......@@ -280,26 +280,29 @@ class PdsCodeGenerator : public CodeGenerator {
" baidu::rpc::ClosureGuard done_guard(done);\n"
" baidu::rpc::Controller* cntl = \n"
" static_cast<baidu::rpc::Controller*>(cntl_base);\n"
" cntl->set_log_id(request->log_id());\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
" LOG(ERROR) << \"Not found service: $service$\";\n"
" LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
" LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
" LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT
" int err_code = svr->inference(request, response);\n"
" LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
"<< cntl->remote_side() << \"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
"<< cntl->local_side() << \"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
"<< \"$name$\" << \"\]\";\n"
" int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
" << \"Failed call inferservice[$name$], name[$service$]\"\n"
" << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
......@@ -307,7 +310,8 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
" LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n", // NOLINT
" LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n", // NOLINT
"name",
class_name,
"service",
......@@ -318,27 +322,31 @@ class PdsCodeGenerator : public CodeGenerator {
" baidu::rpc::ClosureGuard done_guard(done);\n"
" baidu::rpc::Controller* cntl = \n"
" static_cast<baidu::rpc::Controller*>(cntl_base);\n"
" cntl->set_log_id(request->log_id());\n"
" uint64_t log_id = equest->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
" LOG(ERROR) << \"Not found service: $service$\";\n"
" LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
" LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
" LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT
" LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
"<< cntl->remote_side() << \"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
"<< cntl->local_side() << \"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
"<< \"$name$\" << \"\]\";\n"
" butil::IOBufBuilder debug_os;\n"
" int err_code = svr->inference(request, response, &debug_os);\n"
" int err_code = svr->inference(request, response, log_id, "
"&debug_os);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
" << \"Failed call inferservice[$name$], name[$service$]\"\n"
" << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
......@@ -347,9 +355,11 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
" LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n" // NOLINT
" LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n"
" LOG(INFO)\n"
" << \"TC=[\" << (end - start) << \"] Received debug "
" << \"(logid=\" << log_id << \") TC=[\" << (end - start) << "
"\"] Received debug "
"request[log_id=\" << cntl->log_id()\n"
" << \"] from \" << cntl->remote_side()\n"
" << \" to \" << cntl->local_side();\n",
......@@ -1013,26 +1023,31 @@ class PdsCodeGenerator : public CodeGenerator {
" brpc::ClosureGuard done_guard(done);\n"
" brpc::Controller* cntl = \n"
" static_cast<brpc::Controller*>(cntl_base);\n"
" cntl->set_log_id(request->log_id());\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
" LOG(ERROR) << \"Not found service: $service$\";\n"
" LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
" LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT
" LOG(INFO) << \"(logid=\" << log_id << \") "
"remote_side=\[\" << cntl->remote_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT
" LOG(INFO) << \"(logid=\" << log_id << \") "
"local_side=\[\" << cntl->local_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
" LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT
" int err_code = svr->inference(request, response);\n"
" LOG(INFO) << \"(logid=\" << log_id << \") "
"service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
" int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
" << \"Failed call inferservice[$name$], name[$service$]\"\n"
" << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
......@@ -1040,7 +1055,8 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
" LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n", // NOLINT
" LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n", // NOLINT
"name",
class_name,
"service",
......@@ -1051,27 +1067,31 @@ class PdsCodeGenerator : public CodeGenerator {
" brpc::ClosureGuard done_guard(done);\n"
" brpc::Controller* cntl = \n"
" static_cast<brpc::Controller*>(cntl_base);\n"
" cntl->set_log_id(request->log_id());\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n"
" "
"::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n"
" if (svr == NULL) {\n"
" LOG(ERROR) << \"Not found service: $service$\";\n"
" LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n"
" }\n"
" LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT
"\"\]\";\n"
" LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
" LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT
" LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
" << cntl->remote_side() << \"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
"<< cntl->local_side() << \"\]\";\n"
" LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
"<< \"$name$\" << \"\]\";\n"
" butil::IOBufBuilder debug_os;\n"
" int err_code = svr->inference(request, response, &debug_os);\n"
" int err_code = svr->inference(request, response, log_id, "
"&debug_os);\n"
" if (err_code != 0) {\n"
" LOG(WARNING)\n"
" << \"Failed call inferservice[$name$], name[$service$]\"\n"
" << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n"
......@@ -1080,9 +1100,11 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n"
" LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n" // NOLINT
" LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n" // NOLINT
" LOG(INFO)\n"
" << \"TC=[\" << (end - start) << \"] Received debug "
" << \"(logid=\" << log_id << \") TC=[\" << (end - start) << "
"\"] Received debug "
"request[log_id=\" << cntl->log_id()\n"
" << \"] from \" << cntl->remote_side()\n"
" << \" to \" << cntl->local_side();\n",
......
......@@ -19,6 +19,7 @@
#include <butil/time.h> // butil::Timer
#endif
#include <inttypes.h>
#include <list>
#include <string>
#include <vector>
......@@ -135,50 +136,60 @@ const std::string& InferService::name() const { return _infer_service_format; }
// ´®ÐÐÖ´ÐÐÿ¸öworkflow
int InferService::inference(const google::protobuf::Message* request,
google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) {
TRACEPRINTF("start to inference");
TRACEPRINTF("(logid=%" PRIu64 ") start to inference", log_id);
// when funtion call begins, framework will reset
// thread local variables&resources automatically.
if (Resource::instance().thread_clear() != 0) {
LOG(ERROR) << "Failed thread clear whole resource";
LOG(ERROR) << "(logid=" << log_id << ") Failed thread clear whole resource";
return ERR_INTERNAL_FAILURE;
}
TRACEPRINTF("finish to thread clear");
TRACEPRINTF("(logid=%" PRIu64 ") finish to thread clear", log_id);
if (_enable_map_request_to_workflow) {
LOG(INFO) << "enable map request == True";
LOG(INFO) << "(logid=" << log_id << ") enable map request == True";
std::vector<Workflow*>* workflows = _map_request_to_workflow(request);
if (!workflows || workflows->size() == 0) {
LOG(ERROR) << "Failed to map request to workflow";
LOG(ERROR) << "(logid=" << log_id
<< ") Failed to map request to workflow";
return ERR_INTERNAL_FAILURE;
}
size_t fsize = workflows->size();
for (size_t fi = 0; fi < fsize; ++fi) {
Workflow* workflow = (*workflows)[fi];
if (workflow == NULL) {
LOG(ERROR) << "Failed to get valid workflow at: " << fi;
LOG(ERROR) << "(logid=" << log_id
<< ") Failed to get valid workflow at: " << fi;
return ERR_INTERNAL_FAILURE;
}
TRACEPRINTF("start to execute workflow[%s]", workflow->name().c_str());
TRACEPRINTF("(logid=%" PRIu64 ") start to execute workflow[%s]",
log_id,
workflow->name().c_str());
int errcode = _execute_workflow(workflow, request, response, debug_os);
TRACEPRINTF("finish to execute workflow[%s]", workflow->name().c_str());
TRACEPRINTF("(logid=%" PRIu64 ") finish to execute workflow[%s]",
log_id,
workflow->name().c_str());
if (errcode < 0) {
LOG(ERROR) << "Failed execute workflow[" << workflow->name()
<< "] in:" << name();
LOG(ERROR) << "(logid=" << log_id << ") Failed execute workflow["
<< workflow->name() << "] in:" << name();
return errcode;
}
}
} else {
LOG(INFO) << "enable map request == False";
TRACEPRINTF("start to execute one workflow");
LOG(INFO) << "(logid=" << log_id << ") enable map request == False";
TRACEPRINTF("(logid=%" PRIu64 ") start to execute one workflow", log_id);
size_t fsize = _flows.size();
for (size_t fi = 0; fi < fsize; ++fi) {
TRACEPRINTF("start to execute one workflow-%lu", fi);
TRACEPRINTF(
"(logid=%" PRIu64 ") start to execute one workflow-%lu", log_id, fi);
int errcode = execute_one_workflow(fi, request, response, debug_os);
TRACEPRINTF("finish to execute one workflow-%lu", fi);
TRACEPRINTF(
"(logid=%" PRIu64 ") finish to execute one workflow-%lu", log_id, fi);
if (errcode < 0) {
LOG(ERROR) << "Failed execute 0-th workflow in:" << name();
LOG(ERROR) << "(logid=" << log_id
<< ") Failed execute 0-th workflow in:" << name();
return errcode;
}
}
......@@ -188,8 +199,9 @@ int InferService::inference(const google::protobuf::Message* request,
int InferService::debug(const google::protobuf::Message* request,
google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) {
return inference(request, response, debug_os);
return inference(request, response, log_id, debug_os);
}
int InferService::execute_one_workflow(uint32_t index,
......
......@@ -52,10 +52,12 @@ class InferService {
// Execute each workflow serially
virtual int inference(const google::protobuf::Message* request,
google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os = NULL);
int debug(const google::protobuf::Message* request,
google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os);
int execute_one_workflow(uint32_t index,
......@@ -88,6 +90,7 @@ class ParallelInferService : public InferService {
// Execute workflows in parallel
int inference(const google::protobuf::Message* request,
google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) {
return 0;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册