提交 9afdca0a 编写于 作者: G guru4elephant

add profiling timeline

上级 8dcee4e9
...@@ -26,5 +26,5 @@ endif() ...@@ -26,5 +26,5 @@ endif()
if (NOT CLIENT_ONLY) if (NOT CLIENT_ONLY)
add_subdirectory(predictor) add_subdirectory(predictor)
add_subdirectory(general-server) add_subdirectory(general-server)
add_subdirectory(util)
endif() endif()
add_subdirectory(util)
if(CLIENT_ONLY) if(CLIENT_ONLY)
add_subdirectory(pybind11) add_subdirectory(pybind11)
pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp) pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
target_link_libraries(serving_client PRIVATE -Wl,--whole-archive sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
endif() endif()
...@@ -87,6 +87,7 @@ class PredictorClient { ...@@ -87,6 +87,7 @@ class PredictorClient {
std::map<std::string, std::string> _fetch_name_to_var_name; std::map<std::string, std::string> _fetch_name_to_var_name;
std::vector<std::vector<int>> _shape; std::vector<std::vector<int>> _shape;
std::vector<int> _type; std::vector<int> _type;
std::vector<int64_t> _last_request_ts;
}; };
} // namespace general_model } // namespace general_model
......
...@@ -17,7 +17,9 @@ ...@@ -17,7 +17,9 @@
#include "core/sdk-cpp/builtin_format.pb.h" #include "core/sdk-cpp/builtin_format.pb.h"
#include "core/sdk-cpp/include/common.h" #include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/include/predictor_sdk.h" #include "core/sdk-cpp/include/predictor_sdk.h"
#include "core/util/include/timer.h"
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
...@@ -117,15 +119,20 @@ std::vector<std::vector<float>> PredictorClient::predict( ...@@ -117,15 +119,20 @@ std::vector<std::vector<float>> PredictorClient::predict(
return fetch_result; return fetch_result;
} }
Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS();
// we save infer_us at fetch_result[fetch_name.size()] // we save infer_us at fetch_result[fetch_name.size()]
fetch_result.resize(fetch_name.size() + 1); fetch_result.resize(fetch_name.size() + 1);
_api.thrd_clear(); _api.thrd_clear();
_predictor = _api.fetch_predictor("general_model"); _predictor = _api.fetch_predictor("general_model");
VLOG(2) << "fetch general model predictor done."; VLOG(2) << "fetch general model predictor done.";
VLOG(2) << "float feed name size: " << float_feed_name.size(); VLOG(2) << "float feed name size: " << float_feed_name.size();
VLOG(2) << "int feed name size: " << int_feed_name.size(); VLOG(2) << "int feed name size: " << int_feed_name.size();
VLOG(2) << "fetch name size: " << fetch_name.size(); VLOG(2) << "fetch name size: " << fetch_name.size();
Request req; Request req;
for (auto & name : fetch_name) { for (auto & name : fetch_name) {
req.add_fetch_var_names(name); req.add_fetch_var_names(name);
...@@ -175,16 +182,21 @@ std::vector<std::vector<float>> PredictorClient::predict( ...@@ -175,16 +182,21 @@ std::vector<std::vector<float>> PredictorClient::predict(
vec_idx++; vec_idx++;
} }
VLOG(2) << "feed int feed var done."; int64_t preprocess_end = timeline.TimeStampUS();
// std::map<std::string, std::vector<float> > result; int64_t client_infer_start = timeline.TimeStampUS();
Response res; Response res;
int64_t client_infer_end = 0;
int64_t postprocess_start = 0;
int64_t postprocess_end = 0;
res.Clear(); res.Clear();
if (_predictor->inference(&req, &res) != 0) { if (_predictor->inference(&req, &res) != 0) {
LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString(); LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
exit(-1); exit(-1);
} else { } else {
client_infer_end = timeline.TimeStampUS();
postprocess_start = client_infer_end;
for (auto &name : fetch_name) { for (auto &name : fetch_name) {
int idx = _fetch_name_to_idx[name]; int idx = _fetch_name_to_idx[name];
int len = res.insts(0).tensor_array(idx).data_size(); int len = res.insts(0).tensor_array(idx).data_size();
...@@ -196,10 +208,24 @@ std::vector<std::vector<float>> PredictorClient::predict( ...@@ -196,10 +208,24 @@ std::vector<std::vector<float>> PredictorClient::predict(
*(const float *)res.insts(0).tensor_array(idx).data(i).c_str(); *(const float *)res.insts(0).tensor_array(idx).data(i).c_str();
} }
} }
fetch_result[fetch_name.size()].resize(1); postprocess_end = timeline.TimeStampUS();
fetch_result[fetch_name.size()][0] = res.mean_infer_us();
} }
int op_num = res.profile_time_size() / 2;
VLOG(2) << "preprocess start: " << preprocess_start;
VLOG(2) << "preprocess end: " << preprocess_end;
VLOG(2) << "client infer start: " << client_infer_start;
VLOG(2) << "op1 start: " << res.profile_time(0);
VLOG(2) << "op1 end: " << res.profile_time(1);
VLOG(2) << "op2 start: " << res.profile_time(2);
VLOG(2) << "op2 end: " << res.profile_time(3);
VLOG(2) << "op3 start: " << res.profile_time(4);
VLOG(2) << "op3 end: " << res.profile_time(5);
VLOG(2) << "client infer end: " << client_infer_end;
VLOG(2) << "client postprocess start: " << postprocess_start;
VLOG(2) << "client postprocess end: " << postprocess_end;
return fetch_result; return fetch_result;
} }
...@@ -308,10 +334,10 @@ std::vector<std::vector<std::vector<float>>> PredictorClient::batch_predict( ...@@ -308,10 +334,10 @@ std::vector<std::vector<std::vector<float>>> PredictorClient::batch_predict(
} }
} }
} }
//last index for infer time // last index for infer time
fetch_result_batch[batch_size].resize(1); // fetch_result_batch[batch_size].resize(1);
fetch_result_batch[batch_size][0].resize(1); // fetch_result_batch[batch_size][0].resize(1);
fetch_result_batch[batch_size][0][0] = res.mean_infer_us(); // fetch_result_batch[batch_size][0][0] = res.mean_infer_us();
} }
return fetch_result_batch; return fetch_result_batch;
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include <string.h>
#include <vector> #include <vector>
#ifdef BCLOUD #ifdef BCLOUD
#ifdef WITH_GPU #ifdef WITH_GPU
...@@ -34,8 +35,8 @@ static const char* GENERAL_MODEL_NAME = "general_model"; ...@@ -34,8 +35,8 @@ static const char* GENERAL_MODEL_NAME = "general_model";
struct GeneralBlob { struct GeneralBlob {
std::vector<paddle::PaddleTensor> tensor_vector; std::vector<paddle::PaddleTensor> tensor_vector;
double infer_time; int64_t time_stamp[20];
std::vector<std::string> fetch_name_vector; int p_size = 0;
void Clear() { void Clear() {
size_t tensor_count = tensor_vector.size(); size_t tensor_count = tensor_vector.size();
...@@ -60,6 +61,18 @@ struct GeneralBlob { ...@@ -60,6 +61,18 @@ struct GeneralBlob {
std::string ShortDebugString() const { return "Not implemented!"; } std::string ShortDebugString() const { return "Not implemented!"; }
}; };
static void AddBlobInfo(GeneralBlob * blob,
int64_t init_value) {
blob->time_stamp[blob->p_size] = init_value;
blob->p_size++;
}
static void CopyBlobInfo(const GeneralBlob * src,
GeneralBlob * tgt) {
memcpy(&(tgt->time_stamp[0]), &(src->time_stamp[0]),
src->p_size * sizeof(int64_t));
}
} // namespace serving } // namespace serving
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
...@@ -54,15 +54,19 @@ int GeneralInferOp::inference() { ...@@ -54,15 +54,19 @@ int GeneralInferOp::inference() {
VLOG(2) << "infer batch size: " << batch_size; VLOG(2) << "infer batch size: " << batch_size;
// infer // infer
Timer timeline; Timer timeline;
double infer_time = 0.0; // double infer_time = 0.0;
int64_t start = timeline.TimeStampUS();
timeline.Start(); timeline.Start();
if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) { if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME;
return -1; return -1;
} }
timeline.Pause(); // timeline.Pause();
infer_time = timeline.ElapsedUS(); // infer_time = timeline.ElapsedUS();
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end);
return 0; return 0;
} }
DEFINE_OP(GeneralInferOp); DEFINE_OP(GeneralInferOp);
......
...@@ -20,11 +20,13 @@ ...@@ -20,11 +20,13 @@
#include "core/general-server/op/general_reader_op.h" #include "core/general-server/op/general_reader_op.h"
#include "core/predictor/framework/infer.h" #include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h" #include "core/predictor/framework/memory.h"
#include "core/util/include/timer.h"
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace serving { namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
...@@ -85,6 +87,10 @@ int GeneralReaderOp::inference() { ...@@ -85,6 +87,10 @@ int GeneralReaderOp::inference() {
LOG(ERROR) << "Failed get op tls reader object output"; LOG(ERROR) << "Failed get op tls reader object output";
} }
Timer timeline;
// double read_time = 0.0;
// timeline.Start();
int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size(); int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "var num: " << var_num; VLOG(2) << "var num: " << var_num;
// read config // read config
...@@ -196,6 +202,13 @@ int GeneralReaderOp::inference() { ...@@ -196,6 +202,13 @@ int GeneralReaderOp::inference() {
} }
} }
timeline.Pause();
// read_time = timeline.ElapsedUS();
int64_t end = timeline.TimeStampUS();
res->p_size = 0;
AddBlobInfo(res, start);
AddBlobInfo(res, end);
VLOG(2) << "read data from client success"; VLOG(2) << "read data from client success";
return 0; return 0;
} }
......
...@@ -53,6 +53,11 @@ int GeneralResponseOp::inference() { ...@@ -53,6 +53,11 @@ int GeneralResponseOp::inference() {
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
Timer timeline;
// double response_time = 0.0;
// timeline.Start();
int64_t start = timeline.TimeStampUS();
VLOG(2) << "start to call load general model_conf op"; VLOG(2) << "start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
...@@ -118,6 +123,18 @@ int GeneralResponseOp::inference() { ...@@ -118,6 +123,18 @@ int GeneralResponseOp::inference() {
} }
var_idx++; var_idx++;
} }
// timeline.Pause();
// response_time = timeline.ElapsedUS();
int64_t end = timeline.TimeStampUS();
VLOG(2) << "p size for input blob: " << input_blob->p_size;
for (int i = 0; i < input_blob->p_size; ++i) {
res->add_profile_time(input_blob->time_stamp[i]);
}
// TODO(guru4elephant): find more elegant way to do this
res->add_profile_time(start);
res->add_profile_time(end);
return 0; return 0;
} }
......
...@@ -19,11 +19,13 @@ ...@@ -19,11 +19,13 @@
#include "core/general-server/op/general_text_reader_op.h" #include "core/general-server/op/general_text_reader_op.h"
#include "core/predictor/framework/infer.h" #include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h" #include "core/predictor/framework/memory.h"
#include "core/util/include/timer.h"
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace serving { namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Request;
...@@ -54,6 +56,11 @@ int GeneralTextReaderOp::inference() { ...@@ -54,6 +56,11 @@ int GeneralTextReaderOp::inference() {
return -1; return -1;
} }
Timer timeline;
// double read_time = 0.0;
// timeline.Start();
int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size(); int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "var num: " << var_num; VLOG(2) << "var num: " << var_num;
// read config // read config
...@@ -157,6 +164,12 @@ int GeneralTextReaderOp::inference() { ...@@ -157,6 +164,12 @@ int GeneralTextReaderOp::inference() {
} }
} }
// timeline.Pause();
// read_time = timeline.ElapsedUS();
int64_t end = timeline.TimeStampUS();
AddBlobInfo(res, start);
AddBlobInfo(res, end);
VLOG(2) << "read data from client success"; VLOG(2) << "read data from client success";
return 0; return 0;
} }
......
...@@ -53,6 +53,11 @@ int GeneralTextResponseOp::inference() { ...@@ -53,6 +53,11 @@ int GeneralTextResponseOp::inference() {
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
Timer timeline;
// double response_time = 0.0;
// timeline.Start();
int64_t start = timeline.TimeStampUS();
VLOG(2) << "start to call load general model_conf op"; VLOG(2) << "start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
...@@ -118,6 +123,18 @@ int GeneralTextResponseOp::inference() { ...@@ -118,6 +123,18 @@ int GeneralTextResponseOp::inference() {
} }
var_idx++; var_idx++;
} }
// timeline.Pause();
// response_time = timeline.ElapsedUS();
int64_t end = timeline.TimeStampUS();
for (int i = 0; i < input_blob->p_size; ++i) {
res->add_profile_time(input_blob->time_stamp[i]);
}
// TODO(guru4elephant): find more elegant way to do this
res->add_profile_time(start);
res->add_profile_time(end);
return 0; return 0;
} }
DEFINE_OP(GeneralTextResponseOp); DEFINE_OP(GeneralTextResponseOp);
......
...@@ -42,7 +42,7 @@ message Request { ...@@ -42,7 +42,7 @@ message Request {
message Response { message Response {
repeated FetchInst insts = 1; repeated FetchInst insts = 1;
optional float mean_infer_us = 2; repeated int64 profile_time = 2;
}; };
service GeneralModelService { service GeneralModelService {
......
...@@ -147,6 +147,7 @@ int InferService::inference(const google::protobuf::Message* request, ...@@ -147,6 +147,7 @@ int InferService::inference(const google::protobuf::Message* request,
TRACEPRINTF("finish to thread clear"); TRACEPRINTF("finish to thread clear");
if (_enable_map_request_to_workflow) { if (_enable_map_request_to_workflow) {
LOG(INFO) << "enable map request == True";
std::vector<Workflow*>* workflows = _map_request_to_workflow(request); std::vector<Workflow*>* workflows = _map_request_to_workflow(request);
if (!workflows || workflows->size() == 0) { if (!workflows || workflows->size() == 0) {
LOG(ERROR) << "Failed to map request to workflow"; LOG(ERROR) << "Failed to map request to workflow";
...@@ -169,6 +170,7 @@ int InferService::inference(const google::protobuf::Message* request, ...@@ -169,6 +170,7 @@ int InferService::inference(const google::protobuf::Message* request,
} }
} }
} else { } else {
LOG(INFO) << "enable map request == False";
TRACEPRINTF("start to execute one workflow"); TRACEPRINTF("start to execute one workflow");
size_t fsize = _flows.size(); size_t fsize = _flows.size();
for (size_t fi = 0; fi < fsize; ++fi) { for (size_t fi = 0; fi < fsize; ++fi) {
...@@ -233,6 +235,7 @@ int InferService::_execute_workflow(Workflow* workflow, ...@@ -233,6 +235,7 @@ int InferService::_execute_workflow(Workflow* workflow,
TRACEPRINTF("finish to copy from"); TRACEPRINTF("finish to copy from");
workflow_time.stop(); workflow_time.stop();
LOG(INFO) << "workflow total time: " << workflow_time.u_elapsed();
PredictorMetric::GetInstance()->update_latency_metric( PredictorMetric::GetInstance()->update_latency_metric(
WORKFLOW_METRIC_PREFIX + dv->full_name(), workflow_time.u_elapsed()); WORKFLOW_METRIC_PREFIX + dv->full_name(), workflow_time.u_elapsed());
......
...@@ -42,7 +42,7 @@ message Request { ...@@ -42,7 +42,7 @@ message Request {
message Response { message Response {
repeated FetchInst insts = 1; repeated FetchInst insts = 1;
optional float mean_infer_us = 2; repeated int64 profile_time = 2;
}; };
service GeneralModelService { service GeneralModelService {
......
include(src/CMakeLists.txt) include(src/CMakeLists.txt)
add_library(utils ${util_srcs}) add_library(utils ${util_srcs})
...@@ -38,6 +38,7 @@ class Timer { ...@@ -38,6 +38,7 @@ class Timer {
double ElapsedMS(); double ElapsedMS();
// return elapsed time in sec // return elapsed time in sec
double ElapsedSec(); double ElapsedSec();
int64_t TimeStampUS();
private: private:
struct timeval _start; struct timeval _start;
......
FILE(GLOB srcs ${CMAKE_CURRENT_LIST_DIR}/*.cc) FILE(GLOB srcs ${CMAKE_CURRENT_LIST_DIR}/*.cc)
LIST(APPEND util_srcs ${srcs}) LIST(APPEND util_srcs ${srcs})
...@@ -54,6 +54,11 @@ double Timer::ElapsedMS() { return _elapsed / 1000.0; } ...@@ -54,6 +54,11 @@ double Timer::ElapsedMS() { return _elapsed / 1000.0; }
double Timer::ElapsedSec() { return _elapsed / 1000000.0; } double Timer::ElapsedSec() { return _elapsed / 1000000.0; }
int64_t Timer::TimeStampUS() {
gettimeofday(&_now, NULL);
return _now.tv_usec;
}
int64_t Timer::Tickus() { int64_t Timer::Tickus() {
gettimeofday(&_now, NULL); gettimeofday(&_now, NULL);
return (_now.tv_sec - _start.tv_sec) * 1000 * 1000L + return (_now.tv_sec - _start.tv_sec) * 1000 * 1000L +
......
...@@ -143,9 +143,6 @@ class Client(object): ...@@ -143,9 +143,6 @@ class Client(object):
for i, name in enumerate(fetch_names): for i, name in enumerate(fetch_names):
result_map[name] = result[i] result_map[name] = result[i]
if profile:
result_map["infer_time"] = result[-1][0]
return result_map return result_map
def batch_predict(self, feed_batch=[], fetch=[], profile=False): def batch_predict(self, feed_batch=[], fetch=[], profile=False):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册