提交 9afdca0a 编写于 作者: G guru4elephant

add profiling timeline

上级 8dcee4e9
......@@ -26,5 +26,5 @@ endif()
if (NOT CLIENT_ONLY)
add_subdirectory(predictor)
add_subdirectory(general-server)
add_subdirectory(util)
endif()
add_subdirectory(util)
if(CLIENT_ONLY)
add_subdirectory(pybind11)
pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
target_link_libraries(serving_client PRIVATE -Wl,--whole-archive sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
endif()
......@@ -87,6 +87,7 @@ class PredictorClient {
std::map<std::string, std::string> _fetch_name_to_var_name;
std::vector<std::vector<int>> _shape;
std::vector<int> _type;
std::vector<int64_t> _last_request_ts;
};
} // namespace general_model
......
......@@ -17,7 +17,9 @@
#include "core/sdk-cpp/builtin_format.pb.h"
#include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/include/predictor_sdk.h"
#include "core/util/include/timer.h"
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
......@@ -117,15 +119,20 @@ std::vector<std::vector<float>> PredictorClient::predict(
return fetch_result;
}
Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS();
// we save infer_us at fetch_result[fetch_name.size()]
fetch_result.resize(fetch_name.size() + 1);
_api.thrd_clear();
_predictor = _api.fetch_predictor("general_model");
VLOG(2) << "fetch general model predictor done.";
VLOG(2) << "float feed name size: " << float_feed_name.size();
VLOG(2) << "int feed name size: " << int_feed_name.size();
VLOG(2) << "fetch name size: " << fetch_name.size();
Request req;
for (auto & name : fetch_name) {
req.add_fetch_var_names(name);
......@@ -175,16 +182,21 @@ std::vector<std::vector<float>> PredictorClient::predict(
vec_idx++;
}
VLOG(2) << "feed int feed var done.";
int64_t preprocess_end = timeline.TimeStampUS();
// std::map<std::string, std::vector<float> > result;
int64_t client_infer_start = timeline.TimeStampUS();
Response res;
int64_t client_infer_end = 0;
int64_t postprocess_start = 0;
int64_t postprocess_end = 0;
res.Clear();
if (_predictor->inference(&req, &res) != 0) {
LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
exit(-1);
} else {
client_infer_end = timeline.TimeStampUS();
postprocess_start = client_infer_end;
for (auto &name : fetch_name) {
int idx = _fetch_name_to_idx[name];
int len = res.insts(0).tensor_array(idx).data_size();
......@@ -196,10 +208,24 @@ std::vector<std::vector<float>> PredictorClient::predict(
*(const float *)res.insts(0).tensor_array(idx).data(i).c_str();
}
}
fetch_result[fetch_name.size()].resize(1);
fetch_result[fetch_name.size()][0] = res.mean_infer_us();
postprocess_end = timeline.TimeStampUS();
}
int op_num = res.profile_time_size() / 2;
VLOG(2) << "preprocess start: " << preprocess_start;
VLOG(2) << "preprocess end: " << preprocess_end;
VLOG(2) << "client infer start: " << client_infer_start;
VLOG(2) << "op1 start: " << res.profile_time(0);
VLOG(2) << "op1 end: " << res.profile_time(1);
VLOG(2) << "op2 start: " << res.profile_time(2);
VLOG(2) << "op2 end: " << res.profile_time(3);
VLOG(2) << "op3 start: " << res.profile_time(4);
VLOG(2) << "op3 end: " << res.profile_time(5);
VLOG(2) << "client infer end: " << client_infer_end;
VLOG(2) << "client postprocess start: " << postprocess_start;
VLOG(2) << "client postprocess end: " << postprocess_end;
return fetch_result;
}
......@@ -308,10 +334,10 @@ std::vector<std::vector<std::vector<float>>> PredictorClient::batch_predict(
}
}
}
//last index for infer time
fetch_result_batch[batch_size].resize(1);
fetch_result_batch[batch_size][0].resize(1);
fetch_result_batch[batch_size][0][0] = res.mean_infer_us();
// last index for infer time
// fetch_result_batch[batch_size].resize(1);
// fetch_result_batch[batch_size][0].resize(1);
// fetch_result_batch[batch_size][0][0] = res.mean_infer_us();
}
return fetch_result_batch;
......
......@@ -14,6 +14,7 @@
#pragma once
#include <string.h>
#include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
......@@ -34,8 +35,8 @@ static const char* GENERAL_MODEL_NAME = "general_model";
struct GeneralBlob {
std::vector<paddle::PaddleTensor> tensor_vector;
double infer_time;
std::vector<std::string> fetch_name_vector;
int64_t time_stamp[20];
int p_size = 0;
void Clear() {
size_t tensor_count = tensor_vector.size();
......@@ -60,6 +61,18 @@ struct GeneralBlob {
std::string ShortDebugString() const { return "Not implemented!"; }
};
static void AddBlobInfo(GeneralBlob * blob,
int64_t init_value) {
blob->time_stamp[blob->p_size] = init_value;
blob->p_size++;
}
static void CopyBlobInfo(const GeneralBlob * src,
GeneralBlob * tgt) {
memcpy(&(tgt->time_stamp[0]), &(src->time_stamp[0]),
src->p_size * sizeof(int64_t));
}
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
......@@ -54,15 +54,19 @@ int GeneralInferOp::inference() {
VLOG(2) << "infer batch size: " << batch_size;
// infer
Timer timeline;
double infer_time = 0.0;
// double infer_time = 0.0;
int64_t start = timeline.TimeStampUS();
timeline.Start();
if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME;
return -1;
}
timeline.Pause();
infer_time = timeline.ElapsedUS();
// timeline.Pause();
// infer_time = timeline.ElapsedUS();
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end);
return 0;
}
DEFINE_OP(GeneralInferOp);
......
......@@ -20,11 +20,13 @@
#include "core/general-server/op/general_reader_op.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/util/include/timer.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
......@@ -85,6 +87,10 @@ int GeneralReaderOp::inference() {
LOG(ERROR) << "Failed get op tls reader object output";
}
Timer timeline;
// double read_time = 0.0;
// timeline.Start();
int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "var num: " << var_num;
// read config
......@@ -196,6 +202,13 @@ int GeneralReaderOp::inference() {
}
}
timeline.Pause();
// read_time = timeline.ElapsedUS();
int64_t end = timeline.TimeStampUS();
res->p_size = 0;
AddBlobInfo(res, start);
AddBlobInfo(res, end);
VLOG(2) << "read data from client success";
return 0;
}
......
......@@ -53,6 +53,11 @@ int GeneralResponseOp::inference() {
const Request *req = dynamic_cast<const Request *>(get_request_message());
Timer timeline;
// double response_time = 0.0;
// timeline.Start();
int64_t start = timeline.TimeStampUS();
VLOG(2) << "start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
......@@ -118,6 +123,18 @@ int GeneralResponseOp::inference() {
}
var_idx++;
}
// timeline.Pause();
// response_time = timeline.ElapsedUS();
int64_t end = timeline.TimeStampUS();
VLOG(2) << "p size for input blob: " << input_blob->p_size;
for (int i = 0; i < input_blob->p_size; ++i) {
res->add_profile_time(input_blob->time_stamp[i]);
}
// TODO(guru4elephant): find more elegant way to do this
res->add_profile_time(start);
res->add_profile_time(end);
return 0;
}
......
......@@ -19,11 +19,13 @@
#include "core/general-server/op/general_text_reader_op.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/util/include/timer.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
......@@ -54,6 +56,11 @@ int GeneralTextReaderOp::inference() {
return -1;
}
Timer timeline;
// double read_time = 0.0;
// timeline.Start();
int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "var num: " << var_num;
// read config
......@@ -157,6 +164,12 @@ int GeneralTextReaderOp::inference() {
}
}
// timeline.Pause();
// read_time = timeline.ElapsedUS();
int64_t end = timeline.TimeStampUS();
AddBlobInfo(res, start);
AddBlobInfo(res, end);
VLOG(2) << "read data from client success";
return 0;
}
......
......@@ -53,6 +53,11 @@ int GeneralTextResponseOp::inference() {
const Request *req = dynamic_cast<const Request *>(get_request_message());
Timer timeline;
// double response_time = 0.0;
// timeline.Start();
int64_t start = timeline.TimeStampUS();
VLOG(2) << "start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
......@@ -118,6 +123,18 @@ int GeneralTextResponseOp::inference() {
}
var_idx++;
}
// timeline.Pause();
// response_time = timeline.ElapsedUS();
int64_t end = timeline.TimeStampUS();
for (int i = 0; i < input_blob->p_size; ++i) {
res->add_profile_time(input_blob->time_stamp[i]);
}
// TODO(guru4elephant): find more elegant way to do this
res->add_profile_time(start);
res->add_profile_time(end);
return 0;
}
DEFINE_OP(GeneralTextResponseOp);
......
......@@ -42,7 +42,7 @@ message Request {
message Response {
repeated FetchInst insts = 1;
optional float mean_infer_us = 2;
repeated int64 profile_time = 2;
};
service GeneralModelService {
......
......@@ -147,6 +147,7 @@ int InferService::inference(const google::protobuf::Message* request,
TRACEPRINTF("finish to thread clear");
if (_enable_map_request_to_workflow) {
LOG(INFO) << "enable map request == True";
std::vector<Workflow*>* workflows = _map_request_to_workflow(request);
if (!workflows || workflows->size() == 0) {
LOG(ERROR) << "Failed to map request to workflow";
......@@ -169,6 +170,7 @@ int InferService::inference(const google::protobuf::Message* request,
}
}
} else {
LOG(INFO) << "enable map request == False";
TRACEPRINTF("start to execute one workflow");
size_t fsize = _flows.size();
for (size_t fi = 0; fi < fsize; ++fi) {
......@@ -233,6 +235,7 @@ int InferService::_execute_workflow(Workflow* workflow,
TRACEPRINTF("finish to copy from");
workflow_time.stop();
LOG(INFO) << "workflow total time: " << workflow_time.u_elapsed();
PredictorMetric::GetInstance()->update_latency_metric(
WORKFLOW_METRIC_PREFIX + dv->full_name(), workflow_time.u_elapsed());
......
......@@ -42,7 +42,7 @@ message Request {
message Response {
repeated FetchInst insts = 1;
optional float mean_infer_us = 2;
repeated int64 profile_time = 2;
};
service GeneralModelService {
......
include(src/CMakeLists.txt)
add_library(utils ${util_srcs})
......@@ -38,6 +38,7 @@ class Timer {
double ElapsedMS();
// return elapsed time in sec
double ElapsedSec();
int64_t TimeStampUS();
private:
struct timeval _start;
......
FILE(GLOB srcs ${CMAKE_CURRENT_LIST_DIR}/*.cc)
LIST(APPEND util_srcs ${srcs})
......@@ -54,6 +54,11 @@ double Timer::ElapsedMS() { return _elapsed / 1000.0; }
double Timer::ElapsedSec() { return _elapsed / 1000000.0; }
int64_t Timer::TimeStampUS() {
gettimeofday(&_now, NULL);
return _now.tv_usec;
}
int64_t Timer::Tickus() {
gettimeofday(&_now, NULL);
return (_now.tv_sec - _start.tv_sec) * 1000 * 1000L +
......
......@@ -143,9 +143,6 @@ class Client(object):
for i, name in enumerate(fetch_names):
result_map[name] = result[i]
if profile:
result_map["infer_time"] = result[-1][0]
return result_map
def batch_predict(self, feed_batch=[], fetch=[], profile=False):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册