未验证 提交 fd672554 编写于 作者: M MRXLT 提交者: GitHub

Merge pull request #185 from guru4elephant/add_timeline

add profiling timeline
......@@ -26,5 +26,5 @@ endif()
if (NOT CLIENT_ONLY)
add_subdirectory(predictor)
add_subdirectory(general-server)
add_subdirectory(util)
endif()
add_subdirectory(util)
if(CLIENT_ONLY)
add_subdirectory(pybind11)
pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
target_link_libraries(serving_client PRIVATE -Wl,--whole-archive sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
endif()
......@@ -31,6 +31,9 @@
using baidu::paddle_serving::sdk_cpp::Predictor;
using baidu::paddle_serving::sdk_cpp::PredictorApi;
DECLARE_bool(profile_client);
DECLARE_bool(profile_server);
// given some input data, pack into pb, and send request
namespace baidu {
namespace paddle_serving {
......@@ -45,6 +48,8 @@ class PredictorClient {
PredictorClient() {}
~PredictorClient() {}
void init_gflags(std::vector<std::string> argv);
int init(const std::string& client_conf);
void set_predictor_conf(const std::string& conf_path,
......@@ -87,6 +92,7 @@ class PredictorClient {
std::map<std::string, std::string> _fetch_name_to_var_name;
std::vector<std::vector<int>> _shape;
std::vector<int> _type;
std::vector<int64_t> _last_request_ts;
};
} // namespace general_model
......
make: *** No targets specified and no makefile found. Stop.
......@@ -17,18 +17,42 @@
#include "core/sdk-cpp/builtin_format.pb.h"
#include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/include/predictor_sdk.h"
#include "core/util/include/timer.h"
DEFINE_bool(profile_client, false, "");
DEFINE_bool(profile_server, false, "");
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::FeedInst;
using baidu::paddle_serving::predictor::general_model::FetchInst;
std::once_flag gflags_init_flag;
namespace baidu {
namespace paddle_serving {
namespace general_model {
using configure::GeneralModelConfig;
void PredictorClient::init_gflags(std::vector<std::string> argv) {
std::call_once(gflags_init_flag, [&]() {
FLAGS_logtostderr = true;
argv.insert(argv.begin(), "dummy");
int argc = argv.size();
char **arr = new char *[argv.size()];
std::string line;
for (size_t i = 0; i < argv.size(); i++) {
arr[i] = &argv[i][0];
line += argv[i];
line += ' ';
}
google::ParseCommandLineFlags(&argc, &arr, true);
VLOG(2) << "Init commandline: " << line;
});
}
int PredictorClient::init(const std::string &conf_file) {
try {
GeneralModelConfig model_config;
......@@ -117,15 +141,20 @@ std::vector<std::vector<float>> PredictorClient::predict(
return fetch_result;
}
Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS();
// we save infer_us at fetch_result[fetch_name.size()]
fetch_result.resize(fetch_name.size() + 1);
_api.thrd_clear();
_predictor = _api.fetch_predictor("general_model");
VLOG(2) << "fetch general model predictor done.";
VLOG(2) << "float feed name size: " << float_feed_name.size();
VLOG(2) << "int feed name size: " << int_feed_name.size();
VLOG(2) << "fetch name size: " << fetch_name.size();
Request req;
for (auto & name : fetch_name) {
req.add_fetch_var_names(name);
......@@ -175,16 +204,28 @@ std::vector<std::vector<float>> PredictorClient::predict(
vec_idx++;
}
VLOG(2) << "feed int feed var done.";
int64_t preprocess_end = timeline.TimeStampUS();
// std::map<std::string, std::vector<float> > result;
int64_t client_infer_start = timeline.TimeStampUS();
Response res;
int64_t client_infer_end = 0;
int64_t postprocess_start = 0;
int64_t postprocess_end = 0;
if (FLAGS_profile_client) {
if (FLAGS_profile_server) {
req.set_profile_server(true);
}
}
res.Clear();
if (_predictor->inference(&req, &res) != 0) {
LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
exit(-1);
} else {
client_infer_end = timeline.TimeStampUS();
postprocess_start = client_infer_end;
for (auto &name : fetch_name) {
int idx = _fetch_name_to_idx[name];
int len = res.insts(0).tensor_array(idx).data_size();
......@@ -196,8 +237,29 @@ std::vector<std::vector<float>> PredictorClient::predict(
*(const float *)res.insts(0).tensor_array(idx).data(i).c_str();
}
}
fetch_result[fetch_name.size()].resize(1);
fetch_result[fetch_name.size()][0] = res.mean_infer_us();
postprocess_end = timeline.TimeStampUS();
}
if (FLAGS_profile_client) {
std::ostringstream oss;
oss << "PROFILE\t"
<< "prepro_0:" << preprocess_start << " "
<< "prepro_1:" << preprocess_end << " "
<< "client_infer_0:" << client_infer_start << " "
<< "client_infer_1:" << client_infer_end << " ";
if (FLAGS_profile_server) {
int op_num = res.profile_time_size() / 2;
for (int i = 0; i < op_num; ++i) {
oss << "op" << i << "_0:" << res.profile_time(i * 2) << " ";
oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " ";
}
}
oss << "postpro_0:" << postprocess_start << " ";
oss << "postpro_1:" << postprocess_end;
fprintf(stderr, "%s\n", oss.str().c_str());
}
return fetch_result;
......@@ -308,10 +370,6 @@ std::vector<std::vector<std::vector<float>>> PredictorClient::batch_predict(
}
}
}
//last index for infer time
fetch_result_batch[batch_size].resize(1);
fetch_result_batch[batch_size][0].resize(1);
fetch_result_batch[batch_size][0][0] = res.mean_infer_us();
}
return fetch_result_batch;
......
......@@ -31,6 +31,10 @@ PYBIND11_MODULE(serving_client, m) {
)pddoc";
py::class_<PredictorClient>(m, "PredictorClient", py::buffer_protocol())
.def(py::init())
.def("init_gflags",
[](PredictorClient &self, std::vector<std::string> argv) {
self.init_gflags(argv);
})
.def("init",
[](PredictorClient &self, const std::string &conf) {
return self.init(conf);
......
......@@ -14,6 +14,7 @@
#pragma once
#include <string.h>
#include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
......@@ -34,8 +35,8 @@ static const char* GENERAL_MODEL_NAME = "general_model";
struct GeneralBlob {
std::vector<paddle::PaddleTensor> tensor_vector;
double infer_time;
std::vector<std::string> fetch_name_vector;
int64_t time_stamp[20];
int p_size = 0;
void Clear() {
size_t tensor_count = tensor_vector.size();
......@@ -60,6 +61,18 @@ struct GeneralBlob {
std::string ShortDebugString() const { return "Not implemented!"; }
};
static void AddBlobInfo(GeneralBlob * blob,
int64_t init_value) {
blob->time_stamp[blob->p_size] = init_value;
blob->p_size++;
}
static void CopyBlobInfo(const GeneralBlob * src,
GeneralBlob * tgt) {
memcpy(&(tgt->time_stamp[0]), &(src->time_stamp[0]),
src->p_size * sizeof(int64_t));
}
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
......@@ -52,17 +52,18 @@ int GeneralInferOp::inference() {
int batch_size = input_blob->GetBatchSize();
VLOG(2) << "infer batch size: " << batch_size;
// infer
Timer timeline;
double infer_time = 0.0;
int64_t start = timeline.TimeStampUS();
timeline.Start();
if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME;
return -1;
}
timeline.Pause();
infer_time = timeline.ElapsedUS();
int64_t end = timeline.TimeStampUS();
CopyBlobInfo(input_blob, output_blob);
AddBlobInfo(output_blob, start);
AddBlobInfo(output_blob, end);
return 0;
}
DEFINE_OP(GeneralInferOp);
......
......@@ -20,11 +20,13 @@
#include "core/general-server/op/general_reader_op.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/util/include/timer.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
......@@ -85,9 +87,10 @@ int GeneralReaderOp::inference() {
LOG(ERROR) << "Failed get op tls reader object output";
}
Timer timeline;
int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "var num: " << var_num;
// read config
VLOG(2) << "start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
......@@ -196,6 +199,12 @@ int GeneralReaderOp::inference() {
}
}
timeline.Pause();
int64_t end = timeline.TimeStampUS();
res->p_size = 0;
AddBlobInfo(res, start);
AddBlobInfo(res, end);
VLOG(2) << "read data from client success";
return 0;
}
......
......@@ -53,6 +53,11 @@ int GeneralResponseOp::inference() {
const Request *req = dynamic_cast<const Request *>(get_request_message());
Timer timeline;
// double response_time = 0.0;
// timeline.Start();
int64_t start = timeline.TimeStampUS();
VLOG(2) << "start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
......@@ -67,11 +72,8 @@ int GeneralResponseOp::inference() {
fetch_index[i] =
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
}
// response inst with only fetch_var_names
Response *res = mutable_data<Response>();
// res->set_mean_infer_us(infer_time);
Response *res = mutable_data<Response>();
for (int i = 0; i < batch_size; ++i) {
FetchInst *fetch_inst = res->add_insts();
......@@ -118,6 +120,18 @@ int GeneralResponseOp::inference() {
}
var_idx++;
}
if (req->profile_server()) {
int64_t end = timeline.TimeStampUS();
VLOG(2) << "p size for input blob: " << input_blob->p_size;
for (int i = 0; i < input_blob->p_size; ++i) {
res->add_profile_time(input_blob->time_stamp[i]);
}
// TODO(guru4elephant): find more elegant way to do this
res->add_profile_time(start);
res->add_profile_time(end);
}
return 0;
}
......
......@@ -19,11 +19,13 @@
#include "core/general-server/op/general_text_reader_op.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
#include "core/util/include/timer.h"
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::MempoolWrapper;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
......@@ -54,9 +56,11 @@ int GeneralTextReaderOp::inference() {
return -1;
}
Timer timeline;
int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "var num: " << var_num;
// read config
VLOG(2) << "start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
......@@ -157,6 +161,10 @@ int GeneralTextReaderOp::inference() {
}
}
int64_t end = timeline.TimeStampUS();
AddBlobInfo(res, start);
AddBlobInfo(res, end);
VLOG(2) << "read data from client success";
return 0;
}
......
......@@ -49,10 +49,11 @@ int GeneralTextResponseOp::inference() {
int batch_size = input_blob->GetBatchSize();
VLOG(2) << "infer batch size: " << batch_size;
// infer
const Request *req = dynamic_cast<const Request *>(get_request_message());
Timer timeline;
int64_t start = timeline.TimeStampUS();
VLOG(2) << "start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
......@@ -71,8 +72,6 @@ int GeneralTextResponseOp::inference() {
// response inst with only fetch_var_names
Response *res = mutable_data<Response>();
// res->set_mean_infer_us(infer_time);
for (int i = 0; i < batch_size; ++i) {
FetchInst *fetch_inst = res->add_insts();
for (auto & idx : fetch_index) {
......@@ -118,6 +117,18 @@ int GeneralTextResponseOp::inference() {
}
var_idx++;
}
if (req->profile_server()) {
int64_t end = timeline.TimeStampUS();
for (int i = 0; i < input_blob->p_size; ++i) {
res->add_profile_time(input_blob->time_stamp[i]);
}
// TODO(guru4elephant): find more elegant way to do this
res->add_profile_time(start);
res->add_profile_time(end);
}
return 0;
}
DEFINE_OP(GeneralTextResponseOp);
......
......@@ -38,11 +38,12 @@ message FetchInst {
message Request {
repeated FeedInst insts = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
};
message Response {
repeated FetchInst insts = 1;
optional float mean_infer_us = 2;
repeated int64 profile_time = 2;
};
service GeneralModelService {
......
......@@ -147,6 +147,7 @@ int InferService::inference(const google::protobuf::Message* request,
TRACEPRINTF("finish to thread clear");
if (_enable_map_request_to_workflow) {
LOG(INFO) << "enable map request == True";
std::vector<Workflow*>* workflows = _map_request_to_workflow(request);
if (!workflows || workflows->size() == 0) {
LOG(ERROR) << "Failed to map request to workflow";
......@@ -169,6 +170,7 @@ int InferService::inference(const google::protobuf::Message* request,
}
}
} else {
LOG(INFO) << "enable map request == False";
TRACEPRINTF("start to execute one workflow");
size_t fsize = _flows.size();
for (size_t fi = 0; fi < fsize; ++fi) {
......@@ -233,6 +235,7 @@ int InferService::_execute_workflow(Workflow* workflow,
TRACEPRINTF("finish to copy from");
workflow_time.stop();
LOG(INFO) << "workflow total time: " << workflow_time.u_elapsed();
PredictorMetric::GetInstance()->update_latency_metric(
WORKFLOW_METRIC_PREFIX + dv->full_name(), workflow_time.u_elapsed());
......
......@@ -38,11 +38,12 @@ message FetchInst {
message Request {
repeated FeedInst insts = 1;
repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ];
};
message Response {
repeated FetchInst insts = 1;
optional float mean_infer_us = 2;
repeated int64 profile_time = 2;
};
service GeneralModelService {
......
include(src/CMakeLists.txt)
add_library(utils ${util_srcs})
......@@ -38,6 +38,7 @@ class Timer {
double ElapsedMS();
// return elapsed time in sec
double ElapsedSec();
int64_t TimeStampUS();
private:
struct timeval _start;
......
FILE(GLOB srcs ${CMAKE_CURRENT_LIST_DIR}/*.cc)
LIST(APPEND util_srcs ${srcs})
......@@ -54,6 +54,11 @@ double Timer::ElapsedMS() { return _elapsed / 1000.0; }
double Timer::ElapsedSec() { return _elapsed / 1000000.0; }
int64_t Timer::TimeStampUS() {
gettimeofday(&_now, NULL);
return _now.tv_usec;
}
int64_t Timer::Tickus() {
gettimeofday(&_now, NULL);
return (_now.tv_sec - _start.tv_sec) * 1000 * 1000L +
......
......@@ -17,6 +17,7 @@ from .proto import sdk_configure_pb2 as sdk
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
import time
import sys
int_type = 0
float_type = 1
......@@ -87,6 +88,9 @@ class Client(object):
# map feed names to index
self.client_handle_ = PredictorClient()
self.client_handle_.init(path)
read_env_flags = ["profile_client", "profile_server"]
self.client_handle_.init_gflags([sys.argv[0]] +
["--tryfromenv=" + ",".join(read_env_flags)])
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.feed_shapes_ = [var.shape for var in model_conf.feed_var]
......@@ -143,9 +147,6 @@ class Client(object):
for i, name in enumerate(fetch_names):
result_map[name] = result[i]
if profile:
result_map["infer_time"] = result[-1][0]
return result_map
def batch_predict(self, feed_batch=[], fetch=[], profile=False):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册