From 9afdca0a8cc87234c4d937b2f3443ae15292e9e5 Mon Sep 17 00:00:00 2001 From: guru4elephant Date: Tue, 18 Feb 2020 16:45:02 +0800 Subject: [PATCH] add profiling timeline --- core/CMakeLists.txt | 2 +- core/general-client/CMakeLists.txt | 2 +- core/general-client/include/general_model.h | 1 + core/general-client/src/general_model.cpp | 42 +++++++++++++++---- core/general-server/op/general_infer_helper.h | 17 +++++++- core/general-server/op/general_infer_op.cpp | 12 ++++-- core/general-server/op/general_reader_op.cpp | 13 ++++++ .../general-server/op/general_response_op.cpp | 19 ++++++++- .../op/general_text_reader_op.cpp | 13 ++++++ .../op/general_text_response_op.cpp | 17 ++++++++ .../proto/general_model_service.proto | 2 +- core/predictor/framework/service.cpp | 3 ++ .../sdk-cpp/proto/general_model_service.proto | 2 +- core/util/CMakeLists.txt | 1 + core/util/include/timer.h | 1 + core/util/src/CMakeLists.txt | 1 + core/util/src/timer.cc | 5 +++ python/paddle_serving_client/__init__.py | 3 -- 18 files changed, 134 insertions(+), 22 deletions(-) diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index e574627f..3c775a9a 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -26,5 +26,5 @@ endif() if (NOT CLIENT_ONLY) add_subdirectory(predictor) add_subdirectory(general-server) -add_subdirectory(util) endif() +add_subdirectory(util) diff --git a/core/general-client/CMakeLists.txt b/core/general-client/CMakeLists.txt index 8f654ee5..f3bb666a 100644 --- a/core/general-client/CMakeLists.txt +++ b/core/general-client/CMakeLists.txt @@ -1,5 +1,5 @@ if(CLIENT_ONLY) add_subdirectory(pybind11) pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp) -target_link_libraries(serving_client PRIVATE -Wl,--whole-archive sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) +target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) endif() diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h index 8f0a8247..e1820ac2 100644 --- a/core/general-client/include/general_model.h +++ b/core/general-client/include/general_model.h @@ -87,6 +87,7 @@ class PredictorClient { std::map _fetch_name_to_var_name; std::vector> _shape; std::vector _type; + std::vector _last_request_ts; }; } // namespace general_model diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index c78e63c9..cf730835 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -17,7 +17,9 @@ #include "core/sdk-cpp/builtin_format.pb.h" #include "core/sdk-cpp/include/common.h" #include "core/sdk-cpp/include/predictor_sdk.h" +#include "core/util/include/timer.h" +using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; @@ -117,15 +119,20 @@ std::vector> PredictorClient::predict( return fetch_result; } + Timer timeline; + int64_t preprocess_start = timeline.TimeStampUS(); + // we save infer_us at fetch_result[fetch_name.size()] fetch_result.resize(fetch_name.size() + 1); _api.thrd_clear(); _predictor = _api.fetch_predictor("general_model"); + VLOG(2) << "fetch general model predictor done."; VLOG(2) << "float feed name size: " << float_feed_name.size(); VLOG(2) << "int feed name size: " << int_feed_name.size(); VLOG(2) << "fetch name size: " << fetch_name.size(); + Request req; for (auto & name : fetch_name) { req.add_fetch_var_names(name); @@ -175,16 +182,21 @@ std::vector> PredictorClient::predict( vec_idx++; } - VLOG(2) << "feed int feed var done."; + int64_t preprocess_end = timeline.TimeStampUS(); - // std::map > result; + int64_t client_infer_start = timeline.TimeStampUS(); Response res; + int64_t client_infer_end = 0; + int64_t postprocess_start = 0; + int64_t postprocess_end = 0; res.Clear(); if (_predictor->inference(&req, &res) != 0) { LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString(); exit(-1); } else { + client_infer_end = timeline.TimeStampUS(); + postprocess_start = client_infer_end; for (auto &name : fetch_name) { int idx = _fetch_name_to_idx[name]; int len = res.insts(0).tensor_array(idx).data_size(); @@ -196,10 +208,24 @@ std::vector> PredictorClient::predict( *(const float *)res.insts(0).tensor_array(idx).data(i).c_str(); } } - fetch_result[fetch_name.size()].resize(1); - fetch_result[fetch_name.size()][0] = res.mean_infer_us(); + postprocess_end = timeline.TimeStampUS(); } + int op_num = res.profile_time_size() / 2; + + VLOG(2) << "preprocess start: " << preprocess_start; + VLOG(2) << "preprocess end: " << preprocess_end; + VLOG(2) << "client infer start: " << client_infer_start; + VLOG(2) << "op1 start: " << res.profile_time(0); + VLOG(2) << "op1 end: " << res.profile_time(1); + VLOG(2) << "op2 start: " << res.profile_time(2); + VLOG(2) << "op2 end: " << res.profile_time(3); + VLOG(2) << "op3 start: " << res.profile_time(4); + VLOG(2) << "op3 end: " << res.profile_time(5); + VLOG(2) << "client infer end: " << client_infer_end; + VLOG(2) << "client postprocess start: " << postprocess_start; + VLOG(2) << "client postprocess end: " << postprocess_end; + return fetch_result; } @@ -308,10 +334,10 @@ std::vector>> PredictorClient::batch_predict( } } } - //last index for infer time - fetch_result_batch[batch_size].resize(1); - fetch_result_batch[batch_size][0].resize(1); - fetch_result_batch[batch_size][0][0] = res.mean_infer_us(); + // last index for infer time + // fetch_result_batch[batch_size].resize(1); + // fetch_result_batch[batch_size][0].resize(1); + // fetch_result_batch[batch_size][0][0] = res.mean_infer_us(); } return fetch_result_batch; diff --git a/core/general-server/op/general_infer_helper.h b/core/general-server/op/general_infer_helper.h index b5d6d7f2..0fdda98d 100644 --- a/core/general-server/op/general_infer_helper.h +++ b/core/general-server/op/general_infer_helper.h @@ -14,6 +14,7 @@ #pragma once +#include #include #ifdef BCLOUD #ifdef WITH_GPU @@ -34,8 +35,8 @@ static const char* GENERAL_MODEL_NAME = "general_model"; struct GeneralBlob { std::vector tensor_vector; - double infer_time; - std::vector fetch_name_vector; + int64_t time_stamp[20]; + int p_size = 0; void Clear() { size_t tensor_count = tensor_vector.size(); @@ -60,6 +61,18 @@ struct GeneralBlob { std::string ShortDebugString() const { return "Not implemented!"; } }; +static void AddBlobInfo(GeneralBlob * blob, + int64_t init_value) { + blob->time_stamp[blob->p_size] = init_value; + blob->p_size++; +} + +static void CopyBlobInfo(const GeneralBlob * src, + GeneralBlob * tgt) { + memcpy(&(tgt->time_stamp[0]), &(src->time_stamp[0]), + src->p_size * sizeof(int64_t)); +} + } // namespace serving } // namespace paddle_serving } // namespace baidu diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp index d3761000..e298ef4b 100644 --- a/core/general-server/op/general_infer_op.cpp +++ b/core/general-server/op/general_infer_op.cpp @@ -54,15 +54,19 @@ int GeneralInferOp::inference() { VLOG(2) << "infer batch size: " << batch_size; // infer Timer timeline; - double infer_time = 0.0; + // double infer_time = 0.0; + int64_t start = timeline.TimeStampUS(); timeline.Start(); if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) { LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; return -1; } - timeline.Pause(); - infer_time = timeline.ElapsedUS(); - + // timeline.Pause(); + // infer_time = timeline.ElapsedUS(); + int64_t end = timeline.TimeStampUS(); + CopyBlobInfo(input_blob, output_blob); + AddBlobInfo(output_blob, start); + AddBlobInfo(output_blob, end); return 0; } DEFINE_OP(GeneralInferOp); diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 3020dac3..afb593d7 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -20,11 +20,13 @@ #include "core/general-server/op/general_reader_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" +#include "core/util/include/timer.h" namespace baidu { namespace paddle_serving { namespace serving { +using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Request; @@ -85,6 +87,10 @@ int GeneralReaderOp::inference() { LOG(ERROR) << "Failed get op tls reader object output"; } + Timer timeline; + // double read_time = 0.0; + // timeline.Start(); + int64_t start = timeline.TimeStampUS(); int var_num = req->insts(0).tensor_array_size(); VLOG(2) << "var num: " << var_num; // read config @@ -196,6 +202,13 @@ int GeneralReaderOp::inference() { } } + timeline.Pause(); + // read_time = timeline.ElapsedUS(); + int64_t end = timeline.TimeStampUS(); + res->p_size = 0; + AddBlobInfo(res, start); + AddBlobInfo(res, end); + VLOG(2) << "read data from client success"; return 0; } diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index 39624dcd..8ffc9a7d 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -53,6 +53,11 @@ int GeneralResponseOp::inference() { const Request *req = dynamic_cast(get_request_message()); + Timer timeline; + // double response_time = 0.0; + // timeline.Start(); + int64_t start = timeline.TimeStampUS(); + VLOG(2) << "start to call load general model_conf op"; baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource::instance(); @@ -67,7 +72,7 @@ int GeneralResponseOp::inference() { fetch_index[i] = model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; } - + // response inst with only fetch_var_names Response *res = mutable_data(); @@ -118,6 +123,18 @@ int GeneralResponseOp::inference() { } var_idx++; } + + // timeline.Pause(); + // response_time = timeline.ElapsedUS(); + int64_t end = timeline.TimeStampUS(); + VLOG(2) << "p size for input blob: " << input_blob->p_size; + for (int i = 0; i < input_blob->p_size; ++i) { + res->add_profile_time(input_blob->time_stamp[i]); + } + // TODO(guru4elephant): find more elegant way to do this + res->add_profile_time(start); + res->add_profile_time(end); + return 0; } diff --git a/core/general-server/op/general_text_reader_op.cpp b/core/general-server/op/general_text_reader_op.cpp index bc6d96a7..d927dd0f 100644 --- a/core/general-server/op/general_text_reader_op.cpp +++ b/core/general-server/op/general_text_reader_op.cpp @@ -19,11 +19,13 @@ #include "core/general-server/op/general_text_reader_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" +#include "core/util/include/timer.h" namespace baidu { namespace paddle_serving { namespace serving { +using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Request; @@ -54,6 +56,11 @@ int GeneralTextReaderOp::inference() { return -1; } + Timer timeline; + // double read_time = 0.0; + // timeline.Start(); + int64_t start = timeline.TimeStampUS(); + int var_num = req->insts(0).tensor_array_size(); VLOG(2) << "var num: " << var_num; // read config @@ -157,6 +164,12 @@ int GeneralTextReaderOp::inference() { } } + // timeline.Pause(); + // read_time = timeline.ElapsedUS(); + int64_t end = timeline.TimeStampUS(); + AddBlobInfo(res, start); + AddBlobInfo(res, end); + VLOG(2) << "read data from client success"; return 0; } diff --git a/core/general-server/op/general_text_response_op.cpp b/core/general-server/op/general_text_response_op.cpp index 16f47635..544edaf3 100644 --- a/core/general-server/op/general_text_response_op.cpp +++ b/core/general-server/op/general_text_response_op.cpp @@ -53,6 +53,11 @@ int GeneralTextResponseOp::inference() { const Request *req = dynamic_cast(get_request_message()); + Timer timeline; + // double response_time = 0.0; + // timeline.Start(); + int64_t start = timeline.TimeStampUS(); + VLOG(2) << "start to call load general model_conf op"; baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource::instance(); @@ -118,6 +123,18 @@ int GeneralTextResponseOp::inference() { } var_idx++; } + + // timeline.Pause(); + // response_time = timeline.ElapsedUS(); + int64_t end = timeline.TimeStampUS(); + + for (int i = 0; i < input_blob->p_size; ++i) { + res->add_profile_time(input_blob->time_stamp[i]); + } + // TODO(guru4elephant): find more elegant way to do this + res->add_profile_time(start); + res->add_profile_time(end); + return 0; } DEFINE_OP(GeneralTextResponseOp); diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto index 36b63ec6..06b08438 100644 --- a/core/general-server/proto/general_model_service.proto +++ b/core/general-server/proto/general_model_service.proto @@ -42,7 +42,7 @@ message Request { message Response { repeated FetchInst insts = 1; - optional float mean_infer_us = 2; + repeated int64 profile_time = 2; }; service GeneralModelService { diff --git a/core/predictor/framework/service.cpp b/core/predictor/framework/service.cpp index 0a2af4e4..95c7db9f 100644 --- a/core/predictor/framework/service.cpp +++ b/core/predictor/framework/service.cpp @@ -147,6 +147,7 @@ int InferService::inference(const google::protobuf::Message* request, TRACEPRINTF("finish to thread clear"); if (_enable_map_request_to_workflow) { + LOG(INFO) << "enable map request == True"; std::vector* workflows = _map_request_to_workflow(request); if (!workflows || workflows->size() == 0) { LOG(ERROR) << "Failed to map request to workflow"; @@ -169,6 +170,7 @@ int InferService::inference(const google::protobuf::Message* request, } } } else { + LOG(INFO) << "enable map request == False"; TRACEPRINTF("start to execute one workflow"); size_t fsize = _flows.size(); for (size_t fi = 0; fi < fsize; ++fi) { @@ -233,6 +235,7 @@ int InferService::_execute_workflow(Workflow* workflow, TRACEPRINTF("finish to copy from"); workflow_time.stop(); + LOG(INFO) << "workflow total time: " << workflow_time.u_elapsed(); PredictorMetric::GetInstance()->update_latency_metric( WORKFLOW_METRIC_PREFIX + dv->full_name(), workflow_time.u_elapsed()); diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto index 1d62c25f..28f17d5e 100644 --- a/core/sdk-cpp/proto/general_model_service.proto +++ b/core/sdk-cpp/proto/general_model_service.proto @@ -42,7 +42,7 @@ message Request { message Response { repeated FetchInst insts = 1; - optional float mean_infer_us = 2; + repeated int64 profile_time = 2; }; service GeneralModelService { diff --git a/core/util/CMakeLists.txt b/core/util/CMakeLists.txt index 290f2bbc..7d3b2fd5 100644 --- a/core/util/CMakeLists.txt +++ b/core/util/CMakeLists.txt @@ -1,2 +1,3 @@ include(src/CMakeLists.txt) add_library(utils ${util_srcs}) + diff --git a/core/util/include/timer.h b/core/util/include/timer.h index 91732f07..9d547bc1 100644 --- a/core/util/include/timer.h +++ b/core/util/include/timer.h @@ -38,6 +38,7 @@ class Timer { double ElapsedMS(); // return elapsed time in sec double ElapsedSec(); + int64_t TimeStampUS(); private: struct timeval _start; diff --git a/core/util/src/CMakeLists.txt b/core/util/src/CMakeLists.txt index 8b1d4f5e..0a45dc9e 100644 --- a/core/util/src/CMakeLists.txt +++ b/core/util/src/CMakeLists.txt @@ -1,2 +1,3 @@ FILE(GLOB srcs ${CMAKE_CURRENT_LIST_DIR}/*.cc) LIST(APPEND util_srcs ${srcs}) + diff --git a/core/util/src/timer.cc b/core/util/src/timer.cc index 891dbe33..836a1002 100644 --- a/core/util/src/timer.cc +++ b/core/util/src/timer.cc @@ -54,6 +54,11 @@ double Timer::ElapsedMS() { return _elapsed / 1000.0; } double Timer::ElapsedSec() { return _elapsed / 1000000.0; } +int64_t Timer::TimeStampUS() { + gettimeofday(&_now, NULL); + return _now.tv_usec; +} + int64_t Timer::Tickus() { gettimeofday(&_now, NULL); return (_now.tv_sec - _start.tv_sec) * 1000 * 1000L + diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index 85d50863..0b2433d0 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -143,9 +143,6 @@ class Client(object): for i, name in enumerate(fetch_names): result_map[name] = result[i] - if profile: - result_map["infer_time"] = result[-1][0] - return result_map def batch_predict(self, feed_batch=[], fetch=[], profile=False): -- GitLab