diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index e574627f7ef70a74609e3de7125c2f9e3e5c4ba0..3c775a9acb2ac17a99557bfd28c4e8a3cf20a8c5 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -26,5 +26,5 @@ endif() if (NOT CLIENT_ONLY) add_subdirectory(predictor) add_subdirectory(general-server) -add_subdirectory(util) endif() +add_subdirectory(util) diff --git a/core/general-client/CMakeLists.txt b/core/general-client/CMakeLists.txt index 8f654ee52e80e974c421ae73473aba304f6f304a..f3bb666a3a0eb114463e941fdaa1ba8134337bb1 100644 --- a/core/general-client/CMakeLists.txt +++ b/core/general-client/CMakeLists.txt @@ -1,5 +1,5 @@ if(CLIENT_ONLY) add_subdirectory(pybind11) pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp) -target_link_libraries(serving_client PRIVATE -Wl,--whole-archive sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) +target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) endif() diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h index 8f0a824718c89a3b4d64d6e75bb5642e20d32688..e1820ac2cef6b85a731709e2bc2e3c63494995ce 100644 --- a/core/general-client/include/general_model.h +++ b/core/general-client/include/general_model.h @@ -87,6 +87,7 @@ class PredictorClient { std::map _fetch_name_to_var_name; std::vector> _shape; std::vector _type; + std::vector _last_request_ts; }; } // namespace general_model diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index c78e63c9abc2f6271c136fb811553ef3e74f1901..cf73083517673fd8e311f94888808c49d80734ff 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -17,7 +17,9 @@ #include "core/sdk-cpp/builtin_format.pb.h" #include "core/sdk-cpp/include/common.h" #include "core/sdk-cpp/include/predictor_sdk.h" +#include "core/util/include/timer.h" +using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; @@ -117,15 +119,20 @@ std::vector> PredictorClient::predict( return fetch_result; } + Timer timeline; + int64_t preprocess_start = timeline.TimeStampUS(); + // we save infer_us at fetch_result[fetch_name.size()] fetch_result.resize(fetch_name.size() + 1); _api.thrd_clear(); _predictor = _api.fetch_predictor("general_model"); + VLOG(2) << "fetch general model predictor done."; VLOG(2) << "float feed name size: " << float_feed_name.size(); VLOG(2) << "int feed name size: " << int_feed_name.size(); VLOG(2) << "fetch name size: " << fetch_name.size(); + Request req; for (auto & name : fetch_name) { req.add_fetch_var_names(name); @@ -175,16 +182,21 @@ std::vector> PredictorClient::predict( vec_idx++; } - VLOG(2) << "feed int feed var done."; + int64_t preprocess_end = timeline.TimeStampUS(); - // std::map > result; + int64_t client_infer_start = timeline.TimeStampUS(); Response res; + int64_t client_infer_end = 0; + int64_t postprocess_start = 0; + int64_t postprocess_end = 0; res.Clear(); if (_predictor->inference(&req, &res) != 0) { LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString(); exit(-1); } else { + client_infer_end = timeline.TimeStampUS(); + postprocess_start = client_infer_end; for (auto &name : fetch_name) { int idx = _fetch_name_to_idx[name]; int len = res.insts(0).tensor_array(idx).data_size(); @@ -196,10 +208,24 @@ std::vector> PredictorClient::predict( *(const float *)res.insts(0).tensor_array(idx).data(i).c_str(); } } - fetch_result[fetch_name.size()].resize(1); - fetch_result[fetch_name.size()][0] = res.mean_infer_us(); + postprocess_end = timeline.TimeStampUS(); } + int op_num = res.profile_time_size() / 2; + + VLOG(2) << "preprocess start: " << preprocess_start; + VLOG(2) << "preprocess end: " << preprocess_end; + VLOG(2) << "client infer start: " << client_infer_start; + VLOG(2) << "op1 start: " << res.profile_time(0); + VLOG(2) << "op1 end: " << res.profile_time(1); + VLOG(2) << "op2 start: " << res.profile_time(2); + VLOG(2) << "op2 end: " << res.profile_time(3); + VLOG(2) << "op3 start: " << res.profile_time(4); + VLOG(2) << "op3 end: " << res.profile_time(5); + VLOG(2) << "client infer end: " << client_infer_end; + VLOG(2) << "client postprocess start: " << postprocess_start; + VLOG(2) << "client postprocess end: " << postprocess_end; + return fetch_result; } @@ -308,10 +334,10 @@ std::vector>> PredictorClient::batch_predict( } } } - //last index for infer time - fetch_result_batch[batch_size].resize(1); - fetch_result_batch[batch_size][0].resize(1); - fetch_result_batch[batch_size][0][0] = res.mean_infer_us(); + // last index for infer time + // fetch_result_batch[batch_size].resize(1); + // fetch_result_batch[batch_size][0].resize(1); + // fetch_result_batch[batch_size][0][0] = res.mean_infer_us(); } return fetch_result_batch; diff --git a/core/general-server/op/general_infer_helper.h b/core/general-server/op/general_infer_helper.h index b5d6d7f23f6474cd1b77889ce6192d890920f85e..0fdda98dfdbb1c274f9106982190ef1ca527eeec 100644 --- a/core/general-server/op/general_infer_helper.h +++ b/core/general-server/op/general_infer_helper.h @@ -14,6 +14,7 @@ #pragma once +#include #include #ifdef BCLOUD #ifdef WITH_GPU @@ -34,8 +35,8 @@ static const char* GENERAL_MODEL_NAME = "general_model"; struct GeneralBlob { std::vector tensor_vector; - double infer_time; - std::vector fetch_name_vector; + int64_t time_stamp[20]; + int p_size = 0; void Clear() { size_t tensor_count = tensor_vector.size(); @@ -60,6 +61,18 @@ struct GeneralBlob { std::string ShortDebugString() const { return "Not implemented!"; } }; +static void AddBlobInfo(GeneralBlob * blob, + int64_t init_value) { + blob->time_stamp[blob->p_size] = init_value; + blob->p_size++; +} + +static void CopyBlobInfo(const GeneralBlob * src, + GeneralBlob * tgt) { + memcpy(&(tgt->time_stamp[0]), &(src->time_stamp[0]), + src->p_size * sizeof(int64_t)); +} + } // namespace serving } // namespace paddle_serving } // namespace baidu diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp index d3761000fba5ce0663b9c9b1759cbf6a64c912e8..e298ef4bcb754499dc4c219ab88e41e5444f575b 100644 --- a/core/general-server/op/general_infer_op.cpp +++ b/core/general-server/op/general_infer_op.cpp @@ -54,15 +54,19 @@ int GeneralInferOp::inference() { VLOG(2) << "infer batch size: " << batch_size; // infer Timer timeline; - double infer_time = 0.0; + // double infer_time = 0.0; + int64_t start = timeline.TimeStampUS(); timeline.Start(); if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) { LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; return -1; } - timeline.Pause(); - infer_time = timeline.ElapsedUS(); - + // timeline.Pause(); + // infer_time = timeline.ElapsedUS(); + int64_t end = timeline.TimeStampUS(); + CopyBlobInfo(input_blob, output_blob); + AddBlobInfo(output_blob, start); + AddBlobInfo(output_blob, end); return 0; } DEFINE_OP(GeneralInferOp); diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 3020dac3bde5ed484b3db4870f35d025ae6a2396..afb593d733b8564acf85fddaae7dc184099b004f 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -20,11 +20,13 @@ #include "core/general-server/op/general_reader_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" +#include "core/util/include/timer.h" namespace baidu { namespace paddle_serving { namespace serving { +using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Request; @@ -85,6 +87,10 @@ int GeneralReaderOp::inference() { LOG(ERROR) << "Failed get op tls reader object output"; } + Timer timeline; + // double read_time = 0.0; + // timeline.Start(); + int64_t start = timeline.TimeStampUS(); int var_num = req->insts(0).tensor_array_size(); VLOG(2) << "var num: " << var_num; // read config @@ -196,6 +202,13 @@ int GeneralReaderOp::inference() { } } + timeline.Pause(); + // read_time = timeline.ElapsedUS(); + int64_t end = timeline.TimeStampUS(); + res->p_size = 0; + AddBlobInfo(res, start); + AddBlobInfo(res, end); + VLOG(2) << "read data from client success"; return 0; } diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index 39624dcd7bb9455f1268c5d2d464b02fdeb16be4..8ffc9a7d6adbcf054b3448c239e574bc444738b0 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -53,6 +53,11 @@ int GeneralResponseOp::inference() { const Request *req = dynamic_cast(get_request_message()); + Timer timeline; + // double response_time = 0.0; + // timeline.Start(); + int64_t start = timeline.TimeStampUS(); + VLOG(2) << "start to call load general model_conf op"; baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource::instance(); @@ -67,7 +72,7 @@ int GeneralResponseOp::inference() { fetch_index[i] = model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; } - + // response inst with only fetch_var_names Response *res = mutable_data(); @@ -118,6 +123,18 @@ int GeneralResponseOp::inference() { } var_idx++; } + + // timeline.Pause(); + // response_time = timeline.ElapsedUS(); + int64_t end = timeline.TimeStampUS(); + VLOG(2) << "p size for input blob: " << input_blob->p_size; + for (int i = 0; i < input_blob->p_size; ++i) { + res->add_profile_time(input_blob->time_stamp[i]); + } + // TODO(guru4elephant): find more elegant way to do this + res->add_profile_time(start); + res->add_profile_time(end); + return 0; } diff --git a/core/general-server/op/general_text_reader_op.cpp b/core/general-server/op/general_text_reader_op.cpp index bc6d96a7bb507dcc1c74dac28bfe9a908067ffed..d927dd0fbb1e5ce6cb317a8b3ee22361a5eabd3e 100644 --- a/core/general-server/op/general_text_reader_op.cpp +++ b/core/general-server/op/general_text_reader_op.cpp @@ -19,11 +19,13 @@ #include "core/general-server/op/general_text_reader_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" +#include "core/util/include/timer.h" namespace baidu { namespace paddle_serving { namespace serving { +using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Request; @@ -54,6 +56,11 @@ int GeneralTextReaderOp::inference() { return -1; } + Timer timeline; + // double read_time = 0.0; + // timeline.Start(); + int64_t start = timeline.TimeStampUS(); + int var_num = req->insts(0).tensor_array_size(); VLOG(2) << "var num: " << var_num; // read config @@ -157,6 +164,12 @@ int GeneralTextReaderOp::inference() { } } + // timeline.Pause(); + // read_time = timeline.ElapsedUS(); + int64_t end = timeline.TimeStampUS(); + AddBlobInfo(res, start); + AddBlobInfo(res, end); + VLOG(2) << "read data from client success"; return 0; } diff --git a/core/general-server/op/general_text_response_op.cpp b/core/general-server/op/general_text_response_op.cpp index 16f476353965384c0d2b0092f2a42efa9c5289e7..544edaf3b2be912d9257d1bd9c7d82bd14b09996 100644 --- a/core/general-server/op/general_text_response_op.cpp +++ b/core/general-server/op/general_text_response_op.cpp @@ -53,6 +53,11 @@ int GeneralTextResponseOp::inference() { const Request *req = dynamic_cast(get_request_message()); + Timer timeline; + // double response_time = 0.0; + // timeline.Start(); + int64_t start = timeline.TimeStampUS(); + VLOG(2) << "start to call load general model_conf op"; baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource::instance(); @@ -118,6 +123,18 @@ int GeneralTextResponseOp::inference() { } var_idx++; } + + // timeline.Pause(); + // response_time = timeline.ElapsedUS(); + int64_t end = timeline.TimeStampUS(); + + for (int i = 0; i < input_blob->p_size; ++i) { + res->add_profile_time(input_blob->time_stamp[i]); + } + // TODO(guru4elephant): find more elegant way to do this + res->add_profile_time(start); + res->add_profile_time(end); + return 0; } DEFINE_OP(GeneralTextResponseOp); diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto index 36b63ec68bc704c1f5e1eb7a8ed4d3e4fb456d4f..06b08438ab648275d291778a92b73460d56058a2 100644 --- a/core/general-server/proto/general_model_service.proto +++ b/core/general-server/proto/general_model_service.proto @@ -42,7 +42,7 @@ message Request { message Response { repeated FetchInst insts = 1; - optional float mean_infer_us = 2; + repeated int64 profile_time = 2; }; service GeneralModelService { diff --git a/core/predictor/framework/service.cpp b/core/predictor/framework/service.cpp index 0a2af4e42413d3ebfa4d84faf3bac668efe12676..95c7db9f96a6e78522190e3f522d38669423475b 100644 --- a/core/predictor/framework/service.cpp +++ b/core/predictor/framework/service.cpp @@ -147,6 +147,7 @@ int InferService::inference(const google::protobuf::Message* request, TRACEPRINTF("finish to thread clear"); if (_enable_map_request_to_workflow) { + LOG(INFO) << "enable map request == True"; std::vector* workflows = _map_request_to_workflow(request); if (!workflows || workflows->size() == 0) { LOG(ERROR) << "Failed to map request to workflow"; @@ -169,6 +170,7 @@ int InferService::inference(const google::protobuf::Message* request, } } } else { + LOG(INFO) << "enable map request == False"; TRACEPRINTF("start to execute one workflow"); size_t fsize = _flows.size(); for (size_t fi = 0; fi < fsize; ++fi) { @@ -233,6 +235,7 @@ int InferService::_execute_workflow(Workflow* workflow, TRACEPRINTF("finish to copy from"); workflow_time.stop(); + LOG(INFO) << "workflow total time: " << workflow_time.u_elapsed(); PredictorMetric::GetInstance()->update_latency_metric( WORKFLOW_METRIC_PREFIX + dv->full_name(), workflow_time.u_elapsed()); diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto index 1d62c25f1ce40ecc23e98310e7834d7bcc847022..28f17d5e145488c6df4dd67d9b3a4d02f698a19d 100644 --- a/core/sdk-cpp/proto/general_model_service.proto +++ b/core/sdk-cpp/proto/general_model_service.proto @@ -42,7 +42,7 @@ message Request { message Response { repeated FetchInst insts = 1; - optional float mean_infer_us = 2; + repeated int64 profile_time = 2; }; service GeneralModelService { diff --git a/core/util/CMakeLists.txt b/core/util/CMakeLists.txt index 290f2bbc7cafded2ea06f15cf06da750e481f821..7d3b2fd56a44be5d06f2ee16f0f111a01ceeb9d2 100644 --- a/core/util/CMakeLists.txt +++ b/core/util/CMakeLists.txt @@ -1,2 +1,3 @@ include(src/CMakeLists.txt) add_library(utils ${util_srcs}) + diff --git a/core/util/include/timer.h b/core/util/include/timer.h index 91732f07d4822cf04e05491f5138f1a475e68f43..9d547bc102b987f898887b0b2659b1d8e6942c24 100644 --- a/core/util/include/timer.h +++ b/core/util/include/timer.h @@ -38,6 +38,7 @@ class Timer { double ElapsedMS(); // return elapsed time in sec double ElapsedSec(); + int64_t TimeStampUS(); private: struct timeval _start; diff --git a/core/util/src/CMakeLists.txt b/core/util/src/CMakeLists.txt index 8b1d4f5e81adba4ba6ebed90977850ea8bc572c1..0a45dc9e9319f7a3df04ad08eda432616bc363f7 100644 --- a/core/util/src/CMakeLists.txt +++ b/core/util/src/CMakeLists.txt @@ -1,2 +1,3 @@ FILE(GLOB srcs ${CMAKE_CURRENT_LIST_DIR}/*.cc) LIST(APPEND util_srcs ${srcs}) + diff --git a/core/util/src/timer.cc b/core/util/src/timer.cc index 891dbe33d06e9530c9c64a8f09bfd4415e2d114b..836a100231f97e639c60caa2508f0f85887cd155 100644 --- a/core/util/src/timer.cc +++ b/core/util/src/timer.cc @@ -54,6 +54,11 @@ double Timer::ElapsedMS() { return _elapsed / 1000.0; } double Timer::ElapsedSec() { return _elapsed / 1000000.0; } +int64_t Timer::TimeStampUS() { + gettimeofday(&_now, NULL); + return _now.tv_usec; +} + int64_t Timer::Tickus() { gettimeofday(&_now, NULL); return (_now.tv_sec - _start.tv_sec) * 1000 * 1000L + diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index 85d50863b634e89031995f6051d6da0a94a8c55d..0b2433d0f8daf0133c417404cc0e6d9182737465 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -143,9 +143,6 @@ class Client(object): for i, name in enumerate(fetch_names): result_map[name] = result[i] - if profile: - result_map["infer_time"] = result[-1][0] - return result_map def batch_predict(self, feed_batch=[], fetch=[], profile=False):