diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index e574627f7ef70a74609e3de7125c2f9e3e5c4ba0..3c775a9acb2ac17a99557bfd28c4e8a3cf20a8c5 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -26,5 +26,5 @@ endif() if (NOT CLIENT_ONLY) add_subdirectory(predictor) add_subdirectory(general-server) -add_subdirectory(util) endif() +add_subdirectory(util) diff --git a/core/general-client/CMakeLists.txt b/core/general-client/CMakeLists.txt index 8f654ee52e80e974c421ae73473aba304f6f304a..f3bb666a3a0eb114463e941fdaa1ba8134337bb1 100644 --- a/core/general-client/CMakeLists.txt +++ b/core/general-client/CMakeLists.txt @@ -1,5 +1,5 @@ if(CLIENT_ONLY) add_subdirectory(pybind11) pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp) -target_link_libraries(serving_client PRIVATE -Wl,--whole-archive sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) +target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) endif() diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h index 8f0a824718c89a3b4d64d6e75bb5642e20d32688..75c1b53bae08878d530f69c25220d77eb8ba42df 100644 --- a/core/general-client/include/general_model.h +++ b/core/general-client/include/general_model.h @@ -31,6 +31,9 @@ using baidu::paddle_serving::sdk_cpp::Predictor; using baidu::paddle_serving::sdk_cpp::PredictorApi; +DECLARE_bool(profile_client); +DECLARE_bool(profile_server); + // given some input data, pack into pb, and send request namespace baidu { namespace paddle_serving { @@ -45,6 +48,8 @@ class PredictorClient { PredictorClient() {} ~PredictorClient() {} + void init_gflags(std::vector argv); + int init(const std::string& client_conf); void set_predictor_conf(const std::string& conf_path, @@ -87,6 +92,7 @@ class PredictorClient { std::map _fetch_name_to_var_name; std::vector> _shape; std::vector _type; + std::vector _last_request_ts; }; } // namespace general_model diff --git a/core/general-client/src/elog b/core/general-client/src/elog new file mode 100644 index 0000000000000000000000000000000000000000..33edf65aab6e7a4778f160dde6585216181c4cfb --- /dev/null +++ b/core/general-client/src/elog @@ -0,0 +1 @@ +make: *** No targets specified and no makefile found. Stop. diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index c78e63c9abc2f6271c136fb811553ef3e74f1901..f8e7634a0a8ae2b90ccb2851491624d6ba0c058e 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -17,18 +17,42 @@ #include "core/sdk-cpp/builtin_format.pb.h" #include "core/sdk-cpp/include/common.h" #include "core/sdk-cpp/include/predictor_sdk.h" +#include "core/util/include/timer.h" +DEFINE_bool(profile_client, false, ""); +DEFINE_bool(profile_server, false, ""); + +using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::general_model::Request; using baidu::paddle_serving::predictor::general_model::Response; using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::FeedInst; using baidu::paddle_serving::predictor::general_model::FetchInst; +std::once_flag gflags_init_flag; + namespace baidu { namespace paddle_serving { namespace general_model { using configure::GeneralModelConfig; +void PredictorClient::init_gflags(std::vector argv) { + std::call_once(gflags_init_flag, [&]() { + FLAGS_logtostderr = true; + argv.insert(argv.begin(), "dummy"); + int argc = argv.size(); + char **arr = new char *[argv.size()]; + std::string line; + for (size_t i = 0; i < argv.size(); i++) { + arr[i] = &argv[i][0]; + line += argv[i]; + line += ' '; + } + google::ParseCommandLineFlags(&argc, &arr, true); + VLOG(2) << "Init commandline: " << line; + }); +} + int PredictorClient::init(const std::string &conf_file) { try { GeneralModelConfig model_config; @@ -117,15 +141,20 @@ std::vector> PredictorClient::predict( return fetch_result; } + Timer timeline; + int64_t preprocess_start = timeline.TimeStampUS(); + // we save infer_us at fetch_result[fetch_name.size()] fetch_result.resize(fetch_name.size() + 1); _api.thrd_clear(); _predictor = _api.fetch_predictor("general_model"); + VLOG(2) << "fetch general model predictor done."; VLOG(2) << "float feed name size: " << float_feed_name.size(); VLOG(2) << "int feed name size: " << int_feed_name.size(); VLOG(2) << "fetch name size: " << fetch_name.size(); + Request req; for (auto & name : fetch_name) { req.add_fetch_var_names(name); @@ -175,16 +204,28 @@ std::vector> PredictorClient::predict( vec_idx++; } - VLOG(2) << "feed int feed var done."; + int64_t preprocess_end = timeline.TimeStampUS(); - // std::map > result; + int64_t client_infer_start = timeline.TimeStampUS(); Response res; + int64_t client_infer_end = 0; + int64_t postprocess_start = 0; + int64_t postprocess_end = 0; + + if (FLAGS_profile_client) { + if (FLAGS_profile_server) { + req.set_profile_server(true); + } + } + res.Clear(); if (_predictor->inference(&req, &res) != 0) { LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString(); exit(-1); } else { + client_infer_end = timeline.TimeStampUS(); + postprocess_start = client_infer_end; for (auto &name : fetch_name) { int idx = _fetch_name_to_idx[name]; int len = res.insts(0).tensor_array(idx).data_size(); @@ -196,8 +237,29 @@ std::vector> PredictorClient::predict( *(const float *)res.insts(0).tensor_array(idx).data(i).c_str(); } } - fetch_result[fetch_name.size()].resize(1); - fetch_result[fetch_name.size()][0] = res.mean_infer_us(); + postprocess_end = timeline.TimeStampUS(); + } + + if (FLAGS_profile_client) { + std::ostringstream oss; + oss << "PROFILE\t" + << "prepro_0:" << preprocess_start << " " + << "prepro_1:" << preprocess_end << " " + << "client_infer_0:" << client_infer_start << " " + << "client_infer_1:" << client_infer_end << " "; + + if (FLAGS_profile_server) { + int op_num = res.profile_time_size() / 2; + for (int i = 0; i < op_num; ++i) { + oss << "op" << i << "_0:" << res.profile_time(i * 2) << " "; + oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " "; + } + } + + oss << "postpro_0:" << postprocess_start << " "; + oss << "postpro_1:" << postprocess_end; + + fprintf(stderr, "%s\n", oss.str().c_str()); } return fetch_result; @@ -308,10 +370,6 @@ std::vector>> PredictorClient::batch_predict( } } } - //last index for infer time - fetch_result_batch[batch_size].resize(1); - fetch_result_batch[batch_size][0].resize(1); - fetch_result_batch[batch_size][0][0] = res.mean_infer_us(); } return fetch_result_batch; diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp index 898b06356f19627945fa9a1e8115422d327091e3..1ce181c4d0c6207eefeaec77eeacfe63394ed839 100644 --- a/core/general-client/src/pybind_general_model.cpp +++ b/core/general-client/src/pybind_general_model.cpp @@ -31,6 +31,10 @@ PYBIND11_MODULE(serving_client, m) { )pddoc"; py::class_(m, "PredictorClient", py::buffer_protocol()) .def(py::init()) + .def("init_gflags", + [](PredictorClient &self, std::vector argv) { + self.init_gflags(argv); + }) .def("init", [](PredictorClient &self, const std::string &conf) { return self.init(conf); diff --git a/core/general-server/op/general_infer_helper.h b/core/general-server/op/general_infer_helper.h index b5d6d7f23f6474cd1b77889ce6192d890920f85e..0fdda98dfdbb1c274f9106982190ef1ca527eeec 100644 --- a/core/general-server/op/general_infer_helper.h +++ b/core/general-server/op/general_infer_helper.h @@ -14,6 +14,7 @@ #pragma once +#include #include #ifdef BCLOUD #ifdef WITH_GPU @@ -34,8 +35,8 @@ static const char* GENERAL_MODEL_NAME = "general_model"; struct GeneralBlob { std::vector tensor_vector; - double infer_time; - std::vector fetch_name_vector; + int64_t time_stamp[20]; + int p_size = 0; void Clear() { size_t tensor_count = tensor_vector.size(); @@ -60,6 +61,18 @@ struct GeneralBlob { std::string ShortDebugString() const { return "Not implemented!"; } }; +static void AddBlobInfo(GeneralBlob * blob, + int64_t init_value) { + blob->time_stamp[blob->p_size] = init_value; + blob->p_size++; +} + +static void CopyBlobInfo(const GeneralBlob * src, + GeneralBlob * tgt) { + memcpy(&(tgt->time_stamp[0]), &(src->time_stamp[0]), + src->p_size * sizeof(int64_t)); +} + } // namespace serving } // namespace paddle_serving } // namespace baidu diff --git a/core/general-server/op/general_infer_op.cpp b/core/general-server/op/general_infer_op.cpp index d3761000fba5ce0663b9c9b1759cbf6a64c912e8..3ae84b2047a04ec6c76d8de20d15cde7fbb8b68b 100644 --- a/core/general-server/op/general_infer_op.cpp +++ b/core/general-server/op/general_infer_op.cpp @@ -52,17 +52,18 @@ int GeneralInferOp::inference() { int batch_size = input_blob->GetBatchSize(); VLOG(2) << "infer batch size: " << batch_size; - // infer Timer timeline; - double infer_time = 0.0; + int64_t start = timeline.TimeStampUS(); timeline.Start(); if (InferManager::instance().infer(GENERAL_MODEL_NAME, in, out, batch_size)) { LOG(ERROR) << "Failed do infer in fluid model: " << GENERAL_MODEL_NAME; return -1; } - timeline.Pause(); - infer_time = timeline.ElapsedUS(); + int64_t end = timeline.TimeStampUS(); + CopyBlobInfo(input_blob, output_blob); + AddBlobInfo(output_blob, start); + AddBlobInfo(output_blob, end); return 0; } DEFINE_OP(GeneralInferOp); diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 3020dac3bde5ed484b3db4870f35d025ae6a2396..8db186eaca989e533a825a57802343b6cbc0fb43 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -20,11 +20,13 @@ #include "core/general-server/op/general_reader_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" +#include "core/util/include/timer.h" namespace baidu { namespace paddle_serving { namespace serving { +using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Request; @@ -85,9 +87,10 @@ int GeneralReaderOp::inference() { LOG(ERROR) << "Failed get op tls reader object output"; } + Timer timeline; + int64_t start = timeline.TimeStampUS(); int var_num = req->insts(0).tensor_array_size(); VLOG(2) << "var num: " << var_num; - // read config VLOG(2) << "start to call load general model_conf op"; baidu::paddle_serving::predictor::Resource &resource = @@ -196,6 +199,12 @@ int GeneralReaderOp::inference() { } } + timeline.Pause(); + int64_t end = timeline.TimeStampUS(); + res->p_size = 0; + AddBlobInfo(res, start); + AddBlobInfo(res, end); + VLOG(2) << "read data from client success"; return 0; } diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index 39624dcd7bb9455f1268c5d2d464b02fdeb16be4..caa0185efd7c1794f12fc896777e249308ce5647 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -53,6 +53,11 @@ int GeneralResponseOp::inference() { const Request *req = dynamic_cast(get_request_message()); + Timer timeline; + // double response_time = 0.0; + // timeline.Start(); + int64_t start = timeline.TimeStampUS(); + VLOG(2) << "start to call load general model_conf op"; baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource::instance(); @@ -67,11 +72,8 @@ int GeneralResponseOp::inference() { fetch_index[i] = model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; } - - // response inst with only fetch_var_names - Response *res = mutable_data(); - // res->set_mean_infer_us(infer_time); + Response *res = mutable_data(); for (int i = 0; i < batch_size; ++i) { FetchInst *fetch_inst = res->add_insts(); @@ -118,6 +120,18 @@ int GeneralResponseOp::inference() { } var_idx++; } + + if (req->profile_server()) { + int64_t end = timeline.TimeStampUS(); + VLOG(2) << "p size for input blob: " << input_blob->p_size; + for (int i = 0; i < input_blob->p_size; ++i) { + res->add_profile_time(input_blob->time_stamp[i]); + } + // TODO(guru4elephant): find more elegant way to do this + res->add_profile_time(start); + res->add_profile_time(end); + } + return 0; } diff --git a/core/general-server/op/general_text_reader_op.cpp b/core/general-server/op/general_text_reader_op.cpp index bc6d96a7bb507dcc1c74dac28bfe9a908067ffed..77dd299c818cf2af2cee05b7cf14e7b490f79e19 100644 --- a/core/general-server/op/general_text_reader_op.cpp +++ b/core/general-server/op/general_text_reader_op.cpp @@ -19,11 +19,13 @@ #include "core/general-server/op/general_text_reader_op.h" #include "core/predictor/framework/infer.h" #include "core/predictor/framework/memory.h" +#include "core/util/include/timer.h" namespace baidu { namespace paddle_serving { namespace serving { +using baidu::paddle_serving::Timer; using baidu::paddle_serving::predictor::MempoolWrapper; using baidu::paddle_serving::predictor::general_model::Tensor; using baidu::paddle_serving::predictor::general_model::Request; @@ -54,9 +56,11 @@ int GeneralTextReaderOp::inference() { return -1; } + Timer timeline; + int64_t start = timeline.TimeStampUS(); + int var_num = req->insts(0).tensor_array_size(); VLOG(2) << "var num: " << var_num; - // read config VLOG(2) << "start to call load general model_conf op"; baidu::paddle_serving::predictor::Resource &resource = @@ -157,6 +161,10 @@ int GeneralTextReaderOp::inference() { } } + int64_t end = timeline.TimeStampUS(); + AddBlobInfo(res, start); + AddBlobInfo(res, end); + VLOG(2) << "read data from client success"; return 0; } diff --git a/core/general-server/op/general_text_response_op.cpp b/core/general-server/op/general_text_response_op.cpp index 16f476353965384c0d2b0092f2a42efa9c5289e7..a60aba14aafcad7594fc17527642e88a24d71e2c 100644 --- a/core/general-server/op/general_text_response_op.cpp +++ b/core/general-server/op/general_text_response_op.cpp @@ -49,10 +49,11 @@ int GeneralTextResponseOp::inference() { int batch_size = input_blob->GetBatchSize(); VLOG(2) << "infer batch size: " << batch_size; - // infer - const Request *req = dynamic_cast(get_request_message()); + Timer timeline; + int64_t start = timeline.TimeStampUS(); + VLOG(2) << "start to call load general model_conf op"; baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource::instance(); @@ -71,8 +72,6 @@ int GeneralTextResponseOp::inference() { // response inst with only fetch_var_names Response *res = mutable_data(); - // res->set_mean_infer_us(infer_time); - for (int i = 0; i < batch_size; ++i) { FetchInst *fetch_inst = res->add_insts(); for (auto & idx : fetch_index) { @@ -118,6 +117,18 @@ int GeneralTextResponseOp::inference() { } var_idx++; } + + if (req->profile_server()) { + int64_t end = timeline.TimeStampUS(); + + for (int i = 0; i < input_blob->p_size; ++i) { + res->add_profile_time(input_blob->time_stamp[i]); + } + // TODO(guru4elephant): find more elegant way to do this + res->add_profile_time(start); + res->add_profile_time(end); + } + return 0; } DEFINE_OP(GeneralTextResponseOp); diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto index 36b63ec68bc704c1f5e1eb7a8ed4d3e4fb456d4f..9ffc5df7b550b384080c7a5372280f708e01a720 100644 --- a/core/general-server/proto/general_model_service.proto +++ b/core/general-server/proto/general_model_service.proto @@ -38,11 +38,12 @@ message FetchInst { message Request { repeated FeedInst insts = 1; repeated string fetch_var_names = 2; + optional bool profile_server = 3 [ default = false ]; }; message Response { repeated FetchInst insts = 1; - optional float mean_infer_us = 2; + repeated int64 profile_time = 2; }; service GeneralModelService { diff --git a/core/predictor/framework/service.cpp b/core/predictor/framework/service.cpp index 0a2af4e42413d3ebfa4d84faf3bac668efe12676..95c7db9f96a6e78522190e3f522d38669423475b 100644 --- a/core/predictor/framework/service.cpp +++ b/core/predictor/framework/service.cpp @@ -147,6 +147,7 @@ int InferService::inference(const google::protobuf::Message* request, TRACEPRINTF("finish to thread clear"); if (_enable_map_request_to_workflow) { + LOG(INFO) << "enable map request == True"; std::vector* workflows = _map_request_to_workflow(request); if (!workflows || workflows->size() == 0) { LOG(ERROR) << "Failed to map request to workflow"; @@ -169,6 +170,7 @@ int InferService::inference(const google::protobuf::Message* request, } } } else { + LOG(INFO) << "enable map request == False"; TRACEPRINTF("start to execute one workflow"); size_t fsize = _flows.size(); for (size_t fi = 0; fi < fsize; ++fi) { @@ -233,6 +235,7 @@ int InferService::_execute_workflow(Workflow* workflow, TRACEPRINTF("finish to copy from"); workflow_time.stop(); + LOG(INFO) << "workflow total time: " << workflow_time.u_elapsed(); PredictorMetric::GetInstance()->update_latency_metric( WORKFLOW_METRIC_PREFIX + dv->full_name(), workflow_time.u_elapsed()); diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto index 1d62c25f1ce40ecc23e98310e7834d7bcc847022..ec942706acf3cabcb439e24868ac8ea61809104c 100644 --- a/core/sdk-cpp/proto/general_model_service.proto +++ b/core/sdk-cpp/proto/general_model_service.proto @@ -38,11 +38,12 @@ message FetchInst { message Request { repeated FeedInst insts = 1; repeated string fetch_var_names = 2; + optional bool profile_server = 3 [ default = false ]; }; message Response { repeated FetchInst insts = 1; - optional float mean_infer_us = 2; + repeated int64 profile_time = 2; }; service GeneralModelService { diff --git a/core/util/CMakeLists.txt b/core/util/CMakeLists.txt index 290f2bbc7cafded2ea06f15cf06da750e481f821..7d3b2fd56a44be5d06f2ee16f0f111a01ceeb9d2 100644 --- a/core/util/CMakeLists.txt +++ b/core/util/CMakeLists.txt @@ -1,2 +1,3 @@ include(src/CMakeLists.txt) add_library(utils ${util_srcs}) + diff --git a/core/util/include/timer.h b/core/util/include/timer.h index 91732f07d4822cf04e05491f5138f1a475e68f43..9d547bc102b987f898887b0b2659b1d8e6942c24 100644 --- a/core/util/include/timer.h +++ b/core/util/include/timer.h @@ -38,6 +38,7 @@ class Timer { double ElapsedMS(); // return elapsed time in sec double ElapsedSec(); + int64_t TimeStampUS(); private: struct timeval _start; diff --git a/core/util/src/CMakeLists.txt b/core/util/src/CMakeLists.txt index 8b1d4f5e81adba4ba6ebed90977850ea8bc572c1..0a45dc9e9319f7a3df04ad08eda432616bc363f7 100644 --- a/core/util/src/CMakeLists.txt +++ b/core/util/src/CMakeLists.txt @@ -1,2 +1,3 @@ FILE(GLOB srcs ${CMAKE_CURRENT_LIST_DIR}/*.cc) LIST(APPEND util_srcs ${srcs}) + diff --git a/core/util/src/timer.cc b/core/util/src/timer.cc index 891dbe33d06e9530c9c64a8f09bfd4415e2d114b..836a100231f97e639c60caa2508f0f85887cd155 100644 --- a/core/util/src/timer.cc +++ b/core/util/src/timer.cc @@ -54,6 +54,11 @@ double Timer::ElapsedMS() { return _elapsed / 1000.0; } double Timer::ElapsedSec() { return _elapsed / 1000000.0; } +int64_t Timer::TimeStampUS() { + gettimeofday(&_now, NULL); + return _now.tv_usec; +} + int64_t Timer::Tickus() { gettimeofday(&_now, NULL); return (_now.tv_sec - _start.tv_sec) * 1000 * 1000L + diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index 85d50863b634e89031995f6051d6da0a94a8c55d..79bf945dbc8c3cac4cb51e8c1856a59b11c37ac5 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -17,6 +17,7 @@ from .proto import sdk_configure_pb2 as sdk from .proto import general_model_config_pb2 as m_config import google.protobuf.text_format import time +import sys int_type = 0 float_type = 1 @@ -87,6 +88,9 @@ class Client(object): # map feed names to index self.client_handle_ = PredictorClient() self.client_handle_.init(path) + read_env_flags = ["profile_client", "profile_server"] + self.client_handle_.init_gflags([sys.argv[0]] + + ["--tryfromenv=" + ",".join(read_env_flags)]) self.feed_names_ = [var.alias_name for var in model_conf.feed_var] self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] self.feed_shapes_ = [var.shape for var in model_conf.feed_var] @@ -143,9 +147,6 @@ class Client(object): for i, name in enumerate(fetch_names): result_map[name] = result[i] - if profile: - result_map["infer_time"] = result[-1][0] - return result_map def batch_predict(self, feed_batch=[], fetch=[], profile=False):