diff --git a/CMakeLists.txt b/CMakeLists.txt index 8dab01f14a7a82213ae92d5fbcfce619e9939a96..83f9ca4c7dd09b428863f5492996f355fa4b0f07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,6 +75,7 @@ include(generic) include(flags) if (NOT CLIENT_ONLY) +include(external/cudnn) include(paddlepaddle) include(external/opencv) endif() diff --git a/cmake/external/cudnn.cmake b/cmake/external/cudnn.cmake new file mode 100644 index 0000000000000000000000000000000000000000..98466d44fc0dd91ef0cc8e8eac2660c42a19267c --- /dev/null +++ b/cmake/external/cudnn.cmake @@ -0,0 +1,102 @@ +if(NOT WITH_GPU) + return() +endif() + +if(WIN32) + set(CUDNN_ROOT ${CUDA_TOOLKIT_ROOT_DIR}) +else(WIN32) + set(CUDNN_ROOT "/usr" CACHE PATH "CUDNN ROOT") +endif(WIN32) + +find_path(CUDNN_INCLUDE_DIR cudnn.h + PATHS ${CUDNN_ROOT} ${CUDNN_ROOT}/include + $ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}/include ${CUDA_TOOLKIT_INCLUDE} + NO_DEFAULT_PATH +) + +get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH) + +set(TARGET_ARCH "x86_64") +if(NOT ${CMAKE_SYSTEM_PROCESSOR}) + set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) +endif() + +list(APPEND CUDNN_CHECK_LIBRARY_DIRS + ${CUDNN_ROOT} + ${CUDNN_ROOT}/lib64 + ${CUDNN_ROOT}/lib + ${CUDNN_ROOT}/lib/${TARGET_ARCH}-linux-gnu + ${CUDNN_ROOT}/local/cuda-${CUDA_VERSION}/targets/${TARGET_ARCH}-linux/lib/ + $ENV{CUDNN_ROOT} + $ENV{CUDNN_ROOT}/lib64 + $ENV{CUDNN_ROOT}/lib + /usr/lib + ${CUDA_TOOLKIT_ROOT_DIR} + ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 + ) +set(CUDNN_LIB_NAME "") +if (LINUX) +set(CUDNN_LIB_NAME "libcudnn.so") +endif(LINUX) + +if(WIN32) +# only support cudnn7 +set(CUDNN_LIB_NAME "cudnn.lib" "cudnn64_7.dll") +endif(WIN32) + +if(APPLE) +set(CUDNN_LIB_NAME "libcudnn.dylib" "libcudnn.so") +endif(APPLE) + +find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME} # libcudnn_static.a + PATHS ${CUDNN_CHECK_LIBRARY_DIRS} ${CUDNN_INCLUDE_DIR} ${__libpath_hist} + NO_DEFAULT_PATH + DOC "Path to cuDNN library.") + + +if(CUDNN_INCLUDE_DIR AND CUDNN_LIBRARY) + set(CUDNN_FOUND ON) +else() + set(CUDNN_FOUND OFF) +endif() + +if(CUDNN_FOUND) + file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) + + get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY) + + string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)" + CUDNN_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_VERSION +([0-9]+)" "\\1" + CUDNN_VERSION "${CUDNN_VERSION}") + + if("${CUDNN_VERSION}" STREQUAL "2000") + message(STATUS "Current cuDNN version is v2. ") + else() + string(REGEX MATCH "define CUDNN_MAJOR +([0-9]+)" CUDNN_MAJOR_VERSION + "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MAJOR +([0-9]+)" "\\1" + CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}") + string(REGEX MATCH "define CUDNN_MINOR +([0-9]+)" CUDNN_MINOR_VERSION + "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MINOR +([0-9]+)" "\\1" + CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}") + string(REGEX MATCH "define CUDNN_PATCHLEVEL +([0-9]+)" + CUDNN_PATCHLEVEL_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") + string(REGEX REPLACE "define CUDNN_PATCHLEVEL +([0-9]+)" "\\1" + CUDNN_PATCHLEVEL_VERSION "${CUDNN_PATCHLEVEL_VERSION}") + + if(NOT CUDNN_MAJOR_VERSION) + set(CUDNN_VERSION "???") + else() + add_definitions("-DPADDLE_CUDNN_BINVER=\"${CUDNN_MAJOR_VERSION}\"") + math(EXPR CUDNN_VERSION + "${CUDNN_MAJOR_VERSION} * 1000 + + ${CUDNN_MINOR_VERSION} * 100 + ${CUDNN_PATCHLEVEL_VERSION}") + endif() + + message(STATUS "Current cuDNN header is ${CUDNN_INCLUDE_DIR}/cudnn.h. " + "Current cuDNN version is v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}. ") + + endif() +endif() diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index 3e2d0f742a07a59986a2441d3d56c4202e866961..1cf2c0c867b2ae4b9d8144ebbb25f724882fa3a1 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -15,71 +15,70 @@ INCLUDE(ExternalProject) SET(PADDLE_SOURCES_DIR ${THIRD_PARTY_PATH}/Paddle) +SET(PADDLE_DOWNLOAD_DIR ${PADDLE_SOURCES_DIR}/src/extern_paddle) SET(PADDLE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/Paddle/) SET(PADDLE_INCLUDE_DIR "${PADDLE_INSTALL_DIR}/include" CACHE PATH "PaddlePaddle include directory." FORCE) SET(PADDLE_LIBRARIES "${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a" CACHE FILEPATH "Paddle library." FORCE) -INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir) # Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib|${THIRD_PARTY_PATH}/install/glog") message( "WITH_GPU = ${WITH_GPU}") -# If minimal .a is need, you can set WITH_DEBUG_SYMBOLS=OFF + +# Paddle Version should be one of: +# latest: latest develop build +# version number like 1.5.2 +SET(PADDLE_VERSION "latest") + +if (WITH_GPU) + SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl") +else() + if (AVX_FOUND) + if (WITH_MKLML) + SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-avx-mkl") + else() + SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-avx-openblas") + endif() + else() + SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-noavx-openblas") + endif() +endif() + +SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/fluid_inference.tgz") +MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}") + ExternalProject_Add( - extern_paddle + "extern_paddle" ${EXTERNAL_PROJECT_LOG_ARGS} - # TODO(wangguibao): change to de newst repo when they changed. - GIT_REPOSITORY "https://github.com/PaddlePaddle/Paddle" - GIT_TAG "v1.5.1" - PREFIX ${PADDLE_SOURCES_DIR} - UPDATE_COMMAND "" - BINARY_DIR ${CMAKE_BINARY_DIR}/Paddle - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_INSTALL_PREFIX=${PADDLE_INSTALL_DIR} - -DCMAKE_INSTALL_LIBDIR=${PADDLE_INSTALL_DIR}/lib - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - -DCMAKE_PREFIX_PATH=${prefix_path} - -DCMAKE_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR} - -DWITH_SWIG_PY=OFF - -DWITH_PYTHON=OFF - -DWITH_MKL=${WITH_MKL} - -DWITH_AVX=${WITH_AVX} - -DWITH_MKLDNN=OFF - -DWITH_GPU=${WITH_GPU} - -DWITH_FLUID_ONLY=ON - -DWITH_TESTING=OFF - -DWITH_DISTRIBUTE=OFF - -DON_INFER=ON - ${EXTERNAL_OPTIONAL_ARGS} - LIST_SEPARATOR | - CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PADDLE_INSTALL_DIR} - -DCMAKE_INSTALL_LIBDIR:PATH=${PADDLE_INSTALL_DIR}/lib - -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON - -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} - BUILD_COMMAND $(MAKE) - INSTALL_COMMAND $(MAKE) fluid_lib_dist + URL "${PADDLE_LIB_PATH}" + PREFIX "${PADDLE_SOURCES_DIR}" + DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include && + ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib && + ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party && + ${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so ) -ExternalProject_Get_Property(extern_paddle BINARY_DIR) -SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${BINARY_DIR}/fluid_install_dir/third_party/install/mklml/lib") -LINK_DIRECTORIES(${BINARY_DIR}/fluid_install_dir/third_party/install/mklml/lib) - -ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${BINARY_DIR}/fluid_install_dir/paddle/fluid/inference/libpaddle_fluid.a) +INCLUDE_DIRECTORIES(${PADDLE_INCLUDE_DIR}) +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib") +LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib) -LIST(APPEND external_project_dependencies paddle) +SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib") +LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib) -ADD_LIBRARY(snappystream STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET snappystream PROPERTY IMPORTED_LOCATION ${BINARY_DIR}/fluid_install_dir/third_party/install/snappystream/lib/libsnappystream.a) +ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a) ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${BINARY_DIR}/fluid_install_dir/third_party/install/xxhash/lib/libxxhash.a) +SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a) + +LIST(APPEND external_project_dependencies paddle) LIST(APPEND paddle_depend_libs - snappystream - snappy xxhash) diff --git a/cube/cube-api/include/cube_api.h b/cube/cube-api/include/cube_api.h index aee82f6dd1d009d5b5b53b3f6fe8de0d591e5d51..c5059b9fea3a1d03cf4e50f8cb8dc994307d31ad 100644 --- a/cube/cube-api/include/cube_api.h +++ b/cube/cube-api/include/cube_api.h @@ -18,7 +18,12 @@ #include #include +#ifdef BCLOUD +#include "baidu/rpc/server.h" +#else #include "brpc/server.h" +#endif + #include "cube/cube-api/cube.pb.h" #include "cube/cube-api/include/meta.h" diff --git a/cube/cube-api/include/meta.h b/cube/cube-api/include/meta.h index ec891720c55503ad77de24cd178db53b2170023e..69bbb8ccc12e423d286183ed5dd87e90bf2e59de 100644 --- a/cube/cube-api/include/meta.h +++ b/cube/cube-api/include/meta.h @@ -19,11 +19,25 @@ #include #include +#ifdef BCLOUD +#include "baidu/rpc/channel.h" +#include "baidu/rpc/parallel_channel.h" +#include "rapidjson/document.h" +#else #include "brpc/channel.h" #include "brpc/parallel_channel.h" #include "butil/third_party/rapidjson/document.h" +#endif + #include "bvar/bvar.h" +#ifdef BCLOUD +namespace brpc = baidu::rpc; +#ifndef BUTIL_RAPIDJSON_NAMESPACE +#define BUTIL_RAPIDJSON_NAMESPACE RAPIDJSON_NAMESPACE +#endif +#endif + namespace rec { namespace mcube { diff --git a/cube/cube-api/src/cube_api.cpp b/cube/cube-api/src/cube_api.cpp index 8a9cebb9aa92f1bdb13c47cb7c065eaf2738c73f..c481effae450889bbcf25c4e315edca3e8d88e6b 100644 --- a/cube/cube-api/src/cube_api.cpp +++ b/cube/cube-api/src/cube_api.cpp @@ -13,8 +13,14 @@ // limitations under the License. #include "cube/cube-api/include/cube_api.h" +#ifdef BCLOUD +#include +#include +#else #include #include +#endif + #include #include "cube/cube-api/include/cube_api_bvar.h" @@ -25,6 +31,10 @@ namespace { static ::rec::mcube::CubeAPI* g_ins = NULL; } +#ifdef BCLOUD +namespace brpc = baidu::rpc; +#endif + namespace rec { namespace mcube { diff --git a/cube/cube-api/src/meta.cpp b/cube/cube-api/src/meta.cpp index 69ce43a08e0f5460dfa4e440958ff247458f6140..06911e8828685abbe3b0912f22f8dd7a0f807e50 100644 --- a/cube/cube-api/src/meta.cpp +++ b/cube/cube-api/src/meta.cpp @@ -26,6 +26,10 @@ namespace { static ::rec::mcube::Meta* g_ins = NULL; } +#ifdef BCLOUD +namespace brpc = baidu::rpc; +#endif + namespace rec { namespace mcube { diff --git a/demo-client/src/ctr_prediction.cpp b/demo-client/src/ctr_prediction.cpp index 70b0c841227e411b70ef8c7a6263837804a83b55..92e82a36203e3f39ed871e9f5afc47b619527e90 100644 --- a/demo-client/src/ctr_prediction.cpp +++ b/demo-client/src/ctr_prediction.cpp @@ -30,11 +30,17 @@ using baidu::paddle_serving::predictor::ctr_prediction::Response; using baidu::paddle_serving::predictor::ctr_prediction::CTRReqInstance; using baidu::paddle_serving::predictor::ctr_prediction::CTRResInstance; -int batch_size = 16; int sparse_num = 26; int dense_num = 13; -int thread_num = 1; int hash_dim = 1000001; + +DEFINE_int32(batch_size, 50, "Set the batch size of test file."); +DEFINE_int32(concurrency, 1, "Set the max concurrency of requests"); +DEFINE_int32(repeat, 1, "Number of data samples iteration count. Default 1"); +DEFINE_bool(enable_profiling, + false, + "Enable profiling. Will supress a lot normal output"); + std::vector cont_min = {0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; std::vector cont_diff = { 20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50}; @@ -86,7 +92,7 @@ int64_t hash(std::string str) { int create_req(Request* req, const std::vector& data_list, - int data_index, + int start_index, int batch_size) { for (int i = 0; i < batch_size; ++i) { CTRReqInstance* ins = req->add_instances(); @@ -94,12 +100,14 @@ int create_req(Request* req, LOG(ERROR) << "Failed create req instance"; return -1; } + // add data // avoid out of boundary - int cur_index = data_index + i; + int cur_index = start_index + i; if (cur_index >= data_list.size()) { cur_index = cur_index % data_list.size(); } + std::vector feature_list = split(data_list[cur_index], "\t"); for (int fi = 0; fi < dense_num; fi++) { if (feature_list[fi] == "") { @@ -122,10 +130,10 @@ int create_req(Request* req, } return 0; } + void print_res(const Request& req, const Response& res, std::string route_tag, - uint64_t mid_ms, uint64_t elapse_ms) { if (res.err_code() != 0) { LOG(ERROR) << "Get result fail :" << res.err_msg(); @@ -138,72 +146,90 @@ void print_res(const Request& req, LOG(INFO) << "Receive result " << oss.str(); } LOG(INFO) << "Succ call predictor[ctr_prediction_service], the tag is: " - << route_tag << ", mid_ms: " << mid_ms - << ", elapse_ms: " << elapse_ms; + << route_tag << ", elapse_ms: " << elapse_ms; } void thread_worker(PredictorApi* api, int thread_id, - int batch_size, - int server_concurrency, const std::vector& data_list) { // init Request req; Response res; - api->thrd_initialize(); std::string line; - int turns = 0; - while (turns < 1000) { - timeval start; - gettimeofday(&start, NULL); - api->thrd_clear(); - Predictor* predictor = api->fetch_predictor("ctr_prediction_service"); - if (!predictor) { - LOG(ERROR) << "Failed fetch predictor: ctr_prediction_service"; - return; - } - req.Clear(); - res.Clear(); - timeval mid; - gettimeofday(&mid, NULL); - uint64_t mid_ms = (mid.tv_sec * 1000 + mid.tv_usec / 1000) - - (start.tv_sec * 1000 + start.tv_usec / 1000); - // wait for other thread - while (g_concurrency.load() >= server_concurrency) { - } - g_concurrency++; - LOG(INFO) << "Current concurrency " << g_concurrency.load(); - int data_index = turns * batch_size; - if (create_req(&req, data_list, data_index, batch_size) != 0) { - return; - } - timeval start_run; - gettimeofday(&start_run, NULL); - if (predictor->inference(&req, &res) != 0) { - LOG(ERROR) << "failed call predictor with req:" << req.ShortDebugString(); - return; - } - timeval end; - gettimeofday(&end, NULL); - uint64_t elapse_ms = (end.tv_sec * 1000 + end.tv_usec / 1000) - - (start_run.tv_sec * 1000 + start_run.tv_usec / 1000); - response_time[thread_id].push_back(elapse_ms); - print_res(req, res, predictor->tag(), mid_ms, elapse_ms); - g_concurrency--; - LOG(INFO) << "Done. Current concurrency " << g_concurrency.load(); - turns++; - } - // + + api->thrd_initialize(); + + for (int i = 0; i < FLAGS_repeat; ++i) { + int start_index = 0; + + while (true) { + if (start_index >= data_list.size()) { + break; + } + + api->thrd_clear(); + + Predictor* predictor = api->fetch_predictor("ctr_prediction_service"); + if (!predictor) { + LOG(ERROR) << "Failed fetch predictor: ctr_prediction_service"; + return; + } + + req.Clear(); + res.Clear(); + + // wait for other thread + while (g_concurrency.load() >= FLAGS_concurrency) { + } + g_concurrency++; + LOG(INFO) << "Current concurrency " << g_concurrency.load(); + + if (create_req(&req, data_list, start_index, FLAGS_batch_size) != 0) { + return; + } + start_index += FLAGS_batch_size; + LOG(INFO) << "start_index = " << start_index; + + timeval start; + gettimeofday(&start, NULL); + + if (predictor->inference(&req, &res) != 0) { + LOG(ERROR) << "failed call predictor with req:" + << req.ShortDebugString(); + return; + } + g_concurrency--; + + timeval end; + gettimeofday(&end, NULL); + uint64_t elapse_ms = (end.tv_sec * 1000 + end.tv_usec / 1000) - + (start.tv_sec * 1000 + start.tv_usec / 1000); + + response_time[thread_id].push_back(elapse_ms); + + if (!FLAGS_enable_profiling) { + print_res(req, res, predictor->tag(), elapse_ms); + } + + LOG(INFO) << "Done. Current concurrency " << g_concurrency.load(); + } // end while + } // end for + api->thrd_finalize(); } -void calc_time(int server_concurrency, int batch_size) { + +void calc_time() { std::vector time_list; for (auto a : response_time) { time_list.insert(time_list.end(), a.begin(), a.end()); } + LOG(INFO) << "Total request : " << (time_list.size()); - LOG(INFO) << "Batch size : " << batch_size; - LOG(INFO) << "Max concurrency : " << server_concurrency; + LOG(INFO) << "Batch size : " << FLAGS_batch_size; + LOG(INFO) << "Max concurrency : " << FLAGS_concurrency; + LOG(INFO) << "enable_profiling: " << FLAGS_enable_profiling; + LOG(INFO) << "repeat count: " << FLAGS_repeat; + float total_time = 0; float max_time = 0; float min_time = 1000000; @@ -212,21 +238,28 @@ void calc_time(int server_concurrency, int batch_size) { if (time_list[i] > max_time) max_time = time_list[i]; if (time_list[i] < min_time) min_time = time_list[i]; } + float mean_time = total_time / (time_list.size()); float var_time; for (int i = 0; i < time_list.size(); ++i) { var_time += (time_list[i] - mean_time) * (time_list[i] - mean_time); } var_time = var_time / time_list.size(); - LOG(INFO) << "Total time : " << total_time / server_concurrency - << " Variance : " << var_time << " Max time : " << max_time - << " Min time : " << min_time; + + LOG(INFO) << "Total time : " << total_time / FLAGS_concurrency << "ms"; + LOG(INFO) << "Variance : " << var_time << "ms"; + LOG(INFO) << "Max time : " << max_time << "ms"; + LOG(INFO) << "Min time : " << min_time << "ms"; + float qps = 0.0; - if (total_time > 0) - qps = (time_list.size() * 1000) / (total_time / server_concurrency); + if (total_time > 0) { + qps = (time_list.size() * 1000) / (total_time / FLAGS_concurrency); + } LOG(INFO) << "QPS: " << qps << "/s"; + LOG(INFO) << "Latency statistics: "; sort(time_list.begin(), time_list.end()); + int percent_pos_50 = time_list.size() * 0.5; int percent_pos_80 = time_list.size() * 0.8; int percent_pos_90 = time_list.size() * 0.9; @@ -244,11 +277,12 @@ void calc_time(int server_concurrency, int batch_size) { } } int main(int argc, char** argv) { + google::ParseCommandLineFlags(&argc, &argv, true); + // initialize PredictorApi api; - response_time.resize(thread_num); - int server_concurrency = thread_num; -// log set + response_time.resize(FLAGS_concurrency); + #ifdef BCLOUD logging::LoggingSettings settings; settings.logging_dest = logging::LOG_TO_FILE; @@ -282,32 +316,40 @@ int main(int argc, char** argv) { LOG(ERROR) << "Failed create predictors api!"; return -1; } + + LOG(INFO) << "data sample file: " << data_filename; + + if (FLAGS_enable_profiling) { + LOG(INFO) << "In profiling mode, lot of normal output will be supressed. " + << "Use --enable_profiling=false to turn off this mode"; + } + // read data std::ifstream data_file(data_filename); if (!data_file) { std::cout << "read file error \n" << std::endl; return -1; } + std::vector data_list; std::string line; while (getline(data_file, line)) { data_list.push_back(line); } + // create threads std::vector thread_pool; - for (int i = 0; i < server_concurrency; ++i) { - thread_pool.push_back(new std::thread(thread_worker, - &api, - i, - batch_size, - server_concurrency, - std::ref(data_list))); + for (int i = 0; i < FLAGS_concurrency; ++i) { + thread_pool.push_back(new std::thread(thread_worker, &api, i, data_list)); } - for (int i = 0; i < server_concurrency; ++i) { + + for (int i = 0; i < FLAGS_concurrency; ++i) { thread_pool[i]->join(); delete thread_pool[i]; } - calc_time(server_concurrency, batch_size); + + calc_time(); + api.destroy(); return 0; } diff --git a/demo-serving/CMakeLists.txt b/demo-serving/CMakeLists.txt index 93650cf13a4e8c7fe3077e1780e15074081b2de0..82875d23d566cb97205cc2acb009f8da2642e460 100644 --- a/demo-serving/CMakeLists.txt +++ b/demo-serving/CMakeLists.txt @@ -59,7 +59,7 @@ target_link_libraries(serving kvdb rocksdb) if(WITH_GPU) target_link_libraries(serving ${CUDA_LIBRARIES}) endif() -target_link_libraries(serving -liomp5 -lmklml_intel -lpthread +target_link_libraries(serving -liomp5 -lmklml_intel -lmkldnn -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2) install(TARGETS serving @@ -75,7 +75,10 @@ install(FILES ${inc} DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/serving) if (${WITH_MKL}) - install(FILES ${CMAKE_BINARY_DIR}/Paddle/third_party/install/mklml/lib/libmklml_intel.so - ${CMAKE_BINARY_DIR}/Paddle/third_party/install/mklml/lib/libiomp5.so DESTINATION + install(FILES + ${CMAKE_BINARY_DIR}/third_party/install/Paddle/third_party/install/mklml/lib/libmklml_intel.so + ${CMAKE_BINARY_DIR}/third_party/install/Paddle/third_party/install/mklml/lib/libiomp5.so + ${CMAKE_BINARY_DIR}/third_party/install/Paddle/third_party/install/mkldnn/lib/libmkldnn.so + DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/demo/serving/bin) endif() diff --git a/demo-serving/op/bert_service_op.h b/demo-serving/op/bert_service_op.h index a0002aac65a5d1531e5eb023805635602a4dbb07..82c366237c2c263a85c2f7728301d8ec317594d3 100644 --- a/demo-serving/op/bert_service_op.h +++ b/demo-serving/op/bert_service_op.h @@ -21,7 +21,7 @@ #include "paddle/fluid/inference/api/paddle_inference_api.h" #endif #else -#include "paddle/fluid/inference/paddle_inference_api.h" +#include "./paddle_inference_api.h" #endif #include "demo-serving/bert_service.pb.h" diff --git a/demo-serving/op/classify_op.h b/demo-serving/op/classify_op.h index 366793cc7c1ec38197912399b06b6e9e8db8e996..c381f032d0e3a7e19e1a711b1ebe1747ee3145d8 100644 --- a/demo-serving/op/classify_op.h +++ b/demo-serving/op/classify_op.h @@ -21,7 +21,7 @@ #include "paddle/fluid/inference/api/paddle_inference_api.h" #endif #else -#include "paddle/fluid/inference/paddle_inference_api.h" +#include "paddle_inference_api.h" // NOLINT #endif #include "demo-serving/image_class.pb.h" diff --git a/demo-serving/op/ctr_prediction_op.cpp b/demo-serving/op/ctr_prediction_op.cpp index a904562b6b303134d5198fbbe01ad2cb79c4ba97..b2166819a2a6b213ae008580349e870e97797984 100644 --- a/demo-serving/op/ctr_prediction_op.cpp +++ b/demo-serving/op/ctr_prediction_op.cpp @@ -23,6 +23,9 @@ #include "predictor/framework/kv_manager.h" #include "predictor/framework/memory.h" +// Flag where enable profiling mode +DECLARE_bool(enable_ctr_profiling); + namespace baidu { namespace paddle_serving { namespace serving { @@ -46,6 +49,11 @@ const int CTR_PREDICTION_DENSE_SLOT_ID = 26; const int CTR_PREDICTION_DENSE_DIM = 13; const int CTR_PREDICTION_EMBEDDING_SIZE = 10; +bthread::Mutex CTRPredictionOp::mutex_; +int64_t CTRPredictionOp::cube_time_us_ = 0; +int32_t CTRPredictionOp::cube_req_num_ = 0; +int32_t CTRPredictionOp::cube_req_key_num_ = 0; + void fill_response_with_message(Response *response, int err_code, std::string err_msg) { @@ -135,7 +143,41 @@ int CTRPredictionOp::inference() { return 0; } else if (kvinfo->sparse_param_service_type == configure::EngineDesc::REMOTE) { - int ret = cube->seek(table_name, keys, &values); + struct timeval start; + struct timeval end; + + int ret; + + gettimeofday(&start, NULL); + ret = cube->seek(table_name, keys, &values); + gettimeofday(&end, NULL); + uint64_t usec = + end.tv_sec * 1e6 + end.tv_usec - start.tv_sec * 1e6 - start.tv_usec; + + // Statistics + mutex_.lock(); + cube_time_us_ += usec; + ++cube_req_num_; + cube_req_key_num_ += keys.size(); + + if (cube_req_num_ >= 1000) { + LOG(INFO) << "Cube request count: " << cube_req_num_; + LOG(INFO) << "Cube request key count: " << cube_req_key_num_; + LOG(INFO) << "Cube request total time: " << cube_time_us_ << "us"; + LOG(INFO) << "Average " + << static_cast(cube_time_us_) / cube_req_num_ + << "us/req"; + LOG(INFO) << "Average " + << static_cast(cube_time_us_) / cube_req_key_num_ + << "us/key"; + + cube_time_us_ = 0; + cube_req_num_ = 0; + cube_req_key_num_ = 0; + } + mutex_.unlock(); + // Statistics end + if (ret != 0) { fill_response_with_message(res, -1, "Query cube for embeddings error"); LOG(ERROR) << "Query cube for embeddings error"; diff --git a/demo-serving/op/ctr_prediction_op.h b/demo-serving/op/ctr_prediction_op.h index a12cccab68c06c2238e7205b90b095318b28f3f0..ee648151b4ecf4611502798308c2cd81db923bb3 100644 --- a/demo-serving/op/ctr_prediction_op.h +++ b/demo-serving/op/ctr_prediction_op.h @@ -21,7 +21,7 @@ #include "paddle/fluid/inference/api/paddle_inference_api.h" #endif #else -#include "paddle/fluid/inference/paddle_inference_api.h" +#include "paddle_inference_api.h" // NOLINT #endif #include "demo-serving/ctr_prediction.pb.h" @@ -55,6 +55,7 @@ static const char* CTR_PREDICTION_MODEL_NAME = "ctr_prediction"; * and modifications we made * */ + class CTRPredictionOp : public baidu::paddle_serving::predictor::OpWithChannel< baidu::paddle_serving::predictor::ctr_prediction::Response> { @@ -64,6 +65,12 @@ class CTRPredictionOp DECLARE_OP(CTRPredictionOp); int inference(); + + private: + static bthread::Mutex mutex_; + static int64_t cube_time_us_; + static int32_t cube_req_num_; + static int32_t cube_req_key_num_; }; } // namespace serving diff --git a/demo-serving/op/reader_op.h b/demo-serving/op/reader_op.h index 484d6f62d0f64bd3efbf7f8de3b4068a344ae048..546ca19667af0161ddb62f354e32791d15d8ae4b 100644 --- a/demo-serving/op/reader_op.h +++ b/demo-serving/op/reader_op.h @@ -35,7 +35,7 @@ #include "paddle/fluid/inference/api/paddle_inference_api.h" #endif #else -#include "paddle/fluid/inference/paddle_inference_api.h" +#include "paddle_inference_api.h" // NOLINT #endif namespace baidu { diff --git a/demo-serving/op/text_classification_op.h b/demo-serving/op/text_classification_op.h index bef8ec520dc45e97def913715d714e2c46067429..21ac6991be1b47654618c52c4123a5f99f4bc185 100644 --- a/demo-serving/op/text_classification_op.h +++ b/demo-serving/op/text_classification_op.h @@ -21,7 +21,7 @@ #include "paddle/fluid/inference/api/paddle_inference_api.h" #endif #else -#include "paddle/fluid/inference/paddle_inference_api.h" +#include "paddle_inference_api.h" // NOLINT #endif #include "demo-serving/text_classification.pb.h" diff --git a/doc/CTR_PREDICTION.md b/doc/CTR_PREDICTION.md index 746b6360f3fb326f3a63561bab1a2163bf30da67..513b4560f025a08f3fc2ffe9a7fb96ada0b076c5 100755 --- a/doc/CTR_PREDICTION.md +++ b/doc/CTR_PREDICTION.md @@ -320,7 +320,7 @@ def prune_program(): ### 2.5 裁剪过程串到一起 -我们提供了完整的裁剪CTR预估模型的脚本文件save_program.py,同[CTR分布式训练任务](doc/DISTRIBUTED_TRAINING_AND_SERVING.md)一起发布,可以在trainer和pserver容器的训练脚本目录下找到 +我们提供了完整的裁剪CTR预估模型的脚本文件save_program.py,同[CTR分布式训练和Serving流程化部署](https://github.com/PaddlePaddle/Serving/blob/master/doc/DEPLOY.md)一起发布,可以在trainer和pserver容器的训练脚本目录下找到,也可以在[这里](https://github.com/PaddlePaddle/Serving/tree/master/doc/resource)下载。 ## 3. 整个预测计算流程 diff --git a/inferencer-fluid-cpu/include/fluid_cpu_engine.h b/inferencer-fluid-cpu/include/fluid_cpu_engine.h index 41ab63992361f70f1434efe2c4982342e34b5525..a7ba3ae13bfa040f3a6f4b05d8d5cd6b93ae36b5 100644 --- a/inferencer-fluid-cpu/include/fluid_cpu_engine.h +++ b/inferencer-fluid-cpu/include/fluid_cpu_engine.h @@ -28,7 +28,7 @@ #include "paddle/fluid/inference/api/paddle_inference_api.h" #endif #else -#include "paddle/fluid/inference/paddle_inference_api.h" +#include "paddle_inference_api.h" // NOLINT #endif #include "predictor/framework/infer.h" @@ -134,8 +134,7 @@ class FluidCpuAnalysisCore : public FluidFamilyCore { analysis_config.SetCpuMathLibraryNumThreads(1); if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(params.static_optimization(), - params.force_update_static_cache()); + analysis_config.EnableMemoryOptim(); } analysis_config.SwitchSpecifyInputNames(true); @@ -200,8 +199,7 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore { analysis_config.SetCpuMathLibraryNumThreads(1); if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(params.static_optimization(), - params.force_update_static_cache()); + analysis_config.EnableMemoryOptim(); } AutoLock lock(GlobalPaddleCreateMutex::instance()); @@ -519,8 +517,7 @@ class FluidCpuAnalysisDirWithSigmoidCore : public FluidCpuWithSigmoidCore { analysis_config.SetCpuMathLibraryNumThreads(1); if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(params.static_optimization(), - params.force_update_static_cache()); + analysis_config.EnableMemoryOptim(); } AutoLock lock(GlobalPaddleCreateMutex::instance()); diff --git a/inferencer-fluid-gpu/include/fluid_gpu_engine.h b/inferencer-fluid-gpu/include/fluid_gpu_engine.h index 81c20d69121b19e0f43b03630c476dc8c2ae3d4f..667380f7205655a06bedde899168bda6c37d10b1 100644 --- a/inferencer-fluid-gpu/include/fluid_gpu_engine.h +++ b/inferencer-fluid-gpu/include/fluid_gpu_engine.h @@ -28,7 +28,7 @@ #include "paddle/fluid/inference/api/paddle_inference_api.h" #endif #else -#include "paddle/fluid/inference/paddle_inference_api.h" +#include "paddle_inference_api.h" // NOLINT #endif #include "predictor/framework/infer.h" @@ -136,8 +136,7 @@ class FluidGpuAnalysisCore : public FluidFamilyCore { analysis_config.SetCpuMathLibraryNumThreads(1); if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(params.static_optimization(), - params.force_update_static_cache()); + analysis_config.EnableMemoryOptim(); } analysis_config.SwitchSpecifyInputNames(true); @@ -203,8 +202,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { analysis_config.SwitchIrOptim(true); if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(params.static_optimization(), - params.force_update_static_cache()); + analysis_config.EnableMemoryOptim(); } AutoLock lock(GlobalPaddleCreateMutex::instance()); @@ -522,8 +520,7 @@ class FluidGpuAnalysisDirWithSigmoidCore : public FluidGpuWithSigmoidCore { analysis_config.SetCpuMathLibraryNumThreads(1); if (params.enable_memory_optimization()) { - analysis_config.EnableMemoryOptim(params.static_optimization(), - params.force_update_static_cache()); + analysis_config.EnableMemoryOptim(); } AutoLock lock(GlobalPaddleCreateMutex::instance()); diff --git a/predictor/common/constant.h b/predictor/common/constant.h index da44103eb8e6d064a642520bb90dd2c9df293889..72509c8d9187f817cf4dd0dfef1bff06370ce537 100644 --- a/predictor/common/constant.h +++ b/predictor/common/constant.h @@ -40,8 +40,6 @@ DECLARE_int32(reload_interval_s); DECLARE_bool(enable_model_toolkit); DECLARE_string(enable_protocol_list); DECLARE_bool(enable_cube); -DECLARE_string(cube_config_path); -DECLARE_string(cube_config_file); // STATIC Variables extern const char* START_OP_NAME; diff --git a/predictor/framework/infer.h b/predictor/framework/infer.h index c2823f5e3d8cbd2484f02053ffd36e6a3a275846..c479479a271601b0d197d7f4fc4672ccc54c3801 100644 --- a/predictor/framework/infer.h +++ b/predictor/framework/infer.h @@ -632,7 +632,6 @@ class VersionedInferEngine : public InferEngine { LOG(ERROR) << "Failed thrd clear version engine: " << iter->first; return -1; } - LOG(INFO) << "Succ thrd clear version engine: " << iter->first; } return 0; } diff --git a/predictor/framework/resource.cpp b/predictor/framework/resource.cpp index 74e3c95204dfb4fb0dcf32201c244550b6df08c2..15a5022d69458eae76c6b3f75ab3076d365ed333 100644 --- a/predictor/framework/resource.cpp +++ b/predictor/framework/resource.cpp @@ -208,7 +208,6 @@ int Resource::thread_clear() { return -1; } - LOG(INFO) << bthread_self() << "Resource::thread_clear success"; // ... return 0; } diff --git a/predictor/src/pdserving.cpp b/predictor/src/pdserving.cpp index a86b39abac7bd007a8fd401bd9a0b8aaaa5c5114..56ffee84aba6338bcd082d12e6bd4c304fe8ca80 100644 --- a/predictor/src/pdserving.cpp +++ b/predictor/src/pdserving.cpp @@ -51,8 +51,6 @@ using baidu::paddle_serving::predictor::FLAGS_port; using baidu::paddle_serving::configure::InferServiceConf; using baidu::paddle_serving::configure::read_proto_conf; -DECLARE_bool(logtostderr); - void print_revision(std::ostream& os, void*) { #if defined(PDSERVING_VERSION) os << PDSERVING_VERSION; @@ -217,7 +215,8 @@ int main(int argc, char** argv) { } LOG(INFO) << "Succ initialize cube"; - FLAGS_logtostderr = false; + // FATAL messages are output to stderr + FLAGS_stderrthreshold = 3; if (ServerManager::instance().start_and_wait() != 0) { LOG(ERROR) << "Failed start server and wait!"; diff --git a/sdk-cpp/src/endpoint.cpp b/sdk-cpp/src/endpoint.cpp index d1c66124c6e7657db23905eb681bfa0b957be9d2..3a30a0de6465512e647321c07637692599f1890b 100644 --- a/sdk-cpp/src/endpoint.cpp +++ b/sdk-cpp/src/endpoint.cpp @@ -64,7 +64,6 @@ int Endpoint::thrd_clear() { return -1; } } - LOG(INFO) << "Succ thrd clear all vars: " << var_size; return 0; } diff --git a/sdk-cpp/src/predictor_sdk.cpp b/sdk-cpp/src/predictor_sdk.cpp index 214473f64204866febb7d842b53551aa1cfe225d..246ac66f2d07f3c1becd7ab6c05be929c5003a03 100644 --- a/sdk-cpp/src/predictor_sdk.cpp +++ b/sdk-cpp/src/predictor_sdk.cpp @@ -94,8 +94,6 @@ int PredictorApi::thrd_clear() { LOG(ERROR) << "Failed thrd clear endpoint:" << it->first; return -1; } - - LOG(INFO) << "Succ thrd clear endpoint:" << it->first; } return 0; }