提交 f84565cb 编写于 作者: X xulongteng

Merge branch 'bert' of https://github.com/MRXLT/Serving into bert

...@@ -75,6 +75,7 @@ include(generic) ...@@ -75,6 +75,7 @@ include(generic)
include(flags) include(flags)
if (NOT CLIENT_ONLY) if (NOT CLIENT_ONLY)
include(external/cudnn)
include(paddlepaddle) include(paddlepaddle)
include(external/opencv) include(external/opencv)
endif() endif()
......
if(NOT WITH_GPU)
return()
endif()
if(WIN32)
set(CUDNN_ROOT ${CUDA_TOOLKIT_ROOT_DIR})
else(WIN32)
set(CUDNN_ROOT "/usr" CACHE PATH "CUDNN ROOT")
endif(WIN32)
find_path(CUDNN_INCLUDE_DIR cudnn.h
PATHS ${CUDNN_ROOT} ${CUDNN_ROOT}/include
$ENV{CUDNN_ROOT} $ENV{CUDNN_ROOT}/include ${CUDA_TOOLKIT_INCLUDE}
NO_DEFAULT_PATH
)
get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
set(TARGET_ARCH "x86_64")
if(NOT ${CMAKE_SYSTEM_PROCESSOR})
set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
endif()
list(APPEND CUDNN_CHECK_LIBRARY_DIRS
${CUDNN_ROOT}
${CUDNN_ROOT}/lib64
${CUDNN_ROOT}/lib
${CUDNN_ROOT}/lib/${TARGET_ARCH}-linux-gnu
${CUDNN_ROOT}/local/cuda-${CUDA_VERSION}/targets/${TARGET_ARCH}-linux/lib/
$ENV{CUDNN_ROOT}
$ENV{CUDNN_ROOT}/lib64
$ENV{CUDNN_ROOT}/lib
/usr/lib
${CUDA_TOOLKIT_ROOT_DIR}
${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
)
set(CUDNN_LIB_NAME "")
if (LINUX)
set(CUDNN_LIB_NAME "libcudnn.so")
endif(LINUX)
if(WIN32)
# only support cudnn7
set(CUDNN_LIB_NAME "cudnn.lib" "cudnn64_7.dll")
endif(WIN32)
if(APPLE)
set(CUDNN_LIB_NAME "libcudnn.dylib" "libcudnn.so")
endif(APPLE)
find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME} # libcudnn_static.a
PATHS ${CUDNN_CHECK_LIBRARY_DIRS} ${CUDNN_INCLUDE_DIR} ${__libpath_hist}
NO_DEFAULT_PATH
DOC "Path to cuDNN library.")
if(CUDNN_INCLUDE_DIR AND CUDNN_LIBRARY)
set(CUDNN_FOUND ON)
else()
set(CUDNN_FOUND OFF)
endif()
if(CUDNN_FOUND)
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
get_filename_component(CUDNN_LIB_PATH ${CUDNN_LIBRARY} DIRECTORY)
string(REGEX MATCH "define CUDNN_VERSION +([0-9]+)"
CUDNN_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_VERSION +([0-9]+)" "\\1"
CUDNN_VERSION "${CUDNN_VERSION}")
if("${CUDNN_VERSION}" STREQUAL "2000")
message(STATUS "Current cuDNN version is v2. ")
else()
string(REGEX MATCH "define CUDNN_MAJOR +([0-9]+)" CUDNN_MAJOR_VERSION
"${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_MAJOR +([0-9]+)" "\\1"
CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}")
string(REGEX MATCH "define CUDNN_MINOR +([0-9]+)" CUDNN_MINOR_VERSION
"${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_MINOR +([0-9]+)" "\\1"
CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}")
string(REGEX MATCH "define CUDNN_PATCHLEVEL +([0-9]+)"
CUDNN_PATCHLEVEL_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_PATCHLEVEL +([0-9]+)" "\\1"
CUDNN_PATCHLEVEL_VERSION "${CUDNN_PATCHLEVEL_VERSION}")
if(NOT CUDNN_MAJOR_VERSION)
set(CUDNN_VERSION "???")
else()
add_definitions("-DPADDLE_CUDNN_BINVER=\"${CUDNN_MAJOR_VERSION}\"")
math(EXPR CUDNN_VERSION
"${CUDNN_MAJOR_VERSION} * 1000 +
${CUDNN_MINOR_VERSION} * 100 + ${CUDNN_PATCHLEVEL_VERSION}")
endif()
message(STATUS "Current cuDNN header is ${CUDNN_INCLUDE_DIR}/cudnn.h. "
"Current cuDNN version is v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}. ")
endif()
endif()
...@@ -15,71 +15,70 @@ ...@@ -15,71 +15,70 @@
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(PADDLE_SOURCES_DIR ${THIRD_PARTY_PATH}/Paddle) SET(PADDLE_SOURCES_DIR ${THIRD_PARTY_PATH}/Paddle)
SET(PADDLE_DOWNLOAD_DIR ${PADDLE_SOURCES_DIR}/src/extern_paddle)
SET(PADDLE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/Paddle/) SET(PADDLE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/Paddle/)
SET(PADDLE_INCLUDE_DIR "${PADDLE_INSTALL_DIR}/include" CACHE PATH "PaddlePaddle include directory." FORCE) SET(PADDLE_INCLUDE_DIR "${PADDLE_INSTALL_DIR}/include" CACHE PATH "PaddlePaddle include directory." FORCE)
SET(PADDLE_LIBRARIES "${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a" CACHE FILEPATH "Paddle library." FORCE) SET(PADDLE_LIBRARIES "${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a" CACHE FILEPATH "Paddle library." FORCE)
INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir)
# Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args # Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args
set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib|${THIRD_PARTY_PATH}/install/glog") set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib|${THIRD_PARTY_PATH}/install/glog")
message( "WITH_GPU = ${WITH_GPU}") message( "WITH_GPU = ${WITH_GPU}")
# If minimal .a is need, you can set WITH_DEBUG_SYMBOLS=OFF
# Paddle Version should be one of:
# latest: latest develop build
# version number like 1.5.2
SET(PADDLE_VERSION "latest")
if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl")
else()
if (AVX_FOUND)
if (WITH_MKLML)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-avx-mkl")
else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-avx-openblas")
endif()
else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-cpu-noavx-openblas")
endif()
endif()
SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/fluid_inference.tgz")
MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}")
ExternalProject_Add( ExternalProject_Add(
extern_paddle "extern_paddle"
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
# TODO(wangguibao): change to de newst repo when they changed. URL "${PADDLE_LIB_PATH}"
GIT_REPOSITORY "https://github.com/PaddlePaddle/Paddle" PREFIX "${PADDLE_SOURCES_DIR}"
GIT_TAG "v1.5.1" DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}"
PREFIX ${PADDLE_SOURCES_DIR} CONFIGURE_COMMAND ""
UPDATE_COMMAND "" BUILD_COMMAND ""
BINARY_DIR ${CMAKE_BINARY_DIR}/Paddle UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} INSTALL_COMMAND
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
-DCMAKE_INSTALL_PREFIX=${PADDLE_INSTALL_DIR} ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
-DCMAKE_INSTALL_LIBDIR=${PADDLE_INSTALL_DIR}/lib ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party &&
-DCMAKE_POSITION_INDEPENDENT_CODE=ON ${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_PREFIX_PATH=${prefix_path}
-DCMAKE_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}
-DWITH_SWIG_PY=OFF
-DWITH_PYTHON=OFF
-DWITH_MKL=${WITH_MKL}
-DWITH_AVX=${WITH_AVX}
-DWITH_MKLDNN=OFF
-DWITH_GPU=${WITH_GPU}
-DWITH_FLUID_ONLY=ON
-DWITH_TESTING=OFF
-DWITH_DISTRIBUTE=OFF
-DON_INFER=ON
${EXTERNAL_OPTIONAL_ARGS}
LIST_SEPARATOR |
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PADDLE_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${PADDLE_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_COMMAND $(MAKE)
INSTALL_COMMAND $(MAKE) fluid_lib_dist
) )
ExternalProject_Get_Property(extern_paddle BINARY_DIR) INCLUDE_DIRECTORIES(${PADDLE_INCLUDE_DIR})
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${BINARY_DIR}/fluid_install_dir/third_party/install/mklml/lib") SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib")
LINK_DIRECTORIES(${BINARY_DIR}/fluid_install_dir/third_party/install/mklml/lib) LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib)
ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${BINARY_DIR}/fluid_install_dir/paddle/fluid/inference/libpaddle_fluid.a)
LIST(APPEND external_project_dependencies paddle) SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib")
LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
ADD_LIBRARY(snappystream STATIC IMPORTED GLOBAL) ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET snappystream PROPERTY IMPORTED_LOCATION ${BINARY_DIR}/fluid_install_dir/third_party/install/snappystream/lib/libsnappystream.a) SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a)
ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL) ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${BINARY_DIR}/fluid_install_dir/third_party/install/xxhash/lib/libxxhash.a) SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a)
LIST(APPEND external_project_dependencies paddle)
LIST(APPEND paddle_depend_libs LIST(APPEND paddle_depend_libs
snappystream
snappy
xxhash) xxhash)
...@@ -18,7 +18,12 @@ ...@@ -18,7 +18,12 @@
#include <string> #include <string>
#include <vector> #include <vector>
#ifdef BCLOUD
#include "baidu/rpc/server.h"
#else
#include "brpc/server.h" #include "brpc/server.h"
#endif
#include "cube/cube-api/cube.pb.h" #include "cube/cube-api/cube.pb.h"
#include "cube/cube-api/include/meta.h" #include "cube/cube-api/include/meta.h"
......
...@@ -19,11 +19,25 @@ ...@@ -19,11 +19,25 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#ifdef BCLOUD
#include "baidu/rpc/channel.h"
#include "baidu/rpc/parallel_channel.h"
#include "rapidjson/document.h"
#else
#include "brpc/channel.h" #include "brpc/channel.h"
#include "brpc/parallel_channel.h" #include "brpc/parallel_channel.h"
#include "butil/third_party/rapidjson/document.h" #include "butil/third_party/rapidjson/document.h"
#endif
#include "bvar/bvar.h" #include "bvar/bvar.h"
#ifdef BCLOUD
namespace brpc = baidu::rpc;
#ifndef BUTIL_RAPIDJSON_NAMESPACE
#define BUTIL_RAPIDJSON_NAMESPACE RAPIDJSON_NAMESPACE
#endif
#endif
namespace rec { namespace rec {
namespace mcube { namespace mcube {
......
...@@ -13,8 +13,14 @@ ...@@ -13,8 +13,14 @@
// limitations under the License. // limitations under the License.
#include "cube/cube-api/include/cube_api.h" #include "cube/cube-api/include/cube_api.h"
#ifdef BCLOUD
#include <baidu/rpc/channel.h>
#include <baidu/rpc/parallel_channel.h>
#else
#include <brpc/channel.h> #include <brpc/channel.h>
#include <brpc/parallel_channel.h> #include <brpc/parallel_channel.h>
#endif
#include <google/protobuf/descriptor.h> #include <google/protobuf/descriptor.h>
#include "cube/cube-api/include/cube_api_bvar.h" #include "cube/cube-api/include/cube_api_bvar.h"
...@@ -25,6 +31,10 @@ namespace { ...@@ -25,6 +31,10 @@ namespace {
static ::rec::mcube::CubeAPI* g_ins = NULL; static ::rec::mcube::CubeAPI* g_ins = NULL;
} }
#ifdef BCLOUD
namespace brpc = baidu::rpc;
#endif
namespace rec { namespace rec {
namespace mcube { namespace mcube {
......
...@@ -26,6 +26,10 @@ namespace { ...@@ -26,6 +26,10 @@ namespace {
static ::rec::mcube::Meta* g_ins = NULL; static ::rec::mcube::Meta* g_ins = NULL;
} }
#ifdef BCLOUD
namespace brpc = baidu::rpc;
#endif
namespace rec { namespace rec {
namespace mcube { namespace mcube {
......
...@@ -30,11 +30,17 @@ using baidu::paddle_serving::predictor::ctr_prediction::Response; ...@@ -30,11 +30,17 @@ using baidu::paddle_serving::predictor::ctr_prediction::Response;
using baidu::paddle_serving::predictor::ctr_prediction::CTRReqInstance; using baidu::paddle_serving::predictor::ctr_prediction::CTRReqInstance;
using baidu::paddle_serving::predictor::ctr_prediction::CTRResInstance; using baidu::paddle_serving::predictor::ctr_prediction::CTRResInstance;
int batch_size = 16;
int sparse_num = 26; int sparse_num = 26;
int dense_num = 13; int dense_num = 13;
int thread_num = 1;
int hash_dim = 1000001; int hash_dim = 1000001;
DEFINE_int32(batch_size, 50, "Set the batch size of test file.");
DEFINE_int32(concurrency, 1, "Set the max concurrency of requests");
DEFINE_int32(repeat, 1, "Number of data samples iteration count. Default 1");
DEFINE_bool(enable_profiling,
false,
"Enable profiling. Will supress a lot normal output");
std::vector<float> cont_min = {0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; std::vector<float> cont_min = {0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
std::vector<float> cont_diff = { std::vector<float> cont_diff = {
20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50}; 20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50};
...@@ -86,7 +92,7 @@ int64_t hash(std::string str) { ...@@ -86,7 +92,7 @@ int64_t hash(std::string str) {
int create_req(Request* req, int create_req(Request* req,
const std::vector<std::string>& data_list, const std::vector<std::string>& data_list,
int data_index, int start_index,
int batch_size) { int batch_size) {
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
CTRReqInstance* ins = req->add_instances(); CTRReqInstance* ins = req->add_instances();
...@@ -94,12 +100,14 @@ int create_req(Request* req, ...@@ -94,12 +100,14 @@ int create_req(Request* req,
LOG(ERROR) << "Failed create req instance"; LOG(ERROR) << "Failed create req instance";
return -1; return -1;
} }
// add data // add data
// avoid out of boundary // avoid out of boundary
int cur_index = data_index + i; int cur_index = start_index + i;
if (cur_index >= data_list.size()) { if (cur_index >= data_list.size()) {
cur_index = cur_index % data_list.size(); cur_index = cur_index % data_list.size();
} }
std::vector<std::string> feature_list = split(data_list[cur_index], "\t"); std::vector<std::string> feature_list = split(data_list[cur_index], "\t");
for (int fi = 0; fi < dense_num; fi++) { for (int fi = 0; fi < dense_num; fi++) {
if (feature_list[fi] == "") { if (feature_list[fi] == "") {
...@@ -122,10 +130,10 @@ int create_req(Request* req, ...@@ -122,10 +130,10 @@ int create_req(Request* req,
} }
return 0; return 0;
} }
void print_res(const Request& req, void print_res(const Request& req,
const Response& res, const Response& res,
std::string route_tag, std::string route_tag,
uint64_t mid_ms,
uint64_t elapse_ms) { uint64_t elapse_ms) {
if (res.err_code() != 0) { if (res.err_code() != 0) {
LOG(ERROR) << "Get result fail :" << res.err_msg(); LOG(ERROR) << "Get result fail :" << res.err_msg();
...@@ -138,72 +146,90 @@ void print_res(const Request& req, ...@@ -138,72 +146,90 @@ void print_res(const Request& req,
LOG(INFO) << "Receive result " << oss.str(); LOG(INFO) << "Receive result " << oss.str();
} }
LOG(INFO) << "Succ call predictor[ctr_prediction_service], the tag is: " LOG(INFO) << "Succ call predictor[ctr_prediction_service], the tag is: "
<< route_tag << ", mid_ms: " << mid_ms << route_tag << ", elapse_ms: " << elapse_ms;
<< ", elapse_ms: " << elapse_ms;
} }
void thread_worker(PredictorApi* api, void thread_worker(PredictorApi* api,
int thread_id, int thread_id,
int batch_size,
int server_concurrency,
const std::vector<std::string>& data_list) { const std::vector<std::string>& data_list) {
// init // init
Request req; Request req;
Response res; Response res;
api->thrd_initialize();
std::string line; std::string line;
int turns = 0;
while (turns < 1000) { api->thrd_initialize();
timeval start;
gettimeofday(&start, NULL); for (int i = 0; i < FLAGS_repeat; ++i) {
api->thrd_clear(); int start_index = 0;
Predictor* predictor = api->fetch_predictor("ctr_prediction_service");
if (!predictor) { while (true) {
LOG(ERROR) << "Failed fetch predictor: ctr_prediction_service"; if (start_index >= data_list.size()) {
return; break;
} }
req.Clear();
res.Clear(); api->thrd_clear();
timeval mid;
gettimeofday(&mid, NULL); Predictor* predictor = api->fetch_predictor("ctr_prediction_service");
uint64_t mid_ms = (mid.tv_sec * 1000 + mid.tv_usec / 1000) - if (!predictor) {
(start.tv_sec * 1000 + start.tv_usec / 1000); LOG(ERROR) << "Failed fetch predictor: ctr_prediction_service";
// wait for other thread return;
while (g_concurrency.load() >= server_concurrency) { }
}
g_concurrency++; req.Clear();
LOG(INFO) << "Current concurrency " << g_concurrency.load(); res.Clear();
int data_index = turns * batch_size;
if (create_req(&req, data_list, data_index, batch_size) != 0) { // wait for other thread
return; while (g_concurrency.load() >= FLAGS_concurrency) {
} }
timeval start_run; g_concurrency++;
gettimeofday(&start_run, NULL); LOG(INFO) << "Current concurrency " << g_concurrency.load();
if (predictor->inference(&req, &res) != 0) {
LOG(ERROR) << "failed call predictor with req:" << req.ShortDebugString(); if (create_req(&req, data_list, start_index, FLAGS_batch_size) != 0) {
return; return;
} }
timeval end; start_index += FLAGS_batch_size;
gettimeofday(&end, NULL); LOG(INFO) << "start_index = " << start_index;
uint64_t elapse_ms = (end.tv_sec * 1000 + end.tv_usec / 1000) -
(start_run.tv_sec * 1000 + start_run.tv_usec / 1000); timeval start;
response_time[thread_id].push_back(elapse_ms); gettimeofday(&start, NULL);
print_res(req, res, predictor->tag(), mid_ms, elapse_ms);
g_concurrency--; if (predictor->inference(&req, &res) != 0) {
LOG(INFO) << "Done. Current concurrency " << g_concurrency.load(); LOG(ERROR) << "failed call predictor with req:"
turns++; << req.ShortDebugString();
} return;
// }
g_concurrency--;
timeval end;
gettimeofday(&end, NULL);
uint64_t elapse_ms = (end.tv_sec * 1000 + end.tv_usec / 1000) -
(start.tv_sec * 1000 + start.tv_usec / 1000);
response_time[thread_id].push_back(elapse_ms);
if (!FLAGS_enable_profiling) {
print_res(req, res, predictor->tag(), elapse_ms);
}
LOG(INFO) << "Done. Current concurrency " << g_concurrency.load();
} // end while
} // end for
api->thrd_finalize(); api->thrd_finalize();
} }
void calc_time(int server_concurrency, int batch_size) {
void calc_time() {
std::vector<int> time_list; std::vector<int> time_list;
for (auto a : response_time) { for (auto a : response_time) {
time_list.insert(time_list.end(), a.begin(), a.end()); time_list.insert(time_list.end(), a.begin(), a.end());
} }
LOG(INFO) << "Total request : " << (time_list.size()); LOG(INFO) << "Total request : " << (time_list.size());
LOG(INFO) << "Batch size : " << batch_size; LOG(INFO) << "Batch size : " << FLAGS_batch_size;
LOG(INFO) << "Max concurrency : " << server_concurrency; LOG(INFO) << "Max concurrency : " << FLAGS_concurrency;
LOG(INFO) << "enable_profiling: " << FLAGS_enable_profiling;
LOG(INFO) << "repeat count: " << FLAGS_repeat;
float total_time = 0; float total_time = 0;
float max_time = 0; float max_time = 0;
float min_time = 1000000; float min_time = 1000000;
...@@ -212,21 +238,28 @@ void calc_time(int server_concurrency, int batch_size) { ...@@ -212,21 +238,28 @@ void calc_time(int server_concurrency, int batch_size) {
if (time_list[i] > max_time) max_time = time_list[i]; if (time_list[i] > max_time) max_time = time_list[i];
if (time_list[i] < min_time) min_time = time_list[i]; if (time_list[i] < min_time) min_time = time_list[i];
} }
float mean_time = total_time / (time_list.size()); float mean_time = total_time / (time_list.size());
float var_time; float var_time;
for (int i = 0; i < time_list.size(); ++i) { for (int i = 0; i < time_list.size(); ++i) {
var_time += (time_list[i] - mean_time) * (time_list[i] - mean_time); var_time += (time_list[i] - mean_time) * (time_list[i] - mean_time);
} }
var_time = var_time / time_list.size(); var_time = var_time / time_list.size();
LOG(INFO) << "Total time : " << total_time / server_concurrency
<< " Variance : " << var_time << " Max time : " << max_time LOG(INFO) << "Total time : " << total_time / FLAGS_concurrency << "ms";
<< " Min time : " << min_time; LOG(INFO) << "Variance : " << var_time << "ms";
LOG(INFO) << "Max time : " << max_time << "ms";
LOG(INFO) << "Min time : " << min_time << "ms";
float qps = 0.0; float qps = 0.0;
if (total_time > 0) if (total_time > 0) {
qps = (time_list.size() * 1000) / (total_time / server_concurrency); qps = (time_list.size() * 1000) / (total_time / FLAGS_concurrency);
}
LOG(INFO) << "QPS: " << qps << "/s"; LOG(INFO) << "QPS: " << qps << "/s";
LOG(INFO) << "Latency statistics: "; LOG(INFO) << "Latency statistics: ";
sort(time_list.begin(), time_list.end()); sort(time_list.begin(), time_list.end());
int percent_pos_50 = time_list.size() * 0.5; int percent_pos_50 = time_list.size() * 0.5;
int percent_pos_80 = time_list.size() * 0.8; int percent_pos_80 = time_list.size() * 0.8;
int percent_pos_90 = time_list.size() * 0.9; int percent_pos_90 = time_list.size() * 0.9;
...@@ -244,11 +277,12 @@ void calc_time(int server_concurrency, int batch_size) { ...@@ -244,11 +277,12 @@ void calc_time(int server_concurrency, int batch_size) {
} }
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
// initialize // initialize
PredictorApi api; PredictorApi api;
response_time.resize(thread_num); response_time.resize(FLAGS_concurrency);
int server_concurrency = thread_num;
// log set
#ifdef BCLOUD #ifdef BCLOUD
logging::LoggingSettings settings; logging::LoggingSettings settings;
settings.logging_dest = logging::LOG_TO_FILE; settings.logging_dest = logging::LOG_TO_FILE;
...@@ -282,32 +316,40 @@ int main(int argc, char** argv) { ...@@ -282,32 +316,40 @@ int main(int argc, char** argv) {
LOG(ERROR) << "Failed create predictors api!"; LOG(ERROR) << "Failed create predictors api!";
return -1; return -1;
} }
LOG(INFO) << "data sample file: " << data_filename;
if (FLAGS_enable_profiling) {
LOG(INFO) << "In profiling mode, lot of normal output will be supressed. "
<< "Use --enable_profiling=false to turn off this mode";
}
// read data // read data
std::ifstream data_file(data_filename); std::ifstream data_file(data_filename);
if (!data_file) { if (!data_file) {
std::cout << "read file error \n" << std::endl; std::cout << "read file error \n" << std::endl;
return -1; return -1;
} }
std::vector<std::string> data_list; std::vector<std::string> data_list;
std::string line; std::string line;
while (getline(data_file, line)) { while (getline(data_file, line)) {
data_list.push_back(line); data_list.push_back(line);
} }
// create threads // create threads
std::vector<std::thread*> thread_pool; std::vector<std::thread*> thread_pool;
for (int i = 0; i < server_concurrency; ++i) { for (int i = 0; i < FLAGS_concurrency; ++i) {
thread_pool.push_back(new std::thread(thread_worker, thread_pool.push_back(new std::thread(thread_worker, &api, i, data_list));
&api,
i,
batch_size,
server_concurrency,
std::ref(data_list)));
} }
for (int i = 0; i < server_concurrency; ++i) {
for (int i = 0; i < FLAGS_concurrency; ++i) {
thread_pool[i]->join(); thread_pool[i]->join();
delete thread_pool[i]; delete thread_pool[i];
} }
calc_time(server_concurrency, batch_size);
calc_time();
api.destroy(); api.destroy();
return 0; return 0;
} }
...@@ -59,7 +59,7 @@ target_link_libraries(serving kvdb rocksdb) ...@@ -59,7 +59,7 @@ target_link_libraries(serving kvdb rocksdb)
if(WITH_GPU) if(WITH_GPU)
target_link_libraries(serving ${CUDA_LIBRARIES}) target_link_libraries(serving ${CUDA_LIBRARIES})
endif() endif()
target_link_libraries(serving -liomp5 -lmklml_intel -lpthread target_link_libraries(serving -liomp5 -lmklml_intel -lmkldnn -lpthread
-lcrypto -lm -lrt -lssl -ldl -lz -lbz2) -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
install(TARGETS serving install(TARGETS serving
...@@ -75,7 +75,10 @@ install(FILES ${inc} ...@@ -75,7 +75,10 @@ install(FILES ${inc}
DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/serving) DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/serving)
if (${WITH_MKL}) if (${WITH_MKL})
install(FILES ${CMAKE_BINARY_DIR}/Paddle/third_party/install/mklml/lib/libmklml_intel.so install(FILES
${CMAKE_BINARY_DIR}/Paddle/third_party/install/mklml/lib/libiomp5.so DESTINATION ${CMAKE_BINARY_DIR}/third_party/install/Paddle/third_party/install/mklml/lib/libmklml_intel.so
${CMAKE_BINARY_DIR}/third_party/install/Paddle/third_party/install/mklml/lib/libiomp5.so
${CMAKE_BINARY_DIR}/third_party/install/Paddle/third_party/install/mkldnn/lib/libmkldnn.so
DESTINATION
${PADDLE_SERVING_INSTALL_DIR}/demo/serving/bin) ${PADDLE_SERVING_INSTALL_DIR}/demo/serving/bin)
endif() endif()
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif #endif
#else #else
#include "paddle/fluid/inference/paddle_inference_api.h" #include "./paddle_inference_api.h"
#endif #endif
#include "demo-serving/bert_service.pb.h" #include "demo-serving/bert_service.pb.h"
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif #endif
#else #else
#include "paddle/fluid/inference/paddle_inference_api.h" #include "paddle_inference_api.h" // NOLINT
#endif #endif
#include "demo-serving/image_class.pb.h" #include "demo-serving/image_class.pb.h"
......
...@@ -23,6 +23,9 @@ ...@@ -23,6 +23,9 @@
#include "predictor/framework/kv_manager.h" #include "predictor/framework/kv_manager.h"
#include "predictor/framework/memory.h" #include "predictor/framework/memory.h"
// Flag where enable profiling mode
DECLARE_bool(enable_ctr_profiling);
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace serving { namespace serving {
...@@ -46,6 +49,11 @@ const int CTR_PREDICTION_DENSE_SLOT_ID = 26; ...@@ -46,6 +49,11 @@ const int CTR_PREDICTION_DENSE_SLOT_ID = 26;
const int CTR_PREDICTION_DENSE_DIM = 13; const int CTR_PREDICTION_DENSE_DIM = 13;
const int CTR_PREDICTION_EMBEDDING_SIZE = 10; const int CTR_PREDICTION_EMBEDDING_SIZE = 10;
bthread::Mutex CTRPredictionOp::mutex_;
int64_t CTRPredictionOp::cube_time_us_ = 0;
int32_t CTRPredictionOp::cube_req_num_ = 0;
int32_t CTRPredictionOp::cube_req_key_num_ = 0;
void fill_response_with_message(Response *response, void fill_response_with_message(Response *response,
int err_code, int err_code,
std::string err_msg) { std::string err_msg) {
...@@ -135,7 +143,41 @@ int CTRPredictionOp::inference() { ...@@ -135,7 +143,41 @@ int CTRPredictionOp::inference() {
return 0; return 0;
} else if (kvinfo->sparse_param_service_type == } else if (kvinfo->sparse_param_service_type ==
configure::EngineDesc::REMOTE) { configure::EngineDesc::REMOTE) {
int ret = cube->seek(table_name, keys, &values); struct timeval start;
struct timeval end;
int ret;
gettimeofday(&start, NULL);
ret = cube->seek(table_name, keys, &values);
gettimeofday(&end, NULL);
uint64_t usec =
end.tv_sec * 1e6 + end.tv_usec - start.tv_sec * 1e6 - start.tv_usec;
// Statistics
mutex_.lock();
cube_time_us_ += usec;
++cube_req_num_;
cube_req_key_num_ += keys.size();
if (cube_req_num_ >= 1000) {
LOG(INFO) << "Cube request count: " << cube_req_num_;
LOG(INFO) << "Cube request key count: " << cube_req_key_num_;
LOG(INFO) << "Cube request total time: " << cube_time_us_ << "us";
LOG(INFO) << "Average "
<< static_cast<float>(cube_time_us_) / cube_req_num_
<< "us/req";
LOG(INFO) << "Average "
<< static_cast<float>(cube_time_us_) / cube_req_key_num_
<< "us/key";
cube_time_us_ = 0;
cube_req_num_ = 0;
cube_req_key_num_ = 0;
}
mutex_.unlock();
// Statistics end
if (ret != 0) { if (ret != 0) {
fill_response_with_message(res, -1, "Query cube for embeddings error"); fill_response_with_message(res, -1, "Query cube for embeddings error");
LOG(ERROR) << "Query cube for embeddings error"; LOG(ERROR) << "Query cube for embeddings error";
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif #endif
#else #else
#include "paddle/fluid/inference/paddle_inference_api.h" #include "paddle_inference_api.h" // NOLINT
#endif #endif
#include "demo-serving/ctr_prediction.pb.h" #include "demo-serving/ctr_prediction.pb.h"
...@@ -55,6 +55,7 @@ static const char* CTR_PREDICTION_MODEL_NAME = "ctr_prediction"; ...@@ -55,6 +55,7 @@ static const char* CTR_PREDICTION_MODEL_NAME = "ctr_prediction";
* and modifications we made * and modifications we made
* *
*/ */
class CTRPredictionOp class CTRPredictionOp
: public baidu::paddle_serving::predictor::OpWithChannel< : public baidu::paddle_serving::predictor::OpWithChannel<
baidu::paddle_serving::predictor::ctr_prediction::Response> { baidu::paddle_serving::predictor::ctr_prediction::Response> {
...@@ -64,6 +65,12 @@ class CTRPredictionOp ...@@ -64,6 +65,12 @@ class CTRPredictionOp
DECLARE_OP(CTRPredictionOp); DECLARE_OP(CTRPredictionOp);
int inference(); int inference();
private:
static bthread::Mutex mutex_;
static int64_t cube_time_us_;
static int32_t cube_req_num_;
static int32_t cube_req_key_num_;
}; };
} // namespace serving } // namespace serving
......
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif #endif
#else #else
#include "paddle/fluid/inference/paddle_inference_api.h" #include "paddle_inference_api.h" // NOLINT
#endif #endif
namespace baidu { namespace baidu {
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif #endif
#else #else
#include "paddle/fluid/inference/paddle_inference_api.h" #include "paddle_inference_api.h" // NOLINT
#endif #endif
#include "demo-serving/text_classification.pb.h" #include "demo-serving/text_classification.pb.h"
......
...@@ -320,7 +320,7 @@ def prune_program(): ...@@ -320,7 +320,7 @@ def prune_program():
### 2.5 裁剪过程串到一起 ### 2.5 裁剪过程串到一起
我们提供了完整的裁剪CTR预估模型的脚本文件save_program.py,同[CTR分布式训练任务](doc/DISTRIBUTED_TRAINING_AND_SERVING.md)一起发布,可以在trainer和pserver容器的训练脚本目录下找到 我们提供了完整的裁剪CTR预估模型的脚本文件save_program.py,同[CTR分布式训练和Serving流程化部署](https://github.com/PaddlePaddle/Serving/blob/master/doc/DEPLOY.md)一起发布,可以在trainer和pserver容器的训练脚本目录下找到,也可以在[这里](https://github.com/PaddlePaddle/Serving/tree/master/doc/resource)下载。
## 3. 整个预测计算流程 ## 3. 整个预测计算流程
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif #endif
#else #else
#include "paddle/fluid/inference/paddle_inference_api.h" #include "paddle_inference_api.h" // NOLINT
#endif #endif
#include "predictor/framework/infer.h" #include "predictor/framework/infer.h"
...@@ -134,8 +134,7 @@ class FluidCpuAnalysisCore : public FluidFamilyCore { ...@@ -134,8 +134,7 @@ class FluidCpuAnalysisCore : public FluidFamilyCore {
analysis_config.SetCpuMathLibraryNumThreads(1); analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) { if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(), analysis_config.EnableMemoryOptim();
params.force_update_static_cache());
} }
analysis_config.SwitchSpecifyInputNames(true); analysis_config.SwitchSpecifyInputNames(true);
...@@ -200,8 +199,7 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore { ...@@ -200,8 +199,7 @@ class FluidCpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.SetCpuMathLibraryNumThreads(1); analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) { if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(), analysis_config.EnableMemoryOptim();
params.force_update_static_cache());
} }
AutoLock lock(GlobalPaddleCreateMutex::instance()); AutoLock lock(GlobalPaddleCreateMutex::instance());
...@@ -519,8 +517,7 @@ class FluidCpuAnalysisDirWithSigmoidCore : public FluidCpuWithSigmoidCore { ...@@ -519,8 +517,7 @@ class FluidCpuAnalysisDirWithSigmoidCore : public FluidCpuWithSigmoidCore {
analysis_config.SetCpuMathLibraryNumThreads(1); analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) { if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(), analysis_config.EnableMemoryOptim();
params.force_update_static_cache());
} }
AutoLock lock(GlobalPaddleCreateMutex::instance()); AutoLock lock(GlobalPaddleCreateMutex::instance());
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif #endif
#else #else
#include "paddle/fluid/inference/paddle_inference_api.h" #include "paddle_inference_api.h" // NOLINT
#endif #endif
#include "predictor/framework/infer.h" #include "predictor/framework/infer.h"
...@@ -136,8 +136,7 @@ class FluidGpuAnalysisCore : public FluidFamilyCore { ...@@ -136,8 +136,7 @@ class FluidGpuAnalysisCore : public FluidFamilyCore {
analysis_config.SetCpuMathLibraryNumThreads(1); analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) { if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(), analysis_config.EnableMemoryOptim();
params.force_update_static_cache());
} }
analysis_config.SwitchSpecifyInputNames(true); analysis_config.SwitchSpecifyInputNames(true);
...@@ -203,8 +202,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { ...@@ -203,8 +202,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.SwitchIrOptim(true); analysis_config.SwitchIrOptim(true);
if (params.enable_memory_optimization()) { if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(), analysis_config.EnableMemoryOptim();
params.force_update_static_cache());
} }
AutoLock lock(GlobalPaddleCreateMutex::instance()); AutoLock lock(GlobalPaddleCreateMutex::instance());
...@@ -522,8 +520,7 @@ class FluidGpuAnalysisDirWithSigmoidCore : public FluidGpuWithSigmoidCore { ...@@ -522,8 +520,7 @@ class FluidGpuAnalysisDirWithSigmoidCore : public FluidGpuWithSigmoidCore {
analysis_config.SetCpuMathLibraryNumThreads(1); analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) { if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim(params.static_optimization(), analysis_config.EnableMemoryOptim();
params.force_update_static_cache());
} }
AutoLock lock(GlobalPaddleCreateMutex::instance()); AutoLock lock(GlobalPaddleCreateMutex::instance());
......
...@@ -40,8 +40,6 @@ DECLARE_int32(reload_interval_s); ...@@ -40,8 +40,6 @@ DECLARE_int32(reload_interval_s);
DECLARE_bool(enable_model_toolkit); DECLARE_bool(enable_model_toolkit);
DECLARE_string(enable_protocol_list); DECLARE_string(enable_protocol_list);
DECLARE_bool(enable_cube); DECLARE_bool(enable_cube);
DECLARE_string(cube_config_path);
DECLARE_string(cube_config_file);
// STATIC Variables // STATIC Variables
extern const char* START_OP_NAME; extern const char* START_OP_NAME;
......
...@@ -632,7 +632,6 @@ class VersionedInferEngine : public InferEngine { ...@@ -632,7 +632,6 @@ class VersionedInferEngine : public InferEngine {
LOG(ERROR) << "Failed thrd clear version engine: " << iter->first; LOG(ERROR) << "Failed thrd clear version engine: " << iter->first;
return -1; return -1;
} }
LOG(INFO) << "Succ thrd clear version engine: " << iter->first;
} }
return 0; return 0;
} }
......
...@@ -208,7 +208,6 @@ int Resource::thread_clear() { ...@@ -208,7 +208,6 @@ int Resource::thread_clear() {
return -1; return -1;
} }
LOG(INFO) << bthread_self() << "Resource::thread_clear success";
// ... // ...
return 0; return 0;
} }
......
...@@ -51,8 +51,6 @@ using baidu::paddle_serving::predictor::FLAGS_port; ...@@ -51,8 +51,6 @@ using baidu::paddle_serving::predictor::FLAGS_port;
using baidu::paddle_serving::configure::InferServiceConf; using baidu::paddle_serving::configure::InferServiceConf;
using baidu::paddle_serving::configure::read_proto_conf; using baidu::paddle_serving::configure::read_proto_conf;
DECLARE_bool(logtostderr);
void print_revision(std::ostream& os, void*) { void print_revision(std::ostream& os, void*) {
#if defined(PDSERVING_VERSION) #if defined(PDSERVING_VERSION)
os << PDSERVING_VERSION; os << PDSERVING_VERSION;
...@@ -217,7 +215,8 @@ int main(int argc, char** argv) { ...@@ -217,7 +215,8 @@ int main(int argc, char** argv) {
} }
LOG(INFO) << "Succ initialize cube"; LOG(INFO) << "Succ initialize cube";
FLAGS_logtostderr = false; // FATAL messages are output to stderr
FLAGS_stderrthreshold = 3;
if (ServerManager::instance().start_and_wait() != 0) { if (ServerManager::instance().start_and_wait() != 0) {
LOG(ERROR) << "Failed start server and wait!"; LOG(ERROR) << "Failed start server and wait!";
......
...@@ -64,7 +64,6 @@ int Endpoint::thrd_clear() { ...@@ -64,7 +64,6 @@ int Endpoint::thrd_clear() {
return -1; return -1;
} }
} }
LOG(INFO) << "Succ thrd clear all vars: " << var_size;
return 0; return 0;
} }
......
...@@ -94,8 +94,6 @@ int PredictorApi::thrd_clear() { ...@@ -94,8 +94,6 @@ int PredictorApi::thrd_clear() {
LOG(ERROR) << "Failed thrd clear endpoint:" << it->first; LOG(ERROR) << "Failed thrd clear endpoint:" << it->first;
return -1; return -1;
} }
LOG(INFO) << "Succ thrd clear endpoint:" << it->first;
} }
return 0; return 0;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册