diff --git a/BCLOUD.gpu b/BCLOUD.gpu index 3d4fd0a7eb030cf98ae1329639bc179633abe90a..52338adbee72e01683e8d72b5ff72e26161650cc 100644 --- a/BCLOUD.gpu +++ b/BCLOUD.gpu @@ -87,6 +87,12 @@ HEADERS('predictor/op/*.h', '$INC/predictor/op') StaticLibrary('pdserving', Sources(GLOB(' '.join(predictor_sources)), 'predictor/src/pdserving.cpp')) # Sub directory + +# inferencer-fluid-cpu +INCPATHS('.') +inferencer_fluid_cpu_sources = 'inferencer-fluid-cpu/src/fluid_cpu_engine.cpp' +StaticLibrary('inferencer-fluid-cpu', Sources(inferencer_fluid_cpu_sources)) + Directory('inferencer-fluid-gpu') Directory('sdk-cpp') @@ -100,16 +106,17 @@ HEADERS(GLOB_GEN_SRCS('predictor/proto/builtin_format.pb.h'), '$INC/sdk-cpp') HEADERS(GLOB_GEN_SRCS('predictor/proto/pds_option.pb.h'), '$INC/sdk-cpp') HEADERS(GLOB_GEN_SRCS('demo-client/proto/*.pb.h'), '$INC/sdk-cpp') -#Application('ximage', Sources('demo-client/src/ximage.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) -#Application('echo', Sources('demo-client/src/echo.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) -#Application('dense_format', Sources('demo-client/src/dense_format.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) -#Application('sparse_format', Sources('demo-client/src/sparse_format.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) -#Application('int64tensor_format', Sources('demo-client/src/int64tensor_format.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) -#Application('text_classification', Sources('demo-client/src/text_classification.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) -#Application('text_classification_press', Sources('demo-client/src/text_classification_press.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) - -#OUTPUT('demo-client/conf', '$OUT/demo/client') -#OUTPUT('demo-client/data', '$OUT/demo/client') +LIBS('$OUT/lib/libpdconfigure.a') +Application('ximage', Sources('demo-client/src/ximage.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a'), Libs('$OUT/lib/libpdconfigure.a')) +Application('ximage_press', Sources('demo-client/src/ximage_press.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a'), Libs('$OUT/lib/libpdconfigure.a')) +Application('echo', Sources('demo-client/src/echo.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a'), Libs('$OUT/lib/libpdconfigure.a')) +Application('dense_format', Sources('demo-client/src/dense_format.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a'), Libs('$OUT/lib/libpdconfigure.a')) +Application('sparse_format', Sources('demo-client/src/sparse_format.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a'), Libs('$OUT/lib/libpdconfigure.a')) +Application('int64tensor_format', Sources('demo-client/src/int64tensor_format.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a'), Libs('$OUT/lib/libpdconfigure.a')) +Application('text_classification', Sources('demo-client/src/text_classification.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) +Application('text_classification_press', Sources('demo-client/src/text_classification_press.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a'), Libs('$OUT/lib/libpdconfigure.a')) +OUTPUT('demo-client/conf', '$OUT/demo/client') +OUTPUT('demo-client/data', '$OUT/demo/client') # demo-serving INCPATHS('$INC') @@ -130,7 +137,7 @@ serving_sources.append('demo-serving/op/*.cpp') serving_sources.append('demo-serving/proto/*.proto') HEADERS(GLOB_GEN_SRCS('demo-serving/proto/*.pb.h'), '$INC/demo-serving') -Application('serving', Sources(GLOB(' '.join(serving_sources))), WholeArchives('$OUT/lib/libinferencer-fluid-gpu.a $OUT/lib/libpdserving.a $OUT/lib/libpdconfigure.a')) +Application('serving', Sources(GLOB(' '.join(serving_sources))), WholeArchives('$OUT/lib/libinferencer-fluid-gpu.a $OUT/lib/libinferencer-fluid-cpu.a $OUT/lib/libpdserving.a $OUT/lib/libpdconfigure.a')) OUTPUT('demo-serving/conf', '$OUT/demo/serving/') OUTPUT('demo-serving/data', '$OUT/demo/serving') diff --git a/CMakeLists.txt b/CMakeLists.txt index 33465d55a70ca00ac08f251d316fa45be69ca202..d01be6d74f8c0b12b8b50e5ec3e63795a9ef4a09 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,7 @@ message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: " find_package(Git REQUIRED) find_package(Threads REQUIRED) +find_package(CUDA QUIET) include(simd) @@ -43,10 +44,10 @@ set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING set(THIRD_PARTY_BUILD_TYPE Release) -option(WITH_AVX "Compile Paddle Serving with AVX intrinsics" ${AVX_FOUND}) -option(WITH_MKL "Compile Paddle Serving with MKL support." ${AVX_FOUND}) -option(CLIENT_ONLY "Compile client libraries and demos only" - FALSE) +option(WITH_AVX "Compile Paddle Serving with AVX intrinsics" ${AVX_FOUND}) +option(WITH_MKL "Compile Paddle Serving with MKL support." ${AVX_FOUND}) +option(WITH_GPU "Compile Paddle Serving with NVIDIA GPU" ${CUDA_FOUND}) +option(CLIENT_ONLY "Compile client libraries and demos only" FALSE) set(WITH_MKLML ${WITH_MKL}) if (NOT DEFINED WITH_MKLDNN) @@ -108,5 +109,8 @@ add_subdirectory(demo-client) if (NOT CLIENT_ONLY) add_subdirectory(predictor) add_subdirectory(inferencer-fluid-cpu) +if (WITH_GPU) +add_subdirectory(inferencer-fluid-gpu) +endif() add_subdirectory(demo-serving) endif() diff --git a/README.md b/README.md index aa06fa20dab45df342326669de758727e1404ca6..5e3093c05627d9ff2108cb6cadeec5d9599fc89f 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # 概述 -PaddlePaddle是公司开源的机器学习框架,广泛支持各种深度学习模型的定制化开发; Paddle serving是Paddle的在线预测部分,与Paddle模型训练环节无缝衔接,提供机器学习预测云服务。 +PaddlePaddle是百度开源的机器学习框架,广泛支持各种深度学习模型的定制化开发; Paddle serving是Paddle的在线预测部分,与Paddle模型训练环节无缝衔接,提供机器学习预测云服务。 # 框架简介 -![图片](https://paddle-serving.bj.bcebos.com/doc/framework.png) +![图片](doc/framework.png) - 基础框架:屏蔽一个RPC服务所需的所有元素,让用户只关注自己的业务算子的开发; - 业务框架:基于Protobuf定制请求接口,基于有限DAG定制业务逻辑,并行化调度; diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake new file mode 100644 index 0000000000000000000000000000000000000000..ef4192ecc98ea6de0c81c1f33320528d547b818a --- /dev/null +++ b/cmake/cuda.cmake @@ -0,0 +1,231 @@ +if(NOT WITH_GPU) + return() +endif() + +set(paddle_known_gpu_archs "30 35 50 52 60 61 70") +set(paddle_known_gpu_archs7 "30 35 50 52") +set(paddle_known_gpu_archs8 "30 35 50 52 60 61") +set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70") +set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75") + +###################################################################################### +# A function for automatic detection of GPUs installed (if autodetection is enabled) +# Usage: +# detect_installed_gpus(out_variable) +function(detect_installed_gpus out_variable) + if(NOT CUDA_gpu_detect_output) + set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) + + file(WRITE ${cufile} "" + "#include \n" + "int main() {\n" + " int count = 0;\n" + " if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n" + " if (count == 0) return -1;\n" + " for (int device = 0; device < count; ++device) {\n" + " cudaDeviceProp prop;\n" + " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" + " std::printf(\"%d.%d \", prop.major, prop.minor);\n" + " }\n" + " return 0;\n" + "}\n") + + execute_process(COMMAND "${CUDA_NVCC_EXECUTABLE}" "-ccbin=${CUDA_HOST_COMPILER}" + "--run" "${cufile}" + WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" + RESULT_VARIABLE nvcc_res OUTPUT_VARIABLE nvcc_out + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + if(nvcc_res EQUAL 0) + # only keep the last line of nvcc_out + STRING(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}") + STRING(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}") + list(GET nvcc_out -1 nvcc_out) + string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}") + set(CUDA_gpu_detect_output ${nvcc_out} CACHE INTERNAL "Returned GPU architetures from detect_installed_gpus tool" FORCE) + endif() + endif() + + if(NOT CUDA_gpu_detect_output) + message(STATUS "Automatic GPU detection failed. Building for all known architectures.") + set(${out_variable} ${paddle_known_gpu_archs} PARENT_SCOPE) + else() + set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) + endif() +endfunction() + + +######################################################################## +# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME +# Usage: +# select_nvcc_arch_flags(out_variable) +function(select_nvcc_arch_flags out_variable) + # List of arch names + set(archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "All" "Manual") + set(archs_name_default "All") + list(APPEND archs_names "Auto") + + # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui) + set(CUDA_ARCH_NAME ${archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.") + set_property( CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names} ) + mark_as_advanced(CUDA_ARCH_NAME) + + # verify CUDA_ARCH_NAME value + if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};") + string(REPLACE ";" ", " archs_names "${archs_names}") + message(FATAL_ERROR "Only ${archs_names} architeture names are supported.") + endif() + + if(${CUDA_ARCH_NAME} STREQUAL "Manual") + set(CUDA_ARCH_BIN ${paddle_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") + set(CUDA_ARCH_PTX "50" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") + mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX) + else() + unset(CUDA_ARCH_BIN CACHE) + unset(CUDA_ARCH_PTX CACHE) + endif() + + if(${CUDA_ARCH_NAME} STREQUAL "Kepler") + set(cuda_arch_bin "30 35") + elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") + set(cuda_arch_bin "50") + elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") + set(cuda_arch_bin "60 61") + elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") + set(cuda_arch_bin "70") + elseif(${CUDA_ARCH_NAME} STREQUAL "Turing") + set(cuda_arch_bin "75") + elseif(${CUDA_ARCH_NAME} STREQUAL "All") + set(cuda_arch_bin ${paddle_known_gpu_archs}) + elseif(${CUDA_ARCH_NAME} STREQUAL "Auto") + detect_installed_gpus(cuda_arch_bin) + else() # (${CUDA_ARCH_NAME} STREQUAL "Manual") + set(cuda_arch_bin ${CUDA_ARCH_BIN}) + endif() + + # remove dots and convert to lists + string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}") + string(REGEX REPLACE "\\." "" cuda_arch_ptx "${CUDA_ARCH_PTX}") + string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}") + string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") + list(REMOVE_DUPLICATES cuda_arch_bin) + list(REMOVE_DUPLICATES cuda_arch_ptx) + + set(nvcc_flags "") + set(nvcc_archs_readable "") + + # Tell NVCC to add binaries for the specified GPUs + foreach(arch ${cuda_arch_bin}) + if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") + # User explicitly specified PTX for the concrete BIN + list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) + list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1}) + else() + # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch}) + list(APPEND nvcc_archs_readable sm_${arch}) + endif() + endforeach() + + # Tell NVCC to add PTX intermediate code for the specified architectures + foreach(arch ${cuda_arch_ptx}) + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch}) + list(APPEND nvcc_archs_readable compute_${arch}) + endforeach() + + string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") + set(${out_variable} ${nvcc_flags} PARENT_SCOPE) + set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) +endfunction() + +message(STATUS "CUDA detected: " ${CUDA_VERSION}) +if (${CUDA_VERSION} LESS 7.0) + set(paddle_known_gpu_archs ${paddle_known_gpu_archs}) + add_definitions("-DPADDLE_CUDA_BINVER=\"60\"") +elseif (${CUDA_VERSION} LESS 8.0) # CUDA 7.x + set(paddle_known_gpu_archs ${paddle_known_gpu_archs7}) + list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") + list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") + add_definitions("-DPADDLE_CUDA_BINVER=\"70\"") +elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x + set(paddle_known_gpu_archs ${paddle_known_gpu_archs8}) + list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") + list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") + # CUDA 8 may complain that sm_20 is no longer supported. Suppress the + # warning for now. + list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets") + add_definitions("-DPADDLE_CUDA_BINVER=\"80\"") +elseif (${CUDA_VERSION} LESS 10.0) # CUDA 9.x + set(paddle_known_gpu_archs ${paddle_known_gpu_archs9}) + list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") + list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") + add_definitions("-DPADDLE_CUDA_BINVER=\"90\"") +elseif (${CUDA_VERSION} LESS 11.0) # CUDA 10.x + set(paddle_known_gpu_archs ${paddle_known_gpu_archs10}) + list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") + list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") + add_definitions("-DPADDLE_CUDA_BINVER=\"100\"") +endif() + +include_directories(${CUDA_INCLUDE_DIRS}) +list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) +if(NOT WITH_DSO) + # TODO(panyx0718): CUPTI only allows DSO? + list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUPTI_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY}) + if(WIN32) + set_property(GLOBAL PROPERTY CUDA_MODULES ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY}) + endif(WIN32) +endif(NOT WITH_DSO) + +# setting nvcc arch flags +select_nvcc_arch_flags(NVCC_FLAGS_EXTRA) +list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) +message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}") + +# Set C++11 support +set(CUDA_PROPAGATE_HOST_FLAGS OFF) + +# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc. +# So, don't set these flags here. +if (NOT WIN32) # windows msvc2015 support c++11 natively. +# -std=c++11 -fPIC not recoginize by msvc, -Xcompiler will be added by cmake. +list(APPEND CUDA_NVCC_FLAGS "-std=c++11") +list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC") +endif(NOT WIN32) + +if(WITH_FAST_MATH) + # Make use of fast math library. https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html + list(APPEND CUDA_NVCC_FLAGS "--use_fast_math") +endif() +# in cuda9, suppress cuda warning on eigen +list(APPEND CUDA_NVCC_FLAGS "-w") +# Set :expt-relaxed-constexpr to suppress Eigen warnings +list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr") + +if (NOT WIN32) + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG}) + elseif(CMAKE_BUILD_TYPE STREQUAL "Release") + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE}) + elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) + elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") + # nvcc 9 does not support -Os. Use Release flags instead + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE}) + endif() +else(NOT WIN32) + list(APPEND CUDA_NVCC_FLAGS "-Xcompiler \"/wd 4244 /wd 4267 /wd 4819\"") + list(APPEND CUDA_NVCC_FLAGS "--compiler-options;/bigobj") + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + list(APPEND CUDA_NVCC_FLAGS "-g -G") + # match the cl's _ITERATOR_DEBUG_LEVEL + list(APPEND CUDA_NVCC_FLAGS "-D_DEBUG") + elseif(CMAKE_BUILD_TYPE STREQUAL "Release") + list(APPEND CUDA_NVCC_FLAGS "-O3 -DNDEBUG") + else() + message(FATAL "Windows only support Release or Debug build now. Please set visual studio build type to Release/Debug, x64 build.") +endif() +endif(NOT WIN32) + +mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD) +mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION) diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index 5a490c8eddb6b7fbb0dc1490769ea2db7f0f50b2..4caed8c2494338667d03c08d2a62ccaf3577cc7c 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -24,13 +24,15 @@ INCLUDE_DIRECTORIES(${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir) # Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib|${THIRD_PARTY_PATH}/install/glog") +message( "WITH_GPU = ${WITH_GPU}") + # If minimal .a is need, you can set WITH_DEBUG_SYMBOLS=OFF ExternalProject_Add( extern_paddle ${EXTERNAL_PROJECT_LOG_ARGS} # TODO(wangguibao): change to de newst repo when they changed. GIT_REPOSITORY "https://github.com/PaddlePaddle/Paddle" - GIT_TAG "v1.3.0" + GIT_TAG "v1.4.1" PREFIX ${PADDLE_SOURCES_DIR} UPDATE_COMMAND "" BINARY_DIR ${CMAKE_BINARY_DIR}/Paddle @@ -47,7 +49,7 @@ ExternalProject_Add( -DWITH_MKL=${WITH_MKL} -DWITH_AVX=${WITH_AVX} -DWITH_MKLDNN=OFF - -DWITH_GPU=OFF + -DWITH_GPU=${WITH_GPU} -DWITH_FLUID_ONLY=ON -DWITH_TESTING=OFF -DWITH_DISTRIBUTE=OFF diff --git a/demo-client/BCLOUD b/demo-client/BCLOUD index 308556585ed9a8811e95c7d47623e3279e0e0e9e..7f208dea6795c3cae11d4cdea84079c44cff758f 100644 --- a/demo-client/BCLOUD +++ b/demo-client/BCLOUD @@ -46,6 +46,7 @@ HEADERS('include/*.hpp', '$INC/sdk-cpp/include') # Application #bin Application('ximage', Sources('src/ximage.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) +Application('ximage_press', Sources('src/ximage_press.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) Application('echo', Sources('src/echo.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) Application('dense_format', Sources('src/dense_format.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) Application('sparse_format', Sources('src/sparse_format.cpp'), WholeArchives('$OUT/lib/libpdsdk-cpp.a')) diff --git a/demo-client/CMakeLists.txt b/demo-client/CMakeLists.txt index e30ca0ead42d93f3613f8702f36013316499047f..5e7208090ca4c47f724be38e92b8685684367501 100644 --- a/demo-client/CMakeLists.txt +++ b/demo-client/CMakeLists.txt @@ -20,6 +20,11 @@ target_link_libraries(ximage -Wl,--whole-archive sdk-cpp -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) +add_executable(ximage_press ${CMAKE_CURRENT_LIST_DIR}/src/ximage_press.cpp) +target_link_libraries(ximage_press -Wl,--whole-archive sdk-cpp + -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl + -lz) + add_executable(echo ${CMAKE_CURRENT_LIST_DIR}/src/echo.cpp) target_link_libraries(echo -Wl,--whole-archive sdk-cpp -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl @@ -51,6 +56,9 @@ target_link_libraries(text_classification_press -Wl,--whole-archive sdk-cpp -Wl, install(TARGETS ximage RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/demo/client/image_classification/bin) +install(TARGETS ximage_press + RUNTIME DESTINATION + ${PADDLE_SERVING_INSTALL_DIR}/demo/client/image_classification/bin) install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/conf DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/demo/client/image_classification/) install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/data/images DESTINATION diff --git a/demo-client/data/images/ILSVRC2012_val_00000001.jpeg b/demo-client/data/images/ILSVRC2012_val_00000001.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..fcd0de8866f404b87c2881435efe8aa2372f7b36 Binary files /dev/null and b/demo-client/data/images/ILSVRC2012_val_00000001.jpeg differ diff --git a/demo-client/data/images/ILSVRC2012_val_00000002.jpeg b/demo-client/data/images/ILSVRC2012_val_00000002.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..34476b6e5b2a1ef892f4957e8a5d7b54449aceb1 Binary files /dev/null and b/demo-client/data/images/ILSVRC2012_val_00000002.jpeg differ diff --git a/demo-client/data/images/ILSVRC2012_val_00000003.jpeg b/demo-client/data/images/ILSVRC2012_val_00000003.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..95438c5f18dace758c6a62cd0955c4212cfb1c1e Binary files /dev/null and b/demo-client/data/images/ILSVRC2012_val_00000003.jpeg differ diff --git a/demo-client/data/images/ILSVRC2012_val_00000004.jpeg b/demo-client/data/images/ILSVRC2012_val_00000004.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..d6c3df86e0a15baba0833b4b7a9c996526e5ac79 Binary files /dev/null and b/demo-client/data/images/ILSVRC2012_val_00000004.jpeg differ diff --git a/demo-client/data/images/ILSVRC2012_val_00000005.jpeg b/demo-client/data/images/ILSVRC2012_val_00000005.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..166f2897e3965f86a0711bce1f929e63c6dd900e Binary files /dev/null and b/demo-client/data/images/ILSVRC2012_val_00000005.jpeg differ diff --git a/demo-client/data/images/ILSVRC2012_val_00000006.jpeg b/demo-client/data/images/ILSVRC2012_val_00000006.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..2f589369058e002d823c8d9c783311a576f9ebdb Binary files /dev/null and b/demo-client/data/images/ILSVRC2012_val_00000006.jpeg differ diff --git a/demo-client/data/images/ILSVRC2012_val_00000007.jpeg b/demo-client/data/images/ILSVRC2012_val_00000007.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..a043ebf28db967c6e1aefb0913fb1540762e3ad0 Binary files /dev/null and b/demo-client/data/images/ILSVRC2012_val_00000007.jpeg differ diff --git a/demo-client/data/images/ILSVRC2012_val_00000008.jpeg b/demo-client/data/images/ILSVRC2012_val_00000008.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..4844c969cc8f4c16adc0809e566f80af62048ed6 Binary files /dev/null and b/demo-client/data/images/ILSVRC2012_val_00000008.jpeg differ diff --git a/demo-client/data/images/ILSVRC2012_val_00000009.jpeg b/demo-client/data/images/ILSVRC2012_val_00000009.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..592eb3a5022ad5df1333dac2935a4cc4ff156aaa Binary files /dev/null and b/demo-client/data/images/ILSVRC2012_val_00000009.jpeg differ diff --git a/demo-client/data/images/ILSVRC2012_val_00000010.jpeg b/demo-client/data/images/ILSVRC2012_val_00000010.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..6cad056bcf3cd66b586248455073b66af0e9afb5 Binary files /dev/null and b/demo-client/data/images/ILSVRC2012_val_00000010.jpeg differ diff --git a/demo-client/data/images/val.txt b/demo-client/data/images/val.txt new file mode 100644 index 0000000000000000000000000000000000000000..da362d6145f3ad9afb5ba2c02b6dd4f6b758ac55 --- /dev/null +++ b/demo-client/data/images/val.txt @@ -0,0 +1,10 @@ +ILSVRC2012_val_00000001.JPEG 65 +ILSVRC2012_val_00000002.JPEG 970 +ILSVRC2012_val_00000003.JPEG 230 +ILSVRC2012_val_00000004.JPEG 809 +ILSVRC2012_val_00000005.JPEG 516 +ILSVRC2012_val_00000006.JPEG 57 +ILSVRC2012_val_00000007.JPEG 334 +ILSVRC2012_val_00000008.JPEG 415 +ILSVRC2012_val_00000009.JPEG 674 +ILSVRC2012_val_00000010.JPEG 332 diff --git a/demo-client/src/ximage_press.cpp b/demo-client/src/ximage_press.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5a71508e79d8d32598b88be4ad26373ef743850a --- /dev/null +++ b/demo-client/src/ximage_press.cpp @@ -0,0 +1,300 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include +#include // NOLINT +#include "sdk-cpp/builtin_format.pb.h" +#include "sdk-cpp/image_class.pb.h" +#include "sdk-cpp/include/common.h" +#include "sdk-cpp/include/predictor_sdk.h" + +#ifndef BCLOUD +using json2pb::JsonToProtoMessage; +#endif + +using baidu::paddle_serving::sdk_cpp::Predictor; +using baidu::paddle_serving::sdk_cpp::PredictorApi; +using baidu::paddle_serving::predictor::format::XImageReqInstance; +using baidu::paddle_serving::predictor::format::DensePrediction; +using baidu::paddle_serving::predictor::image_classification::Request; +using baidu::paddle_serving::predictor::image_classification::Response; + +DEFINE_int32(concurrency, 1, "Set the max concurrent number of requests"); +DEFINE_int32(requests, 100, "Number of requests to send per thread"); +DEFINE_int32(batch_size, 1, "Batch size"); + +std::atomic g_concurrency(0); + +std::vector> g_round_time; +std::vector g_image_data; +std::vector g_image_lengths; + +const std::vector g_image_paths{ + "./data/images/ILSVRC2012_val_00000001.jpeg", + "./data/images/ILSVRC2012_val_00000002.jpeg", + "./data/images/ILSVRC2012_val_00000003.jpeg", + "./data/images/ILSVRC2012_val_00000004.jpeg", + "./data/images/ILSVRC2012_val_00000005.jpeg", + "./data/images/ILSVRC2012_val_00000006.jpeg", + "./data/images/ILSVRC2012_val_00000007.jpeg", + "./data/images/ILSVRC2012_val_00000008.jpeg", + "./data/images/ILSVRC2012_val_00000009.jpeg", + "./data/images/ILSVRC2012_val_00000010.jpeg"}; + +int prepare_data() { + for (auto x : g_image_paths) { + FILE* fp = fopen(x.c_str(), "rb"); + if (!fp) { + LOG(ERROR) << "Failed open image: " << x.c_str(); + continue; + } + + fseek(fp, 0L, SEEK_END); + size_t isize = ftell(fp); + char* ibuf = new (std::nothrow) char[isize]; + if (!ibuf) { + LOG(ERROR) << "Failed malloc image buffer"; + fclose(fp); + return -1; + } + + fseek(fp, 0, SEEK_SET); + fread(ibuf, sizeof(ibuf[0]), isize, fp); + g_image_data.push_back(ibuf); + g_image_lengths.push_back(isize); + + fclose(fp); + } + + return 0; +} + +int create_req(Request& req) { // NOLINT + for (int i = 0; i < FLAGS_batch_size; ++i) { + XImageReqInstance* ins = req.add_instances(); + if (!ins) { + LOG(ERROR) << "Failed create req instance"; + return -1; + } + + int id = i % g_image_data.size(); + ins->set_image_binary(g_image_data[id], g_image_lengths[id]); + ins->set_image_length(g_image_lengths[id]); + } + + return 0; +} + +void extract_res(const Request& req, const Response& res) { + uint32_t sample_size = res.predictions_size(); + std::string err_string; + for (uint32_t si = 0; si < sample_size; ++si) { + DensePrediction json_msg; + std::string json = res.predictions(si).response_json(); + butil::IOBuf buf; + buf.clear(); + buf.append(json); + butil::IOBufAsZeroCopyInputStream wrapper(buf); + if (!JsonToProtoMessage(&wrapper, &json_msg, &err_string)) { + LOG(ERROR) << "Failed parse json from str:" << json; + return; + } + + uint32_t csize = json_msg.categories_size(); + if (csize <= 0) { + LOG(ERROR) << "sample-" << si << "has no" + << "categories props"; + continue; + } + float max_prop = json_msg.categories(0); + uint32_t max_idx = 0; + for (uint32_t ci = 1; ci < csize; ++ci) { + if (json_msg.categories(ci) > max_prop) { + max_prop = json_msg.categories(ci); + max_idx = ci; + } + } + + LOG(INFO) << "instance " << si << "has class " << max_idx; + } // end for +} + +void thread_worker(PredictorApi* api, int thread_id) { + Request req; + Response res; + + api->thrd_initialize(); + + for (int i = 0; i < FLAGS_requests; ++i) { + api->thrd_clear(); + + Predictor* predictor = api->fetch_predictor("ximage"); + if (!predictor) { + LOG(ERROR) << "Failed fetch predictor: ximage"; + return; + } + + req.Clear(); + res.Clear(); + + if (create_req(req) != 0) { + return; + } + + while (g_concurrency.load() >= FLAGS_concurrency) { + } + g_concurrency++; +#if 1 + LOG(INFO) << "Current concurrency " << g_concurrency.load(); +#endif + + timeval start; + timeval end; + + gettimeofday(&start, NULL); + if (predictor->inference(&req, &res) != 0) { + LOG(ERROR) << "failed call predictor with req:" << req.ShortDebugString(); + return; + } + gettimeofday(&end, NULL); + + g_round_time[thread_id].push_back(end.tv_sec * 1000 + end.tv_usec / 1000 - + start.tv_sec * 1000 - + start.tv_usec / 1000); + + extract_res(req, res); + res.Clear(); + + g_concurrency--; +#if 1 + LOG(INFO) << "Done. Currenct concurrency " << g_concurrency.load(); +#endif + } // for (int i = 0; i < FLAGS_requests; ++i) + + api->thrd_finalize(); +} + +int main(int argc, char** argv) { + google::ParseCommandLineFlags(&argc, &argv, true); + + PredictorApi api; + +// initialize logger instance +#ifdef BCLOUD + logging::LoggingSettings settings; + settings.logging_dest = logging::LOG_TO_FILE; + + std::string filename(argv[0]); + filename = filename.substr(filename.find_last_of('/') + 1); + settings.log_file = (std::string("./log/") + filename + ".log").c_str(); + settings.delete_old = logging::DELETE_OLD_LOG_FILE; + logging::InitLogging(settings); + + logging::ComlogSinkOptions cso; + cso.process_name = filename; + cso.enable_wf_device = true; + logging::ComlogSink::GetInstance()->Setup(&cso); +#else + struct stat st_buf; + int ret = 0; + if ((ret = stat("./log", &st_buf)) != 0) { + mkdir("./log", 0777); + ret = stat("./log", &st_buf); + if (ret != 0) { + LOG(WARNING) << "Log path ./log not exist, and create fail"; + return -1; + } + } + FLAGS_log_dir = "./log"; + google::InitGoogleLogging(strdup(argv[0])); +#endif + + g_round_time.resize(FLAGS_concurrency); + + if (api.create("./conf", "predictors.prototxt") != 0) { + LOG(ERROR) << "Failed create predictors api!"; + return -1; + } + + if (prepare_data() != 0) { + LOG(ERROR) << "Prepare data fail"; + return -1; + } + + std::vector worker_threads; + int i = 0; + for (; i < FLAGS_concurrency; ++i) { + worker_threads.push_back(new std::thread(thread_worker, &api, i)); + } + + for (i = 0; i < FLAGS_concurrency; ++i) { + worker_threads[i]->join(); + delete worker_threads[i]; + } + + api.destroy(); + + std::vector round_times; + for (auto x : g_round_time) { + round_times.insert(round_times.end(), x.begin(), x.end()); + } + + std::sort(round_times.begin(), round_times.end()); + + int percent_pos_50 = round_times.size() * 0.5; + int percent_pos_80 = round_times.size() * 0.8; + int percent_pos_90 = round_times.size() * 0.9; + int percent_pos_99 = round_times.size() * 0.99; + int percent_pos_999 = round_times.size() * 0.999; + + uint64_t total_ms = 0; + for (auto x : round_times) { + total_ms += x; + } + + LOG(INFO) << "Batch size: " << FLAGS_batch_size; + LOG(INFO) << "Total requests: " << round_times.size(); + LOG(INFO) << "Max concurrency: " << FLAGS_concurrency; + LOG(INFO) << "Total ms (absolute time): " << total_ms / FLAGS_concurrency; + + double qps = 0.0; + if (total_ms != 0) { + qps = (static_cast(FLAGS_concurrency * FLAGS_requests) / + (total_ms / FLAGS_concurrency)) * + 1000; + } + + LOG(INFO) << "QPS: " << qps << "/s"; + + LOG(INFO) << "Latency statistics: "; + if (round_times.size() != 0) { + LOG(INFO) << "Average ms: " + << static_cast(total_ms) / round_times.size(); + LOG(INFO) << "50 percent ms: " << round_times[percent_pos_50]; + LOG(INFO) << "80 percent ms: " << round_times[percent_pos_80]; + LOG(INFO) << "90 percent ms: " << round_times[percent_pos_90]; + LOG(INFO) << "99 percent ms: " << round_times[percent_pos_99]; + LOG(INFO) << "99.9 percent ms: " << round_times[percent_pos_999]; + } else { + LOG(INFO) << "N/A"; + } + + return 0; +} + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/demo-serving/CMakeLists.txt b/demo-serving/CMakeLists.txt index f9c6cab63a6c7e1fe04253e8aae6cbd8d8478e06..238fadf8934ca6bc8051e3f2d662c6fbe99170db 100644 --- a/demo-serving/CMakeLists.txt +++ b/demo-serving/CMakeLists.txt @@ -18,9 +18,17 @@ include(proto/CMakeLists.txt) add_executable(serving ${serving_srcs}) add_dependencies(serving pdcodegen fluid_cpu_engine pdserving paddle_fluid opencv_imgcodecs) +if (WITH_GPU) + add_dependencies(serving fluid_gpu_engine) +endif() target_include_directories(serving PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/../predictor ) + +if(WITH_GPU) + target_link_libraries(serving ${CUDA_LIBRARIES} -Wl,--whole-archive fluid_gpu_engine + -Wl,--no-whole-archive) +endif() target_link_libraries(serving opencv_imgcodecs ${opencv_depend_libs} -Wl,--whole-archive fluid_cpu_engine -Wl,--no-whole-archive pdserving paddle_fluid ${paddle_depend_libs} diff --git a/demo-serving/op/classify_op.cpp b/demo-serving/op/classify_op.cpp index a714e7cd820113280f3deabbe0e95922cdee8d9a..22c3393866c002985d307168fb64b4cd88d4974e 100644 --- a/demo-serving/op/classify_op.cpp +++ b/demo-serving/op/classify_op.cpp @@ -35,7 +35,6 @@ int ClassifyOp::inference() { } const TensorVector* in = &reader_out->tensors; - uint32_t sample_size = in->size(); TensorVector* out = butil::get_object(); if (!out) { @@ -43,20 +42,21 @@ int ClassifyOp::inference() { return -1; } - if (sample_size <= 0) { - LOG(INFO) << "No samples need to to predicted"; - return 0; + if (in->size() != 1) { + LOG(ERROR) << "Samples should have been packed into a single tensor"; + return -1; } + int batch_size = in->at(0).shape[0]; // call paddle fluid model for inferencing if (InferManager::instance().infer( - IMAGE_CLASSIFICATION_MODEL_NAME, in, out, sample_size)) { + IMAGE_CLASSIFICATION_MODEL_NAME, in, out, batch_size)) { LOG(ERROR) << "Failed do infer in fluid model: " << IMAGE_CLASSIFICATION_MODEL_NAME; return -1; } - if (out->size() != sample_size) { + if (out->size() != in->size()) { LOG(ERROR) << "Output size not eq input size: " << in->size() << out->size(); return -1; @@ -64,24 +64,35 @@ int ClassifyOp::inference() { // copy output tensor into response ClassifyResponse* res = mutable_data(); + const paddle::PaddleTensor& out_tensor = (*out)[0]; + +#if 0 + int out_shape_size = out_tensor.shape.size(); + LOG(ERROR) << "out_tensor.shpae"; + for (int i = 0; i < out_shape_size; ++i) { + LOG(ERROR) << out_tensor.shape[i] << ":"; + } + + if (out_shape_size != 2) { + return -1; + } +#endif + + int sample_size = out_tensor.shape[0]; +#if 0 + LOG(ERROR) << "Output sample size " << sample_size; +#endif for (uint32_t si = 0; si < sample_size; si++) { - const paddle::PaddleTensor& out_tensor = (*out)[si]; DensePrediction* ins = res->add_predictions(); if (!ins) { LOG(ERROR) << "Failed append new out tensor"; return -1; } - uint32_t shape_size = out_tensor.shape.size(); - if (out_tensor.shape.size() != 2 || out_tensor.shape[0] != 1) { - LOG(ERROR) << "Not valid classification out shape" - << ", shape size: " << out_tensor.shape.size(); - return -1; - } - // assign output data - uint32_t data_size = out_tensor.data.length() / sizeof(float); - float* data = reinterpret_cast(out_tensor.data.data()); + uint32_t data_size = out_tensor.shape[1]; + float* data = reinterpret_cast(out_tensor.data.data() + + si * sizeof(float) * data_size); for (uint32_t di = 0; di < data_size; ++di) { ins->add_categories(data[di]); } @@ -95,10 +106,6 @@ int ClassifyOp::inference() { out->clear(); butil::return_object(out); - LOG(INFO) << "Response in image classification:" - << "length:" << res->ByteSize() << "," - << "data:" << res->ShortDebugString(); - return 0; } diff --git a/demo-serving/op/reader_op.cpp b/demo-serving/op/reader_op.cpp index 8c97702de33215f9b33e201c0b02de6a2a8d7d08..d50c9ebd24be48c32d59aa641bace52cf556a337 100644 --- a/demo-serving/op/reader_op.cpp +++ b/demo-serving/op/reader_op.cpp @@ -51,6 +51,26 @@ int ReaderOp::inference() { resize.height = iresize[0]; resize.width = iresize[1]; + paddle::PaddleTensor in_tensor; + in_tensor.name = "tensor"; + in_tensor.dtype = paddle::FLOAT32; + // shape assignment + in_tensor.shape.push_back(sample_size); // batch_size + in_tensor.shape.push_back(3); + in_tensor.shape.push_back(resize.width); + in_tensor.shape.push_back(resize.height); + + // tls resource assignment + size_t dense_capacity = 3 * resize.width * resize.height; + size_t len = dense_capacity * sizeof(float) * sample_size; + float* data = + reinterpret_cast(MempoolWrapper::instance().malloc(len)); + if (data == NULL) { + LOG(ERROR) << "Failed create temp float array, " + << "size=" << dense_capacity * sample_size * sizeof(float); + return -1; + } + for (uint32_t si = 0; si < sample_size; si++) { // parse image object from x-image const XImageReqInstance& ins = req->instances(si); @@ -103,50 +123,31 @@ int ReaderOp::inference() { const int H = _image_8u_rgb.rows; const int W = _image_8u_rgb.cols; const int C = _image_8u_rgb.channels(); - size_t dense_capacity = H * W * C; - - paddle::PaddleTensor in_tensor; - in_tensor.name = "tensor"; - in_tensor.dtype = paddle::FLOAT32; - - // shape assignment - in_tensor.shape.push_back(1); // batch_size - - // accoreding to training stage, the instance shape should be - // in order of C-W-H. - in_tensor.shape.push_back(C); - in_tensor.shape.push_back(W); - in_tensor.shape.push_back(H); + if (H != resize.height || W != resize.width || C != 3) { + LOG(ERROR) << "Image " << si << " has incompitable size"; + return -1; + } LOG(INFO) << "Succ read one image, C: " << C << ", W: " << W << ", H: " << H; - // tls resource assignment - size_t len = dense_capacity * sizeof(float); - float* data = - reinterpret_cast(MempoolWrapper::instance().malloc(len)); - if (data == NULL) { - LOG(ERROR) << "Failed create temp float array, " - << "size=" << dense_capacity; - return -1; - } - + float* data_ptr = data + dense_capacity * si; for (int h = 0; h < H; h++) { // p points to a new line unsigned char* p = _image_8u_rgb.ptr(h); for (int w = 0; w < W; w++) { for (int c = 0; c < C; c++) { // HWC(row,column,channel) -> CWH - data[W * H * c + W * h + w] = (p[C * w + c] - pmean[c]) * scale[c]; + data_ptr[W * H * c + W * h + w] = + (p[C * w + c] - pmean[c]) * scale[c]; } } } - - paddle::PaddleBuf pbuf(data, len); - in_tensor.data = pbuf; - - in->push_back(in_tensor); } + paddle::PaddleBuf pbuf(data, len); + in_tensor.data = pbuf; + + in->push_back(in_tensor); return 0; } diff --git a/demo-serving/op/write_json_op.cpp b/demo-serving/op/write_json_op.cpp index 25aece0e4533ab5a535e86d2c4cbff259754436c..e0b372d7c8fce86ced556ea397c9dcbb05111f66 100644 --- a/demo-serving/op/write_json_op.cpp +++ b/demo-serving/op/write_json_op.cpp @@ -16,7 +16,7 @@ #include #ifdef BCLOUD -#include "pb_to_json.h" +#include "pb_to_json.h" // NOLINT #else #include "json2pb/pb_to_json.h" #endif @@ -70,7 +70,7 @@ int WriteJsonOp::inference() { } } - LOG(INFO) << "Succ write json:" << classify_out->ShortDebugString(); + LOG(INFO) << "Succ write json"; return 0; } diff --git a/doc/DESIGN.md b/doc/DESIGN.md index ce89854006ac389d954cb2bda89c54e6f1a2e6d2..12dce781a2ba19b678ea488a1b45841d1b611cc2 100644 --- a/doc/DESIGN.md +++ b/doc/DESIGN.md @@ -26,7 +26,7 @@ PaddlePaddle是公司开源的机器学习框架,广泛支持各种深度学 ## 3. Paddle Serving总体框架 -![Paddle-Serging总体框图](https://paddle-serving.bj.bcebos.com/doc/framework.png) +![Paddle-Serging总体框图](framework.png) **模型管理框架**:对接多种机器学习平台的模型文件,向上提供统一的inference接口 **业务调度框架**:对各种不同预测模型的计算逻辑进行抽象,提供通用的DAG调度框架,通过DAG图串联不同的算子,共同完成一次预测服务。该抽象模型使用户可以方便的实现自己的计算逻辑,同时便于算子共用。(用户搭建自己的预测服务,很大一部分工作是搭建DAG和提供算子的实现) @@ -62,31 +62,31 @@ class FluidFamilyCore { 参考TF框架的模型计算的抽象思想,将业务逻辑抽象成DAG图,由配置驱动,生成workflow,跳过C++代码编译。业务的每个具体步骤,对应一个具体的OP,OP可配置自己依赖的上游OP。OP之间消息传递统一由线程级Bus和channel机制实现。例如,一个简单的预测服务的服务过程,可以抽象成读请求数据->调用预测接口->写回预测结果等3个步骤,相应的实现到3个OP: ReaderOp->ClassifyOp->WriteOp -![预测服务Service](https://paddle-serving.bj.bcebos.com/doc/predict-service.png) +![预测服务Service](predict-service.png) 关于OP之间的依赖关系,以及通过OP组建workflow,可以参考[从零开始写一个预测服务](CREATING.md)的相关章节 服务端实例透视图 -![服务端实例透视图](https://paddle-serving.bj.bcebos.com/doc/server-side.png) +![服务端实例透视图](server-side.png) #### 3.2.2 Paddle Serving的多服务机制 -![Paddle Serving的多服务机制](https://paddle-serving.bj.bcebos.com/doc/multi-service.png) +![Paddle Serving的多服务机制](multi-service.png) -Paddle Serving实例可以同时加载多个模型,每个模型用一个Service(以及其所配置的workflow)承接服务。可以参考[Demo例子中的service配置文件](../serving/conf/service.prototxt)了解如何为serving实例配置多个service +Paddle Serving实例可以同时加载多个模型,每个模型用一个Service(以及其所配置的workflow)承接服务。可以参考[Demo例子中的service配置文件](../demo-serving/conf/service.prototxt)了解如何为serving实例配置多个service #### 3.2.3 业务调度层级关系 从客户端看,一个Paddle Serving service从顶向下可分为Service, Endpoint, Variant等3个层级 -![调用层级关系](https://paddle-serving.bj.bcebos.com/doc/multi-variants.png) +![调用层级关系](multi-variants.png) 一个Service对应一个预测模型,模型下有1个endpoint。模型的不同版本,通过endpoint下多个variant概念实现: 同一个模型预测服务,可以配置多个variant,每个variant有自己的下游IP列表。客户端代码可以对各个variant配置相对权重,以达到调节流量比例的关系(参考[客户端配置](CLIENT_CONFIGURE.md)第3.2节中关于variant_weight_list的说明)。 -![Client端proxy功能](https://paddle-serving.bj.bcebos.com/doc/client-side-proxy.png) +![Client端proxy功能](client-side-proxy.png) ## 4. 用户接口 diff --git a/doc/FAQ.md b/doc/FAQ.md index a48e5c4dbc6562a8035d4ea0731b06183e2e8f3c..2ba9ec9d5e0a5d7c8f0ccc3ebfc480f21170751d 100644 --- a/doc/FAQ.md +++ b/doc/FAQ.md @@ -5,3 +5,22 @@ - 如果在inferservice_file里指定了port:xxx,那么就去申请该端口号; - 否则,如果在gflags.conf里指定了--port:xxx,那就去申请该端口号; - 否则,使用程序里指定的默认端口号:8010。 + +## 2. GPU预测中为何请求的响应时间波动会非常大? +PaddleServing依托PaddlePaddle预测库执行预测计算;在GPU设备上,由于同一个进程内目前共用1个GPU stream,进程内的多个请求的预测计算会被严格串行。所以如果有2个请求同时到达某个Serving实例,不管该实例启动时创建了多少个worker线程,都不能起到加速作用,后到的请求会被排队,直到前面请求计算完成。 + +## 3. 如何充分利用GPU卡的计算能力? +如问题2所说,由于预测库的限制,单个Serving进程只能绑定单张GPU卡,且进程内共用1个GPU stream,所有请求必须串行计算。 + +为提高GPU卡使用率,目前可以想到的方法是:在单张GPU卡上启动多个Serving进程,每个进程绑定一个GPU stream,多个stream并行计算。这种方法是否能起到加速作用,受限于多个因素,主要有: + +1. 单个stream占用GPU算力;假如单个stream已经将GPU算力占用超过50%,那么增加stream很可能会导致2个stream的job分别排队,拖慢各自的响应时间 +2. GPU显存:Serving进程需要将模型参数加载到显存中,并且计算时要在GPU显存池分配临时变量;假如单个Serving进程已经用掉超过50%的显存,则增加Serving进程会造成显存不足,导致进程报错退出 + +为此,可采用如下步骤,进行测试: + +1. 加载模型时,在model_toolkit.prototxt中,model type选择FLUID_GPU_ANALYSIS或FLUID_GPU_ANALYSIS_DIR;会对模型进行静态分析,进行一定程度显存优化 +2. 在步骤1完成后,启动单个Serving进程,启动参数:`--gpuid=N --bthread_concurrency=4 --bthread_min_concurrency=4`;启动一个client,进行并发度为1的压力测试,batch size从小到大,记下平响;由于算力的限制,当batch size增大到一定程度,应该会出现响应时间明显变大;或虽然没有明显变大,但已经不满足系统需求 +3. 再启动1个Serving进程,与步骤2启动时使用相同的参数略有不同: `--gpuid=N --bthread_concurrency=4 --bthread_min_concurrency=4 --port=8011` 其中--port=8011用来让新启动的进程使用一个新的服务端口;然后同时对这2个Serving进程进行压测,继续观察batch size从小到大时平均响应时间的变化,直到取得batch size和响应时间的折中 +4. 重复步骤2-3 +5. 以2-4步的测试,来决定:单张GPU卡可以由多少个Serving进程共用; 实际部署时,就在一张GPU卡上启动这么多个Serving进程同时提供服务 diff --git a/doc/INDEX.md b/doc/INDEX.md index c4e78be00435e3b936b9fa08785098028e53de17..c9399b2f5cf50cda10062023f009b92bfc0c8158 100644 --- a/doc/INDEX.md +++ b/doc/INDEX.md @@ -1,16 +1,19 @@ +[Design](DESIGN.md) -[Client Configure](CLIENT_CONFIGURE.md) +[Installation](INSTALL.md) -[How to Configure a Clustered Service](CLUSTERING.md) +[Getting Started](GETTING_STARTED.md) [Creating a Prediction Service](CREATING.md) -[Design](DESIGN.md) +[Client Configure](CLIENT_CONFIGURE.md) -[FAQ](FAQ.md) +[Server Side Configuration](SERVING_CONFIGURE.md) -[Getting Started](GETTING_STARTED.md) +[How to Configure a Clustered Service](CLUSTERING.md) -[Installation](INSTALL.md) +[Multiple Serving Instances over Single GPU Card](MULTI_SERVING_OVER_SINGLE_GPU_CARD.md) -[Server Side Configuration](SERVING_CONFIGURE.md) +[Benchmarking](BENCHMARKING.md) + +[FAQ](FAQ.md) diff --git a/doc/INSTALL.md b/doc/INSTALL.md index 64e482d6438f2d6e5da8ea6ce98e0933b18a0f7b..d3114e86efbf2cd5985811d4d39ec7e8069e3534 100644 --- a/doc/INSTALL.md +++ b/doc/INSTALL.md @@ -58,10 +58,9 @@ $ make install # CMake编译选项说明 -因Paddle Serving依托于PaddlePaddle项目进行构建,以下编译选项其实是传递给PaddlePaddle的编译选项: - | 编译选项 | 说明 | |----------|------| -| WITH_AVX | Compile PaddlePaddle with AVX intrinsics | -| WITH_MKL | Compile PaddlePaddle with MKLML library | +| WITH_AVX | For configuring PaddlePaddle. Compile PaddlePaddle with AVX intrinsics | +| WITH_MKL | For configuring PaddlePaddle. Compile PaddlePaddle with MKLML library | +| WITH_GPU | For configuring PaddlePaddle. Compile PaddlePaddle with NVIDIA GPU | | CLINET_ONLY | Compile client libraries and demos only | diff --git a/doc/MULTI_SERVING_OVER_SINGLE_GPU_CARD.md b/doc/MULTI_SERVING_OVER_SINGLE_GPU_CARD.md new file mode 100644 index 0000000000000000000000000000000000000000..1bbcaf16a38d551998402c0f52df581dc9dc9866 --- /dev/null +++ b/doc/MULTI_SERVING_OVER_SINGLE_GPU_CARD.md @@ -0,0 +1,32 @@ +# Multiple Serving Instances over Single GPU Card + +Paddle Serving依托PaddlePaddle预测库执行实际的预测计算。由于当前GPU预测库的限制,单个Serving实例只可以绑定1张GPU卡,且进程内所有worker线程共用1个GPU stream。也就是说,不管Serving启动多少个worker线程,所有的请求在GPU是严格串行计算的,起不到加速作用。这会带来一个问题,就是如果模型计算量不大,那么Serving进程实际上不会用满GPU的算力。 + +为了充分利用GPU卡的算力,考虑在单张卡上启动多个Serving实例,通过多个GPU stream,力争用满GPU的算力。启动命令可以如下所示: + +``` +bin/serving --gpuid=0 --bthread_concurrency=4 --bthread_min_concurrency=4 --port=8010& +bin/serving --gpuid=0 --bthread_concurrency=4 --bthread_min_concurrency=4 --port=8011& +``` + +上述2条命令,启动2个Serving实例,分别监听8010端口和8011端口。但他们都绑定同一张卡 (gpuid = 0)。 + +命令行参数含义: +``` +-gpuid=N:用于指定所绑定的GPU卡ID +-bthread_concurrency和bthread_min_concurrency共同限制该进程启动的worker数:由于在GPU预测模式下,增加worker线程数并不能提高并发能力,为了节省部分资源,干脆将他们限制掉;均设为4,是因为这是bthread允许的最小值。 +-port xxx:Serving实例监听的端口 +``` + +但是,上述方式究竟是否能在不影响响应时间等其他指标的前提下,起到提高GPU使用率作用,受到多个限制因素的制约,具体的: + +1. 单个stream占用GPU算力;假如单个stream已经将GPU算力占用超过50%,那么增加stream很可能会导致2个stream的job分别排队,拖慢各自的响应时间 +2. GPU显存:Serving进程需要将模型参数加载到显存中,并且计算时要在GPU显存池分配临时变量;假如单个Serving进程已经用掉超过50%的显存,则增加Serving进程会造成显存不足,导致进程报错退出 + +为此,可采用如下步骤,进行测试: + +1. 加载模型时,在model_toolkit.prototxt中,model type选择FLUID_GPU_ANALYSIS或FLUID_GPU_ANALYSIS_DIR;会对模型进行静态分析,进行一定程度显存优化 +2. 在步骤1完成后,启动单个Serving进程,启动参数:`--gpuid=N --bthread_concurrency=4 --bthread_min_concurrency=4`;启动一个client,进行并发度为1的压力测试,batch size从小到大,记下平响;由于算力的限制,当batch size增大到一定程度,应该会出现响应时间明显变大;或虽然没有明显变大,但已经不满足系统需求 +3. 再启动1个Serving进程,与步骤2启动时使用相同的参数略有不同: `--gpuid=N --bthread_concurrency=4 --bthread_min_concurrency=4 --port=8011` 其中--port=8011用来让新启动的进程使用一个新的服务端口;然后同时对这2个Serving进程进行压测,继续观察batch size从小到大时平均响应时间的变化,直到取得batch size和响应时间的折中 +4. 重复步骤2-3 +5. 以2-4步的测试,来决定:单张GPU卡可以由多少个Serving进程共用; 实际部署时,就在一张GPU卡上启动这么多个Serving进程同时提供服务 diff --git a/doc/SERVING_CONFIGURE.md b/doc/SERVING_CONFIGURE.md index 9cb4a149294b66e2ef977d8529f54cea74a81c23..f5887f5cd139b1e70b49f4eee2e2552658692103 100644 --- a/doc/SERVING_CONFIGURE.md +++ b/doc/SERVING_CONFIGURE.md @@ -142,6 +142,11 @@ type: 预测引擎的类型。可在inferencer-fluid-cpu/src/fluid_cpu_engine.cp |FLUID_CPU_ANALYSIS_DIR|使用fluid Analysis API;模型所有参数分开保存为独立的文件,整个模型放到一个目录中| |FLUID_CPU_NATIVE|使用fluid Native API;模型所有参数保存在一个文件| |FLUID_CPU_NATIVE_DIR|使用fluid Native API;模型所有参数分开保存为独立的文件,整个模型放到一个目录中| +|FLUID_GPU_ANALYSIS|GPU预测,使用fluid Analysis API;模型所有参数保存在一个文件| +|FLUID_GPU_ANALYSIS_DIR|GPU预测,使用fluid Analysis API;模型所有参数分开保存为独立的文件,整个模型放到一个目录中| +|FLUID_GPU_NATIVE|GPU预测,使用fluid Native API;模型所有参数保存在一个文件| +|FLUID_GPU_NATIVE_DIR|GPU预测,使用fluid Native API;模型所有参数分开保存为独立的文件,整个模型放到一个目录中| + **fluid Analysis API和fluid Native API的区别** @@ -183,9 +188,10 @@ enable_batch_align: |enable_protocol_list|baidu_std|brpc 通信协议列表| |log_dir|./log|log dir| |num_threads||brpc server使用的系统线程数,默认为CPU核数| -|max_concurrency||并发处理的请求数,设为<=0则为不予限制,若大于0则限定brpc server端同时处理的请求数上限| |port|8010|Serving进程接收请求监听端口| |gpuid|0|GPU预测时指定Serving进程使用的GPU device id。只允许绑定1张GPU卡| +|bthread_concurrency|9|BRPC底层bthread的concurrency。在使用GPU预测引擎时,为了限制并发worker数,可使用此参数| +|bthread_min_concurrency|4|BRPC底层bthread的min concurrency。在使用GPU预测引擎时,为限制并发worker数,可使用此参数。与bthread_concurrency结合使用| 可以通过在serving/conf/gflags.conf覆盖默认值,例如 ``` diff --git a/doc/client-side-proxy.png b/doc/client-side-proxy.png new file mode 100755 index 0000000000000000000000000000000000000000..1e7639ac401955d9b7c2761820f3c3cdc7fbf8fd Binary files /dev/null and b/doc/client-side-proxy.png differ diff --git a/doc/framework.png b/doc/framework.png new file mode 100755 index 0000000000000000000000000000000000000000..676d35bed06893d0f6247561756c4595f48f1698 Binary files /dev/null and b/doc/framework.png differ diff --git a/doc/multi-service.png b/doc/multi-service.png new file mode 100755 index 0000000000000000000000000000000000000000..629024e58f58299d16fb133601c09e673746d560 Binary files /dev/null and b/doc/multi-service.png differ diff --git a/doc/multi-variants.png b/doc/multi-variants.png new file mode 100755 index 0000000000000000000000000000000000000000..c3d141b14712b4853629f9119d60347a20779268 Binary files /dev/null and b/doc/multi-variants.png differ diff --git a/doc/predict-service.png b/doc/predict-service.png new file mode 100755 index 0000000000000000000000000000000000000000..ccd92e4bb1b5c58787118b564cc6a776d648be01 Binary files /dev/null and b/doc/predict-service.png differ diff --git a/doc/server-side.png b/doc/server-side.png new file mode 100755 index 0000000000000000000000000000000000000000..7a96996c2a4d14832c9c2177a09e78181b1a551c Binary files /dev/null and b/doc/server-side.png differ diff --git a/inferencer-fluid-cpu/include/fluid_cpu_engine.h b/inferencer-fluid-cpu/include/fluid_cpu_engine.h index f01f7fce418278ea45eee4cc3558a1da0b0dd094..24109ef0226a510d48e0cade4d9bc6039d7d5754 100644 --- a/inferencer-fluid-cpu/include/fluid_cpu_engine.h +++ b/inferencer-fluid-cpu/include/fluid_cpu_engine.h @@ -22,7 +22,11 @@ #include "configure/include/configure_parser.h" #include "configure/inferencer_configure.pb.h" #ifdef BCLOUD +#ifdef WITH_GPU +#include "paddle/paddle_inference_api.h" +#else #include "paddle/fluid/inference/api/paddle_inference_api.h" +#endif #else #include "paddle/fluid/inference/paddle_inference_api.h" #endif @@ -155,6 +159,8 @@ class FluidCpuNativeCore : public FluidFamilyCore { native_config.prog_file = data_path + "/__model__"; native_config.use_gpu = false; native_config.device = 0; + native_config.fraction_of_gpu_memory = 0; + AutoLock lock(GlobalPaddleCreateMutex::instance()); _core = paddle::CreatePaddlePredictor( @@ -209,6 +215,7 @@ class FluidCpuNativeDirCore : public FluidFamilyCore { native_config.model_dir = data_path; native_config.use_gpu = false; native_config.device = 0; + native_config.fraction_of_gpu_memory = 0; AutoLock lock(GlobalPaddleCreateMutex::instance()); _core = paddle::CreatePaddlePredictor( @@ -458,6 +465,7 @@ class FluidCpuNativeDirWithSigmoidCore : public FluidCpuWithSigmoidCore { native_config.model_dir = data_path; native_config.use_gpu = false; native_config.device = 0; + native_config.fraction_of_gpu_memory = 0; AutoLock lock(GlobalPaddleCreateMutex::instance()); _core->_fluid_core = paddle::CreatePaddlePredictor(analysis_config); @@ -192,6 +193,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { analysis_config.SwitchSpecifyInputNames(true); analysis_config.SetCpuMathLibraryNumThreads(1); analysis_config.EnableMemoryOptim(false, false); + AutoLock lock(GlobalPaddleCreateMutex::instance()); _core = paddle::CreatePaddlePredictor(analysis_config); @@ -214,7 +216,6 @@ class FluidGpuNativeDirCore : public FluidFamilyCore { return -1; } - paddle::NativeConfig native_config; native_config.model_dir = data_path; native_config.use_gpu = true; diff --git a/predictor/common/inner_common.h b/predictor/common/inner_common.h index a10ce38ea039b734b62dcef5981d46293057f94d..9f9b3b2f3607578348dcca9253d6451f8fa205d6 100644 --- a/predictor/common/inner_common.h +++ b/predictor/common/inner_common.h @@ -32,24 +32,26 @@ #include "gflags/gflags.h" #ifdef BCLOUD -#include "bthread.h" #include "baidu/rpc/channel.h" #include "baidu/rpc/policy/giano_authenticator.h" #include "baidu/rpc/server.h" -#include "base/logging.h" #include "base/comlog_sink.h" +#include "base/logging.h" #include "base/object_pool.h" #include "base/time.h" +#include "bthread.h" // NOLINT #else -#include "bthread/bthread.h" #include "brpc/channel.h" #include "brpc/policy/giano_authenticator.h" #include "brpc/server.h" +#include "bthread/bthread.h" #include "butil/logging.h" #include "butil/object_pool.h" #include "butil/time.h" #endif +#include "glog/raw_logging.h" + #include "configure/include/configure_parser.h" #include "configure/server_configure.pb.h" diff --git a/predictor/framework/op_repository.h b/predictor/framework/op_repository.h index dca8f129c55fac39e24bb2c03a400d49d727c809..045912945232f9a36f40a941bc77041fa5ca08e2 100644 --- a/predictor/framework/op_repository.h +++ b/predictor/framework/op_repository.h @@ -62,7 +62,7 @@ class OpRepository { template void regist_op(std::string op_type) { _repository[op_type] = &OpFactory::instance(); - LOG(INFO) << "Succ regist op: " << op_type << "!"; + RAW_LOG_INFO("Succ regist op: %s", op_type.c_str()); } Op* get_op(std::string op_type); diff --git a/predictor/framework/service_manager.h b/predictor/framework/service_manager.h index e456c5cdcd0eb93d91a33efa93db0f71cd92bcc9..1b339c3742ef7302d5ce82704dd70d0ad6f84e7b 100644 --- a/predictor/framework/service_manager.h +++ b/predictor/framework/service_manager.h @@ -27,13 +27,13 @@ namespace predictor { ::baidu::paddle_serving::predictor::FormatServiceManager::instance() \ .regist_service(svr_name, svr); \ if (ret != 0) { \ - LOG(ERROR) << "Failed regist service[" << svr_name << "]" \ - << "[" << typeid(svr).name() << "]" \ - << "!"; \ + RAW_LOG_ERROR("Failed regist service[%s][%s]", \ + svr_name.c_str(), \ + typeid(svr).name()); \ } else { \ - LOG(INFO) << "Success regist service[" << svr_name << "][" \ - << typeid(svr).name() << "]" \ - << "!"; \ + RAW_LOG_INFO("Success regist service[%s][%s]", \ + svr_name.c_str(), \ + typeid(svr).name()); \ } \ } while (0) @@ -43,29 +43,30 @@ class FormatServiceManager { int regist_service(const std::string& svr_name, Service* svr) { if (_service_map.find(svr_name) != _service_map.end()) { - LOG(ERROR) << "Service[" << svr_name << "][" << typeid(svr).name() << "]" - << " already exist!"; + RAW_LOG_ERROR("Service[%s][%s] already exist!", + svr_name.c_str(), + typeid(svr).name()); return -1; } std::pair::iterator, bool> ret; ret = _service_map.insert(std::make_pair(svr_name, svr)); if (ret.second == false) { - LOG(ERROR) << "Service[" << svr_name << "][" << typeid(svr).name() << "]" - << " insert failed!"; + RAW_LOG_ERROR("Service[%s][%s] insert failed!", + svr_name.c_str(), + typeid(svr).name()); return -1; } - LOG(INFO) << "Service[" << svr_name << "] insert successfully!"; + RAW_LOG_INFO("Service[%s] insert successfully!", svr_name.c_str()); return 0; } Service* get_service(const std::string& svr_name) { boost::unordered_map::iterator res; if ((res = _service_map.find(svr_name)) == _service_map.end()) { - LOG(WARNING) << "Service[" << svr_name << "] " - << "not found in service manager" - << "!"; + RAW_LOG_WARNING("Service[%s] not found in service manager!", + svr_name.c_str()); return NULL; } return (*res).second; diff --git a/predictor/src/pdserving.cpp b/predictor/src/pdserving.cpp index 0897039c79e4c576fd39cdea4bc21934a1ceed9c..be7f988744b6ef0530c8b725cb3d6275725831ec 100644 --- a/predictor/src/pdserving.cpp +++ b/predictor/src/pdserving.cpp @@ -51,6 +51,8 @@ using baidu::paddle_serving::predictor::FLAGS_port; using baidu::paddle_serving::configure::InferServiceConf; using baidu::paddle_serving::configure::read_proto_conf; +DECLARE_bool(logtostderr); + void print_revision(std::ostream& os, void*) { #if defined(PDSERVING_VERSION) os << PDSERVING_VERSION; @@ -70,12 +72,13 @@ DEFINE_bool(g, false, "user defined gflag path"); DECLARE_string(flagfile); namespace bthread { - extern pthread_mutex_t g_task_control_mutex; +extern pthread_mutex_t g_task_control_mutex; } pthread_mutex_t g_worker_start_fn_mutex = PTHREAD_MUTEX_INITIALIZER; void pthread_worker_start_fn() { - while (pthread_mutex_lock(&g_worker_start_fn_mutex) != 0) {} + while (pthread_mutex_lock(&g_worker_start_fn_mutex) != 0) { + } // Try to avoid deadlock in bthread int lock_status = pthread_mutex_trylock(&bthread::g_task_control_mutex); @@ -86,7 +89,8 @@ void pthread_worker_start_fn() { // Try to avoid deadlock in bthread if (lock_status == EBUSY || lock_status == EAGAIN) { - while (pthread_mutex_lock(&bthread::g_task_control_mutex) != 0) {} + while (pthread_mutex_lock(&bthread::g_task_control_mutex) != 0) { + } } pthread_mutex_unlock(&g_worker_start_fn_mutex); @@ -132,7 +136,7 @@ int main(int argc, char** argv) { g_change_server_port(); - // initialize logger instance +// initialize logger instance #ifdef BCLOUD logging::LoggingSettings settings; settings.logging_dest = logging::LOG_TO_FILE; @@ -204,6 +208,8 @@ int main(int argc, char** argv) { } LOG(INFO) << "Succ call pthread worker start function"; + FLAGS_logtostderr = false; + if (ServerManager::instance().start_and_wait() != 0) { LOG(ERROR) << "Failed start server and wait!"; return -1; diff --git a/release.bcloud b/release.bcloud index 3f76ab210331d9118393eb555fad9d1c0d61f487..85454b5da574a5023a30048e7aa3bdb11d352f16 100644 --- a/release.bcloud +++ b/release.bcloud @@ -9,6 +9,7 @@ mv bin/sparse_format demo/client/bin mv bin/text_classification demo/client/bin mv bin/text_classification_press demo/client/bin mv bin/ximage demo/client/bin +mv bin/ximage_press demo/client/bin cp baidu_third-party_mklml/so/* demo/serving/bin/ rm -rf baidu_third-party_mklml