未验证 提交 7b77852b 编写于 作者: T TeslaZhao 提交者: GitHub

Merge pull request #32 from PaddlePaddle/develop

Sync codes
...@@ -53,7 +53,7 @@ We consider deploying deep learning inference service online to be a user-facing ...@@ -53,7 +53,7 @@ We consider deploying deep learning inference service online to be a user-facing
<h2 align="center">AIStudio Turorial</h2> <h2 align="center">AIStudio Turorial</h2>
Here we provide tutorial on AIStudio(Chinese Version) [AIStudio教程-Paddle Serving服务化部署框架](https://aistudio.baidu.com/aistudio/projectdetail/1550674) Here we provide tutorial on AIStudio(Chinese Version) [AIStudio教程-Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/1555945)
The tutorial provides The tutorial provides
<ul> <ul>
...@@ -85,14 +85,14 @@ We **highly recommend** you to **run Paddle Serving in Docker**, please visit [R ...@@ -85,14 +85,14 @@ We **highly recommend** you to **run Paddle Serving in Docker**, please visit [R
``` ```
# Run CPU Docker # Run CPU Docker
docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-devel docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-devel
docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-devel docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-devel bash
docker exec -it test bash docker exec -it test bash
git clone https://github.com/PaddlePaddle/Serving git clone https://github.com/PaddlePaddle/Serving
``` ```
``` ```
# Run GPU Docker # Run GPU Docker
nvidia-docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel nvidia-docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel
nvidia-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel nvidia-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel bash
nvidia-docker exec -it test bash nvidia-docker exec -it test bash
git clone https://github.com/PaddlePaddle/Serving git clone https://github.com/PaddlePaddle/Serving
``` ```
......
...@@ -53,7 +53,7 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务 ...@@ -53,7 +53,7 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务
<h2 align="center">教程</h2> <h2 align="center">教程</h2>
Paddle Serving开发者为您提供了简单易用的[AIStudio教程-Paddle Serving服务化部署框架](https://aistudio.baidu.com/aistudio/projectdetail/1550674) Paddle Serving开发者为您提供了简单易用的[AIStudio教程-Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/1555945)
教程提供了如下内容 教程提供了如下内容
...@@ -86,14 +86,14 @@ Paddle Serving开发者为您提供了简单易用的[AIStudio教程-Paddle Serv ...@@ -86,14 +86,14 @@ Paddle Serving开发者为您提供了简单易用的[AIStudio教程-Paddle Serv
``` ```
# 启动 CPU Docker # 启动 CPU Docker
docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-devel docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-devel
docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-devel docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-devel bash
docker exec -it test bash docker exec -it test bash
git clone https://github.com/PaddlePaddle/Serving git clone https://github.com/PaddlePaddle/Serving
``` ```
``` ```
# 启动 GPU Docker # 启动 GPU Docker
nvidia-docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel nvidia-docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel
nvidia-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel nvidia-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel bash
nvidia-docker exec -it test bash nvidia-docker exec -it test bash
git clone https://github.com/PaddlePaddle/Serving git clone https://github.com/PaddlePaddle/Serving
``` ```
......
...@@ -18,7 +18,7 @@ SET(PADDLE_SOURCES_DIR ${THIRD_PARTY_PATH}/Paddle) ...@@ -18,7 +18,7 @@ SET(PADDLE_SOURCES_DIR ${THIRD_PARTY_PATH}/Paddle)
SET(PADDLE_DOWNLOAD_DIR ${PADDLE_SOURCES_DIR}/src/extern_paddle) SET(PADDLE_DOWNLOAD_DIR ${PADDLE_SOURCES_DIR}/src/extern_paddle)
SET(PADDLE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/Paddle/) SET(PADDLE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/Paddle/)
SET(PADDLE_INCLUDE_DIR "${PADDLE_INSTALL_DIR}/include" CACHE PATH "PaddlePaddle include directory." FORCE) SET(PADDLE_INCLUDE_DIR "${PADDLE_INSTALL_DIR}/include" CACHE PATH "PaddlePaddle include directory." FORCE)
SET(PADDLE_LIBRARIES "${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a" CACHE FILEPATH "Paddle library." FORCE) SET(PADDLE_LIBRARIES "${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a" CACHE FILEPATH "Paddle library." FORCE)
message("paddle install dir: " ${PADDLE_INSTALL_DIR}) message("paddle install dir: " ${PADDLE_INSTALL_DIR})
...@@ -31,7 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}") ...@@ -31,7 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of: # Paddle Version should be one of:
# latest: latest develop build # latest: latest develop build
# version number like 1.5.2 # version number like 1.5.2
SET(PADDLE_VERSION "2.0.0") SET(PADDLE_VERSION "2.0.1")
if (WITH_GPU) if (WITH_GPU)
if(CUDA_VERSION EQUAL 11.0) if(CUDA_VERSION EQUAL 11.0)
set(CUDA_SUFFIX "cuda11-cudnn8-avx-mkl") set(CUDA_SUFFIX "cuda11-cudnn8-avx-mkl")
...@@ -55,9 +55,9 @@ if (WITH_GPU) ...@@ -55,9 +55,9 @@ if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-${CUDA_SUFFIX}") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-${CUDA_SUFFIX}")
elseif (WITH_LITE) elseif (WITH_LITE)
if (WITH_XPU) if (WITH_XPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm-xpu") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}-xpu")
else() else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}")
endif() endif()
else() else()
if (WITH_AVX) if (WITH_AVX)
...@@ -139,8 +139,8 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib) ...@@ -139,8 +139,8 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL) ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a) SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL) ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a) SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a)
if (WITH_TRT) if (WITH_TRT)
ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL) ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
......
if (SERVER OR CLIENT) if (SERVER OR CLIENT)
LIST(APPEND protofiles LIST(APPEND protofiles
${CMAKE_CURRENT_LIST_DIR}/proto/server_configure.proto ${CMAKE_CURRENT_LIST_DIR}/proto/server_configure.proto
${CMAKE_CURRENT_LIST_DIR}/proto/sdk_configure.proto ${CMAKE_CURRENT_LIST_DIR}/proto/sdk_configure.proto
${CMAKE_CURRENT_LIST_DIR}/proto/inferencer_configure.proto ${CMAKE_CURRENT_LIST_DIR}/proto/inferencer_configure.proto
${CMAKE_CURRENT_LIST_DIR}/proto/general_model_config.proto ${CMAKE_CURRENT_LIST_DIR}/proto/general_model_config.proto
) )
PROTOBUF_GENERATE_CPP(configure_proto_srcs configure_proto_hdrs ${protofiles}) PROTOBUF_GENERATE_CPP(configure_proto_srcs configure_proto_hdrs ${protofiles})
list(APPEND configure_srcs ${configure_proto_srcs}) list(APPEND configure_srcs ${configure_proto_srcs})
list(APPEND configure_srcs ${CMAKE_CURRENT_LIST_DIR}/src/configure_parser.cpp) list(APPEND configure_srcs ${CMAKE_CURRENT_LIST_DIR}/src/configure_parser.cpp)
add_library(configure ${configure_srcs}) add_library(configure ${configure_srcs})
add_dependencies(configure brpc) add_dependencies(configure brpc)
install(TARGETS configure install(TARGETS configure
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
) )
install(FILES ${CMAKE_CURRENT_LIST_DIR}/include/configure_parser.h install(FILES ${CMAKE_CURRENT_LIST_DIR}/include/configure_parser.h
DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure/include) DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure/include)
FILE(GLOB inc ${CMAKE_CURRENT_BINARY_DIR}/*.pb.h) FILE(GLOB inc ${CMAKE_CURRENT_BINARY_DIR}/*.pb.h)
install(FILES ${inc} install(FILES ${inc}
DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure) DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure)
endif() endif()
if (WITH_PYTHON) if (WITH_PYTHON)
py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto) py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto)
add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(general_model_config_py_proto general_model_config_py_proto_init) add_dependencies(general_model_config_py_proto general_model_config_py_proto_init)
py_grpc_proto_compile(multi_lang_general_model_service_py_proto SRCS proto/multi_lang_general_model_service.proto) py_grpc_proto_compile(multi_lang_general_model_service_py_proto SRCS proto/multi_lang_general_model_service.proto)
add_custom_target(multi_lang_general_model_service_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(multi_lang_general_model_service_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(multi_lang_general_model_service_py_proto multi_lang_general_model_service_py_proto_init) add_dependencies(multi_lang_general_model_service_py_proto multi_lang_general_model_service_py_proto_init)
if (CLIENT) if (CLIENT)
py_proto_compile(sdk_configure_py_proto SRCS proto/sdk_configure.proto) py_proto_compile(sdk_configure_py_proto SRCS proto/sdk_configure.proto)
add_custom_target(sdk_configure_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(sdk_configure_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(sdk_configure_py_proto sdk_configure_py_proto_init) add_dependencies(sdk_configure_py_proto sdk_configure_py_proto_init)
add_custom_command(TARGET sdk_configure_py_proto POST_BUILD add_custom_command(TARGET sdk_configure_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMENT "Copy generated python proto into directory paddle_serving_client/proto." COMMENT "Copy generated python proto into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET general_model_config_py_proto POST_BUILD add_custom_command(TARGET general_model_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_client/proto." COMMENT "Copy generated general_model_config proto file into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_client/proto." COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
if (APP) if (APP)
add_custom_command(TARGET general_model_config_py_proto POST_BUILD add_custom_command(TARGET general_model_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_app/proto." COMMENT "Copy generated general_model_config proto file into directory paddle_serving_app/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif() endif()
if (SERVER) if (SERVER)
py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto) py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto)
add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(server_config_py_proto server_config_py_proto_init) add_dependencies(server_config_py_proto server_config_py_proto_init)
if (NOT WITH_GPU AND NOT WITH_LITE) add_custom_command(TARGET server_config_py_proto POST_BUILD
add_custom_command(TARGET server_config_py_proto POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto COMMENT "Copy generated python proto into directory paddle_serving_server/proto."
COMMENT "Copy generated python proto into directory paddle_serving_server/proto." WORKING_DIRECTORY ${CMAKE_CURRENT_BINRARY_DIR})
WORKING_DIRECTORY ${CMAKE_CURRENT_BINRARY_DIR})
add_custom_command(TARGET general_model_config_py_proto POST_BUILD
add_custom_command(TARGET general_model_config_py_proto POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto."
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server/proto."
COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server/proto." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif()
else()
add_custom_command(TARGET server_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory
${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
COMMAND cp -f *.py
${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
COMMENT "Copy generated python proto into directory
paddle_serving_server_gpu/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINRARY_DIR})
add_custom_command(TARGET general_model_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory
${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
COMMAND cp -f *.py
${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
COMMENT "Copy generated general_model_config proto file into directory
paddle_serving_server_gpu/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server_gpu/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif()
endif()
endif() endif()
...@@ -20,7 +20,7 @@ message EngineDesc { ...@@ -20,7 +20,7 @@ message EngineDesc {
required string type = 2; required string type = 2;
required string reloadable_meta = 3; required string reloadable_meta = 3;
required string reloadable_type = 4; required string reloadable_type = 4;
required string model_data_path = 5; required string model_dir = 5;
required int32 runtime_thread_num = 6; required int32 runtime_thread_num = 6;
required int32 batch_infer_size = 7; required int32 batch_infer_size = 7;
required int32 enable_batch_align = 8; required int32 enable_batch_align = 8;
...@@ -41,12 +41,13 @@ message EngineDesc { ...@@ -41,12 +41,13 @@ message EngineDesc {
optional SparseParamServiceType sparse_param_service_type = 11; optional SparseParamServiceType sparse_param_service_type = 11;
optional string sparse_param_service_table_name = 12; optional string sparse_param_service_table_name = 12;
optional bool enable_memory_optimization = 13; optional bool enable_memory_optimization = 13;
optional bool static_optimization = 14; optional bool enable_ir_optimization = 14;
optional bool force_update_static_cache = 15; optional bool use_trt = 15;
optional bool enable_ir_optimization = 16; optional bool use_lite = 16;
optional bool use_trt = 17; optional bool use_xpu = 17;
optional bool use_lite = 18; optional bool use_gpu = 18;
optional bool use_xpu = 19; optional bool combined_model = 19;
optional bool encrypted_model = 20;
}; };
// model_toolkit conf // model_toolkit conf
......
...@@ -69,8 +69,6 @@ int test_write_conf() { ...@@ -69,8 +69,6 @@ int test_write_conf() {
engine->set_sparse_param_service_type(EngineDesc::LOCAL); engine->set_sparse_param_service_type(EngineDesc::LOCAL);
engine->set_sparse_param_service_table_name("local_kv"); engine->set_sparse_param_service_table_name("local_kv");
engine->set_enable_memory_optimization(true); engine->set_enable_memory_optimization(true);
engine->set_static_optimization(false);
engine->set_force_update_static_cache(false);
int ret = baidu::paddle_serving::configure::write_proto_conf( int ret = baidu::paddle_serving::configure::write_proto_conf(
&model_toolkit_conf, output_dir, model_toolkit_conf_file); &model_toolkit_conf, output_dir, model_toolkit_conf_file);
......
if(CLIENT) if(CLIENT)
add_subdirectory(pybind11) add_subdirectory(pybind11)
pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp) pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
add_dependencies(serving_client sdk_cpp)
target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib) target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
endif() endif()
...@@ -2,33 +2,25 @@ include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../../) ...@@ -2,33 +2,25 @@ include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../../)
include(op/CMakeLists.txt) include(op/CMakeLists.txt)
include(proto/CMakeLists.txt) include(proto/CMakeLists.txt)
add_executable(serving ${serving_srcs}) add_executable(serving ${serving_srcs})
add_dependencies(serving pdcodegen fluid_cpu_engine pdserving paddle_fluid cube-api utils) add_dependencies(serving pdcodegen paddle_inference_engine pdserving paddle_inference cube-api utils)
if (WITH_GPU) if (WITH_GPU)
add_dependencies(serving fluid_gpu_engine) add_dependencies(serving paddle_inference_engine)
endif() endif()
if (WITH_LITE) if (WITH_LITE)
add_dependencies(serving fluid_arm_engine) add_dependencies(serving paddle_inference_engine)
endif() endif()
target_include_directories(serving PUBLIC target_include_directories(serving PUBLIC
${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor ${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
) )
include_directories(${CUDNN_ROOT}/include/) include_directories(${CUDNN_ROOT}/include/)
if(WITH_GPU)
target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine
-Wl,--no-whole-archive)
endif()
if(WITH_LITE)
target_link_libraries(serving -Wl,--whole-archive fluid_arm_engine
-Wl,--no-whole-archive)
endif()
target_link_libraries(serving -Wl,--whole-archive fluid_cpu_engine target_link_libraries(serving -Wl,--whole-archive paddle_inference_engine
-Wl,--no-whole-archive) -Wl,--no-whole-archive)
target_link_libraries(serving paddle_fluid ${paddle_depend_libs}) target_link_libraries(serving paddle_inference ${paddle_depend_libs})
target_link_libraries(serving brpc) target_link_libraries(serving brpc)
target_link_libraries(serving protobuf) target_link_libraries(serving protobuf)
target_link_libraries(serving pdserving) target_link_libraries(serving pdserving)
......
...@@ -12,12 +12,12 @@ set_source_files_properties( ...@@ -12,12 +12,12 @@ set_source_files_properties(
${pdserving_srcs} ${pdserving_srcs}
PROPERTIES PROPERTIES
COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure extern_paddle paddle_fluid) add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure extern_paddle paddle_inference)
if (WITH_TRT) if (WITH_TRT)
add_definitions(-DWITH_TRT) add_definitions(-DWITH_TRT)
endif() endif()
target_link_libraries(pdserving target_link_libraries(pdserving
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_fluid ${paddle_depend_libs}) brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_inference ${paddle_depend_libs})
# install # install
install(TARGETS pdserving install(TARGETS pdserving
RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include <string> #include <string>
#include <fstream>
#include "core/predictor/common/inner_common.h" #include "core/predictor/common/inner_common.h"
#include "core/predictor/common/macros.h" #include "core/predictor/common/macros.h"
...@@ -148,6 +149,16 @@ class IsDerivedFrom { ...@@ -148,6 +149,16 @@ class IsDerivedFrom {
} }
}; };
static void ReadBinaryFile(const std::string& filename, std::string* contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
} // namespace predictor } // namespace predictor
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h> #include <unistd.h>
#include <pthread.h>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
...@@ -29,83 +30,29 @@ namespace predictor { ...@@ -29,83 +30,29 @@ namespace predictor {
using configure::ModelToolkitConf; using configure::ModelToolkitConf;
class InferEngineCreationParams { class AutoLock {
public: public:
InferEngineCreationParams() { explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
_path = ""; pthread_mutex_lock(&mutex);
_enable_memory_optimization = false;
_enable_ir_optimization = false;
_static_optimization = false;
_force_update_static_cache = false;
_use_trt = false;
_use_lite = false;
_use_xpu = false;
} }
~AutoLock() { pthread_mutex_unlock(&_mut); }
void set_path(const std::string& path) { _path = path; } private:
pthread_mutex_t& _mut;
void set_enable_memory_optimization(bool enable_memory_optimization) { };
_enable_memory_optimization = enable_memory_optimization;
}
void set_enable_ir_optimization(bool enable_ir_optimization) {
_enable_ir_optimization = enable_ir_optimization;
}
void set_use_trt(bool use_trt) { _use_trt = use_trt; }
void set_use_lite(bool use_lite) { _use_lite = use_lite; }
void set_use_xpu(bool use_xpu) { _use_xpu = use_xpu; }
bool enable_memory_optimization() const {
return _enable_memory_optimization;
}
bool enable_ir_optimization() const { return _enable_ir_optimization; }
bool use_trt() const { return _use_trt; }
bool use_lite() const { return _use_lite; }
bool use_xpu() const { return _use_xpu; }
void set_static_optimization(bool static_optimization = false) {
_static_optimization = static_optimization;
}
void set_force_update_static_cache(bool force_update_static_cache = false) {
_force_update_static_cache = force_update_static_cache;
}
bool static_optimization() const { return _static_optimization; }
bool force_update_static_cache() const { return _force_update_static_cache; }
std::string get_path() const { return _path; } class GlobalCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
void dump() const { static pthread_mutex_t& instance() {
LOG(INFO) << "InferEngineCreationParams: " static GlobalCreateMutex gmutex;
<< "model_path = " << _path << ", " return gmutex.mutex();
<< "enable_memory_optimization = " << _enable_memory_optimization
<< ", "
<< "enable_tensorrt = " << _use_trt << ", "
<< "enable_lite = " << _use_lite << ", "
<< "enable_xpu = " << _use_xpu << ", "
<< "enable_ir_optimization = " << _enable_ir_optimization << ", "
<< "static_optimization = " << _static_optimization << ", "
<< "force_update_static_cache = " << _force_update_static_cache;
} }
private: private:
std::string _path; GlobalCreateMutex() { pthread_mutex_init(&_mut, NULL); }
bool _enable_memory_optimization; pthread_mutex_t _mut;
bool _enable_ir_optimization;
bool _static_optimization;
bool _force_update_static_cache;
bool _use_trt;
bool _use_lite;
bool _use_xpu;
}; };
class InferEngine { class InferEngine {
...@@ -152,57 +99,19 @@ class ReloadableInferEngine : public InferEngine { ...@@ -152,57 +99,19 @@ class ReloadableInferEngine : public InferEngine {
uint64_t last_revision; uint64_t last_revision;
}; };
virtual int load(const InferEngineCreationParams& params) = 0; virtual int load(const configure::EngineDesc& conf) = 0;
int proc_initialize_impl(const configure::EngineDesc& conf, bool version) { int proc_initialize_impl(const configure::EngineDesc& conf, bool version) {
_reload_tag_file = conf.reloadable_meta(); _reload_tag_file = conf.reloadable_meta();
_reload_mode_tag = conf.reloadable_type(); _reload_mode_tag = conf.reloadable_type();
_model_data_path = conf.model_data_path(); _model_data_path = conf.model_dir();
_infer_thread_num = conf.runtime_thread_num(); _infer_thread_num = conf.runtime_thread_num();
_infer_batch_size = conf.batch_infer_size(); _infer_batch_size = conf.batch_infer_size();
_infer_batch_align = conf.enable_batch_align(); _infer_batch_align = conf.enable_batch_align();
bool enable_memory_optimization = false; _conf = conf;
if (conf.has_enable_memory_optimization()) {
enable_memory_optimization = conf.enable_memory_optimization();
}
bool static_optimization = false;
if (conf.has_static_optimization()) {
static_optimization = conf.static_optimization();
}
bool force_update_static_cache = false;
if (conf.has_force_update_static_cache()) {
force_update_static_cache = conf.force_update_static_cache();
}
if (conf.has_enable_ir_optimization()) { if (!check_need_reload() || load(conf) != 0) {
_infer_engine_params.set_enable_ir_optimization(
conf.enable_ir_optimization());
}
_infer_engine_params.set_path(_model_data_path);
if (enable_memory_optimization) {
_infer_engine_params.set_enable_memory_optimization(true);
_infer_engine_params.set_static_optimization(static_optimization);
_infer_engine_params.set_force_update_static_cache(
force_update_static_cache);
}
if (conf.has_use_trt()) {
_infer_engine_params.set_use_trt(conf.use_trt());
}
if (conf.has_use_lite()) {
_infer_engine_params.set_use_lite(conf.use_lite());
}
if (conf.has_use_xpu()) {
_infer_engine_params.set_use_xpu(conf.use_xpu());
}
if (!check_need_reload() || load(_infer_engine_params) != 0) {
LOG(ERROR) << "Failed load model_data_path" << _model_data_path; LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
return -1; return -1;
} }
...@@ -230,7 +139,6 @@ class ReloadableInferEngine : public InferEngine { ...@@ -230,7 +139,6 @@ class ReloadableInferEngine : public InferEngine {
if (_infer_thread_num > 0) { if (_infer_thread_num > 0) {
return 0; return 0;
} }
return thrd_initialize_impl(); return thrd_initialize_impl();
} }
...@@ -254,13 +162,13 @@ class ReloadableInferEngine : public InferEngine { ...@@ -254,13 +162,13 @@ class ReloadableInferEngine : public InferEngine {
int reload() { int reload() {
if (check_need_reload()) { if (check_need_reload()) {
LOG(WARNING) << "begin reload model[" << _model_data_path << "]."; LOG(WARNING) << "begin reload model[" << _model_data_path << "].";
return load(_infer_engine_params); return load(_conf);
} }
return 0; return 0;
} }
uint64_t version() const { return _version; } uint64_t version() const { return _version; }
uint32_t thread_num() const { return _infer_thread_num; } uint32_t thread_num() const { return _infer_thread_num; }
private: private:
...@@ -322,7 +230,7 @@ class ReloadableInferEngine : public InferEngine { ...@@ -322,7 +230,7 @@ class ReloadableInferEngine : public InferEngine {
protected: protected:
std::string _model_data_path; std::string _model_data_path;
InferEngineCreationParams _infer_engine_params; configure::EngineDesc _conf;
private: private:
std::string _reload_tag_file; std::string _reload_tag_file;
...@@ -361,25 +269,25 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -361,25 +269,25 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
return ReloadableInferEngine::proc_initialize(conf, version); return ReloadableInferEngine::proc_initialize(conf, version);
} }
virtual int load(const InferEngineCreationParams& params) { virtual int load(const configure::EngineDesc& conf) {
if (_reload_vec.empty()) { if (_reload_vec.empty()) {
return 0; return 0;
} }
for (uint32_t ti = 0; ti < _reload_vec.size(); ++ti) { for (uint32_t ti = 0; ti < _reload_vec.size(); ++ti) {
if (load_data(_reload_vec[ti], params) != 0) { if (load_data(_reload_vec[ti], conf) != 0) {
LOG(ERROR) << "Failed reload engine model: " << ti; LOG(ERROR) << "Failed reload engine model: " << ti;
return -1; return -1;
} }
} }
LOG(WARNING) << "Succ load engine, path: " << params.get_path(); LOG(WARNING) << "Succ load engine, path: " << conf.model_dir();
return 0; return 0;
} }
int load_data(ModelData<EngineCore>* md, int load_data(ModelData<EngineCore>* md,
const InferEngineCreationParams& params) { const configure::EngineDesc& conf) {
uint32_t next_idx = (md->current_idx + 1) % 2; uint32_t next_idx = (md->current_idx + 1) % 2;
if (md->cores[next_idx]) { if (md->cores[next_idx]) {
delete md->cores[next_idx]; delete md->cores[next_idx];
...@@ -387,9 +295,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -387,9 +295,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
md->cores[next_idx] = new (std::nothrow) EngineCore; md->cores[next_idx] = new (std::nothrow) EngineCore;
params.dump(); //params.dump();
if (!md->cores[next_idx] || md->cores[next_idx]->create(params) != 0) { if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) {
LOG(ERROR) << "Failed create model, path: " << params.get_path(); LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
return -1; return -1;
} }
md->current_idx = next_idx; md->current_idx = next_idx;
...@@ -400,9 +308,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -400,9 +308,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
// memory pool to be inited in non-serving-threads // memory pool to be inited in non-serving-threads
ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>; ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>;
if (!md || load_data(md, _infer_engine_params) != 0) { if (!md || load_data(md, _conf) != 0) {
LOG(ERROR) << "Failed create thread data from " LOG(ERROR) << "Failed create thread data from "
<< _infer_engine_params.get_path(); << _conf.model_dir();
return -1; return -1;
} }
...@@ -458,16 +366,16 @@ class CloneDBReloadableInferEngine ...@@ -458,16 +366,16 @@ class CloneDBReloadableInferEngine
return DBReloadableInferEngine<EngineCore>::proc_initialize(conf, version); return DBReloadableInferEngine<EngineCore>::proc_initialize(conf, version);
} }
virtual int load(const InferEngineCreationParams& params) { virtual int load(const configure::EngineDesc& conf) {
// 加载进程级模型数据 // 加载进程级模型数据
if (!_pd || if (!_pd ||
DBReloadableInferEngine<EngineCore>::load_data(_pd, params) != 0) { DBReloadableInferEngine<EngineCore>::load_data(_pd, conf) != 0) {
LOG(ERROR) << "Failed to create common model from [" << params.get_path() LOG(ERROR) << "Failed to create common model from [" << conf.model_dir()
<< "]."; << "].";
return -1; return -1;
} }
LOG(WARNING) << "Succ load common model[" << _pd->cores[_pd->current_idx] LOG(WARNING) << "Succ load common model[" << _pd->cores[_pd->current_idx]
<< "], path[" << params.get_path() << "]."; << "], path[" << conf.model_dir() << "].";
if (DBReloadableInferEngine<EngineCore>::_reload_vec.empty()) { if (DBReloadableInferEngine<EngineCore>::_reload_vec.empty()) {
return 0; return 0;
...@@ -483,7 +391,7 @@ class CloneDBReloadableInferEngine ...@@ -483,7 +391,7 @@ class CloneDBReloadableInferEngine
} }
} }
LOG(WARNING) << "Succ load clone model, path[" << params.get_path() << "]"; LOG(WARNING) << "Succ load clone model, path[" << conf.model_dir() << "]";
return 0; return 0;
} }
...@@ -527,18 +435,18 @@ class CloneDBReloadableInferEngine ...@@ -527,18 +435,18 @@ class CloneDBReloadableInferEngine
_pd; // 进程级EngineCore,多个线程级EngineCore共用该对象的模型数据 _pd; // 进程级EngineCore,多个线程级EngineCore共用该对象的模型数据
}; };
template <typename FluidFamilyCore> template <typename PaddleInferenceCore>
#ifdef WITH_TRT #ifdef WITH_TRT
class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> { class FluidInferEngine : public DBReloadableInferEngine<PaddleInferenceCore> {
#else #else
class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore> {
#endif #endif
public: // NOLINT public: // NOLINT
FluidInferEngine() {} FluidInferEngine() {}
~FluidInferEngine() {} ~FluidInferEngine() {}
std::vector<std::string> GetInputNames() { std::vector<std::string> GetInputNames() {
FluidFamilyCore* core = PaddleInferenceCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core(); DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) { if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
} }
...@@ -546,8 +454,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { ...@@ -546,8 +454,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
} }
std::vector<std::string> GetOutputNames() { std::vector<std::string> GetOutputNames() {
FluidFamilyCore* core = PaddleInferenceCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core(); DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) { if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
} }
...@@ -556,8 +464,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { ...@@ -556,8 +464,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
std::unique_ptr<paddle_infer::Tensor> GetInputHandle( std::unique_ptr<paddle_infer::Tensor> GetInputHandle(
const std::string& name) { const std::string& name) {
FluidFamilyCore* core = PaddleInferenceCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core(); DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) { if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()"; LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
} }
...@@ -566,8 +474,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { ...@@ -566,8 +474,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
std::unique_ptr<paddle_infer::Tensor> GetOutputHandle( std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(
const std::string& name) { const std::string& name) {
FluidFamilyCore* core = PaddleInferenceCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core(); DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) { if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetOutputHandle()"; LOG(ERROR) << "Failed get fluid core in GetOutputHandle()";
} }
...@@ -575,8 +483,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { ...@@ -575,8 +483,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
} }
int infer_impl() { int infer_impl() {
FluidFamilyCore* core = PaddleInferenceCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core(); DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) { if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in infer_impl()"; LOG(ERROR) << "Failed get fluid core in infer_impl()";
return -1; return -1;
......
...@@ -77,7 +77,7 @@ export PYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.8 ...@@ -77,7 +77,7 @@ export PYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.8
## Install Python dependencies ## Install Python dependencies
```shell ```shell
pip install -r python/requirements.txt pip install -r python/requirements.txt -i https://mirror.baidu.com/pypi/simple
``` ```
If you use other Python version, please use the right `pip` accordingly. If you use other Python version, please use the right `pip` accordingly.
...@@ -123,14 +123,13 @@ Compared with CPU environment, GPU environment needs to refer to the following t ...@@ -123,14 +123,13 @@ Compared with CPU environment, GPU environment needs to refer to the following t
**It should be noted that the following table is used as a reference for non-Docker compilation environment. The Docker compilation environment has been configured with relevant parameters and does not need to be specified in cmake process. ** **It should be noted that the following table is used as a reference for non-Docker compilation environment. The Docker compilation environment has been configured with relevant parameters and does not need to be specified in cmake process. **
| cmake environment variable | meaning | GPU environment considerations | whether Docker environment is needed | | cmake environment variable | meaning | GPU environment considerations | whether Docker environment is needed |
|-----------------------|------------------------- ------------|-------------------------------|----- ---------------| |-----------------------|-------------------------------------|-------------------------------|--------------------|
| CUDA_TOOLKIT_ROOT_DIR | cuda installation path, usually /usr/local/cuda | Required for all environments | No | CUDA_TOOLKIT_ROOT_DIR | cuda installation path, usually /usr/local/cuda | Required for all environments | No (/usr/local/cuda) |
(/usr/local/cuda) |
| CUDNN_LIBRARY | The directory where libcudnn.so.* is located, usually /usr/local/cuda/lib64/ | Required for all environments | No (/usr/local/cuda/lib64/) | | CUDNN_LIBRARY | The directory where libcudnn.so.* is located, usually /usr/local/cuda/lib64/ | Required for all environments | No (/usr/local/cuda/lib64/) |
| CUDA_CUDART_LIBRARY | The directory where libcudart.so.* is located, usually /usr/local/cuda/lib64/ | Required for all environments | No (/usr/local/cuda/lib64/) | | CUDA_CUDART_LIBRARY | The directory where libcudart.so.* is located, usually /usr/local/cuda/lib64/ | Required for all environments | No (/usr/local/cuda/lib64/) |
| TENSORRT_ROOT | The upper level directory of the directory where libnvinfer.so.* is located, depends on the TensorRT installation directory | Cuda 9.0/10.0 does not need, other needs | No (/usr) | | TENSORRT_ROOT | The upper level directory of the directory where libnvinfer.so.* is located, depends on the TensorRT installation directory | Cuda 9.0/10.0 does not need, other needs | No (/usr) |
If not in Docker environment, users can refer to the following execution methods. The specific path is subject to the current environment, and the code is only for reference. If not in Docker environment, users can refer to the following execution methods. The specific path is subject to the current environment, and the code is only for reference.TENSORRT_LIBRARY_PATH is related to the TensorRT version and should be set according to the actual situation。For example, in the cuda10.1 environment, the TensorRT version is 6.0 (/usr/local/TensorRT-6.0.1.5/targets/x86_64-linux-gnu/),In the cuda10.2 environment, the TensorRT version is 7.1 (/usr/local/TensorRT-7.1.3.4/targets/x86_64-linux-gnu/).
``` shell ``` shell
export CUDA_PATH='/usr/local/cuda' export CUDA_PATH='/usr/local/cuda'
...@@ -145,7 +144,7 @@ cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \ ...@@ -145,7 +144,7 @@ cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \ -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \ -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DCUDA_CUDART_LIBRARY=${CUDA_CUDART_LIBRARY} \ -DCUDA_CUDART_LIBRARY=${CUDA_CUDART_LIBRARY} \
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} -DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
-DSERVER=ON \ -DSERVER=ON \
-DWITH_GPU=ON .. -DWITH_GPU=ON ..
make -j10 make -j10
......
...@@ -76,7 +76,7 @@ export PYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.8 ...@@ -76,7 +76,7 @@ export PYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.8
## 安装Python依赖 ## 安装Python依赖
```shell ```shell
pip install -r python/requirements.txt pip install -r python/requirements.txt -i https://mirror.baidu.com/pypi/simple
``` ```
如果使用其他Python版本,请使用对应版本的`pip` 如果使用其他Python版本,请使用对应版本的`pip`
...@@ -128,7 +128,7 @@ make -j10 ...@@ -128,7 +128,7 @@ make -j10
| CUDA_CUDART_LIBRARY | libcudart.so.*所在目录,通常为/usr/local/cuda/lib64/ | 全部环境都需要 | 否(/usr/local/cuda/lib64/) | | CUDA_CUDART_LIBRARY | libcudart.so.*所在目录,通常为/usr/local/cuda/lib64/ | 全部环境都需要 | 否(/usr/local/cuda/lib64/) |
| TENSORRT_ROOT | libnvinfer.so.*所在目录的上一级目录,取决于TensorRT安装目录 | Cuda 9.0/10.0不需要,其他需要 | 否(/usr) | | TENSORRT_ROOT | libnvinfer.so.*所在目录的上一级目录,取决于TensorRT安装目录 | Cuda 9.0/10.0不需要,其他需要 | 否(/usr) |
非Docker环境下,用户可以参考如下执行方式,具体的路径以当时环境为准,代码仅作为参考。 非Docker环境下,用户可以参考如下执行方式,具体的路径以当时环境为准,代码仅作为参考。TENSORRT_LIBRARY_PATH和TensorRT版本有关,要根据实际情况设置。例如在cuda10.1环境下TensorRT版本是6.0(/usr/local/TensorRT-6.0.1.5/targets/x86_64-linux-gnu/),在cuda10.2环境下TensorRT版本是7.1(/usr/local/TensorRT-7.1.3.4/targets/x86_64-linux-gnu/)。
``` shell ``` shell
export CUDA_PATH='/usr/local/cuda' export CUDA_PATH='/usr/local/cuda'
...@@ -143,7 +143,7 @@ cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \ ...@@ -143,7 +143,7 @@ cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \ -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \ -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DCUDA_CUDART_LIBRARY=${CUDA_CUDART_LIBRARY} \ -DCUDA_CUDART_LIBRARY=${CUDA_CUDART_LIBRARY} \
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} -DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
-DSERVER=ON \ -DSERVER=ON \
-DWITH_GPU=ON .. -DWITH_GPU=ON ..
make -j10 make -j10
...@@ -159,7 +159,7 @@ make -j10 ...@@ -159,7 +159,7 @@ make -j10
mkdir client-build && cd client-build mkdir client-build && cd client-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \ cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \
-DPYTHON_LIBRARIES=$PYTHON_LIBRARIES \ -DPYTHON_LIBRARIES=$PYTHON_LIBRARIES \
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \ -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
-DCLIENT=ON .. -DCLIENT=ON ..
make -j10 make -j10
``` ```
......
...@@ -6,17 +6,17 @@ ...@@ -6,17 +6,17 @@
#### Q: Paddle Serving 、Paddle Inference、PaddleHub Serving三者的区别及联系? #### Q: Paddle Serving 、Paddle Inference、PaddleHub Serving三者的区别及联系?
**A:** paddle serving是远程服务,即发起预测的设备(手机、浏览器、客户端等)与实际预测的硬件不在一起。 paddle inference是一个library,适合嵌入到一个大系统中保证预测效率,paddle serving调用了paddle inference做远程服务。paddlehub serving可以认为是一个示例,都会使用paddle serving作为统一预测服务入口。如果在web端交互,一般是调用远程服务的形式,可以使用paddle serving的web service搭建。 **A:** paddle serving是远程服务,即发起预测的设备(手机、浏览器、客户端等)与实际预测的硬件不在一起。 paddle inference是一个library,适合嵌入到一个大系统中保证预测效率,paddle serving调用了paddle inference做远程服务。paddlehub serving可以认为是一个示例,都会使用paddle serving作为统一预测服务入口。如果在web端交互,一般是调用远程服务的形式,可以使用paddle serving的web service搭建。
#### Q: paddle-serving是否支持Int32支持 #### Q: paddle-serving是否支持Int32支持
**A:** 在protobuf定feed_type和fetch_type编号与数据类型对应如下 **A:** 在protobuf定feed_type和fetch_type编号与数据类型对应如下
0-int64 0-int64
1-float32 1-float32
2-int32 2-int32
#### Q: paddle-serving是否支持windows和Linux环境下的多线程调用 #### Q: paddle-serving是否支持windows和Linux环境下的多线程调用
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
## 安装问题 ## 安装问题
#### Q: pip install安装whl包过程,报错信息如下: #### Q: pip install安装whl包过程,报错信息如下:
``` ```
Collecting opencv-python Collecting opencv-python
Using cached opencv-python-4.3.0.38.tar.gz (88.0 MB) Using cached opencv-python-4.3.0.38.tar.gz (88.0 MB)
...@@ -69,9 +70,11 @@ Collecting opencv-python ...@@ -69,9 +70,11 @@ Collecting opencv-python
s = list(pattern) s = list(pattern)
TypeError: 'NoneType' object is not iterable TypeError: 'NoneType' object is not iterable
``` ```
**A:** 指定opencv-python版本安装,pip install opencv-python==4.2.0.32,再安装whl包 **A:** 指定opencv-python版本安装,pip install opencv-python==4.2.0.32,再安装whl包
#### Q: pip3 install whl包过程报错信息如下: #### Q: pip3 install whl包过程报错信息如下:
``` ```
Complete output from command python setup.py egg_info: Complete output from command python setup.py egg_info:
Found cython-generated files... Found cython-generated files...
...@@ -80,13 +83,16 @@ Collecting opencv-python ...@@ -80,13 +83,16 @@ Collecting opencv-python
---------------------------------------- ----------------------------------------
Command "python setup.py egg_info" failed with error code 1 in /tmp/pip-install-taoxz02y/grpcio/ Command "python setup.py egg_info" failed with error code 1 in /tmp/pip-install-taoxz02y/grpcio/
``` ```
**A:** 需要升级pip3,再重新执行安装命令。 **A:** 需要升级pip3,再重新执行安装命令。
``` ```
pip3 install --upgrade pip pip3 install --upgrade pip
pip3 install --upgrade setuptools pip3 install --upgrade setuptools
``` ```
#### Q: 运行过程中报错,信息如下: #### Q: 运行过程中报错,信息如下:
``` ```
Traceback (most recent call last): Traceback (most recent call last):
File "../../deploy/serving/test_client.py", line 18, in <module> File "../../deploy/serving/test_client.py", line 18, in <module>
...@@ -97,7 +103,9 @@ Traceback (most recent call last): ...@@ -97,7 +103,9 @@ Traceback (most recent call last):
from shapely.geometry import Polygon from shapely.geometry import Polygon
ImportError: No module named shapely.geometry ImportError: No module named shapely.geometry
``` ```
**A:** 有2种方法,第一种通过pip/pip3安装shapely,第二种通过pip/pip3安装所有依赖组件。 **A:** 有2种方法,第一种通过pip/pip3安装shapely,第二种通过pip/pip3安装所有依赖组件。
``` ```
方法1: 方法1:
pip install shapely==1.7.0 pip install shapely==1.7.0
...@@ -116,7 +124,69 @@ pip install -r python/requirements.txt ...@@ -116,7 +124,69 @@ pip install -r python/requirements.txt
**A:** 没有安装JDK,或者JAVA_HOME路径配置错误(正确配置是JDK路径,常见错误配置成JRE路径,例如正确路径参考JAVA_HOME="/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.262.b10-0.el7_8.x86_64/")。Java JDK安装参考https://segmentfault.com/a/1190000015389941 **A:** 没有安装JDK,或者JAVA_HOME路径配置错误(正确配置是JDK路径,常见错误配置成JRE路径,例如正确路径参考JAVA_HOME="/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.262.b10-0.el7_8.x86_64/")。Java JDK安装参考https://segmentfault.com/a/1190000015389941
## 环境问题
#### Q:使用过程中出现CXXABI错误。
这个问题出现的原因是Python使用的gcc版本和Serving所需的gcc版本对不上。对于Docker用户,推荐使用[Docker容器](./RUN_IN_DOCKER_CN.md),由于Docker容器内的Python版本与Serving在发布前都做过适配,这样就不会出现类似的错误。如果是其他开发环境,首先需要确保开发环境中具备GCC 8.2,如果没有gcc 8.2,参考安装方式
```bash
wget -q https://paddle-ci.gz.bcebos.com/gcc-8.2.0.tar.xz
tar -xvf gcc-8.2.0.tar.xz && \
cd gcc-8.2.0 && \
unset LIBRARY_PATH CPATH C_INCLUDE_PATH PKG_CONFIG_PATH CPLUS_INCLUDE_PATH INCLUDE && \
./contrib/download_prerequisites && \
cd .. && mkdir temp_gcc82 && cd temp_gcc82 && \
../gcc-8.2.0/configure --prefix=/usr/local/gcc-8.2 --enable-threads=posix --disable-checking --disable-multilib && \
make -j8 && make install
cd .. && rm -rf temp_gcc82
cp ${lib_so_6} ${lib_so_6}.bak && rm -f ${lib_so_6} &&
ln -s /usr/local/gcc-8.2/lib64/libgfortran.so.5 ${lib_so_5} && \
ln -s /usr/local/gcc-8.2/lib64/libstdc++.so.6 ${lib_so_6} && \
cp /usr/local/gcc-8.2/lib64/libstdc++.so.6.0.25 ${lib_path}
```
假如已经有了GCC 8.2,可以自行安装Python,此外我们也提供了两个GCC 8.2编译的[Python2.7](https://paddle-serving.bj.bcebos.com/others/Python2.7.17-gcc82.tar)[Python3.6](https://paddle-serving.bj.bcebos.com/others/Python3.6.10-gcc82.tar) 。下载解压后,需要将对应的目录设置为`PYTHONROOT`,并设置`PATH``LD_LIBRARY_PATH`
```bash
export PYTHONROOT=/path/of/python # 对应解压后的Python目录
export PATH=$PYTHONROOT/bin:$PATH
export LD_LIBRARY_PATH=$PYTHONROOT/lib:$LD_LIBRARY_PATH
```
#### Q:遇到libstdc++.so.6的版本不够的问题
触发该问题的原因在于,编译Paddle Serving相关可执行程序和动态库,所采用的是GCC 8.2(Cuda 9.0和10.0的Server可执行程序受限Cuda兼容性采用GCC 4.8编译)。Python在调用的过程中,有可能链接到了其他GCC版本的 `libstdc++.so`。 需要做的就是受限确保所在环境具备GCC 8.2,其次将GCC8.2的`libstdc++.so.*`拷贝到某个目录例如`/home/libstdcpp`下。最后`export LD_LIBRARY_PATH=/home/libstdcpp:$LD_LIBRARY_PATH` 即可。
#### Q: 遇到OPENSSL_1.0.1EC 符号找不到的问题。
目前Serving的可执行程序和客户端动态库需要链接1.0.2k版本的openssl动态库。如果环境当中没有,可以执行
```bash
wget https://paddle-serving.bj.bcebos.com/others/centos_ssl.tar && \
tar xf centos_ssl.tar && rm -rf centos_ssl.tar && \
mv libcrypto.so.1.0.2k /usr/lib/libcrypto.so.1.0.2k && mv libssl.so.1.0.2k /usr/lib/libssl.so.1.0.2k && \
ln -sf /usr/lib/libcrypto.so.1.0.2k /usr/lib/libcrypto.so.10 && \
ln -sf /usr/lib/libssl.so.1.0.2k /usr/lib/libssl.so.10 && \
ln -sf /usr/lib/libcrypto.so.10 /usr/lib/libcrypto.so && \
ln -sf /usr/lib/libssl.so.10 /usr/lib/libssl.so
```
其中`/usr/lib` 可以换成其他目录,并确保该目录在`LD_LIBRARY_PATH`下。
### GPU相关环境问题
#### Q:需要做哪些检查确保Serving可以运行在GPU环境
**注:如果是使用Serving提供的镜像不需要做下列检查,如果是其他开发环境可以参考以下指导。**
首先需要确保`nvidia-smi`可用,其次需要确保所需的动态库so文件在`LD_LIBRARY_PATH`所在的目录(包括系统lib库)。
(1)Cuda显卡驱动:文件名通常为 `libcuda.so.$DRIVER_VERSION` 例如驱动版本为440.10.15,文件名就是`libcuda.so.440.10.15`
(2)Cuda和Cudnn动态库:文件名通常为 `libcudart.so.$CUDA_VERSION`,和 `libcudnn.so.$CUDNN_VERSION`。例如Cuda9就是 `libcudart.so.9.0`,Cudnn7就是 `libcudnn.so.7`。Cuda和Cudnn与Serving的版本匹配参见[Serving所有镜像列表](DOCKER_IMAGES_CN.md#%E9%99%84%E5%BD%95%E6%89%80%E6%9C%89%E9%95%9C%E5%83%8F%E5%88%97%E8%A1%A8).
(3) Cuda10.1及更高版本需要TensorRT。安装TensorRT相关文件的脚本参考 [install_trt.sh](../tools/dockerfile/build_scripts/install_trt.sh).
## 部署问题 ## 部署问题
...@@ -154,7 +224,7 @@ InvalidArgumentError: Device id must be less than GPU count, but received id is: ...@@ -154,7 +224,7 @@ InvalidArgumentError: Device id must be less than GPU count, but received id is:
**A:**:1)使用[GPU docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md#gpunvidia-docker)解决环境问题 **A:**:1)使用[GPU docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md#gpunvidia-docker)解决环境问题
2)修改anaconda的虚拟环境下安装的python的gcc版本[参考](https://www.jianshu.com/p/c498b3d86f77) 2)修改anaconda的虚拟环境下安装的python的gcc版本[参考](https://www.jianshu.com/p/c498b3d86f77)
#### Q: paddle-serving是否支持本地离线安装 #### Q: paddle-serving是否支持本地离线安装
...@@ -221,9 +291,10 @@ client端的日志直接打印到标准输出。 ...@@ -221,9 +291,10 @@ client端的日志直接打印到标准输出。
**A:** 1)警告是glog组件打印的,告知glog初始化之前日志打印在STDERR **A:** 1)警告是glog组件打印的,告知glog初始化之前日志打印在STDERR
2)一般采用GLOG_v方式启动服务同时设置日志级别。 2)一般采用GLOG_v方式启动服务同时设置日志级别。
例如: 例如:
``` ```
GLOG_v=2 python -m paddle_serving_server.serve --model xxx_conf/ --port 9999 GLOG_v=2 python -m paddle_serving_server.serve --model xxx_conf/ --port 9999
``` ```
......
...@@ -13,13 +13,5 @@ ...@@ -13,13 +13,5 @@
# limitations under the License # limitations under the License
if (NOT CLIENT_ONLY) if (NOT CLIENT_ONLY)
add_subdirectory(inferencer-fluid-cpu) add_subdirectory(paddle)
if (WITH_GPU)
add_subdirectory(inferencer-fluid-gpu)
endif()
if (WITH_LITE)
add_subdirectory(inferencer-fluid-arm)
endif()
endif() endif()
FILE(GLOB fluid_arm_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_arm_engine ${fluid_arm_engine_srcs})
target_include_directories(fluid_arm_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_arm_engine pdserving extern_paddle configure)
target_link_libraries(fluid_arm_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS fluid_arm_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
FILE(GLOB fluid_cpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_cpu_engine ${fluid_cpu_engine_srcs})
target_include_directories(fluid_cpu_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_cpu_engine pdserving extern_paddle configure)
target_link_libraries(fluid_cpu_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS fluid_cpu_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pthread.h>
#include <fstream>
#include <map>
#include <string>
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
namespace fluid_cpu {
class AutoLock {
public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex);
}
~AutoLock() { pthread_mutex_unlock(&_mut); }
private:
pthread_mutex_t& _mut;
};
class GlobalPaddleCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
static pthread_mutex_t& instance() {
static GlobalPaddleCreateMutex gmutex;
return gmutex.mutex();
}
private:
GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
using paddle_infer::Config;
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::CreatePredictor;
// data interface
class FluidFamilyCore {
public:
virtual ~FluidFamilyCore() {}
virtual std::vector<std::string> GetInputNames() {
return _core->GetInputNames();
}
virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
return _core->GetInputHandle(name);
}
virtual std::vector<std::string> GetOutputNames() {
return _core->GetOutputNames();
}
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name);
}
virtual bool Run() {
if (!_core->Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor";
return false;
}
return true;
}
virtual int create(const predictor::InferEngineCreationParams& params) = 0;
virtual int clone(void* origin_core) {
if (origin_core == NULL) {
LOG(ERROR) << "origin paddle Predictor is null.";
return -1;
}
Predictor* p_predictor = (Predictor*)origin_core;
_core = p_predictor->Clone();
if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
return -1;
}
return 0;
}
virtual void* get() { return _core.get(); }
protected:
std::shared_ptr<Predictor> _core;
};
// infer interface
class FluidCpuAnalysisCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetParamsFile(data_path + "/__params__");
config.SetProgFile(data_path + "/__model__");
config.DisableGpu();
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidCpuAnalysisDirCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetModel(data_path);
config.DisableGpu();
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
} else {
config.SwitchIrOptim(false);
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class Parameter {
public:
Parameter() : _row(0), _col(0), _params(NULL) {}
~Parameter() {
VLOG(2) << "before destroy Parameter, file_name[" << _file_name << "]";
destroy();
}
int init(int row, int col, const char* file_name) {
destroy();
_file_name = file_name;
_row = row;
_col = col;
_params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
if (_params == NULL) {
LOG(ERROR) << "Load " << _file_name << " malloc error.";
return -1;
}
VLOG(2) << "Load parameter file[" << _file_name << "] success.";
return 0;
}
void destroy() {
_row = 0;
_col = 0;
if (_params != NULL) {
free(_params);
_params = NULL;
}
}
int load() {
if (_params == NULL || _row <= 0 || _col <= 0) {
LOG(ERROR) << "load parameter error [not inited].";
return -1;
}
FILE* fs = fopen(_file_name.c_str(), "rb");
if (fs == NULL) {
LOG(ERROR) << "load " << _file_name << " fopen error.";
return -1;
}
static const uint32_t MODEL_FILE_HEAD_LEN = 16;
char head[MODEL_FILE_HEAD_LEN] = {0};
if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
destroy();
LOG(ERROR) << "Load " << _file_name << " read head error.";
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
uint32_t matrix_size = _row * _col;
if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
VLOG(2) << "load " << _file_name << " read ok.";
return 0;
} else {
LOG(ERROR) << "load " << _file_name << " read error.";
destroy();
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
return 0;
}
public:
std::string _file_name;
int _row;
int _col;
float* _params;
};
class FluidCpuAnalysisEncryptCore : public FluidFamilyCore {
public:
void ReadBinaryFile(const std::string& filename, std::string* contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path note exits: "
<< data_path;
return -1;
}
std::string model_buffer, params_buffer, key_buffer;
ReadBinaryFile(data_path + "encrypt_model", &model_buffer);
ReadBinaryFile(data_path + "encrypt_params", &params_buffer);
ReadBinaryFile(data_path + "key", &key_buffer);
VLOG(2) << "prepare for encryption model";
auto cipher = paddle::MakeCipher("");
std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer);
Config analysis_config;
// paddle::AnalysisConfig analysis_config;
analysis_config.SetModelBuffer(&real_model_buffer[0],
real_model_buffer.size(),
&real_params_buffer[0],
real_params_buffer.size());
analysis_config.DisableGpu();
analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
}
analysis_config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
VLOG(2) << "decrypt model file sucess";
_core = CreatePredictor(analysis_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
} // namespace fluid_cpu
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h"
#include "core/predictor/framework/factory.h"
namespace baidu {
namespace paddle_serving {
namespace fluid_cpu {
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidCpuAnalysisCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidCpuAnalysisDirCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS_DIR");
#if 1
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidCpuAnalysisEncryptCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS_ENCRYPT");
#endif
} // namespace fluid_cpu
} // namespace paddle_serving
} // namespace baidu
FILE(GLOB fluid_gpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_gpu_engine ${fluid_gpu_engine_srcs})
target_include_directories(fluid_gpu_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_gpu_engine pdserving extern_paddle configure)
target_link_libraries(fluid_gpu_engine pdserving paddle_fluid iomp5 mklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS fluid_gpu_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pthread.h>
#include <fstream>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT
DECLARE_int32(gpuid);
namespace baidu {
namespace paddle_serving {
namespace fluid_gpu {
using configure::SigmoidConf;
class AutoLock {
public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex);
}
~AutoLock() { pthread_mutex_unlock(&_mut); }
private:
pthread_mutex_t& _mut;
};
class GlobalPaddleCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
static pthread_mutex_t& instance() {
static GlobalPaddleCreateMutex gmutex;
return gmutex.mutex();
}
private:
GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
using paddle_infer::Config;
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::CreatePredictor;
// data interface
class FluidFamilyCore {
public:
virtual ~FluidFamilyCore() {}
virtual std::vector<std::string> GetInputNames() {
return _core->GetInputNames();
}
virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
return _core->GetInputHandle(name);
}
virtual std::vector<std::string> GetOutputNames() {
return _core->GetOutputNames();
}
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name);
}
virtual bool Run() {
if (!_core->Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor";
return false;
}
return true;
}
virtual int create(const predictor::InferEngineCreationParams& params) = 0;
virtual int clone(void* origin_core) {
if (origin_core == NULL) {
LOG(ERROR) << "origin paddle Predictor is null.";
return -1;
}
Predictor* p_predictor = (Predictor*)origin_core;
_core = p_predictor->Clone();
if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
return -1;
}
return 0;
}
virtual void* get() { return _core.get(); }
protected:
std::shared_ptr<Predictor> _core;
};
// infer interface
class FluidGpuAnalysisCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetParamsFile(data_path + "/__params__");
config.SetProgFile(data_path + "/__model__");
config.EnableUseGpu(100, FLAGS_gpuid);
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidGpuAnalysisDirCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetModel(data_path);
config.EnableUseGpu(1500, FLAGS_gpuid);
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
int max_batch = 32;
int min_subgraph_size = 3;
if (params.use_trt()) {
config.EnableTensorRtEngine(1 << 20,
max_batch,
min_subgraph_size,
Config::Precision::kFloat32,
false,
false);
LOG(INFO) << "create TensorRT predictor";
} else {
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
} else {
config.SwitchIrOptim(false);
}
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class Parameter {
public:
Parameter() : _row(0), _col(0), _params(NULL) {}
~Parameter() {
LOG(INFO) << "before destroy Parameter, file_name[" << _file_name << "]";
destroy();
}
int init(int row, int col, const char* file_name) {
destroy();
_file_name = file_name;
_row = row;
_col = col;
_params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
if (_params == NULL) {
LOG(ERROR) << "Load " << _file_name << " malloc error.";
return -1;
}
VLOG(2) << "Load parameter file[" << _file_name << "] success.";
return 0;
}
void destroy() {
_row = 0;
_col = 0;
if (_params != NULL) {
free(_params);
_params = NULL;
}
}
int load() {
if (_params == NULL || _row <= 0 || _col <= 0) {
LOG(ERROR) << "load parameter error [not inited].";
return -1;
}
FILE* fs = fopen(_file_name.c_str(), "rb");
if (fs == NULL) {
LOG(ERROR) << "load " << _file_name << " fopen error.";
return -1;
}
static const uint32_t MODEL_FILE_HEAD_LEN = 16;
char head[MODEL_FILE_HEAD_LEN] = {0};
if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
destroy();
LOG(ERROR) << "Load " << _file_name << " read head error.";
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
uint32_t matrix_size = _row * _col;
if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
LOG(INFO) << "load " << _file_name << " read ok.";
return 0;
} else {
LOG(ERROR) << "load " << _file_name << " read error.";
destroy();
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
return 0;
}
public:
std::string _file_name;
int _row;
int _col;
float* _params;
};
class FluidGpuAnalysisEncryptCore : public FluidFamilyCore {
public:
void ReadBinaryFile(const std::string& filename, std::string* contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path note exits: "
<< data_path;
return -1;
}
std::string model_buffer, params_buffer, key_buffer;
ReadBinaryFile(data_path + "encrypt_model", &model_buffer);
ReadBinaryFile(data_path + "encrypt_params", &params_buffer);
ReadBinaryFile(data_path + "key", &key_buffer);
VLOG(2) << "prepare for encryption model";
auto cipher = paddle::MakeCipher("");
std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer);
Config analysis_config;
analysis_config.SetModelBuffer(&real_model_buffer[0],
real_model_buffer.size(),
&real_params_buffer[0],
real_params_buffer.size());
analysis_config.EnableUseGpu(100, FLAGS_gpuid);
analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
}
analysis_config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
VLOG(2) << "decrypt model file sucess";
_core = CreatePredictor(analysis_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
} // namespace fluid_gpu
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h"
#include "core/predictor/framework/factory.h"
DEFINE_int32(gpuid, 0, "GPU device id to use");
namespace baidu {
namespace paddle_serving {
namespace fluid_gpu {
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidGpuAnalysisCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_GPU_ANALYSIS");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidGpuAnalysisDirCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_GPU_ANALYSIS_DIR");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidGpuAnalysisEncryptCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_GPU_ANALYSIS_ENCRPT")
} // namespace fluid_gpu
} // namespace paddle_serving
} // namespace baidu
FILE(GLOB paddle_inference_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(paddle_inference_engine ${paddle_inference_engine_srcs})
target_include_directories(paddle_inference_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(paddle_inference_engine pdserving extern_paddle configure)
target_link_libraries(paddle_inference_engine pdserving paddle_inference -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS paddle_inference_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -17,275 +17,174 @@ ...@@ -17,275 +17,174 @@
#include <pthread.h> #include <pthread.h>
#include <fstream> #include <fstream>
#include <map> #include <map>
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "core/configure/include/configure_parser.h" #include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h" #include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/common/utils.h"
#include "core/predictor/framework/infer.h" #include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT #include "paddle_inference_api.h" // NOLINT
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace fluid_arm { namespace inference {
class AutoLock {
public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex);
}
~AutoLock() { pthread_mutex_unlock(&_mut); }
private:
pthread_mutex_t& _mut;
};
class GlobalPaddleCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
static pthread_mutex_t& instance() {
static GlobalPaddleCreateMutex gmutex;
return gmutex.mutex();
}
private:
GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
using paddle_infer::Config; using paddle_infer::Config;
using paddle_infer::PrecisionType;
using paddle_infer::Predictor; using paddle_infer::Predictor;
using paddle_infer::Tensor; using paddle_infer::Tensor;
using paddle_infer::PrecisionType;
using paddle_infer::CreatePredictor; using paddle_infer::CreatePredictor;
// data interface DECLARE_int32(gpuid);
class FluidFamilyCore {
static const int max_batch = 32;
static const int min_subgraph_size = 3;
// Engine Base
class PaddleEngineBase {
public: public:
virtual ~FluidFamilyCore() {} virtual ~PaddleEngineBase() {}
virtual std::vector<std::string> GetInputNames() { virtual std::vector<std::string> GetInputNames() {
return _core->GetInputNames(); return _predictor->GetInputNames();
} }
virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) { virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
return _core->GetInputHandle(name); return _predictor->GetInputHandle(name);
} }
virtual std::vector<std::string> GetOutputNames() { virtual std::vector<std::string> GetOutputNames() {
return _core->GetOutputNames(); return _predictor->GetOutputNames();
} }
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) { virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name); return _predictor->GetOutputHandle(name);
} }
virtual bool Run() { virtual bool Run() {
if (!_core->Run()) { if (!_predictor->Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor"; LOG(ERROR) << "Failed call Run with paddle predictor";
return false; return false;
} }
return true; return true;
} }
virtual int create(const predictor::InferEngineCreationParams& params) = 0; virtual int create(const configure::EngineDesc& conf) = 0;
virtual int clone(void* origin_core) { virtual int clone(void* predictor) {
if (origin_core == NULL) { if (predictor == NULL) {
LOG(ERROR) << "origin paddle Predictor is null."; LOG(ERROR) << "origin paddle Predictor is null.";
return -1; return -1;
} }
Predictor* p_predictor = (Predictor*)origin_core; Predictor* prep = static_cast<Predictor*>(predictor);
_core = p_predictor->Clone(); _predictor = prep->Clone();
if (_core.get() == NULL) { if (_predictor.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core; LOG(ERROR) << "fail to clone paddle predictor: " << predictor;
return -1; return -1;
} }
return 0; return 0;
} }
virtual void* get() { return _core.get(); } virtual void* get() { return _predictor.get(); }
protected: protected:
std::shared_ptr<Predictor> _core; std::shared_ptr<Predictor> _predictor;
}; };
// infer interface // Paddle Inference Engine
class FluidArmAnalysisCore : public FluidFamilyCore { class PaddleInferenceEngine : public PaddleEngineBase {
public: public:
int create(const predictor::InferEngineCreationParams& params) { int create(const configure::EngineDesc& engine_conf) {
std::string data_path = params.get_path(); std::string model_path = engine_conf.model_dir();
if (access(data_path.c_str(), F_OK) == -1) { if (access(model_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: " LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path; << model_path;
return -1; return -1;
} }
Config config; Config config;
config.SetParamsFile(data_path + "/__params__"); // todo, auto config(zhangjun)
config.SetProgFile(data_path + "/__model__"); if (engine_conf.has_combined_model()) {
config.DisableGpu(); if (!engine_conf.combined_model()) {
config.SetCpuMathLibraryNumThreads(1); config.SetModel(model_path);
} else {
if (params.use_lite()) { config.SetParamsFile(model_path + "/__params__");
config.EnableLiteEngine(PrecisionType::kFloat32, true); config.SetProgFile(model_path + "/__model__");
} }
if (params.use_xpu()) {
config.EnableXpu(2 * 1024 * 1024);
}
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
} else { } else {
config.SwitchIrOptim(false); config.SetParamsFile(model_path + "/__params__");
config.SetProgFile(model_path + "/__model__");
} }
config.SwitchSpecifyInputNames(true); config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance()); config.SetCpuMathLibraryNumThreads(1);
_core = CreatePredictor(config); if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) {
if (NULL == _core.get()) { // 2000MB GPU memory
LOG(ERROR) << "create paddle predictor failed, path: " << data_path; config.EnableUseGpu(2000, FLAGS_gpuid);
return -1;
} }
VLOG(2) << "create paddle predictor sucess, path: " << data_path; if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
return 0; if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
} config.EnableUseGpu(2000, FLAGS_gpuid);
}; }
config.EnableTensorRtEngine(1 << 20,
class FluidArmAnalysisDirCore : public FluidFamilyCore { max_batch,
public: min_subgraph_size,
int create(const predictor::InferEngineCreationParams& params) { Config::Precision::kFloat32,
std::string data_path = params.get_path(); false,
if (access(data_path.c_str(), F_OK) == -1) { false);
LOG(ERROR) << "create paddle predictor failed, path not exits: " LOG(INFO) << "create TensorRT predictor";
<< data_path;
return -1;
} }
Config config; if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
config.SetModel(data_path);
config.DisableGpu();
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true); config.EnableLiteEngine(PrecisionType::kFloat32, true);
} }
if (params.use_xpu()) { if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) {
// 2 MB l3 cache
config.EnableXpu(2 * 1024 * 1024); config.EnableXpu(2 * 1024 * 1024);
} }
if (engine_conf.has_enable_ir_optimization() &&
if (params.enable_memory_optimization()) { !engine_conf.enable_ir_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
} else {
config.SwitchIrOptim(false); config.SwitchIrOptim(false);
} else {
config.SwitchIrOptim(true);
} }
AutoLock lock(GlobalPaddleCreateMutex::instance()); if (engine_conf.has_enable_memory_optimization() &&
_core = CreatePredictor(config); engine_conf.enable_memory_optimization()) {
if (NULL == _core.get()) { config.EnableMemoryOptim();
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class Parameter {
public:
Parameter() : _row(0), _col(0), _params(NULL) {}
~Parameter() {
VLOG(2) << "before destroy Parameter, file_name[" << _file_name << "]";
destroy();
}
int init(int row, int col, const char* file_name) {
destroy();
_file_name = file_name;
_row = row;
_col = col;
_params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
if (_params == NULL) {
LOG(ERROR) << "Load " << _file_name << " malloc error.";
return -1;
} }
VLOG(2) << "Load parameter file[" << _file_name << "] success.";
return 0;
}
void destroy() { if (engine_conf.has_encrypted_model() && engine_conf.encrypted_model()) {
_row = 0; // decrypt model
_col = 0; std::string model_buffer, params_buffer, key_buffer;
if (_params != NULL) { predictor::ReadBinaryFile(model_path + "encrypt_model", &model_buffer);
free(_params); predictor::ReadBinaryFile(model_path + "encrypt_params", &params_buffer);
_params = NULL; predictor::ReadBinaryFile(model_path + "key", &key_buffer);
}
}
int load() { auto cipher = paddle::MakeCipher("");
if (_params == NULL || _row <= 0 || _col <= 0) { std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
LOG(ERROR) << "load parameter error [not inited]."; std::string real_params_buffer =
return -1; cipher->Decrypt(params_buffer, key_buffer);
config.SetModelBuffer(&real_model_buffer[0],
real_model_buffer.size(),
&real_params_buffer[0],
real_params_buffer.size());
} }
FILE* fs = fopen(_file_name.c_str(), "rb"); predictor::AutoLock lock(predictor::GlobalCreateMutex::instance());
if (fs == NULL) { _predictor = CreatePredictor(config);
LOG(ERROR) << "load " << _file_name << " fopen error."; if (NULL == _predictor.get()) {
return -1; LOG(ERROR) << "create paddle predictor failed, path: " << model_path;
}
static const uint32_t MODEL_FILE_HEAD_LEN = 16;
char head[MODEL_FILE_HEAD_LEN] = {0};
if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
destroy();
LOG(ERROR) << "Load " << _file_name << " read head error.";
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1; return -1;
} }
uint32_t matrix_size = _row * _col; VLOG(2) << "create paddle predictor sucess, path: " << model_path;
if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
VLOG(2) << "load " << _file_name << " read ok.";
return 0;
} else {
LOG(ERROR) << "load " << _file_name << " read error.";
destroy();
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
return 0; return 0;
} }
public:
std::string _file_name;
int _row;
int _col;
float* _params;
}; };
} // namespace fluid_arm } // namespace inference
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,24 +12,20 @@ ...@@ -12,24 +12,20 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h" #include "paddle_inference/paddle/include/paddle_engine.h"
#include "core/predictor/framework/factory.h" #include "core/predictor/framework/factory.h"
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace fluid_arm { namespace inference {
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( DEFINE_int32(gpuid, 0, "GPU device id to use");
::baidu::paddle_serving::predictor::FluidInferEngine<FluidArmAnalysisCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_ANALYSIS");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME( REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine< ::baidu::paddle_serving::predictor::FluidInferEngine<PaddleInferenceEngine>,
FluidArmAnalysisDirCore>,
::baidu::paddle_serving::predictor::InferEngine, ::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_ANALYSIS_DIR"); "PADDLE_INFER");
} // namespace fluid_arm } // namespace inference
} // namespace paddle_serving } // namespace paddle_serving
} // namespace baidu } // namespace baidu
if (CLIENT) if (CLIENT)
file(INSTALL pipeline DESTINATION paddle_serving_client) file(INSTALL pipeline DESTINATION paddle_serving_client)
file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py) file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py)
set(PY_FILES ${SERVING_CLIENT_PY_FILES}) set(PY_FILES ${SERVING_CLIENT_PY_FILES})
SET(PACKAGE_NAME "serving_client") SET(PACKAGE_NAME "serving_client")
set(SETUP_LOG_FILE "setup.py.client.log") set(SETUP_LOG_FILE "setup.py.client.log")
endif() endif()
if (SERVER) if (SERVER)
if (NOT WITH_GPU AND NOT WITH_LITE) SET(SERVER_PACKAGE_NAME "paddle-serving-server")
file(INSTALL pipeline DESTINATION paddle_serving_server) if (WITH_GPU)
file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) set(SERVER_PACKAGE_NAME "paddle-serving-server-gpu")
else() elseif(WITH_XPU)
file(INSTALL pipeline DESTINATION paddle_serving_server_gpu) set(SERVER_PACKAGE_NAME "paddle-serving-server-xpu")
file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server_gpu/*.py) endif()
endif() file(INSTALL pipeline DESTINATION paddle_serving_server)
set(PY_FILES ${SERVING_SERVER_PY_FILES}) file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py)
SET(PACKAGE_NAME "serving_server") set(PY_FILES ${SERVING_SERVER_PY_FILES})
set(SETUP_LOG_FILE "setup.py.server.log") set(SETUP_LOG_FILE "setup.py.server.log")
endif() endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/util.py configure_file(${CMAKE_CURRENT_SOURCE_DIR}/util.py
${CMAKE_CURRENT_BINARY_DIR}/util.py) ${CMAKE_CURRENT_BINARY_DIR}/util.py)
if (CLIENT) if (CLIENT)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py) ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../tools/python_tag.py configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../tools/python_tag.py
${CMAKE_CURRENT_BINARY_DIR}/python_tag.py) ${CMAKE_CURRENT_BINARY_DIR}/python_tag.py)
endif() endif()
if (APP) if (APP)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.app.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.app.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py) ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
endif() endif()
if (SERVER) if (SERVER)
if (NOT WITH_GPU AND NOT WITH_LITE) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
else()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server_gpu.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
endif()
endif() endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/gen_version.py configure_file(${CMAKE_CURRENT_SOURCE_DIR}/gen_version.py
...@@ -50,108 +45,73 @@ set (SERVING_CLIENT_CORE ${PADDLE_SERVING_BINARY_DIR}/core/general-client/*.so) ...@@ -50,108 +45,73 @@ set (SERVING_CLIENT_CORE ${PADDLE_SERVING_BINARY_DIR}/core/general-client/*.so)
message("python env: " ${py_env}) message("python env: " ${py_env})
if (APP) if (APP)
add_custom_command( add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/ COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "app" COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "app"
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES}) DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
endif() endif()
if (CLIENT) if (CLIENT)
add_custom_command( add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/ COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} python_tag.py COMMAND env ${py_env} ${PYTHON_EXECUTABLE} python_tag.py
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "client" COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "client"
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES}) DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp) add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
endif() endif()
if (SERVER) if (SERVER)
if(NOT WITH_GPU AND NOT WITH_LITE) # todo, generate suffix for cpu、gpu、arm
add_custom_command( if(WITH_TRT)
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp if(CUDA_VERSION EQUAL 10.1)
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/ set(VERSION_SUFFIX 101)
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "server" elseif(CUDA_VERSION EQUAL 10.2)
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel set(VERSION_SUFFIX 102)
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) elseif(CUDA_VERSION EQUAL 11.0)
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) set(VERSION_SUFFIX 11)
elseif(WITH_TRT)
if(CUDA_VERSION EQUAL 10.1)
set(SUFFIX 101)
elseif(CUDA_VERSION EQUAL 10.2)
set(SUFFIX 102)
elseif(CUDA_VERSION EQUAL 11.0)
set(SUFFIX 11)
endif()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" ${SUFFIX}
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
elseif(WITH_LITE)
if(WITH_XPU)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" arm-xpu
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
else()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" arm
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
endif()
else()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" ${CUDA_VERSION_MAJOR}
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
endif() endif()
endif()
if(WITH_LITE)
set(VERSION_SUFFIX 2)
endif()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server" ${VERSION_SUFFIX}
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
endif() endif()
set(SERVING_CLIENT_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) set(SERVING_CLIENT_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)
set(SERVING_SERVER_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) set(SERVING_SERVER_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)
if (CLIENT) if (CLIENT)
install(DIRECTORY ${SERVING_CLIENT_PYTHON_PACKAGE_DIR} install(DIRECTORY ${SERVING_CLIENT_PYTHON_PACKAGE_DIR}
DESTINATION opt/serving_client/share/wheels DESTINATION opt/serving_client/share/wheels
) )
endif() endif()
if (SERVER) if (SERVER)
install(DIRECTORY ${SERVING_SERVER_PYTHON_PACKAGE_DIR} install(DIRECTORY ${SERVING_SERVER_PYTHON_PACKAGE_DIR}
DESTINATION opt/serving_server/share/wheels DESTINATION opt/serving_server/share/wheels
) )
endif() endif()
if (CLIENT OR SERVER) if (CLIENT OR SERVER)
find_program(PATCHELF_EXECUTABLE patchelf) find_program(PATCHELF_EXECUTABLE patchelf)
if (NOT PATCHELF_EXECUTABLE) if (NOT PATCHELF_EXECUTABLE)
message(FATAL_ERROR "patchelf not found, please install it.\n" message(FATAL_ERROR "patchelf not found, please install it.\n"
"For Ubuntu, the command is: apt-get install -y patchelf.") "For Ubuntu, the command is: apt-get install -y patchelf.")
endif() endif()
endif() endif()
...@@ -49,7 +49,7 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #c ...@@ -49,7 +49,7 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #c
``` ```
Or,start gpu inference service,Run Or,start gpu inference service,Run
``` ```
python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0 python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0
``` ```
### RPC Inference ### RPC Inference
......
...@@ -48,7 +48,7 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 # ...@@ -48,7 +48,7 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #
``` ```
或者,启动gpu预测服务,执行 或者,启动gpu预测服务,执行
``` ```
python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务 python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务
``` ```
......
...@@ -12,7 +12,7 @@ else ...@@ -12,7 +12,7 @@ else
mkdir utilization mkdir utilization
fi fi
#start server #start server
$PYTHONROOT/bin/python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 & $PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 &
sleep 5 sleep 5
#warm up #warm up
......
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog & python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
export FLAGS_profile_client=1 export FLAGS_profile_client=1
export FLAGS_profile_server=1 export FLAGS_profile_server=1
sleep 5 sleep 5
......
...@@ -14,9 +14,9 @@ ...@@ -14,9 +14,9 @@
import os import os
import sys import sys
from paddle_serving_server_gpu import OpMaker from paddle_serving_server import OpMaker
from paddle_serving_server_gpu import OpSeqMaker from paddle_serving_server import OpSeqMaker
from paddle_serving_server_gpu import Server from paddle_serving_server import Server
op_maker = OpMaker() op_maker = OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
from paddle_serving_app.reader import ChineseBertReader from paddle_serving_app.reader import ChineseBertReader
import sys import sys
import os import os
......
...@@ -10,7 +10,7 @@ If you want to have more detection models, please refer to [Paddle Detection Mod ...@@ -10,7 +10,7 @@ If you want to have more detection models, please refer to [Paddle Detection Mod
### Start the service ### Start the service
``` ```
python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0 python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
``` ```
### Perform prediction ### Perform prediction
......
...@@ -10,7 +10,7 @@ sh get_data.sh ...@@ -10,7 +10,7 @@ sh get_data.sh
### 启动服务 ### 启动服务
``` ```
python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0 python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
``` ```
### 执行预测 ### 执行预测
......
...@@ -20,7 +20,7 @@ the directories like `ctr_serving_model` and `ctr_client_conf` will appear. ...@@ -20,7 +20,7 @@ the directories like `ctr_serving_model` and `ctr_client_conf` will appear.
``` ```
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #CPU RPC Service python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #CPU RPC Service
python -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #RPC Service on GPU 0 python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #RPC Service on GPU 0
``` ```
### RPC Infer ### RPC Infer
......
...@@ -20,7 +20,7 @@ mv models/ctr_serving_model . ...@@ -20,7 +20,7 @@ mv models/ctr_serving_model .
``` ```
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #启动CPU预测服务 python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #启动CPU预测服务
python -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #在GPU 0上启动预测服务 python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #在GPU 0上启动预测服务
``` ```
### 执行预测 ### 执行预测
......
...@@ -12,7 +12,7 @@ tar -xzvf deeplabv3.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf deeplabv3.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 9494 python -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494
``` ```
### Client Prediction ### Client Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf deeplabv3.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf deeplabv3.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 9494 python -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494
``` ```
### 客户端预测 ### 客户端预测
......
...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf faster_rcnn_r50_fpn_1x_coco.tar tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
......
...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf faster_rcnn_r50_fpn_1x_coco.tar tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
......
...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf ppyolo_r50vd_dcn_1x_coco.tar tar xf ppyolo_r50vd_dcn_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
......
...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf ppyolo_r50vd_dcn_1x_coco.tar tar xf ppyolo_r50vd_dcn_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
......
...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf ttfnet_darknet53_1x_coco.tar tar xf ttfnet_darknet53_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
......
...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf ttfnet_darknet53_1x_coco.tar tar xf ttfnet_darknet53_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
......
...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service ### Start the service
``` ```
tar xf yolov3_darknet53_270e_coco.tar tar xf yolov3_darknet53_270e_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`.
......
...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务 ### 启动服务
``` ```
tar xf yolov3_darknet53_270e_coco.tar tar xf yolov3_darknet53_270e_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
......
...@@ -26,7 +26,7 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_ ...@@ -26,7 +26,7 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_
``` ```
GPU Service GPU Service
``` ```
python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0 python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
``` ```
## Prediction ## Prediction
......
...@@ -24,7 +24,7 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_ ...@@ -24,7 +24,7 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_
``` ```
GPU预测服务 GPU预测服务
``` ```
python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0 python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
``` ```
## 预测 ## 预测
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
import os import os
import sys import sys
from paddle_serving_server_gpu import OpMaker from paddle_serving_server import OpMaker
from paddle_serving_server_gpu import OpSeqMaker from paddle_serving_server import OpSeqMaker
from paddle_serving_server_gpu import MultiLangServer as Server from paddle_serving_server import MultiLangServer as Server
op_maker = OpMaker() op_maker = OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
......
...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz
## Start RPC Service ## Start RPC Service
``` ```
python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
``` ```
## Prediction ## Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz
## 启动RPC服务 ## 启动RPC服务
``` ```
python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
``` ```
## 预测 ## 预测
......
...@@ -39,7 +39,7 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu ...@@ -39,7 +39,7 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu
``` ```
``` ```
python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service
``` ```
client send inference request client send inference request
......
...@@ -39,7 +39,7 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu ...@@ -39,7 +39,7 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu
``` ```
``` ```
python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务 python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务
``` ```
client端进行预测 client端进行预测
......
...@@ -2,7 +2,7 @@ rm profile_log* ...@@ -2,7 +2,7 @@ rm profile_log*
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_profile_server=1 export FLAGS_profile_server=1
export FLAGS_profile_client=1 export FLAGS_profile_client=1
python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog & python -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog &
sleep 5 sleep 5
gpu_id=0 gpu_id=0
......
...@@ -25,7 +25,7 @@ device = sys.argv[2] ...@@ -25,7 +25,7 @@ device = sys.argv[2]
if device == "cpu": if device == "cpu":
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
else: else:
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
class ImageService(WebService): class ImageService(WebService):
......
...@@ -12,7 +12,7 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393 python -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
``` ```
### Client Prediction ### Client Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393 python -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
``` ```
### 客户端预测 ### 客户端预测
......
...@@ -26,7 +26,7 @@ tar xf test_imgs.tar ...@@ -26,7 +26,7 @@ tar xf test_imgs.tar
python -m paddle_serving_server.serve --model ocr_det_model --port 9293 python -m paddle_serving_server.serve --model ocr_det_model --port 9293
python ocr_web_server.py cpu python ocr_web_server.py cpu
#for gpu user #for gpu user
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0 python -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_id 0
python ocr_web_server.py gpu python ocr_web_server.py gpu
``` ```
......
...@@ -25,7 +25,7 @@ tar xf test_imgs.tar ...@@ -25,7 +25,7 @@ tar xf test_imgs.tar
python -m paddle_serving_server.serve --model ocr_det_model --port 9293 python -m paddle_serving_server.serve --model ocr_det_model --port 9293
python ocr_web_server.py cpu python ocr_web_server.py cpu
#for gpu user #for gpu user
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0 python -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_id 0
python ocr_web_server.py gpu python ocr_web_server.py gpu
``` ```
......
...@@ -22,7 +22,7 @@ from paddle_serving_app.reader import Sequential, ResizeByFactor ...@@ -22,7 +22,7 @@ from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes
if sys.argv[1] == 'gpu': if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu': elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
import time import time
......
...@@ -22,7 +22,7 @@ from paddle_serving_app.reader import Sequential, ResizeByFactor ...@@ -22,7 +22,7 @@ from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes
if sys.argv[1] == 'gpu': if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu': elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
import time import time
......
...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor ...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu': if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu': elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
from paddle_serving_app.local_predict import LocalPredictor from paddle_serving_app.local_predict import LocalPredictor
......
...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor ...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu': if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu': elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
import time import time
......
...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor ...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu': if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu': elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
import time import time
......
...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor ...@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu': if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu': elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
import time import time
......
import sys
import os
import yaml
import requests
import time
import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
'''
2021-03-16 10:26:01,832 ==================== TRACER ======================
2021-03-16 10:26:01,838 Op(bert):
2021-03-16 10:26:01,838 in[5.7833 ms]
2021-03-16 10:26:01,838 prep[8.2001 ms]
2021-03-16 10:26:01,838 midp[198.79853333333332 ms]
2021-03-16 10:26:01,839 postp[0.8411 ms]
2021-03-16 10:26:01,839 out[0.9440666666666667 ms]
2021-03-16 10:26:01,839 idle[0.03135320683677345]
2021-03-16 10:26:01,839 DAGExecutor:
2021-03-16 10:26:01,839 Query count[30]
2021-03-16 10:26:01,839 QPS[3.0 q/s]
2021-03-16 10:26:01,839 Succ[1.0]
2021-03-16 10:26:01,839 Error req[]
2021-03-16 10:26:01,839 Latency:
2021-03-16 10:26:01,839 ave[237.85519999999997 ms]
2021-03-16 10:26:01,839 .50[179.937 ms]
2021-03-16 10:26:01,839 .60[179.994 ms]
2021-03-16 10:26:01,839 .70[180.515 ms]
2021-03-16 10:26:01,840 .80[180.735 ms]
2021-03-16 10:26:01,840 .90[182.275 ms]
2021-03-16 10:26:01,840 .95[182.789 ms]
2021-03-16 10:26:01,840 .99[1921.33 ms]
2021-03-16 10:26:01,840 Channel (server worker num[1]):
2021-03-16 10:26:01,840 chl0(In: ['@DAGExecutor'], Out: ['bert']) size[0/0]
2021-03-16 10:26:01,841 chl1(In: ['bert'], Out: ['@DAGExecutor']) size[0/0]
'''
def parse_benchmark(filein, fileout):
with open(filein, "r") as fin:
res = yaml.load(fin)
del_list = []
for key in res["DAG"].keys():
if "call" in key:
del_list.append(key)
for key in del_list:
del res["DAG"][key]
with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device):
fin = open("config.yml", "r")
config = yaml.load(fin)
fin.close()
config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu":
config["op"]["bert"]["local_service_conf"]["device_type"] = 1
config["op"]["bert"]["local_service_conf"]["devices"] = "2"
with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False)
def run_http(idx, batch_size):
print("start thread ({})".format(idx))
url = "http://127.0.0.1:18082/bert/prediction"
start = time.time()
with open("data-c.txt", 'r') as fin:
start = time.time()
lines = fin.readlines()
start_idx = 0
while start_idx < len(lines):
end_idx = min(len(lines), start_idx + batch_size)
feed = {}
for i in range(start_idx, end_idx):
feed[str(i - start_idx)] = lines[i]
keys = list(feed.keys())
values = [feed[x] for x in keys]
data = {"key": keys, "value": values}
r = requests.post(url=url, data=json.dumps(data))
start_idx += batch_size
if start_idx > 2000:
break
end = time.time()
return [[end - start]]
def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_http , thread, batch_size)
def run_rpc(thread, batch_size):
client = PipelineClient()
client.connect(['127.0.0.1:9998'])
with open("data-c.txt", 'r') as fin:
start = time.time()
lines = fin.readlines()
start_idx = 0
while start_idx < len(lines):
end_idx = min(len(lines), start_idx + batch_size)
feed = {}
for i in range(start_idx, end_idx):
feed[str(i - start_idx)] = lines[i]
ret = client.predict(feed_dict=feed, fetch=["res"])
start_idx += batch_size
if start_idx > 1000:
break
end = time.time()
return [[end - start]]
def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size)
if __name__ == "__main__":
if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3])
device = sys.argv[4]
gen_yml(device)
elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3])
batch_size = int(sys.argv[4])
if mode == "http":
multithread_http(thread, batch_size)
elif mode == "rpc":
multithread_rpc(thread, batch_size)
elif sys.argv[1] == "dump":
filein = sys.argv[2]
fileout = sys.argv[3]
parse_benchmark(filein, fileout)
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="bert"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1 gpu
rm -rf profile_log_$modelname
for thread_num in 1 8 16
do
for batch_size in 1 10 100
do
echo "----Bert thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=2 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=2 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
# RPC
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1 gpu
for thread_num in 1 8 16
do
for batch_size in 1 10 100
do
echo "----Bert thread num: $thread_num batch size: $batch_size mode:rpc ----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=2 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=2 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run rpc $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
cat benchmark.log >> profile_log_$modelname
done
done
dag:
is_thread_op: false
tracer:
interval_s: 10
http_port: 18082
op:
bert:
local_service_conf:
client_type: local_predictor
concurrency: 2
device_type: 1
devices: '2'
fetch_list:
- pooled_output
model_config: bert_seq128_model/
rpc_port: 9998
worker_num: 20
wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz
tar -xzf bert_chinese_L-12_H-768_A-12.tar.gz
mv bert_chinese_L-12_H-768_A-12_model bert_seq128_model
mv bert_chinese_L-12_H-768_A-12_client bert_seq128_client
wget https://paddle-serving.bj.bcebos.com/bert_example/data-c.txt --no-check-certificate
wget https://paddle-serving.bj.bcebos.com/bert_example/vocab.txt --no-check-certificate
import sys
import os
import yaml
import requests
import time
import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
client = PipelineClient()
client.connect(['127.0.0.1:9998'])
batch_size = 101
with open("data-c.txt", 'r') as fin:
lines = fin.readlines()
start_idx = 0
while start_idx < len(lines):
end_idx = min(len(lines), start_idx + batch_size)
feed = {}
for i in range(start_idx, end_idx):
feed[str(i - start_idx)] = lines[i]
ret = client.predict(feed_dict=feed, fetch=["res"])
print(ret)
start_idx += batch_size
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_serving_server_gpu.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging
import numpy as np
import sys
from paddle_serving_app.reader import ChineseBertReader
_LOGGER = logging.getLogger()
class BertOp(Op):
def init_op(self):
self.reader = ChineseBertReader({
"vocab_file": "vocab.txt",
"max_seq_len": 128
})
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
print("input dict", input_dict)
batch_size = len(input_dict.keys())
feed_res = []
for i in range(batch_size):
feed_dict = self.reader.process(input_dict[str(i)].encode("utf-8"))
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape((1, len(feed_dict[key]), 1))
feed_res.append(feed_dict)
feed_dict = {}
for key in feed_res[0].keys():
feed_dict[key] = np.concatenate([x[key] for x in feed_res], axis=0)
print(key, feed_dict[key].shape)
return feed_dict, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id):
fetch_dict["pooled_output"] = str(fetch_dict["pooled_output"])
return fetch_dict, None, ""
class BertService(WebService):
def get_pipeline_response(self, read_op):
bert_op = BertOp(name="bert", input_ops=[read_op])
return bert_op
bert_service = BertService(name="bert")
bert_service.prepare_pipeline_config("config2.yml")
bert_service.run_service()
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try: try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError: except ImportError:
from paddle_serving_server.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import sys import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try: try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError: except ImportError:
from paddle_serving_server.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
import logging import logging
......
...@@ -22,7 +22,7 @@ import logging ...@@ -22,7 +22,7 @@ import logging
try: try:
from paddle_serving_server.web_service import WebService from paddle_serving_server.web_service import WebService
except ImportError: except ImportError:
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger()
user_handler = logging.StreamHandler() user_handler = logging.StreamHandler()
......
import sys
import os
import base64
import yaml
import requests
import time
import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout):
with open(filein, "r") as fin:
res = yaml.load(fin)
del_list = []
for key in res["DAG"].keys():
if "call" in key:
del_list.append(key)
for key in del_list:
del res["DAG"][key]
with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device):
fin = open("config.yml", "r")
config = yaml.load(fin)
fin.close()
config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu":
config["op"]["det"]["local_service_conf"]["device_type"] = 1
config["op"]["det"]["local_service_conf"]["devices"] = "2"
config["op"]["rec"]["local_service_conf"]["device_type"] = 1
config["op"]["rec"]["local_service_conf"]["devices"] = "2"
with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size):
print("start thread ({})".format(idx))
url = "http://127.0.0.1:9999/ocr/prediction"
start = time.time()
test_img_dir = "imgs/"
for img_file in os.listdir(test_img_dir):
with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
data = {"key": ["image"], "value": [image]}
for i in range(100):
r = requests.post(url=url, data=json.dumps(data))
end = time.time()
return [[end - start]]
def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_http , thread, batch_size)
def run_rpc(thread, batch_size):
client = PipelineClient()
client.connect(['127.0.0.1:18090'])
start = time.time()
test_img_dir = "imgs/"
for img_file in os.listdir(test_img_dir):
with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data = file.read()
image = cv2_to_base64(image_data)
for i in range(100):
ret = client.predict(feed_dict={"image": image}, fetch=["res"])
end = time.time()
return [[end - start]]
def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size)
if __name__ == "__main__":
if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3])
device = sys.argv[4]
gen_yml(device)
elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3])
batch_size = int(sys.argv[4])
if mode == "http":
multithread_http(thread, batch_size)
elif mode == "rpc":
multithread_rpc(thread, batch_size)
elif sys.argv[1] == "dump":
filein = sys.argv[2]
fileout = sys.argv[3]
parse_benchmark(filein, fileout)
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="ocr"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1 gpu
rm -rf profile_log_$modelname
for thread_num in 1 8 16
do
for batch_size in 1
do
echo "----Bert thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=2 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=2 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
# RPC
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1 gpu
for thread_num in 1 8 16
do
for batch_size in 1
do
echo "----Bert thread num: $thread_num batch size: $batch_size mode:rpc ----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=2 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=2 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run rpc $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
cat benchmark.log >> profile_log_$modelname
done
done
...@@ -6,7 +6,7 @@ http_port: 9999 ...@@ -6,7 +6,7 @@ http_port: 9999
#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG #worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG
##当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num ##当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num
worker_num: 1 worker_num: 5
#build_dag_each_worker, False,框架在进程内创建一条DAG;True,框架会每个进程内创建多个独立的DAG #build_dag_each_worker, False,框架在进程内创建一条DAG;True,框架会每个进程内创建多个独立的DAG
build_dag_each_worker: false build_dag_each_worker: false
...@@ -20,6 +20,9 @@ dag: ...@@ -20,6 +20,9 @@ dag:
#使用性能分析, True,生成Timeline性能数据,对性能有一定影响;False为不使用 #使用性能分析, True,生成Timeline性能数据,对性能有一定影响;False为不使用
use_profile: false use_profile: false
tracer:
interval_s: 10
op: op:
det: det:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发 #并发数,is_thread_op=True时,为线程并发;否则为进程并发
...@@ -37,7 +40,7 @@ op: ...@@ -37,7 +40,7 @@ op:
fetch_list: ["concat_1.tmp_0"] fetch_list: ["concat_1.tmp_0"]
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "0" devices: "2"
rec: rec:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发 #并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 2 concurrency: 2
...@@ -61,4 +64,4 @@ op: ...@@ -61,4 +64,4 @@ op:
fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "0" devices: "2"
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try: try:
from paddle_serving_server_gpu.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
except ImportError: except ImportError:
from paddle_serving_server.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
......
...@@ -12,9 +12,9 @@ ...@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try: try:
from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server_gpu.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging import logging
import numpy as np import numpy as np
import cv2 import cv2
...@@ -45,16 +45,19 @@ class DetOp(Op): ...@@ -45,16 +45,19 @@ class DetOp(Op):
def preprocess(self, input_dicts, data_id, log_id): def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items() (_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8')) imgs = []
data = np.fromstring(data, np.uint8) for key in input_dict.keys():
# Note: class variables(self.var) can only be used in process op mode data = base64.b64decode(input_dict[key].encode('utf8'))
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR) data = np.fromstring(data, np.uint8)
self.ori_h, self.ori_w, _ = self.im.shape self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
det_img = self.det_preprocess(self.im) self.ori_h, self.ori_w, _ = self.im.shape
_, self.new_h, self.new_w = det_img.shape det_img = self.det_preprocess(self.im)
return {"image": det_img[np.newaxis, :].copy()}, False, None, "" _, self.new_h, self.new_w = det_img.shape
imgs.append(det_img[np.newaxis, :].copy())
return {"image": np.concatenate(imgs, axis=0)}, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id): def postprocess(self, input_dicts, fetch_dict, log_id):
# print(fetch_dict)
det_out = fetch_dict["concat_1.tmp_0"] det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [ ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
...@@ -62,7 +65,6 @@ class DetOp(Op): ...@@ -62,7 +65,6 @@ class DetOp(Op):
dt_boxes_list = self.post_func(det_out, [ratio_list]) dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w]) dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im} out_dict = {"dt_boxes": dt_boxes, "image": self.im}
print("out dict", out_dict)
return out_dict, None, "" return out_dict, None, ""
...@@ -112,5 +114,5 @@ class OcrService(WebService): ...@@ -112,5 +114,5 @@ class OcrService(WebService):
uci_service = OcrService(name="ocr") uci_service = OcrService(name="ocr")
uci_service.prepare_pipeline_config("config.yml") uci_service.prepare_pipeline_config("config2.yml")
uci_service.run_service() uci_service.run_service()
import sys
import os
import yaml
import requests
import time
import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
def gen_yml():
fin = open("config.yml", "r")
config = yaml.load(fin)
fin.close()
config["dag"]["tracer"] = {"interval_s": 5}
with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False)
def run_http(idx, batch_size):
print("start thread ({})".format(idx))
url = "http://127.0.0.1:18082/uci/prediction"
start = time.time()
value = "0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"
all_value = ";".join([value for i in range(batch_size)])
data = {"key": ["x"], "value": [all_value]}
for i in range(1000):
r = requests.post(url=url, data=json.dumps(data))
print(r.json())
end = time.time()
return [[end - start]]
def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_http , thread, batch_size)
def run_rpc(thread, batch_size):
client = PipelineClient()
client.connect(['127.0.0.1:9998'])
value = "0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"
all_value = ";".join([value for i in range(batch_size)])
data = {"key": "x", "value": all_value}
for i in range(1000):
ret = client.predict(feed_dict={data["key"]: data["value"]}, fetch=["res"])
print(ret)
def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size)
if __name__ == "__main__":
if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3])
gen_yml()
elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3])
batch_size = int(sys.argv[4])
if mode == "http":
multithread_http(thread, batch_size)
elif mode == "rpc":
multithread_rpc(thread, batch_size)
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1
for thread_num in 1
do
for batch_size in 1
do
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py
echo "------------Fit a line pipeline benchmark (Thread: $thread_num) (BatchSize: $batch_size)"
tail -n 25 PipelineServingLogs/pipeline.tracer
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
done
done
# RPC
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1
for thread_num in 1
do
for batch_size in 1
do
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run rpc $thread_num $batch_size
python3 cpu_utilization.py
echo "------------Fit a line pipeline benchmark (Thread: $thread_num) (BatchSize: $batch_size)"
tail -n 25 PipelineServingLogs/pipeline.tracer
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
done
done
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
try: try:
from paddle_serving_server_gpu.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
except ImportError: except ImportError:
from paddle_serving_server.web_service import WebService, Op from paddle_serving_server.web_service import WebService, Op
import logging import logging
...@@ -25,32 +25,24 @@ _LOGGER = logging.getLogger() ...@@ -25,32 +25,24 @@ _LOGGER = logging.getLogger()
class UciOp(Op): class UciOp(Op):
def init_op(self): def init_op(self):
self.separator = "," self.separator = ","
self.batch_separator = ";"
def preprocess(self, input_dicts, data_id, log_id): def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items() (_, input_dict), = input_dicts.items()
_LOGGER.error("UciOp::preprocess >>> log_id:{}, input:{}".format( _LOGGER.error("UciOp::preprocess >>> log_id:{}, input:{}".format(
log_id, input_dict)) log_id, input_dict))
x_value = input_dict["x"] x_value = input_dict["x"].split(self.batch_separator)
x_lst = []
for x_val in x_value:
x_lst.append(np.array([float(x.strip()) for x in x_val.split(self.separator)]).reshape(1, 13))
input_dict["x"] = np.concatenate(x_lst, axis=0)
proc_dict = {} proc_dict = {}
if sys.version_info.major == 2:
if isinstance(x_value, (str, unicode)):
input_dict["x"] = np.array(
[float(x.strip())
for x in x_value.split(self.separator)]).reshape(1, 13)
_LOGGER.error("input_dict:{}".format(input_dict))
else:
if isinstance(x_value, str):
input_dict["x"] = np.array(
[float(x.strip())
for x in x_value.split(self.separator)]).reshape(1, 13)
_LOGGER.error("input_dict:{}".format(input_dict))
return input_dict, False, None, "" return input_dict, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id): def postprocess(self, input_dicts, fetch_dict, log_id):
_LOGGER.info("UciOp::postprocess >>> log_id:{}, fetch_dict:{}".format( _LOGGER.info("UciOp::postprocess >>> log_id:{}, fetch_dict:{}".format(
log_id, fetch_dict)) log_id, fetch_dict))
fetch_dict["price"] = str(fetch_dict["price"][0][0]) fetch_dict["price"] = str(fetch_dict["price"])
return fetch_dict, None, "" return fetch_dict, None, ""
...@@ -61,5 +53,5 @@ class UciService(WebService): ...@@ -61,5 +53,5 @@ class UciService(WebService):
uci_service = UciService(name="uci") uci_service = UciService(name="uci")
uci_service.prepare_pipeline_config("config.yml") uci_service.prepare_pipeline_config("config2.yml")
uci_service.run_service() uci_service.run_service()
...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393 python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
``` ```
### Client Prediction ### Client Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393 python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
``` ```
### 客户端预测 ### 客户端预测
......
...@@ -12,7 +12,7 @@ tar -xzvf unet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf unet.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 9494 python -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494
``` ```
### Client Prediction ### Client Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf unet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf unet.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 9494 python -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494
``` ```
### 客户端预测 ### 客户端预测
......
...@@ -15,7 +15,7 @@ sh get_data.sh ...@@ -15,7 +15,7 @@ sh get_data.sh
### Start server ### Start server
```shell ```shell
python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim
``` ```
### Client prediction ### Client prediction
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from paddle_serving_server_gpu.web_service import WebService from paddle_serving_server.web_service import WebService
import numpy as np import numpy as np
......
...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### Start Service ### Start Service
``` ```
python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
``` ```
### Client Prediction ### Client Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### 启动服务端 ### 启动服务端
``` ```
python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
``` ```
### 客户端预测 ### 客户端预测
......
...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz
## Start RPC Service ## Start RPC Service
``` ```
python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0
``` ```
## Prediction ## Prediction
......
...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz ...@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz
## 启动RPC服务 ## 启动RPC服务
``` ```
python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0
``` ```
## 预测 ## 预测
......
...@@ -34,10 +34,16 @@ def update_info(file_name, feature, info): ...@@ -34,10 +34,16 @@ def update_info(file_name, feature, info):
f.write(new_str) f.write(new_str)
if len(sys.argv) > 2: if len(sys.argv) > 2 and len(sys.argv[2]) > 0:
update_info("paddle_serving_server_gpu/version.py", "cuda_version", update_info("paddle_serving_server/version.py", "version_suffix",
sys.argv[2]) sys.argv[2])
package_name = '${SERVER_PACKAGE_NAME}'
if package_name.endswith('gpu'):
update_info("paddle_serving_server/version.py", "device_type", "1")
elif package_name.endswith('xpu'):
update_info("paddle_serving_server/version.py", "device_type", "2")
path = "paddle_serving_" + sys.argv[1] path = "paddle_serving_" + sys.argv[1]
commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD']) commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
update_info(path + "/version.py", "commit_id", commit_id) update_info(path + "/version.py", "commit_id", commit_id)
...@@ -82,7 +82,10 @@ class LocalPredictor(object): ...@@ -82,7 +82,10 @@ class LocalPredictor(object):
f = open(client_config, 'r') f = open(client_config, 'r')
model_conf = google.protobuf.text_format.Merge( model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf) str(f.read()), model_conf)
config = AnalysisConfig(model_path) if os.path.exists(os.path.join(model_path, "__params__")):
config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, "__params__"))
else:
config = AnalysisConfig(model_path)
logger.info("load_model_config params: model_path:{}, use_gpu:{},\ logger.info("load_model_config params: model_path:{}, use_gpu:{},\
gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{},\ gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{},\
use_trt:{}, use_lite:{}, use_xpu: {}, use_feed_fetch_ops:{}".format( use_trt:{}, use_lite:{}, use_xpu: {}, use_feed_fetch_ops:{}".format(
......
...@@ -13,703 +13,9 @@ ...@@ -13,703 +13,9 @@
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
import paddle_serving_client from . import version
import os
from .proto import sdk_configure_pb2 as sdk
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
import numpy as np
import requests
import json
import base64
import time
import sys
import grpc from . import client
from .proto import multi_lang_general_model_service_pb2 from .client import *
sys.path.append(
os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
from .proto import multi_lang_general_model_service_pb2_grpc
int64_type = 0 __version__ = version.serving_client_version
float32_type = 1
int32_type = 2
int_type = set([int64_type, int32_type])
float_type = set([float32_type])
class _NOPProfiler(object):
def record(self, name):
pass
def print_profile(self):
pass
class _TimeProfiler(object):
def __init__(self):
self.pid = os.getpid()
self.print_head = 'PROFILE\tpid:{}\t'.format(self.pid)
self.time_record = [self.print_head]
def record(self, name):
self.time_record.append('{}:{} '.format(
name, int(round(time.time() * 1000000))))
def print_profile(self):
self.time_record.append('\n')
sys.stderr.write(''.join(self.time_record))
self.time_record = [self.print_head]
_is_profile = int(os.environ.get('FLAGS_profile_client', 0))
_Profiler = _TimeProfiler if _is_profile else _NOPProfiler
class SDKConfig(object):
def __init__(self):
self.sdk_desc = sdk.SDKConf()
self.tag_list = []
self.cluster_list = []
self.variant_weight_list = []
self.rpc_timeout_ms = 20000
self.load_balance_strategy = "la"
def add_server_variant(self, tag, cluster, variant_weight):
self.tag_list.append(tag)
self.cluster_list.append(cluster)
self.variant_weight_list.append(variant_weight)
def set_load_banlance_strategy(self, strategy):
self.load_balance_strategy = strategy
def gen_desc(self, rpc_timeout_ms):
predictor_desc = sdk.Predictor()
predictor_desc.name = "general_model"
predictor_desc.service_name = \
"baidu.paddle_serving.predictor.general_model.GeneralModelService"
predictor_desc.endpoint_router = "WeightedRandomRender"
predictor_desc.weighted_random_render_conf.variant_weight_list = "|".join(
self.variant_weight_list)
for idx, tag in enumerate(self.tag_list):
variant_desc = sdk.VariantConf()
variant_desc.tag = tag
variant_desc.naming_conf.cluster = "list://{}".format(",".join(
self.cluster_list[idx]))
predictor_desc.variants.extend([variant_desc])
self.sdk_desc.predictors.extend([predictor_desc])
self.sdk_desc.default_variant_conf.tag = "default"
self.sdk_desc.default_variant_conf.connection_conf.connect_timeout_ms = 2000
self.sdk_desc.default_variant_conf.connection_conf.rpc_timeout_ms = rpc_timeout_ms
self.sdk_desc.default_variant_conf.connection_conf.connect_retry_count = 2
self.sdk_desc.default_variant_conf.connection_conf.max_connection_per_host = 100
self.sdk_desc.default_variant_conf.connection_conf.hedge_request_timeout_ms = -1
self.sdk_desc.default_variant_conf.connection_conf.hedge_fetch_retry_count = 2
self.sdk_desc.default_variant_conf.connection_conf.connection_type = "pooled"
self.sdk_desc.default_variant_conf.naming_conf.cluster_filter_strategy = "Default"
self.sdk_desc.default_variant_conf.naming_conf.load_balance_strategy = "la"
self.sdk_desc.default_variant_conf.rpc_parameter.compress_type = 0
self.sdk_desc.default_variant_conf.rpc_parameter.package_size = 20
self.sdk_desc.default_variant_conf.rpc_parameter.protocol = "baidu_std"
self.sdk_desc.default_variant_conf.rpc_parameter.max_channel_per_request = 3
return self.sdk_desc
class Client(object):
def __init__(self):
self.feed_names_ = []
self.fetch_names_ = []
self.client_handle_ = None
self.feed_shapes_ = {}
self.feed_types_ = {}
self.feed_names_to_idx_ = {}
self.pid = os.getpid()
self.predictor_sdk_ = None
self.producers = []
self.consumer = None
self.profile_ = _Profiler()
self.all_numpy_input = True
self.has_numpy_input = False
self.rpc_timeout_ms = 20000
from .serving_client import PredictorRes
self.predictorres_constructor = PredictorRes
def load_client_config(self, path):
from .serving_client import PredictorClient
model_conf = m_config.GeneralModelConfig()
f = open(path, 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
# load configuraion here
# get feed vars, fetch vars
# get feed shapes, feed types
# map feed names to index
self.client_handle_ = PredictorClient()
self.client_handle_.init(path)
if "FLAGS_max_body_size" not in os.environ:
os.environ["FLAGS_max_body_size"] = str(512 * 1024 * 1024)
read_env_flags = ["profile_client", "profile_server", "max_body_size"]
self.client_handle_.init_gflags([sys.argv[
0]] + ["--tryfromenv=" + ",".join(read_env_flags)])
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.feed_names_to_idx_ = {}
self.fetch_names_to_type_ = {}
self.fetch_names_to_idx_ = {}
self.lod_tensor_set = set()
self.feed_tensor_len = {}
self.key = None
for i, var in enumerate(model_conf.feed_var):
self.feed_names_to_idx_[var.alias_name] = i
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set.add(var.alias_name)
else:
counter = 1
for dim in self.feed_shapes_[var.alias_name]:
counter *= dim
self.feed_tensor_len[var.alias_name] = counter
for i, var in enumerate(model_conf.fetch_var):
self.fetch_names_to_idx_[var.alias_name] = i
self.fetch_names_to_type_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set.add(var.alias_name)
return
def add_variant(self, tag, cluster, variant_weight):
if self.predictor_sdk_ is None:
self.predictor_sdk_ = SDKConfig()
self.predictor_sdk_.add_server_variant(tag, cluster,
str(variant_weight))
def set_rpc_timeout_ms(self, rpc_timeout):
if not isinstance(rpc_timeout, int):
raise ValueError("rpc_timeout must be int type.")
else:
self.rpc_timeout_ms = rpc_timeout
def use_key(self, key_filename):
with open(key_filename, "rb") as f:
self.key = f.read()
def get_serving_port(self, endpoints):
if self.key is not None:
req = json.dumps({"key": base64.b64encode(self.key).decode()})
else:
req = json.dumps({})
r = requests.post("http://" + endpoints[0], req)
result = r.json()
print(result)
if "endpoint_list" not in result:
raise ValueError("server not ready")
else:
endpoints = [
endpoints[0].split(":")[0] + ":" +
str(result["endpoint_list"][0])
]
return endpoints
def connect(self, endpoints=None, encryption=False):
# check whether current endpoint is available
# init from client config
# create predictor here
if endpoints is None:
if self.predictor_sdk_ is None:
raise ValueError(
"You must set the endpoints parameter or use add_variant function to create a variant."
)
else:
if encryption:
endpoints = self.get_serving_port(endpoints)
if self.predictor_sdk_ is None:
self.add_variant('default_tag_{}'.format(id(self)), endpoints,
100)
else:
print(
"parameter endpoints({}) will not take effect, because you use the add_variant function.".
format(endpoints))
sdk_desc = self.predictor_sdk_.gen_desc(self.rpc_timeout_ms)
self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString(
))
def get_feed_names(self):
return self.feed_names_
def get_fetch_names(self):
return self.fetch_names_
def shape_check(self, feed, key):
if key in self.lod_tensor_set:
return
if isinstance(feed[key],
list) and len(feed[key]) != self.feed_tensor_len[key]:
raise ValueError("The shape of feed tensor {} not match.".format(
key))
if type(feed[key]).__module__ == np.__name__ and np.size(feed[
key]) != self.feed_tensor_len[key]:
#raise SystemExit("The shape of feed tensor {} not match.".format(
# key))
pass
def predict(self,
feed=None,
fetch=None,
batch=False,
need_variant_tag=False,
log_id=0):
self.profile_.record('py_prepro_0')
if feed is None or fetch is None:
raise ValueError("You should specify feed and fetch for prediction")
fetch_list = []
if isinstance(fetch, str):
fetch_list = [fetch]
elif isinstance(fetch, list):
fetch_list = fetch
else:
raise ValueError("Fetch only accepts string and list of string")
feed_batch = []
if isinstance(feed, dict):
feed_batch.append(feed)
elif isinstance(feed, list):
feed_batch = feed
else:
raise ValueError("Feed only accepts dict and list of dict")
int_slot_batch = []
float_slot_batch = []
int_feed_names = []
float_feed_names = []
int_shape = []
int_lod_slot_batch = []
float_lod_slot_batch = []
float_shape = []
fetch_names = []
counter = 0
batch_size = len(feed_batch)
for key in fetch_list:
if key in self.fetch_names_:
fetch_names.append(key)
if len(fetch_names) == 0:
raise ValueError(
"Fetch names should not be empty or out of saved fetch list.")
return {}
for i, feed_i in enumerate(feed_batch):
int_slot = []
float_slot = []
int_lod_slot = []
float_lod_slot = []
for key in feed_i:
if ".lod" not in key and key not in self.feed_names_:
raise ValueError("Wrong feed name: {}.".format(key))
if ".lod" in key:
continue
#if not isinstance(feed_i[key], np.ndarray):
self.shape_check(feed_i, key)
if self.feed_types_[key] in int_type:
if i == 0:
int_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = feed_i[key][np.newaxis, :]
if isinstance(feed_i[key], np.ndarray):
shape_lst.extend(list(feed_i[key].shape))
int_shape.append(shape_lst)
else:
int_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
int_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
int_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
int_slot.append(feed_i[key])
self.has_numpy_input = True
else:
int_slot.append(feed_i[key])
self.all_numpy_input = False
elif self.feed_types_[key] in float_type:
if i == 0:
float_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = feed_i[key][np.newaxis, :]
if isinstance(feed_i[key], np.ndarray):
shape_lst.extend(list(feed_i[key].shape))
float_shape.append(shape_lst)
else:
float_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
float_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
float_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
float_slot.append(feed_i[key])
self.has_numpy_input = True
else:
float_slot.append(feed_i[key])
self.all_numpy_input = False
int_slot_batch.append(int_slot)
float_slot_batch.append(float_slot)
int_lod_slot_batch.append(int_lod_slot)
float_lod_slot_batch.append(float_lod_slot)
self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0')
result_batch_handle = self.predictorres_constructor()
if self.all_numpy_input:
res = self.client_handle_.numpy_predict(
float_slot_batch, float_feed_names, float_shape,
float_lod_slot_batch, int_slot_batch, int_feed_names, int_shape,
int_lod_slot_batch, fetch_names, result_batch_handle, self.pid,
log_id)
elif self.has_numpy_input == False:
raise ValueError(
"Please make sure all of your inputs are numpy array")
else:
raise ValueError(
"Please make sure the inputs are all in list type or all in numpy.array type"
)
self.profile_.record('py_client_infer_1')
self.profile_.record('py_postpro_0')
if res == -1:
return None
multi_result_map = []
model_engine_names = result_batch_handle.get_engine_names()
for mi, engine_name in enumerate(model_engine_names):
result_map = {}
# result map needs to be a numpy array
for i, name in enumerate(fetch_names):
if self.fetch_names_to_type_[name] == int64_type:
# result_map[name] will be py::array(numpy array)
result_map[name] = result_batch_handle.get_int64_by_name(
mi, name)
shape = result_batch_handle.get_shape(mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == float32_type:
result_map[name] = result_batch_handle.get_float_by_name(
mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == int32_type:
# result_map[name] will be py::array(numpy array)
result_map[name] = result_batch_handle.get_int32_by_name(
mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
multi_result_map.append(result_map)
ret = None
if len(model_engine_names) == 1:
# If only one model result is returned, the format of ret is result_map
ret = multi_result_map[0]
else:
# If multiple model results are returned, the format of ret is {name: result_map}
ret = {
engine_name: multi_result_map[mi]
for mi, engine_name in enumerate(model_engine_names)
}
self.profile_.record('py_postpro_1')
self.profile_.print_profile()
# When using the A/B test, the tag of variant needs to be returned
return ret if not need_variant_tag else [
ret, result_batch_handle.variant_tag()
]
def release(self):
self.client_handle_.destroy_predictor()
self.client_handle_ = None
class MultiLangClient(object):
def __init__(self):
self.channel_ = None
self.stub_ = None
self.rpc_timeout_s_ = 2
self.profile_ = _Profiler()
def add_variant(self, tag, cluster, variant_weight):
# TODO
raise Exception("cannot support ABtest yet")
def set_rpc_timeout_ms(self, rpc_timeout):
if self.stub_ is None:
raise Exception("set timeout must be set after connect.")
if not isinstance(rpc_timeout, int):
# for bclient
raise ValueError("rpc_timeout must be int type.")
self.rpc_timeout_s_ = rpc_timeout / 1000.0
timeout_req = multi_lang_general_model_service_pb2.SetTimeoutRequest()
timeout_req.timeout_ms = rpc_timeout
resp = self.stub_.SetTimeout(timeout_req)
return resp.err_code == 0
def connect(self, endpoints):
# https://github.com/tensorflow/serving/issues/1382
options = [('grpc.max_receive_message_length', 512 * 1024 * 1024),
('grpc.max_send_message_length', 512 * 1024 * 1024),
('grpc.lb_policy_name', 'round_robin')]
# TODO: weight round robin
g_endpoint = 'ipv4:{}'.format(','.join(endpoints))
self.channel_ = grpc.insecure_channel(g_endpoint, options=options)
self.stub_ = multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelServiceStub(
self.channel_)
# get client model config
get_client_config_req = multi_lang_general_model_service_pb2.GetClientConfigRequest(
)
resp = self.stub_.GetClientConfig(get_client_config_req)
model_config_str = resp.client_config_str
self._parse_model_config(model_config_str)
def _flatten_list(self, nested_list):
for item in nested_list:
if isinstance(item, (list, tuple)):
for sub_item in self._flatten_list(item):
yield sub_item
else:
yield item
def _parse_model_config(self, model_config_str):
model_conf = m_config.GeneralModelConfig()
model_conf = google.protobuf.text_format.Merge(model_config_str,
model_conf)
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.feed_types_ = {}
self.feed_shapes_ = {}
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.fetch_types_ = {}
self.lod_tensor_set_ = set()
for i, var in enumerate(model_conf.feed_var):
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
else:
counter = 1
for dim in self.feed_shapes_[var.alias_name]:
counter *= dim
for i, var in enumerate(model_conf.fetch_var):
self.fetch_types_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
def _pack_inference_request(self, feed, fetch, is_python, log_id):
req = multi_lang_general_model_service_pb2.InferenceRequest()
req.fetch_var_names.extend(fetch)
req.is_python = is_python
req.log_id = log_id
feed_var_names = []
for key in feed.keys():
if '.lod' not in key:
feed_var_names.append(key)
req.feed_var_names.extend(feed_var_names)
inst = multi_lang_general_model_service_pb2.FeedInst()
for name in req.feed_var_names:
tensor = multi_lang_general_model_service_pb2.Tensor()
var = feed[name]
v_type = self.feed_types_[name]
if is_python:
data = None
if isinstance(var, list):
if v_type == 0: # int64
data = np.array(var, dtype="int64")
elif v_type == 1: # float32
data = np.array(var, dtype="float32")
elif v_type == 2: # int32
data = np.array(var, dtype="int32")
else:
raise Exception("error tensor value type.")
elif isinstance(var, np.ndarray):
data = var
if v_type == 0:
if data.dtype != 'int64':
data = data.astype("int64")
elif v_type == 1:
if data.dtype != 'float32':
data = data.astype("float32")
elif v_type == 2:
if data.dtype != 'int32':
data = data.astype("int32")
else:
raise Exception("error tensor value type.")
else:
raise Exception("var must be list or ndarray.")
tensor.data = data.tobytes()
tensor.shape.extend(list(var.shape))
if "{}.lod".format(name) in feed.keys():
tensor.lod.extend(feed["{}.lod".format(name)])
inst.tensor_array.append(tensor)
req.insts.append(inst)
return req
def _unpack_inference_response(self, resp, fetch, is_python,
need_variant_tag):
if resp.err_code != 0:
return None
tag = resp.tag
multi_result_map = {}
for model_result in resp.outputs:
inst = model_result.insts[0]
result_map = {}
for i, name in enumerate(fetch):
var = inst.tensor_array[i]
v_type = self.fetch_types_[name]
if is_python:
if v_type == 0: # int64
result_map[name] = np.frombuffer(
var.data, dtype="int64")
elif v_type == 1: # float32
result_map[name] = np.frombuffer(
var.data, dtype="float32")
else:
raise Exception("error type.")
else:
if v_type == 0: # int64
result_map[name] = np.array(
list(var.int64_data), dtype="int64")
elif v_type == 1: # float32
result_map[name] = np.array(
list(var.float_data), dtype="float32")
else:
raise Exception("error type.")
result_map[name].shape = list(var.shape)
if name in self.lod_tensor_set_:
result_map["{}.lod".format(name)] = np.array(list(var.lod))
multi_result_map[model_result.engine_name] = result_map
ret = None
if len(resp.outputs) == 1:
ret = list(multi_result_map.values())[0]
else:
ret = multi_result_map
ret["serving_status_code"] = 0
return ret if not need_variant_tag else [ret, tag]
def _done_callback_func(self, fetch, is_python, need_variant_tag):
def unpack_resp(resp):
return self._unpack_inference_response(resp, fetch, is_python,
need_variant_tag)
return unpack_resp
def get_feed_names(self):
return self.feed_names_
def predict(self,
feed,
fetch,
batch=True,
need_variant_tag=False,
asyn=False,
is_python=True,
log_id=0):
if isinstance(feed, dict) is False:
raise ValueError("Type Error. grpc feed must be dict.")
if batch is False:
for key in feed:
if ".lod" not in key:
feed[key] = feed[key][np.newaxis, :]
if not asyn:
try:
self.profile_.record('py_prepro_0')
req = self._pack_inference_request(
feed, fetch, is_python=is_python, log_id=log_id)
self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0')
resp = self.stub_.Inference(req, timeout=self.rpc_timeout_s_)
self.profile_.record('py_client_infer_1')
self.profile_.record('py_postpro_0')
ret = self._unpack_inference_response(
resp,
fetch,
is_python=is_python,
need_variant_tag=need_variant_tag)
self.profile_.record('py_postpro_1')
self.profile_.print_profile()
return ret
except grpc.RpcError as e:
return {"serving_status_code": e.code()}
else:
req = self._pack_inference_request(
feed, fetch, is_python=is_python, log_id=log_id)
call_future = self.stub_.Inference.future(
req, timeout=self.rpc_timeout_s_)
return MultiLangPredictFuture(
call_future,
self._done_callback_func(
fetch,
is_python=is_python,
need_variant_tag=need_variant_tag))
class MultiLangPredictFuture(object):
def __init__(self, call_future, callback_func):
self.call_future_ = call_future
self.callback_func_ = callback_func
def result(self):
try:
resp = self.call_future_.result()
except grpc.RpcError as e:
return {"serving_status_code": e.code()}
return self.callback_func_(resp)
def add_done_callback(self, fn):
def __fn__(call_future):
assert call_future == self.call_future_
fn(self)
self.call_future_.add_done_callback(__fn__)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
import paddle_serving_client
import os
from .proto import sdk_configure_pb2 as sdk
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
import numpy as np
import requests
import json
import base64
import time
import sys
import grpc
from .proto import multi_lang_general_model_service_pb2
sys.path.append(
os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
from .proto import multi_lang_general_model_service_pb2_grpc
int64_type = 0
float32_type = 1
int32_type = 2
int_type = set([int64_type, int32_type])
float_type = set([float32_type])
class _NOPProfiler(object):
def record(self, name):
pass
def print_profile(self):
pass
class _TimeProfiler(object):
def __init__(self):
self.pid = os.getpid()
self.print_head = 'PROFILE\tpid:{}\t'.format(self.pid)
self.time_record = [self.print_head]
def record(self, name):
self.time_record.append('{}:{} '.format(
name, int(round(time.time() * 1000000))))
def print_profile(self):
self.time_record.append('\n')
sys.stderr.write(''.join(self.time_record))
self.time_record = [self.print_head]
_is_profile = int(os.environ.get('FLAGS_profile_client', 0))
_Profiler = _TimeProfiler if _is_profile else _NOPProfiler
class SDKConfig(object):
def __init__(self):
self.sdk_desc = sdk.SDKConf()
self.tag_list = []
self.cluster_list = []
self.variant_weight_list = []
self.rpc_timeout_ms = 20000
self.load_balance_strategy = "la"
def add_server_variant(self, tag, cluster, variant_weight):
self.tag_list.append(tag)
self.cluster_list.append(cluster)
self.variant_weight_list.append(variant_weight)
def set_load_banlance_strategy(self, strategy):
self.load_balance_strategy = strategy
def gen_desc(self, rpc_timeout_ms):
predictor_desc = sdk.Predictor()
predictor_desc.name = "general_model"
predictor_desc.service_name = \
"baidu.paddle_serving.predictor.general_model.GeneralModelService"
predictor_desc.endpoint_router = "WeightedRandomRender"
predictor_desc.weighted_random_render_conf.variant_weight_list = "|".join(
self.variant_weight_list)
for idx, tag in enumerate(self.tag_list):
variant_desc = sdk.VariantConf()
variant_desc.tag = tag
variant_desc.naming_conf.cluster = "list://{}".format(",".join(
self.cluster_list[idx]))
predictor_desc.variants.extend([variant_desc])
self.sdk_desc.predictors.extend([predictor_desc])
self.sdk_desc.default_variant_conf.tag = "default"
self.sdk_desc.default_variant_conf.connection_conf.connect_timeout_ms = 2000
self.sdk_desc.default_variant_conf.connection_conf.rpc_timeout_ms = rpc_timeout_ms
self.sdk_desc.default_variant_conf.connection_conf.connect_retry_count = 2
self.sdk_desc.default_variant_conf.connection_conf.max_connection_per_host = 100
self.sdk_desc.default_variant_conf.connection_conf.hedge_request_timeout_ms = -1
self.sdk_desc.default_variant_conf.connection_conf.hedge_fetch_retry_count = 2
self.sdk_desc.default_variant_conf.connection_conf.connection_type = "pooled"
self.sdk_desc.default_variant_conf.naming_conf.cluster_filter_strategy = "Default"
self.sdk_desc.default_variant_conf.naming_conf.load_balance_strategy = "la"
self.sdk_desc.default_variant_conf.rpc_parameter.compress_type = 0
self.sdk_desc.default_variant_conf.rpc_parameter.package_size = 20
self.sdk_desc.default_variant_conf.rpc_parameter.protocol = "baidu_std"
self.sdk_desc.default_variant_conf.rpc_parameter.max_channel_per_request = 3
return self.sdk_desc
class Client(object):
def __init__(self):
self.feed_names_ = []
self.fetch_names_ = []
self.client_handle_ = None
self.feed_shapes_ = {}
self.feed_types_ = {}
self.feed_names_to_idx_ = {}
self.pid = os.getpid()
self.predictor_sdk_ = None
self.producers = []
self.consumer = None
self.profile_ = _Profiler()
self.all_numpy_input = True
self.has_numpy_input = False
self.rpc_timeout_ms = 20000
from .serving_client import PredictorRes
self.predictorres_constructor = PredictorRes
def load_client_config(self, path):
from .serving_client import PredictorClient
model_conf = m_config.GeneralModelConfig()
f = open(path, 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
# load configuraion here
# get feed vars, fetch vars
# get feed shapes, feed types
# map feed names to index
self.client_handle_ = PredictorClient()
self.client_handle_.init(path)
if "FLAGS_max_body_size" not in os.environ:
os.environ["FLAGS_max_body_size"] = str(512 * 1024 * 1024)
read_env_flags = ["profile_client", "profile_server", "max_body_size"]
self.client_handle_.init_gflags([sys.argv[
0]] + ["--tryfromenv=" + ",".join(read_env_flags)])
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.feed_names_to_idx_ = {}
self.fetch_names_to_type_ = {}
self.fetch_names_to_idx_ = {}
self.lod_tensor_set = set()
self.feed_tensor_len = {}
self.key = None
for i, var in enumerate(model_conf.feed_var):
self.feed_names_to_idx_[var.alias_name] = i
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set.add(var.alias_name)
else:
counter = 1
for dim in self.feed_shapes_[var.alias_name]:
counter *= dim
self.feed_tensor_len[var.alias_name] = counter
for i, var in enumerate(model_conf.fetch_var):
self.fetch_names_to_idx_[var.alias_name] = i
self.fetch_names_to_type_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set.add(var.alias_name)
return
def add_variant(self, tag, cluster, variant_weight):
if self.predictor_sdk_ is None:
self.predictor_sdk_ = SDKConfig()
self.predictor_sdk_.add_server_variant(tag, cluster,
str(variant_weight))
def set_rpc_timeout_ms(self, rpc_timeout):
if not isinstance(rpc_timeout, int):
raise ValueError("rpc_timeout must be int type.")
else:
self.rpc_timeout_ms = rpc_timeout
def use_key(self, key_filename):
with open(key_filename, "rb") as f:
self.key = f.read()
def get_serving_port(self, endpoints):
if self.key is not None:
req = json.dumps({"key": base64.b64encode(self.key).decode()})
else:
req = json.dumps({})
r = requests.post("http://" + endpoints[0], req)
result = r.json()
print(result)
if "endpoint_list" not in result:
raise ValueError("server not ready")
else:
endpoints = [
endpoints[0].split(":")[0] + ":" +
str(result["endpoint_list"][0])
]
return endpoints
def connect(self, endpoints=None, encryption=False):
# check whether current endpoint is available
# init from client config
# create predictor here
if endpoints is None:
if self.predictor_sdk_ is None:
raise ValueError(
"You must set the endpoints parameter or use add_variant function to create a variant."
)
else:
if encryption:
endpoints = self.get_serving_port(endpoints)
if self.predictor_sdk_ is None:
self.add_variant('default_tag_{}'.format(id(self)), endpoints,
100)
else:
print(
"parameter endpoints({}) will not take effect, because you use the add_variant function.".
format(endpoints))
sdk_desc = self.predictor_sdk_.gen_desc(self.rpc_timeout_ms)
self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString(
))
def get_feed_names(self):
return self.feed_names_
def get_fetch_names(self):
return self.fetch_names_
def shape_check(self, feed, key):
if key in self.lod_tensor_set:
return
if isinstance(feed[key],
list) and len(feed[key]) != self.feed_tensor_len[key]:
raise ValueError("The shape of feed tensor {} not match.".format(
key))
if type(feed[key]).__module__ == np.__name__ and np.size(feed[
key]) != self.feed_tensor_len[key]:
#raise SystemExit("The shape of feed tensor {} not match.".format(
# key))
pass
def predict(self,
feed=None,
fetch=None,
batch=False,
need_variant_tag=False,
log_id=0):
self.profile_.record('py_prepro_0')
if feed is None or fetch is None:
raise ValueError("You should specify feed and fetch for prediction")
fetch_list = []
if isinstance(fetch, str):
fetch_list = [fetch]
elif isinstance(fetch, list):
fetch_list = fetch
else:
raise ValueError("Fetch only accepts string and list of string")
feed_batch = []
if isinstance(feed, dict):
feed_batch.append(feed)
elif isinstance(feed, list):
feed_batch = feed
else:
raise ValueError("Feed only accepts dict and list of dict")
int_slot_batch = []
float_slot_batch = []
int_feed_names = []
float_feed_names = []
int_shape = []
int_lod_slot_batch = []
float_lod_slot_batch = []
float_shape = []
fetch_names = []
counter = 0
batch_size = len(feed_batch)
for key in fetch_list:
if key in self.fetch_names_:
fetch_names.append(key)
if len(fetch_names) == 0:
raise ValueError(
"Fetch names should not be empty or out of saved fetch list.")
return {}
for i, feed_i in enumerate(feed_batch):
int_slot = []
float_slot = []
int_lod_slot = []
float_lod_slot = []
for key in feed_i:
if ".lod" not in key and key not in self.feed_names_:
raise ValueError("Wrong feed name: {}.".format(key))
if ".lod" in key:
continue
#if not isinstance(feed_i[key], np.ndarray):
self.shape_check(feed_i, key)
if self.feed_types_[key] in int_type:
if i == 0:
int_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = feed_i[key][np.newaxis, :]
if isinstance(feed_i[key], np.ndarray):
shape_lst.extend(list(feed_i[key].shape))
int_shape.append(shape_lst)
else:
int_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
int_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
int_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
int_slot.append(feed_i[key])
self.has_numpy_input = True
else:
int_slot.append(feed_i[key])
self.all_numpy_input = False
elif self.feed_types_[key] in float_type:
if i == 0:
float_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = feed_i[key][np.newaxis, :]
if isinstance(feed_i[key], np.ndarray):
shape_lst.extend(list(feed_i[key].shape))
float_shape.append(shape_lst)
else:
float_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
float_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
float_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
float_slot.append(feed_i[key])
self.has_numpy_input = True
else:
float_slot.append(feed_i[key])
self.all_numpy_input = False
int_slot_batch.append(int_slot)
float_slot_batch.append(float_slot)
int_lod_slot_batch.append(int_lod_slot)
float_lod_slot_batch.append(float_lod_slot)
self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0')
result_batch_handle = self.predictorres_constructor()
if self.all_numpy_input:
res = self.client_handle_.numpy_predict(
float_slot_batch, float_feed_names, float_shape,
float_lod_slot_batch, int_slot_batch, int_feed_names, int_shape,
int_lod_slot_batch, fetch_names, result_batch_handle, self.pid,
log_id)
elif self.has_numpy_input == False:
raise ValueError(
"Please make sure all of your inputs are numpy array")
else:
raise ValueError(
"Please make sure the inputs are all in list type or all in numpy.array type"
)
self.profile_.record('py_client_infer_1')
self.profile_.record('py_postpro_0')
if res == -1:
return None
multi_result_map = []
model_engine_names = result_batch_handle.get_engine_names()
for mi, engine_name in enumerate(model_engine_names):
result_map = {}
# result map needs to be a numpy array
for i, name in enumerate(fetch_names):
if self.fetch_names_to_type_[name] == int64_type:
# result_map[name] will be py::array(numpy array)
result_map[name] = result_batch_handle.get_int64_by_name(
mi, name)
shape = result_batch_handle.get_shape(mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == float32_type:
result_map[name] = result_batch_handle.get_float_by_name(
mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == int32_type:
# result_map[name] will be py::array(numpy array)
result_map[name] = result_batch_handle.get_int32_by_name(
mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
multi_result_map.append(result_map)
ret = None
if len(model_engine_names) == 1:
# If only one model result is returned, the format of ret is result_map
ret = multi_result_map[0]
else:
# If multiple model results are returned, the format of ret is {name: result_map}
ret = {
engine_name: multi_result_map[mi]
for mi, engine_name in enumerate(model_engine_names)
}
self.profile_.record('py_postpro_1')
self.profile_.print_profile()
# When using the A/B test, the tag of variant needs to be returned
return ret if not need_variant_tag else [
ret, result_batch_handle.variant_tag()
]
def release(self):
self.client_handle_.destroy_predictor()
self.client_handle_ = None
class MultiLangClient(object):
def __init__(self):
self.channel_ = None
self.stub_ = None
self.rpc_timeout_s_ = 2
self.profile_ = _Profiler()
def add_variant(self, tag, cluster, variant_weight):
# TODO
raise Exception("cannot support ABtest yet")
def set_rpc_timeout_ms(self, rpc_timeout):
if self.stub_ is None:
raise Exception("set timeout must be set after connect.")
if not isinstance(rpc_timeout, int):
# for bclient
raise ValueError("rpc_timeout must be int type.")
self.rpc_timeout_s_ = rpc_timeout / 1000.0
timeout_req = multi_lang_general_model_service_pb2.SetTimeoutRequest()
timeout_req.timeout_ms = rpc_timeout
resp = self.stub_.SetTimeout(timeout_req)
return resp.err_code == 0
def connect(self, endpoints):
# https://github.com/tensorflow/serving/issues/1382
options = [('grpc.max_receive_message_length', 512 * 1024 * 1024),
('grpc.max_send_message_length', 512 * 1024 * 1024),
('grpc.lb_policy_name', 'round_robin')]
# TODO: weight round robin
g_endpoint = 'ipv4:{}'.format(','.join(endpoints))
self.channel_ = grpc.insecure_channel(g_endpoint, options=options)
self.stub_ = multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelServiceStub(
self.channel_)
# get client model config
get_client_config_req = multi_lang_general_model_service_pb2.GetClientConfigRequest(
)
resp = self.stub_.GetClientConfig(get_client_config_req)
model_config_str = resp.client_config_str
self._parse_model_config(model_config_str)
def _flatten_list(self, nested_list):
for item in nested_list:
if isinstance(item, (list, tuple)):
for sub_item in self._flatten_list(item):
yield sub_item
else:
yield item
def _parse_model_config(self, model_config_str):
model_conf = m_config.GeneralModelConfig()
model_conf = google.protobuf.text_format.Merge(model_config_str,
model_conf)
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.feed_types_ = {}
self.feed_shapes_ = {}
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.fetch_types_ = {}
self.lod_tensor_set_ = set()
for i, var in enumerate(model_conf.feed_var):
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
else:
counter = 1
for dim in self.feed_shapes_[var.alias_name]:
counter *= dim
for i, var in enumerate(model_conf.fetch_var):
self.fetch_types_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
def _pack_inference_request(self, feed, fetch, is_python, log_id):
req = multi_lang_general_model_service_pb2.InferenceRequest()
req.fetch_var_names.extend(fetch)
req.is_python = is_python
req.log_id = log_id
feed_var_names = []
for key in feed.keys():
if '.lod' not in key:
feed_var_names.append(key)
req.feed_var_names.extend(feed_var_names)
inst = multi_lang_general_model_service_pb2.FeedInst()
for name in req.feed_var_names:
tensor = multi_lang_general_model_service_pb2.Tensor()
var = feed[name]
v_type = self.feed_types_[name]
if is_python:
data = None
if isinstance(var, list):
if v_type == 0: # int64
data = np.array(var, dtype="int64")
elif v_type == 1: # float32
data = np.array(var, dtype="float32")
elif v_type == 2: # int32
data = np.array(var, dtype="int32")
else:
raise Exception("error tensor value type.")
elif isinstance(var, np.ndarray):
data = var
if v_type == 0:
if data.dtype != 'int64':
data = data.astype("int64")
elif v_type == 1:
if data.dtype != 'float32':
data = data.astype("float32")
elif v_type == 2:
if data.dtype != 'int32':
data = data.astype("int32")
else:
raise Exception("error tensor value type.")
else:
raise Exception("var must be list or ndarray.")
tensor.data = data.tobytes()
tensor.shape.extend(list(var.shape))
if "{}.lod".format(name) in feed.keys():
tensor.lod.extend(feed["{}.lod".format(name)])
inst.tensor_array.append(tensor)
req.insts.append(inst)
return req
def _unpack_inference_response(self, resp, fetch, is_python,
need_variant_tag):
if resp.err_code != 0:
return None
tag = resp.tag
multi_result_map = {}
for model_result in resp.outputs:
inst = model_result.insts[0]
result_map = {}
for i, name in enumerate(fetch):
var = inst.tensor_array[i]
v_type = self.fetch_types_[name]
if is_python:
if v_type == 0: # int64
result_map[name] = np.frombuffer(
var.data, dtype="int64")
elif v_type == 1: # float32
result_map[name] = np.frombuffer(
var.data, dtype="float32")
else:
raise Exception("error type.")
else:
if v_type == 0: # int64
result_map[name] = np.array(
list(var.int64_data), dtype="int64")
elif v_type == 1: # float32
result_map[name] = np.array(
list(var.float_data), dtype="float32")
else:
raise Exception("error type.")
result_map[name].shape = list(var.shape)
if name in self.lod_tensor_set_:
result_map["{}.lod".format(name)] = np.array(list(var.lod))
multi_result_map[model_result.engine_name] = result_map
ret = None
if len(resp.outputs) == 1:
ret = list(multi_result_map.values())[0]
else:
ret = multi_result_map
ret["serving_status_code"] = 0
return ret if not need_variant_tag else [ret, tag]
def _done_callback_func(self, fetch, is_python, need_variant_tag):
def unpack_resp(resp):
return self._unpack_inference_response(resp, fetch, is_python,
need_variant_tag)
return unpack_resp
def get_feed_names(self):
return self.feed_names_
def predict(self,
feed,
fetch,
batch=True,
need_variant_tag=False,
asyn=False,
is_python=True,
log_id=0):
if isinstance(feed, dict) is False:
raise ValueError("Type Error. grpc feed must be dict.")
if batch is False:
for key in feed:
if ".lod" not in key:
feed[key] = feed[key][np.newaxis, :]
if not asyn:
try:
self.profile_.record('py_prepro_0')
req = self._pack_inference_request(
feed, fetch, is_python=is_python, log_id=log_id)
self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0')
resp = self.stub_.Inference(req, timeout=self.rpc_timeout_s_)
self.profile_.record('py_client_infer_1')
self.profile_.record('py_postpro_0')
ret = self._unpack_inference_response(
resp,
fetch,
is_python=is_python,
need_variant_tag=need_variant_tag)
self.profile_.record('py_postpro_1')
self.profile_.print_profile()
return ret
except grpc.RpcError as e:
return {"serving_status_code": e.code()}
else:
req = self._pack_inference_request(
feed, fetch, is_python=is_python, log_id=log_id)
call_future = self.stub_.Inference.future(
req, timeout=self.rpc_timeout_s_)
return MultiLangPredictFuture(
call_future,
self._done_callback_func(
fetch,
is_python=is_python,
need_variant_tag=need_variant_tag))
class MultiLangPredictFuture(object):
def __init__(self, call_future, callback_func):
self.call_future_ = call_future
self.callback_func_ = callback_func
def result(self):
try:
resp = self.call_future_.result()
except grpc.RpcError as e:
return {"serving_status_code": e.code()}
return self.callback_func_(resp)
def add_done_callback(self, fn):
def __fn__(call_future):
assert call_future == self.call_future_
fn(self)
self.call_future_.add_done_callback(__fn__)
...@@ -13,737 +13,22 @@ ...@@ -13,737 +13,22 @@
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
import os from . import monitor
from .proto import server_configure_pb2 as server_sdk from . import rpc_service
from .proto import general_model_config_pb2 as m_config from . import serve
import google.protobuf.text_format from . import version
import tarfile
import socket
import paddle_serving_server as paddle_serving_server
from .version import serving_server_version
from contextlib import closing
import collections
import shutil
import numpy as np
import grpc
from .proto import multi_lang_general_model_service_pb2
import sys
if sys.platform.startswith('win') is False:
import fcntl
sys.path.append(
os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
from .proto import multi_lang_general_model_service_pb2_grpc
from multiprocessing import Pool, Process
from concurrent import futures
__all__ = ["version", "server", "serve", "monitor", "rpc_service", "dag"]
class OpMaker(object): from paddle_serving_server import (
def __init__(self): version,
self.op_dict = { server,
"general_infer": "GeneralInferOp", serve,
"general_reader": "GeneralReaderOp", monitor,
"general_response": "GeneralResponseOp", rpc_service,
"general_text_reader": "GeneralTextReaderOp", dag, )
"general_text_response": "GeneralTextResponseOp",
"general_single_kv": "GeneralSingleKVOp",
"general_dist_kv_infer": "GeneralDistKVInferOp",
"general_dist_kv_quant_infer": "GeneralDistKVQuantInferOp",
"general_copy": "GeneralCopyOp"
}
self.node_name_suffix_ = collections.defaultdict(int)
def create(self, node_type, engine_name=None, inputs=[], outputs=[]): from .dag import *
if node_type not in self.op_dict: from .server import *
raise Exception("Op type {} is not supported right now".format(
node_type))
node = server_sdk.DAGNode()
# node.name will be used as the infer engine name
if engine_name:
node.name = engine_name
else:
node.name = '{}_{}'.format(node_type,
self.node_name_suffix_[node_type])
self.node_name_suffix_[node_type] += 1
node.type = self.op_dict[node_type] __version__ = version.serving_server_version
if inputs:
for dep_node_str in inputs:
dep_node = server_sdk.DAGNode()
google.protobuf.text_format.Parse(dep_node_str, dep_node)
dep = server_sdk.DAGNodeDependency()
dep.name = dep_node.name
dep.mode = "RO"
node.dependencies.extend([dep])
# Because the return value will be used as the key value of the
# dict, and the proto object is variable which cannot be hashed,
# so it is processed into a string. This has little effect on
# overall efficiency.
return google.protobuf.text_format.MessageToString(node)
class OpSeqMaker(object):
def __init__(self):
self.workflow = server_sdk.Workflow()
self.workflow.name = "workflow1"
self.workflow.workflow_type = "Sequence"
def add_op(self, node_str):
node = server_sdk.DAGNode()
google.protobuf.text_format.Parse(node_str, node)
if len(node.dependencies) > 1:
raise Exception(
'Set more than one predecessor for op in OpSeqMaker is not allowed.'
)
if len(self.workflow.nodes) >= 1:
if len(node.dependencies) == 0:
dep = server_sdk.DAGNodeDependency()
dep.name = self.workflow.nodes[-1].name
dep.mode = "RO"
node.dependencies.extend([dep])
elif len(node.dependencies) == 1:
if node.dependencies[0].name != self.workflow.nodes[-1].name:
raise Exception(
'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.'
.format(node.dependencies[0].name, self.workflow.nodes[
-1].name))
self.workflow.nodes.extend([node])
def get_op_sequence(self):
workflow_conf = server_sdk.WorkflowConf()
workflow_conf.workflows.extend([self.workflow])
return workflow_conf
class OpGraphMaker(object):
def __init__(self):
self.workflow = server_sdk.Workflow()
self.workflow.name = "workflow1"
# Currently, SDK only supports "Sequence"
self.workflow.workflow_type = "Sequence"
def add_op(self, node_str):
node = server_sdk.DAGNode()
google.protobuf.text_format.Parse(node_str, node)
self.workflow.nodes.extend([node])
def get_op_graph(self):
workflow_conf = server_sdk.WorkflowConf()
workflow_conf.workflows.extend([self.workflow])
return workflow_conf
class Server(object):
def __init__(self):
self.server_handle_ = None
self.infer_service_conf = None
self.model_toolkit_conf = None
self.resource_conf = None
self.memory_optimization = False
self.ir_optimization = False
self.model_conf = None
self.workflow_fn = "workflow.prototxt"
self.resource_fn = "resource.prototxt"
self.infer_service_fn = "infer_service.prototxt"
self.model_toolkit_fn = "model_toolkit.prototxt"
self.general_model_config_fn = "general_model.prototxt"
self.cube_config_fn = "cube.conf"
self.workdir = ""
self.max_concurrency = 0
self.num_threads = 4
self.port = 8080
self.reload_interval_s = 10
self.max_body_size = 64 * 1024 * 1024
self.module_path = os.path.dirname(paddle_serving_server.__file__)
self.cur_path = os.getcwd()
self.use_local_bin = False
self.mkl_flag = False
self.encryption_model = False
self.product_name = None
self.container_id = None
self.model_config_paths = None # for multi-model in a workflow
def get_fetch_list(self):
fetch_names = [var.alias_name for var in self.model_conf.fetch_var]
return fetch_names
def set_max_concurrency(self, concurrency):
self.max_concurrency = concurrency
def set_num_threads(self, threads):
self.num_threads = threads
def set_max_body_size(self, body_size):
if body_size >= self.max_body_size:
self.max_body_size = body_size
else:
print(
"max_body_size is less than default value, will use default value in service."
)
def set_port(self, port):
self.port = port
def set_reload_interval(self, interval):
self.reload_interval_s = interval
def set_op_sequence(self, op_seq):
self.workflow_conf = op_seq
def set_op_graph(self, op_graph):
self.workflow_conf = op_graph
def set_memory_optimize(self, flag=False):
self.memory_optimization = flag
def set_ir_optimize(self, flag=False):
self.ir_optimization = flag
def use_encryption_model(self, flag=False):
self.encryption_model = flag
def set_product_name(self, product_name=None):
if product_name == None:
raise ValueError("product_name can't be None.")
self.product_name = product_name
def set_container_id(self, container_id):
if container_id == None:
raise ValueError("container_id can't be None.")
self.container_id = container_id
def check_local_bin(self):
if "SERVING_BIN" in os.environ:
self.use_local_bin = True
self.bin_path = os.environ["SERVING_BIN"]
def _prepare_engine(self, model_config_paths, device):
if self.model_toolkit_conf == None:
self.model_toolkit_conf = server_sdk.ModelToolkitConf()
for engine_name, model_config_path in model_config_paths.items():
engine = server_sdk.EngineDesc()
engine.name = engine_name
engine.reloadable_meta = model_config_path + "/fluid_time_file"
os.system("touch {}".format(engine.reloadable_meta))
engine.reloadable_type = "timestamp_ne"
engine.runtime_thread_num = 0
engine.batch_infer_size = 0
engine.enable_batch_align = 0
engine.model_data_path = model_config_path
engine.enable_memory_optimization = self.memory_optimization
engine.enable_ir_optimization = self.ir_optimization
engine.static_optimization = False
engine.force_update_static_cache = False
if os.path.exists('{}/__params__'.format(model_config_path)):
suffix = ""
else:
suffix = "_DIR"
if device == "cpu":
if self.encryption_model:
engine.type = "FLUID_CPU_ANALYSIS_ENCRYPT"
else:
engine.type = "FLUID_CPU_ANALYSIS" + suffix
elif device == "gpu":
if self.encryption_model:
engine.type = "FLUID_GPU_ANALYSIS_ENCRYPT"
else:
engine.type = "FLUID_GPU_ANALYSIS" + suffix
self.model_toolkit_conf.engines.extend([engine])
def _prepare_infer_service(self, port):
if self.infer_service_conf == None:
self.infer_service_conf = server_sdk.InferServiceConf()
self.infer_service_conf.port = port
infer_service = server_sdk.InferService()
infer_service.name = "GeneralModelService"
infer_service.workflows.extend(["workflow1"])
self.infer_service_conf.services.extend([infer_service])
def _prepare_resource(self, workdir, cube_conf):
self.workdir = workdir
if self.resource_conf == None:
with open("{}/{}".format(workdir, self.general_model_config_fn),
"w") as fout:
fout.write(str(self.model_conf))
self.resource_conf = server_sdk.ResourceConf()
for workflow in self.workflow_conf.workflows:
for node in workflow.nodes:
if "dist_kv" in node.name:
self.resource_conf.cube_config_path = workdir
self.resource_conf.cube_config_file = self.cube_config_fn
if cube_conf == None:
raise ValueError(
"Please set the path of cube.conf while use dist_kv op."
)
shutil.copy(cube_conf, workdir)
if "quant" in node.name:
self.resource_conf.cube_quant_bits = 8
self.resource_conf.model_toolkit_path = workdir
self.resource_conf.model_toolkit_file = self.model_toolkit_fn
self.resource_conf.general_model_path = workdir
self.resource_conf.general_model_file = self.general_model_config_fn
if self.product_name != None:
self.resource_conf.auth_product_name = self.product_name
if self.container_id != None:
self.resource_conf.auth_container_id = self.container_id
def _write_pb_str(self, filepath, pb_obj):
with open(filepath, "w") as fout:
fout.write(str(pb_obj))
def load_model_config(self, model_config_paths):
# At present, Serving needs to configure the model path in
# the resource.prototxt file to determine the input and output
# format of the workflow. To ensure that the input and output
# of multiple models are the same.
workflow_oi_config_path = None
if isinstance(model_config_paths, str):
# If there is only one model path, use the default infer_op.
# Because there are several infer_op type, we need to find
# it from workflow_conf.
default_engine_names = [
'general_infer_0', 'general_dist_kv_infer_0',
'general_dist_kv_quant_infer_0'
]
engine_name = None
for node in self.workflow_conf.workflows[0].nodes:
if node.name in default_engine_names:
engine_name = node.name
break
if engine_name is None:
raise Exception(
"You have set the engine_name of Op. Please use the form {op: model_path} to configure model path"
)
self.model_config_paths = {engine_name: model_config_paths}
workflow_oi_config_path = self.model_config_paths[engine_name]
elif isinstance(model_config_paths, dict):
self.model_config_paths = {}
for node_str, path in model_config_paths.items():
node = server_sdk.DAGNode()
google.protobuf.text_format.Parse(node_str, node)
self.model_config_paths[node.name] = path
print("You have specified multiple model paths, please ensure "
"that the input and output of multiple models are the same.")
workflow_oi_config_path = list(self.model_config_paths.items())[0][
1]
else:
raise Exception("The type of model_config_paths must be str or "
"dict({op: model_path}), not {}.".format(
type(model_config_paths)))
self.model_conf = m_config.GeneralModelConfig()
f = open(
"{}/serving_server_conf.prototxt".format(workflow_oi_config_path),
'r')
self.model_conf = google.protobuf.text_format.Merge(
str(f.read()), self.model_conf)
# check config here
# print config here
def use_mkl(self, flag):
self.mkl_flag = flag
def get_device_version(self):
avx_flag = False
mkl_flag = self.mkl_flag
openblas_flag = False
r = os.system("cat /proc/cpuinfo | grep avx > /dev/null 2>&1")
if r == 0:
avx_flag = True
if avx_flag:
if mkl_flag:
device_version = "serving-cpu-avx-mkl-"
else:
device_version = "serving-cpu-avx-openblas-"
else:
if mkl_flag:
print(
"Your CPU does not support AVX, server will running with noavx-openblas mode."
)
device_version = "serving-cpu-noavx-openblas-"
return device_version
def download_bin(self):
os.chdir(self.module_path)
need_download = False
device_version = self.get_device_version()
folder_name = device_version + serving_server_version
tar_name = folder_name + ".tar.gz"
bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name
self.server_path = os.path.join(self.module_path, folder_name)
#acquire lock
version_file = open("{}/version.py".format(self.module_path), "r")
fcntl.flock(version_file, fcntl.LOCK_EX)
if not os.path.exists(self.server_path):
print('Frist time run, downloading PaddleServing components ...')
r = os.system('wget ' + bin_url + ' --no-check-certificate')
if r != 0:
if os.path.exists(tar_name):
os.remove(tar_name)
raise SystemExit(
'Download failed, please check your network or permission of {}.'
.format(self.module_path))
else:
try:
print('Decompressing files ..')
tar = tarfile.open(tar_name)
tar.extractall()
tar.close()
except:
if os.path.exists(exe_path):
os.remove(exe_path)
raise SystemExit(
'Decompressing failed, please check your permission of {} or disk space left.'
.format(self.module_path))
finally:
os.remove(tar_name)
#release lock
version_file.close()
os.chdir(self.cur_path)
self.bin_path = self.server_path + "/serving"
def prepare_server(self,
workdir=None,
port=9292,
device="cpu",
cube_conf=None):
if workdir == None:
workdir = "./tmp"
os.system("mkdir {}".format(workdir))
else:
os.system("mkdir {}".format(workdir))
os.system("touch {}/fluid_time_file".format(workdir))
if not self.port_is_available(port):
raise SystemExit("Port {} is already used".format(port))
self.set_port(port)
self._prepare_resource(workdir, cube_conf)
self._prepare_engine(self.model_config_paths, device)
self._prepare_infer_service(port)
self.workdir = workdir
infer_service_fn = "{}/{}".format(workdir, self.infer_service_fn)
workflow_fn = "{}/{}".format(workdir, self.workflow_fn)
resource_fn = "{}/{}".format(workdir, self.resource_fn)
model_toolkit_fn = "{}/{}".format(workdir, self.model_toolkit_fn)
self._write_pb_str(infer_service_fn, self.infer_service_conf)
self._write_pb_str(workflow_fn, self.workflow_conf)
self._write_pb_str(resource_fn, self.resource_conf)
self._write_pb_str(model_toolkit_fn, self.model_toolkit_conf)
def port_is_available(self, port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
result = sock.connect_ex(('0.0.0.0', port))
if result != 0:
return True
else:
return False
def run_server(self):
# just run server with system command
# currently we do not load cube
self.check_local_bin()
if not self.use_local_bin:
self.download_bin()
else:
print("Use local bin : {}".format(self.bin_path))
command = "{} " \
"-enable_model_toolkit " \
"-inferservice_path {} " \
"-inferservice_file {} " \
"-max_concurrency {} " \
"-num_threads {} " \
"-port {} " \
"-reload_interval_s {} " \
"-resource_path {} " \
"-resource_file {} " \
"-workflow_path {} " \
"-workflow_file {} " \
"-bthread_concurrency {} " \
"-max_body_size {} ".format(
self.bin_path,
self.workdir,
self.infer_service_fn,
self.max_concurrency,
self.num_threads,
self.port,
self.reload_interval_s,
self.workdir,
self.resource_fn,
self.workdir,
self.workflow_fn,
self.num_threads,
self.max_body_size)
print("Going to Run Command")
print(command)
os.system(command)
class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
MultiLangGeneralModelServiceServicer):
def __init__(self, model_config_path, is_multi_model, endpoints):
self.is_multi_model_ = is_multi_model
self.model_config_path_ = model_config_path
self.endpoints_ = endpoints
with open(self.model_config_path_) as f:
self.model_config_str_ = str(f.read())
self._parse_model_config(self.model_config_str_)
self._init_bclient(self.model_config_path_, self.endpoints_)
def _init_bclient(self, model_config_path, endpoints, timeout_ms=None):
from paddle_serving_client import Client
self.bclient_ = Client()
if timeout_ms is not None:
self.bclient_.set_rpc_timeout_ms(timeout_ms)
self.bclient_.load_client_config(model_config_path)
self.bclient_.connect(endpoints)
def _parse_model_config(self, model_config_str):
model_conf = m_config.GeneralModelConfig()
model_conf = google.protobuf.text_format.Merge(model_config_str,
model_conf)
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.feed_types_ = {}
self.feed_shapes_ = {}
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.fetch_types_ = {}
self.lod_tensor_set_ = set()
for i, var in enumerate(model_conf.feed_var):
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
for i, var in enumerate(model_conf.fetch_var):
self.fetch_types_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
def _flatten_list(self, nested_list):
for item in nested_list:
if isinstance(item, (list, tuple)):
for sub_item in self._flatten_list(item):
yield sub_item
else:
yield item
def _unpack_inference_request(self, request):
feed_names = list(request.feed_var_names)
fetch_names = list(request.fetch_var_names)
is_python = request.is_python
log_id = request.log_id
feed_batch = []
for feed_inst in request.insts:
feed_dict = {}
for idx, name in enumerate(feed_names):
var = feed_inst.tensor_array[idx]
v_type = self.feed_types_[name]
data = None
if is_python:
if v_type == 0: # int64
data = np.frombuffer(var.data, dtype="int64")
elif v_type == 1: # float32
data = np.frombuffer(var.data, dtype="float32")
elif v_type == 2: # int32
data = np.frombuffer(var.data, dtype="int32")
else:
raise Exception("error type.")
else:
if v_type == 0: # int64
data = np.array(list(var.int64_data), dtype="int64")
elif v_type == 1: # float32
data = np.array(list(var.float_data), dtype="float32")
elif v_type == 2: # int32
data = np.array(list(var.int_data), dtype="int32")
else:
raise Exception("error type.")
data.shape = list(feed_inst.tensor_array[idx].shape)
feed_dict[name] = data
if len(var.lod) > 0:
feed_dict["{}.lod".format(name)] = var.lod
feed_batch.append(feed_dict)
return feed_batch, fetch_names, is_python, log_id
def _pack_inference_response(self, ret, fetch_names, is_python):
resp = multi_lang_general_model_service_pb2.InferenceResponse()
if ret is None:
resp.err_code = 1
return resp
results, tag = ret
resp.tag = tag
resp.err_code = 0
if not self.is_multi_model_:
results = {'general_infer_0': results}
for model_name, model_result in results.items():
model_output = multi_lang_general_model_service_pb2.ModelOutput()
inst = multi_lang_general_model_service_pb2.FetchInst()
for idx, name in enumerate(fetch_names):
tensor = multi_lang_general_model_service_pb2.Tensor()
v_type = self.fetch_types_[name]
if is_python:
tensor.data = model_result[name].tobytes()
else:
if v_type == 0: # int64
tensor.int64_data.extend(model_result[name].reshape(-1)
.tolist())
elif v_type == 1: # float32
tensor.float_data.extend(model_result[name].reshape(-1)
.tolist())
elif v_type == 2: # int32
tensor.int_data.extend(model_result[name].reshape(-1)
.tolist())
else:
raise Exception("error type.")
tensor.shape.extend(list(model_result[name].shape))
if "{}.lod".format(name) in model_result:
tensor.lod.extend(model_result["{}.lod".format(name)]
.tolist())
inst.tensor_array.append(tensor)
model_output.insts.append(inst)
model_output.engine_name = model_name
resp.outputs.append(model_output)
return resp
def SetTimeout(self, request, context):
# This porcess and Inference process cannot be operate at the same time.
# For performance reasons, do not add thread lock temporarily.
timeout_ms = request.timeout_ms
self._init_bclient(self.model_config_path_, self.endpoints_, timeout_ms)
resp = multi_lang_general_model_service_pb2.SimpleResponse()
resp.err_code = 0
return resp
def Inference(self, request, context):
feed_batch, fetch_names, is_python, log_id = \
self._unpack_inference_request(request)
ret = self.bclient_.predict(
feed=feed_batch,
fetch=fetch_names,
batch=True,
need_variant_tag=True,
log_id=log_id)
return self._pack_inference_response(ret, fetch_names, is_python)
def GetClientConfig(self, request, context):
resp = multi_lang_general_model_service_pb2.GetClientConfigResponse()
resp.client_config_str = self.model_config_str_
return resp
class MultiLangServer(object):
def __init__(self):
self.bserver_ = Server()
self.worker_num_ = 4
self.body_size_ = 64 * 1024 * 1024
self.concurrency_ = 100000
self.is_multi_model_ = False # for model ensemble
def set_max_concurrency(self, concurrency):
self.concurrency_ = concurrency
self.bserver_.set_max_concurrency(concurrency)
def set_num_threads(self, threads):
self.worker_num_ = threads
self.bserver_.set_num_threads(threads)
def set_max_body_size(self, body_size):
self.bserver_.set_max_body_size(body_size)
if body_size >= self.body_size_:
self.body_size_ = body_size
else:
print(
"max_body_size is less than default value, will use default value in service."
)
def use_encryption_model(self, flag=False):
self.encryption_model = flag
def set_port(self, port):
self.gport_ = port
def set_reload_interval(self, interval):
self.bserver_.set_reload_interval(interval)
def set_op_sequence(self, op_seq):
self.bserver_.set_op_sequence(op_seq)
def set_op_graph(self, op_graph):
self.bserver_.set_op_graph(op_graph)
def set_memory_optimize(self, flag=False):
self.bserver_.set_memory_optimize(flag)
def set_ir_optimize(self, flag=False):
self.bserver_.set_ir_optimize(flag)
def set_op_sequence(self, op_seq):
self.bserver_.set_op_sequence(op_seq)
def use_mkl(self, flag):
self.bserver_.use_mkl(flag)
def load_model_config(self, server_config_paths, client_config_path=None):
self.bserver_.load_model_config(server_config_paths)
if client_config_path is None:
if isinstance(server_config_paths, dict):
self.is_multi_model_ = True
client_config_path = '{}/serving_server_conf.prototxt'.format(
list(server_config_paths.items())[0][1])
else:
client_config_path = '{}/serving_server_conf.prototxt'.format(
server_config_paths)
self.bclient_config_path_ = client_config_path
def prepare_server(self,
workdir=None,
port=9292,
device="cpu",
cube_conf=None):
if not self._port_is_available(port):
raise SystemExit("Prot {} is already used".format(port))
default_port = 12000
self.port_list_ = []
for i in range(1000):
if default_port + i != port and self._port_is_available(default_port
+ i):
self.port_list_.append(default_port + i)
break
self.bserver_.prepare_server(
workdir=workdir,
port=self.port_list_[0],
device=device,
cube_conf=cube_conf)
self.set_port(port)
def _launch_brpc_service(self, bserver):
bserver.run_server()
def _port_is_available(self, port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
result = sock.connect_ex(('0.0.0.0', port))
return result != 0
def run_server(self):
p_bserver = Process(
target=self._launch_brpc_service, args=(self.bserver_, ))
p_bserver.start()
options = [('grpc.max_send_message_length', self.body_size_),
('grpc.max_receive_message_length', self.body_size_)]
server = grpc.server(
futures.ThreadPoolExecutor(max_workers=self.worker_num_),
options=options,
maximum_concurrent_rpcs=self.concurrency_)
multi_lang_general_model_service_pb2_grpc.add_MultiLangGeneralModelServiceServicer_to_server(
MultiLangServerServiceServicer(
self.bclient_config_path_, self.is_multi_model_,
["0.0.0.0:{}".format(self.port_list_[0])]), server)
server.add_insecure_port('[::]:{}'.format(self.gport_))
server.start()
p_bserver.join()
server.wait_for_termination()
from .proto import server_configure_pb2 as server_sdk
import google.protobuf.text_format
import collections
class OpMaker(object):
def __init__(self):
self.op_dict = {
"general_infer": "GeneralInferOp",
"general_reader": "GeneralReaderOp",
"general_response": "GeneralResponseOp",
"general_text_reader": "GeneralTextReaderOp",
"general_text_response": "GeneralTextResponseOp",
"general_single_kv": "GeneralSingleKVOp",
"general_dist_kv_infer": "GeneralDistKVInferOp",
"general_dist_kv": "GeneralDistKVOp"
}
self.node_name_suffix_ = collections.defaultdict(int)
def create(self, node_type, engine_name=None, inputs=[], outputs=[]):
if node_type not in self.op_dict:
raise Exception("Op type {} is not supported right now".format(
node_type))
node = server_sdk.DAGNode()
# node.name will be used as the infer engine name
if engine_name:
node.name = engine_name
else:
node.name = '{}_{}'.format(node_type,
self.node_name_suffix_[node_type])
self.node_name_suffix_[node_type] += 1
node.type = self.op_dict[node_type]
if inputs:
for dep_node_str in inputs:
dep_node = server_sdk.DAGNode()
google.protobuf.text_format.Parse(dep_node_str, dep_node)
dep = server_sdk.DAGNodeDependency()
dep.name = dep_node.name
dep.mode = "RO"
node.dependencies.extend([dep])
# Because the return value will be used as the key value of the
# dict, and the proto object is variable which cannot be hashed,
# so it is processed into a string. This has little effect on
# overall efficiency.
return google.protobuf.text_format.MessageToString(node)
class OpSeqMaker(object):
def __init__(self):
self.workflow = server_sdk.Workflow()
self.workflow.name = "workflow1"
self.workflow.workflow_type = "Sequence"
def add_op(self, node_str):
node = server_sdk.DAGNode()
google.protobuf.text_format.Parse(node_str, node)
if len(node.dependencies) > 1:
raise Exception(
'Set more than one predecessor for op in OpSeqMaker is not allowed.'
)
if len(self.workflow.nodes) >= 1:
if len(node.dependencies) == 0:
dep = server_sdk.DAGNodeDependency()
dep.name = self.workflow.nodes[-1].name
dep.mode = "RO"
node.dependencies.extend([dep])
elif len(node.dependencies) == 1:
if node.dependencies[0].name != self.workflow.nodes[-1].name:
raise Exception(
'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.'
.format(node.dependencies[0].name, self.workflow.nodes[
-1].name))
self.workflow.nodes.extend([node])
def get_op_sequence(self):
workflow_conf = server_sdk.WorkflowConf()
workflow_conf.workflows.extend([self.workflow])
return workflow_conf
class OpGraphMaker(object):
def __init__(self):
self.workflow = server_sdk.Workflow()
self.workflow.name = "workflow1"
# Currently, SDK only supports "Sequence"
self.workflow.workflow_type = "Sequence"
def add_op(self, node_str):
node = server_sdk.DAGNode()
google.protobuf.text_format.Parse(node_str, node)
self.workflow.nodes.extend([node])
def get_op_graph(self):
workflow_conf = server_sdk.WorkflowConf()
workflow_conf.workflows.extend([self.workflow])
return workflow_conf
...@@ -28,7 +28,6 @@ import logging ...@@ -28,7 +28,6 @@ import logging
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
class Monitor(object): class Monitor(object):
''' '''
Monitor base class. It is used to monitor the remote model, pull and update the local model. Monitor base class. It is used to monitor the remote model, pull and update the local model.
......
import sys
import os
import google.protobuf.text_format
from .proto import general_model_config_pb2 as m_config
from .proto import multi_lang_general_model_service_pb2
sys.path.append(
os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
from .proto import multi_lang_general_model_service_pb2_grpc
class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
MultiLangGeneralModelServiceServicer):
def __init__(self, model_config_path, is_multi_model, endpoints):
self.is_multi_model_ = is_multi_model
self.model_config_path_ = model_config_path
self.endpoints_ = endpoints
with open(self.model_config_path_) as f:
self.model_config_str_ = str(f.read())
self._parse_model_config(self.model_config_str_)
self._init_bclient(self.model_config_path_, self.endpoints_)
def _init_bclient(self, model_config_path, endpoints, timeout_ms=None):
from paddle_serving_client import Client
self.bclient_ = Client()
if timeout_ms is not None:
self.bclient_.set_rpc_timeout_ms(timeout_ms)
self.bclient_.load_client_config(model_config_path)
self.bclient_.connect(endpoints)
def _parse_model_config(self, model_config_str):
model_conf = m_config.GeneralModelConfig()
model_conf = google.protobuf.text_format.Merge(model_config_str,
model_conf)
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.feed_types_ = {}
self.feed_shapes_ = {}
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.fetch_types_ = {}
self.lod_tensor_set_ = set()
for i, var in enumerate(model_conf.feed_var):
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
for i, var in enumerate(model_conf.fetch_var):
self.fetch_types_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
def _flatten_list(self, nested_list):
for item in nested_list:
if isinstance(item, (list, tuple)):
for sub_item in self._flatten_list(item):
yield sub_item
else:
yield item
def _unpack_inference_request(self, request):
feed_names = list(request.feed_var_names)
fetch_names = list(request.fetch_var_names)
is_python = request.is_python
log_id = request.log_id
feed_batch = []
for feed_inst in request.insts:
feed_dict = {}
for idx, name in enumerate(feed_names):
var = feed_inst.tensor_array[idx]
v_type = self.feed_types_[name]
data = None
if is_python:
if v_type == 0:
data = np.frombuffer(var.data, dtype="int64")
elif v_type == 1:
data = np.frombuffer(var.data, dtype="float32")
elif v_type == 2:
data = np.frombuffer(var.data, dtype="int32")
else:
raise Exception("error type.")
else:
if v_type == 0: # int64
data = np.array(list(var.int64_data), dtype="int64")
elif v_type == 1: # float32
data = np.array(list(var.float_data), dtype="float32")
elif v_type == 2:
data = np.array(list(var.int_data), dtype="int32")
else:
raise Exception("error type.")
data.shape = list(feed_inst.tensor_array[idx].shape)
feed_dict[name] = data
if len(var.lod) > 0:
feed_dict["{}.lod".format(name)] = var.lod
feed_batch.append(feed_dict)
return feed_batch, fetch_names, is_python, log_id
def _pack_inference_response(self, ret, fetch_names, is_python):
resp = multi_lang_general_model_service_pb2.InferenceResponse()
if ret is None:
resp.err_code = 1
return resp
results, tag = ret
resp.tag = tag
resp.err_code = 0
if not self.is_multi_model_:
results = {'general_infer_0': results}
for model_name, model_result in results.items():
model_output = multi_lang_general_model_service_pb2.ModelOutput()
inst = multi_lang_general_model_service_pb2.FetchInst()
for idx, name in enumerate(fetch_names):
tensor = multi_lang_general_model_service_pb2.Tensor()
v_type = self.fetch_types_[name]
if is_python:
tensor.data = model_result[name].tobytes()
else:
if v_type == 0: # int64
tensor.int64_data.extend(model_result[name].reshape(-1)
.tolist())
elif v_type == 1: # float32
tensor.float_data.extend(model_result[name].reshape(-1)
.tolist())
elif v_type == 2: # int32
tensor.int_data.extend(model_result[name].reshape(-1)
.tolist())
else:
raise Exception("error type.")
tensor.shape.extend(list(model_result[name].shape))
if "{}.lod".format(name) in model_result:
tensor.lod.extend(model_result["{}.lod".format(name)]
.tolist())
inst.tensor_array.append(tensor)
model_output.insts.append(inst)
model_output.engine_name = model_name
resp.outputs.append(model_output)
return resp
def SetTimeout(self, request, context):
# This porcess and Inference process cannot be operate at the same time.
# For performance reasons, do not add thread lock temporarily.
timeout_ms = request.timeout_ms
self._init_bclient(self.model_config_path_, self.endpoints_, timeout_ms)
resp = multi_lang_general_model_service_pb2.SimpleResponse()
resp.err_code = 0
return resp
def Inference(self, request, context):
feed_batch, fetch_names, is_python, log_id \
= self._unpack_inference_request(request)
ret = self.bclient_.predict(
feed=feed_batch,
fetch=fetch_names,
batch=True,
need_variant_tag=True,
log_id=log_id)
return self._pack_inference_response(ret, fetch_names, is_python)
def GetClientConfig(self, request, context):
resp = multi_lang_general_model_service_pb2.GetClientConfigResponse()
resp.client_config_str = self.model_config_str_
return resp
\ No newline at end of file
...@@ -18,12 +18,11 @@ Usage: ...@@ -18,12 +18,11 @@ Usage:
python -m paddle_serving_server.serve --model ./serving_server_model --port 9292 python -m paddle_serving_server.serve --model ./serving_server_model --port 9292
""" """
import argparse import argparse
import sys import os
import json import json
import base64 import base64
import time import time
from multiprocessing import Process from multiprocessing import Pool, Process
from .web_service import WebService, port_is_available
from flask import Flask, request from flask import Flask, request
import sys import sys
if sys.version_info.major == 2: if sys.version_info.major == 2:
...@@ -32,23 +31,26 @@ elif sys.version_info.major == 3: ...@@ -32,23 +31,26 @@ elif sys.version_info.major == 3:
from http.server import BaseHTTPRequestHandler, HTTPServer from http.server import BaseHTTPRequestHandler, HTTPServer
def parse_args(): # pylint: disable=doc-string-missing def serve_args():
parser = argparse.ArgumentParser("serve") parser = argparse.ArgumentParser("serve")
parser.add_argument( parser.add_argument(
"--thread", type=int, default=10, help="Concurrency of server") "--thread", type=int, default=2, help="Concurrency of server")
parser.add_argument( parser.add_argument(
"--model", type=str, default="", help="Model for serving") "--port", type=int, default=9292, help="Port of the starting gpu")
parser.add_argument( parser.add_argument(
"--port", type=int, default=9292, help="Port the server") "--device", type=str, default="gpu", help="Type of device")
parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids")
parser.add_argument( parser.add_argument(
"--name", type=str, default="None", help="Web service name") "--model", type=str, default="", help="Model for serving")
parser.add_argument( parser.add_argument(
"--workdir", "--workdir",
type=str, type=str,
default="workdir", default="workdir",
help="Working dir of current service") help="Working dir of current service")
parser.add_argument( parser.add_argument(
"--device", type=str, default="cpu", help="Type of device") "--name", type=str, default="None", help="Default service name")
parser.add_argument(
"--use_mkl", default=False, action="store_true", help="Use MKL")
parser.add_argument( parser.add_argument(
"--mem_optim_off", "--mem_optim_off",
default=False, default=False,
...@@ -56,8 +58,6 @@ def parse_args(): # pylint: disable=doc-string-missing ...@@ -56,8 +58,6 @@ def parse_args(): # pylint: disable=doc-string-missing
help="Memory optimize") help="Memory optimize")
parser.add_argument( parser.add_argument(
"--ir_optim", default=False, action="store_true", help="Graph optimize") "--ir_optim", default=False, action="store_true", help="Graph optimize")
parser.add_argument(
"--use_mkl", default=False, action="store_true", help="Use MKL")
parser.add_argument( parser.add_argument(
"--max_body_size", "--max_body_size",
type=int, type=int,
...@@ -73,6 +73,12 @@ def parse_args(): # pylint: disable=doc-string-missing ...@@ -73,6 +73,12 @@ def parse_args(): # pylint: disable=doc-string-missing
default=False, default=False,
action="store_true", action="store_true",
help="Use Multi-language-service") help="Use Multi-language-service")
parser.add_argument(
"--use_trt", default=False, action="store_true", help="Use TensorRT")
parser.add_argument(
"--use_lite", default=False, action="store_true", help="Use PaddleLite")
parser.add_argument(
"--use_xpu", default=False, action="store_true", help="Use XPU")
parser.add_argument( parser.add_argument(
"--product_name", "--product_name",
type=str, type=str,
...@@ -138,6 +144,116 @@ def start_standard_model(serving_port): # pylint: disable=doc-string-missing ...@@ -138,6 +144,116 @@ def start_standard_model(serving_port): # pylint: disable=doc-string-missing
server.run_server() server.run_server()
def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-string-missing
workdir = args.workdir
gpuid = int(gpuid)
device = "gpu"
if gpuid == -1:
device = "cpu"
elif gpuid >= 0:
port = port + index
thread_num = args.thread
model = args.model
mem_optim = args.mem_optim_off is False
ir_optim = args.ir_optim
use_mkl = args.use_mkl
max_body_size = args.max_body_size
use_multilang = args.use_multilang
if gpuid >= 0:
workdir = "{}_{}".format(args.workdir, gpuid)
if model == "":
print("You must specify your serving model")
exit(-1)
import paddle_serving_server as serving
op_maker = serving.OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response')
op_seq_maker = serving.OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
if use_multilang:
server = serving.MultiLangServer()
else:
server = serving.Server()
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num)
server.use_mkl(use_mkl)
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.set_max_body_size(max_body_size)
if args.use_trt:
server.set_trt()
if args.use_lite:
server.set_lite()
server.set_device(device)
if args.use_xpu:
server.set_xpu()
if args.product_name != None:
server.set_product_name(args.product_name)
if args.container_id != None:
server.set_container_id(args.container_id)
server.load_model_config(model)
server.prepare_server(
workdir=workdir,
port=port,
device=device,
use_encryption_model=args.use_encryption_model)
if gpuid >= 0:
server.set_gpuid(gpuid)
server.run_server()
def start_multi_card(args, serving_port=None): # pylint: disable=doc-string-missing
gpus = ""
if serving_port == None:
serving_port = args.port
if args.gpu_ids == "":
gpus = []
else:
gpus = args.gpu_ids.split(",")
if "CUDA_VISIBLE_DEVICES" in os.environ:
env_gpus = os.environ["CUDA_VISIBLE_DEVICES"].split(",")
for ids in gpus:
if int(ids) >= len(env_gpus):
print(
" Max index of gpu_ids out of range, the number of CUDA_VISIBLE_DEVICES is {}."
.format(len(env_gpus)))
exit(-1)
else:
env_gpus = []
if args.use_lite:
print("run using paddle-lite.")
start_gpu_card_model(-1, -1, serving_port, args)
elif len(gpus) <= 0:
print("gpu_ids not set, going to run cpu service.")
start_gpu_card_model(-1, -1, serving_port, args)
else:
gpu_processes = []
for i, gpu_id in enumerate(gpus):
p = Process(
target=start_gpu_card_model,
args=(
i,
gpu_id,
serving_port,
args, ))
gpu_processes.append(p)
for p in gpu_processes:
p.start()
for p in gpu_processes:
p.join()
class MainService(BaseHTTPRequestHandler): class MainService(BaseHTTPRequestHandler):
def get_available_port(self): def get_available_port(self):
default_port = 12000 default_port = 12000
...@@ -146,7 +262,7 @@ class MainService(BaseHTTPRequestHandler): ...@@ -146,7 +262,7 @@ class MainService(BaseHTTPRequestHandler):
return default_port + i return default_port + i
def start_serving(self): def start_serving(self):
start_standard_model(serving_port) start_multi_card(args, serving_port)
def get_key(self, post_data): def get_key(self, post_data):
if "key" not in post_data: if "key" not in post_data:
...@@ -207,9 +323,9 @@ class MainService(BaseHTTPRequestHandler): ...@@ -207,9 +323,9 @@ class MainService(BaseHTTPRequestHandler):
if __name__ == "__main__": if __name__ == "__main__":
args = serve_args()
args = parse_args()
if args.name == "None": if args.name == "None":
from .web_service import port_is_available
if args.use_encryption_model: if args.use_encryption_model:
p_flag = False p_flag = False
p = None p = None
...@@ -220,27 +336,39 @@ if __name__ == "__main__": ...@@ -220,27 +336,39 @@ if __name__ == "__main__":
) )
server.serve_forever() server.serve_forever()
else: else:
start_standard_model(args.port) start_multi_card(args)
else: else:
service = WebService(name=args.name) from .web_service import WebService
service.load_model_config(args.model) web_service = WebService(name=args.name)
service.prepare_server( web_service.load_model_config(args.model)
workdir=args.workdir, port=args.port, device=args.device) gpu_ids = args.gpu_ids
service.run_rpc_service() if gpu_ids == "":
if "CUDA_VISIBLE_DEVICES" in os.environ:
gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
if len(gpu_ids) > 0:
web_service.set_gpus(gpu_ids)
web_service.prepare_server(
workdir=args.workdir,
port=args.port,
device=args.device,
use_lite=args.use_lite,
use_xpu=args.use_xpu,
ir_optim=args.ir_optim)
web_service.run_rpc_service()
app_instance = Flask(__name__) app_instance = Flask(__name__)
@app_instance.before_first_request @app_instance.before_first_request
def init(): def init():
service._launch_web_service() web_service._launch_web_service()
service_name = "/" + service.name + "/prediction" service_name = "/" + web_service.name + "/prediction"
@app_instance.route(service_name, methods=["POST"]) @app_instance.route(service_name, methods=["POST"])
def run(): def run():
return service.get_prediction(request) return web_service.get_prediction(request)
app_instance.run(host="0.0.0.0", app_instance.run(host="0.0.0.0",
port=service.port, port=web_service.port,
threaded=False, threaded=False,
processes=4) processes=4)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -11,188 +11,32 @@ ...@@ -11,188 +11,32 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing
import os import os
import tarfile
import socket
import paddle_serving_server as paddle_serving_server
from .proto import server_configure_pb2 as server_sdk from .proto import server_configure_pb2 as server_sdk
from .proto import general_model_config_pb2 as m_config from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format import google.protobuf.text_format
import tarfile
import socket
import paddle_serving_server_gpu as paddle_serving_server
import time import time
from .version import serving_server_version from .version import serving_server_version, version_suffix, device_type
from contextlib import closing from contextlib import closing
import argparse import argparse
import collections
import sys import sys
if sys.platform.startswith('win') is False: if sys.platform.startswith('win') is False:
import fcntl import fcntl
import shutil import shutil
import platform
import numpy as np import numpy as np
import grpc import grpc
from .proto import multi_lang_general_model_service_pb2
import sys import sys
sys.path.append(
os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
from .proto import multi_lang_general_model_service_pb2_grpc
from multiprocessing import Pool, Process from multiprocessing import Pool, Process
from concurrent import futures from concurrent import futures
def serve_args():
parser = argparse.ArgumentParser("serve")
parser.add_argument(
"--thread", type=int, default=2, help="Concurrency of server")
parser.add_argument(
"--model", type=str, default="", help="Model for serving")
parser.add_argument(
"--port", type=int, default=9292, help="Port of the starting gpu")
parser.add_argument(
"--workdir",
type=str,
default="workdir",
help="Working dir of current service")
parser.add_argument(
"--device", type=str, default="gpu", help="Type of device")
parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids")
parser.add_argument(
"--name", type=str, default="None", help="Default service name")
parser.add_argument(
"--mem_optim_off",
default=False,
action="store_true",
help="Memory optimize")
parser.add_argument(
"--ir_optim", default=False, action="store_true", help="Graph optimize")
parser.add_argument(
"--max_body_size",
type=int,
default=512 * 1024 * 1024,
help="Limit sizes of messages")
parser.add_argument(
"--use_encryption_model",
default=False,
action="store_true",
help="Use encryption model")
parser.add_argument(
"--use_multilang",
default=False,
action="store_true",
help="Use Multi-language-service")
parser.add_argument(
"--use_trt", default=False, action="store_true", help="Use TensorRT")
parser.add_argument(
"--use_lite", default=False, action="store_true", help="Use PaddleLite")
parser.add_argument(
"--use_xpu", default=False, action="store_true", help="Use XPU")
parser.add_argument(
"--product_name",
type=str,
default=None,
help="product_name for authentication")
parser.add_argument(
"--container_id",
type=str,
default=None,
help="container_id for authentication")
return parser.parse_args()
class OpMaker(object):
def __init__(self):
self.op_dict = {
"general_infer": "GeneralInferOp",
"general_reader": "GeneralReaderOp",
"general_response": "GeneralResponseOp",
"general_text_reader": "GeneralTextReaderOp",
"general_text_response": "GeneralTextResponseOp",
"general_single_kv": "GeneralSingleKVOp",
"general_dist_kv_infer": "GeneralDistKVInferOp",
"general_dist_kv": "GeneralDistKVOp"
}
self.node_name_suffix_ = collections.defaultdict(int)
def create(self, node_type, engine_name=None, inputs=[], outputs=[]):
if node_type not in self.op_dict:
raise Exception("Op type {} is not supported right now".format(
node_type))
node = server_sdk.DAGNode()
# node.name will be used as the infer engine name
if engine_name:
node.name = engine_name
else:
node.name = '{}_{}'.format(node_type,
self.node_name_suffix_[node_type])
self.node_name_suffix_[node_type] += 1
node.type = self.op_dict[node_type]
if inputs:
for dep_node_str in inputs:
dep_node = server_sdk.DAGNode()
google.protobuf.text_format.Parse(dep_node_str, dep_node)
dep = server_sdk.DAGNodeDependency()
dep.name = dep_node.name
dep.mode = "RO"
node.dependencies.extend([dep])
# Because the return value will be used as the key value of the
# dict, and the proto object is variable which cannot be hashed,
# so it is processed into a string. This has little effect on
# overall efficiency.
return google.protobuf.text_format.MessageToString(node)
class OpSeqMaker(object):
def __init__(self):
self.workflow = server_sdk.Workflow()
self.workflow.name = "workflow1"
self.workflow.workflow_type = "Sequence"
def add_op(self, node_str):
node = server_sdk.DAGNode()
google.protobuf.text_format.Parse(node_str, node)
if len(node.dependencies) > 1:
raise Exception(
'Set more than one predecessor for op in OpSeqMaker is not allowed.'
)
if len(self.workflow.nodes) >= 1:
if len(node.dependencies) == 0:
dep = server_sdk.DAGNodeDependency()
dep.name = self.workflow.nodes[-1].name
dep.mode = "RO"
node.dependencies.extend([dep])
elif len(node.dependencies) == 1:
if node.dependencies[0].name != self.workflow.nodes[-1].name:
raise Exception(
'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.'
.format(node.dependencies[0].name, self.workflow.nodes[
-1].name))
self.workflow.nodes.extend([node])
def get_op_sequence(self):
workflow_conf = server_sdk.WorkflowConf()
workflow_conf.workflows.extend([self.workflow])
return workflow_conf
class OpGraphMaker(object):
def __init__(self):
self.workflow = server_sdk.Workflow()
self.workflow.name = "workflow1"
# Currently, SDK only supports "Sequence"
self.workflow.workflow_type = "Sequence"
def add_op(self, node_str):
node = server_sdk.DAGNode()
google.protobuf.text_format.Parse(node_str, node)
self.workflow.nodes.extend([node])
def get_op_graph(self):
workflow_conf = server_sdk.WorkflowConf()
workflow_conf.workflows.extend([self.workflow])
return workflow_conf
class Server(object): class Server(object):
def __init__(self): def __init__(self):
self.server_handle_ = None self.server_handle_ = None
...@@ -217,6 +61,7 @@ class Server(object): ...@@ -217,6 +61,7 @@ class Server(object):
self.module_path = os.path.dirname(paddle_serving_server.__file__) self.module_path = os.path.dirname(paddle_serving_server.__file__)
self.cur_path = os.getcwd() self.cur_path = os.getcwd()
self.use_local_bin = False self.use_local_bin = False
self.mkl_flag = False
self.device = "cpu" self.device = "cpu"
self.gpuid = 0 self.gpuid = 0
self.use_trt = False self.use_trt = False
...@@ -317,31 +162,20 @@ class Server(object): ...@@ -317,31 +162,20 @@ class Server(object):
engine.runtime_thread_num = 0 engine.runtime_thread_num = 0
engine.batch_infer_size = 0 engine.batch_infer_size = 0
engine.enable_batch_align = 0 engine.enable_batch_align = 0
engine.model_data_path = model_config_path engine.model_dir = model_config_path
engine.enable_memory_optimization = self.memory_optimization engine.enable_memory_optimization = self.memory_optimization
engine.enable_ir_optimization = self.ir_optimization engine.enable_ir_optimization = self.ir_optimization
engine.static_optimization = False
engine.force_update_static_cache = False
engine.use_trt = self.use_trt engine.use_trt = self.use_trt
engine.use_lite = self.use_lite
engine.use_xpu = self.use_xpu
if os.path.exists('{}/__params__'.format(model_config_path)): if os.path.exists('{}/__params__'.format(model_config_path)):
suffix = "" engine.combined_model = True
else: else:
suffix = "_DIR" engine.combined_model = False
if device == "arm": if use_encryption_model:
engine.use_lite = self.use_lite engine.encrypted_model = True
engine.use_xpu = self.use_xpu engine.type = "PADDLE_INFER"
if device == "cpu":
if use_encryption_model:
engine.type = "FLUID_CPU_ANALYSIS_ENCRPT"
else:
engine.type = "FLUID_CPU_ANALYSIS" + suffix
elif device == "gpu":
if use_encryption_model:
engine.type = "FLUID_GPU_ANALYSIS_ENCRPT"
else:
engine.type = "FLUID_GPU_ANALYSIS" + suffix
elif device == "arm":
engine.type = "FLUID_ARM_ANALYSIS" + suffix
self.model_toolkit_conf.engines.extend([engine]) self.model_toolkit_conf.engines.extend([engine])
def _prepare_infer_service(self, port): def _prepare_infer_service(self, port):
...@@ -432,26 +266,53 @@ class Server(object): ...@@ -432,26 +266,53 @@ class Server(object):
# check config here # check config here
# print config here # print config here
def use_mkl(self, flag):
self.mkl_flag = flag
def get_device_version(self):
avx_flag = False
mkl_flag = self.mkl_flag
openblas_flag = False
r = os.system("cat /proc/cpuinfo | grep avx > /dev/null 2>&1")
if r == 0:
avx_flag = True
if avx_flag:
if mkl_flag:
device_version = "cpu-avx-mkl"
else:
device_version = "cpu-avx-openblas"
else:
if mkl_flag:
print(
"Your CPU does not support AVX, server will running with noavx-openblas mode."
)
device_version = "cpu-noavx-openblas"
return device_version
def get_serving_bin_name(self):
if device_type == "0":
device_version = self.get_device_version()
elif device_type == "1":
if version_suffix == "101" or version_suffix == "102":
device_version = "gpu-" + version_suffix
else:
device_version = "gpu-cuda" + version_suffix
elif device_type == "2":
device_version = "xpu-" + platform.machine()
return device_version
def download_bin(self): def download_bin(self):
os.chdir(self.module_path) os.chdir(self.module_path)
need_download = False need_download = False
#acquire lock #acquire lock
version_file = open("{}/version.py".format(self.module_path), "r") version_file = open("{}/version.py".format(self.module_path), "r")
import re
for line in version_file.readlines(): folder_name = "serving-%s-%s" % (self.get_serving_bin_name(),
if re.match("cuda_version", line): serving_server_version)
cuda_version = line.split("\"")[1] tar_name = "%s.tar.gz" % folder_name
if cuda_version == "101" or cuda_version == "102": bin_url = "https://paddle-serving.bj.bcebos.com/bin/%s" % tar_name
device_version = "serving-gpu-" + cuda_version + "-"
elif cuda_version == "arm" or cuda_version == "arm-xpu":
device_version = "serving-" + cuda_version + "-"
else:
device_version = "serving-gpu-cuda" + cuda_version + "-"
folder_name = device_version + serving_server_version
tar_name = folder_name + ".tar.gz"
bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name
self.server_path = os.path.join(self.module_path, folder_name) self.server_path = os.path.join(self.module_path, folder_name)
download_flag = "{}/{}.is_download".format(self.module_path, download_flag = "{}/{}.is_download".format(self.module_path,
...@@ -503,9 +364,9 @@ class Server(object): ...@@ -503,9 +364,9 @@ class Server(object):
cube_conf=None): cube_conf=None):
if workdir == None: if workdir == None:
workdir = "./tmp" workdir = "./tmp"
os.system("mkdir {}".format(workdir)) os.system("mkdir -p {}".format(workdir))
else: else:
os.system("mkdir {}".format(workdir)) os.system("mkdir -p {}".format(workdir))
os.system("touch {}/fluid_time_file".format(workdir)) os.system("touch {}/fluid_time_file".format(workdir))
if not self.port_is_available(port): if not self.port_is_available(port):
...@@ -614,157 +475,6 @@ class Server(object): ...@@ -614,157 +475,6 @@ class Server(object):
os.system(command) os.system(command)
class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
MultiLangGeneralModelServiceServicer):
def __init__(self, model_config_path, is_multi_model, endpoints):
self.is_multi_model_ = is_multi_model
self.model_config_path_ = model_config_path
self.endpoints_ = endpoints
with open(self.model_config_path_) as f:
self.model_config_str_ = str(f.read())
self._parse_model_config(self.model_config_str_)
self._init_bclient(self.model_config_path_, self.endpoints_)
def _init_bclient(self, model_config_path, endpoints, timeout_ms=None):
from paddle_serving_client import Client
self.bclient_ = Client()
if timeout_ms is not None:
self.bclient_.set_rpc_timeout_ms(timeout_ms)
self.bclient_.load_client_config(model_config_path)
self.bclient_.connect(endpoints)
def _parse_model_config(self, model_config_str):
model_conf = m_config.GeneralModelConfig()
model_conf = google.protobuf.text_format.Merge(model_config_str,
model_conf)
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.feed_types_ = {}
self.feed_shapes_ = {}
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
self.fetch_types_ = {}
self.lod_tensor_set_ = set()
for i, var in enumerate(model_conf.feed_var):
self.feed_types_[var.alias_name] = var.feed_type
self.feed_shapes_[var.alias_name] = var.shape
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
for i, var in enumerate(model_conf.fetch_var):
self.fetch_types_[var.alias_name] = var.fetch_type
if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name)
def _flatten_list(self, nested_list):
for item in nested_list:
if isinstance(item, (list, tuple)):
for sub_item in self._flatten_list(item):
yield sub_item
else:
yield item
def _unpack_inference_request(self, request):
feed_names = list(request.feed_var_names)
fetch_names = list(request.fetch_var_names)
is_python = request.is_python
log_id = request.log_id
feed_batch = []
for feed_inst in request.insts:
feed_dict = {}
for idx, name in enumerate(feed_names):
var = feed_inst.tensor_array[idx]
v_type = self.feed_types_[name]
data = None
if is_python:
if v_type == 0:
data = np.frombuffer(var.data, dtype="int64")
elif v_type == 1:
data = np.frombuffer(var.data, dtype="float32")
elif v_type == 2:
data = np.frombuffer(var.data, dtype="int32")
else:
raise Exception("error type.")
else:
if v_type == 0: # int64
data = np.array(list(var.int64_data), dtype="int64")
elif v_type == 1: # float32
data = np.array(list(var.float_data), dtype="float32")
elif v_type == 2:
data = np.array(list(var.int_data), dtype="int32")
else:
raise Exception("error type.")
data.shape = list(feed_inst.tensor_array[idx].shape)
feed_dict[name] = data
if len(var.lod) > 0:
feed_dict["{}.lod".format(name)] = var.lod
feed_batch.append(feed_dict)
return feed_batch, fetch_names, is_python, log_id
def _pack_inference_response(self, ret, fetch_names, is_python):
resp = multi_lang_general_model_service_pb2.InferenceResponse()
if ret is None:
resp.err_code = 1
return resp
results, tag = ret
resp.tag = tag
resp.err_code = 0
if not self.is_multi_model_:
results = {'general_infer_0': results}
for model_name, model_result in results.items():
model_output = multi_lang_general_model_service_pb2.ModelOutput()
inst = multi_lang_general_model_service_pb2.FetchInst()
for idx, name in enumerate(fetch_names):
tensor = multi_lang_general_model_service_pb2.Tensor()
v_type = self.fetch_types_[name]
if is_python:
tensor.data = model_result[name].tobytes()
else:
if v_type == 0: # int64
tensor.int64_data.extend(model_result[name].reshape(-1)
.tolist())
elif v_type == 1: # float32
tensor.float_data.extend(model_result[name].reshape(-1)
.tolist())
elif v_type == 2: # int32
tensor.int_data.extend(model_result[name].reshape(-1)
.tolist())
else:
raise Exception("error type.")
tensor.shape.extend(list(model_result[name].shape))
if "{}.lod".format(name) in model_result:
tensor.lod.extend(model_result["{}.lod".format(name)]
.tolist())
inst.tensor_array.append(tensor)
model_output.insts.append(inst)
model_output.engine_name = model_name
resp.outputs.append(model_output)
return resp
def SetTimeout(self, request, context):
# This porcess and Inference process cannot be operate at the same time.
# For performance reasons, do not add thread lock temporarily.
timeout_ms = request.timeout_ms
self._init_bclient(self.model_config_path_, self.endpoints_, timeout_ms)
resp = multi_lang_general_model_service_pb2.SimpleResponse()
resp.err_code = 0
return resp
def Inference(self, request, context):
feed_batch, fetch_names, is_python, log_id \
= self._unpack_inference_request(request)
ret = self.bclient_.predict(
feed=feed_batch,
fetch=fetch_names,
batch=True,
need_variant_tag=True,
log_id=log_id)
return self._pack_inference_response(ret, fetch_names, is_python)
def GetClientConfig(self, request, context):
resp = multi_lang_general_model_service_pb2.GetClientConfigResponse()
resp.client_config_str = self.model_config_str_
return resp
class MultiLangServer(object): class MultiLangServer(object):
def __init__(self): def __init__(self):
self.bserver_ = Server() self.bserver_ = Server()
...@@ -808,6 +518,9 @@ class MultiLangServer(object): ...@@ -808,6 +518,9 @@ class MultiLangServer(object):
def set_op_graph(self, op_graph): def set_op_graph(self, op_graph):
self.bserver_.set_op_graph(op_graph) self.bserver_.set_op_graph(op_graph)
def use_mkl(self, flag):
self.bserver_.use_mkl(flag)
def set_memory_optimize(self, flag=False): def set_memory_optimize(self, flag=False):
self.bserver_.set_memory_optimize(flag) self.bserver_.set_memory_optimize(flag)
......
...@@ -11,8 +11,11 @@ ...@@ -11,8 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Paddle Serving Client version string """ """ Paddle Serving Server version string """
serving_client_version = "0.0.0" serving_client_version = "0.0.0"
serving_server_version = "0.0.0" serving_server_version = "0.0.0"
module_proto_version = "0.0.0" module_proto_version = "0.0.0"
version_suffix = ""
device_type = "0"
cuda_version = "9"
commit_id = "" commit_id = ""
...@@ -15,16 +15,19 @@ ...@@ -15,16 +15,19 @@
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from flask import Flask, request, abort from flask import Flask, request, abort
from multiprocessing import Pool, Process
from paddle_serving_server import OpMaker, OpSeqMaker, Server
from paddle_serving_client import Client
from contextlib import closing from contextlib import closing
from multiprocessing import Pool, Process, Queue
from paddle_serving_client import Client
from paddle_serving_server import OpMaker, OpSeqMaker, Server
from paddle_serving_server.serve import start_multi_card
import socket import socket
import sys
import numpy as np import numpy as np
import paddle_serving_server as serving
from paddle_serving_server import pipeline from paddle_serving_server import pipeline
from paddle_serving_server.pipeline import Op from paddle_serving_server.pipeline import Op
def port_is_available(port): def port_is_available(port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2) sock.settimeout(2)
...@@ -34,13 +37,15 @@ def port_is_available(port): ...@@ -34,13 +37,15 @@ def port_is_available(port):
else: else:
return False return False
class WebService(object): class WebService(object):
def __init__(self, name="default_service"): def __init__(self, name="default_service"):
self.name = name self.name = name
# pipeline # pipeline
self._server = pipeline.PipelineServer(self.name) self._server = pipeline.PipelineServer(self.name)
self.gpus = [] # deprecated
self.rpc_service_list = [] # deprecated
def get_pipeline_response(self, read_op): def get_pipeline_response(self, read_op):
return None return None
...@@ -77,58 +82,115 @@ class WebService(object): ...@@ -77,58 +82,115 @@ class WebService(object):
self.feed_vars = {var.name: var for var in model_conf.feed_var} self.feed_vars = {var.name: var for var in model_conf.feed_var}
self.fetch_vars = {var.name: var for var in model_conf.fetch_var} self.fetch_vars = {var.name: var for var in model_conf.fetch_var}
def _launch_rpc_service(self): def set_gpus(self, gpus):
op_maker = OpMaker() print("This API will be deprecated later. Please do not use it")
self.gpus = [int(x) for x in gpus.split(",")]
def default_rpc_service(self,
workdir="conf",
port=9292,
gpuid=0,
thread_num=2,
mem_optim=True,
use_lite=False,
use_xpu=False,
ir_optim=False):
device = "gpu"
if gpuid == -1:
if use_lite:
device = "arm"
else:
device = "cpu"
op_maker = serving.OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer') general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response') general_response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker() op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op) op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op) op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op) op_seq_maker.add_op(general_response_op)
server = Server() server = Server()
server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(16) server.set_num_threads(thread_num)
server.set_memory_optimize(self.mem_optim) server.set_memory_optimize(mem_optim)
server.set_ir_optimize(self.ir_optim) server.set_ir_optimize(ir_optim)
server.set_device(device)
if use_lite:
server.set_lite()
if use_xpu:
server.set_xpu()
server.load_model_config(self.model_config) server.load_model_config(self.model_config)
server.prepare_server( if gpuid >= 0:
workdir=self.workdir, port=self.port_list[0], device=self.device) server.set_gpuid(gpuid)
server.run_server() server.prepare_server(workdir=workdir, port=port, device=device)
return server
def port_is_available(self, port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: def _launch_rpc_service(self, service_idx):
sock.settimeout(2) self.rpc_service_list[service_idx].run_server()
result = sock.connect_ex(('0.0.0.0', port))
if result != 0:
return True
else:
return False
def prepare_server(self, def prepare_server(self,
workdir="", workdir="",
port=9393, port=9393,
device="cpu", device="gpu",
mem_optim=True, use_lite=False,
ir_optim=False): use_xpu=False,
ir_optim=False,
gpuid=0,
mem_optim=True):
print("This API will be deprecated later. Please do not use it") print("This API will be deprecated later. Please do not use it")
self.workdir = workdir self.workdir = workdir
self.port = port self.port = port
self.device = device self.device = device
default_port = 12000 self.gpuid = gpuid
self.port_list = [] self.port_list = []
self.mem_optim = mem_optim default_port = 12000
self.ir_optim = ir_optim
for i in range(1000): for i in range(1000):
if port_is_available(default_port + i): if port_is_available(default_port + i):
self.port_list.append(default_port + i) self.port_list.append(default_port + i)
if len(self.port_list) > len(self.gpus):
break break
if len(self.gpus) == 0:
# init cpu service
self.rpc_service_list.append(
self.default_rpc_service(
self.workdir,
self.port_list[0],
-1,
thread_num=2,
mem_optim=mem_optim,
use_lite=use_lite,
use_xpu=use_xpu,
ir_optim=ir_optim))
else:
for i, gpuid in enumerate(self.gpus):
self.rpc_service_list.append(
self.default_rpc_service(
"{}_{}".format(self.workdir, i),
self.port_list[i],
gpuid,
thread_num=2,
mem_optim=mem_optim,
use_lite=use_lite,
use_xpu=use_xpu,
ir_optim=ir_optim))
def _launch_web_service(self): def _launch_web_service(self):
gpu_num = len(self.gpus)
self.client = Client() self.client = Client()
self.client.load_client_config("{}/serving_server_conf.prototxt".format( self.client.load_client_config("{}/serving_server_conf.prototxt".format(
self.model_config)) self.model_config))
self.client.connect(["0.0.0.0:{}".format(self.port_list[0])]) endpoints = ""
if gpu_num > 0:
for i in range(gpu_num):
endpoints += "127.0.0.1:{},".format(self.port_list[i])
else:
endpoints = "127.0.0.1:{}".format(self.port_list[0])
self.client.connect([endpoints])
def get_prediction(self, request): def get_prediction(self, request):
if not request.json: if not request.json:
...@@ -158,8 +220,12 @@ class WebService(object): ...@@ -158,8 +220,12 @@ class WebService(object):
print("web service address:") print("web service address:")
print("http://{}:{}/{}/prediction".format(localIP, self.port, print("http://{}:{}/{}/prediction".format(localIP, self.port,
self.name)) self.name))
p_rpc = Process(target=self._launch_rpc_service) server_pros = []
p_rpc.start() for i, service in enumerate(self.rpc_service_list):
p = Process(target=self._launch_rpc_service, args=(i, ))
server_pros.append(p)
for p in server_pros:
p.start()
app_instance = Flask(__name__) app_instance = Flask(__name__)
...@@ -175,7 +241,9 @@ class WebService(object): ...@@ -175,7 +241,9 @@ class WebService(object):
self.app_instance = app_instance self.app_instance = app_instance
def run_debugger_service(self): # TODO: maybe change another API name: maybe run_local_predictor?
def run_debugger_service(self, gpu=False):
print("This API will be deprecated later. Please do not use it")
import socket import socket
localIP = socket.gethostbyname(socket.gethostname()) localIP = socket.gethostbyname(socket.gethostname())
print("web service address:") print("web service address:")
...@@ -185,7 +253,7 @@ class WebService(object): ...@@ -185,7 +253,7 @@ class WebService(object):
@app_instance.before_first_request @app_instance.before_first_request
def init(): def init():
self._launch_local_predictor() self._launch_local_predictor(gpu)
service_name = "/" + self.name + "/prediction" service_name = "/" + self.name + "/prediction"
...@@ -195,11 +263,11 @@ class WebService(object): ...@@ -195,11 +263,11 @@ class WebService(object):
self.app_instance = app_instance self.app_instance = app_instance
def _launch_local_predictor(self): def _launch_local_predictor(self, gpu):
from paddle_serving_app.local_predict import LocalPredictor from paddle_serving_app.local_predict import LocalPredictor
self.client = LocalPredictor() self.client = LocalPredictor()
self.client.load_model_config( self.client.load_model_config(
"{}".format(self.model_config), use_gpu=False) "{}".format(self.model_config), use_gpu=True, gpu_id=self.gpus[0])
def run_web_service(self): def run_web_service(self):
print("This API will be deprecated later. Please do not use it") print("This API will be deprecated later. Please do not use it")
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Usage:
Start monitor with one line command
Example:
python -m paddle_serving_server.monitor
"""
import os
import time
import argparse
import subprocess
import datetime
import shutil
import tarfile
import logging
_LOGGER = logging.getLogger(__name__)
class Monitor(object):
'''
Monitor base class. It is used to monitor the remote model, pull and update the local model.
'''
def __init__(self, interval):
self._remote_path = None
self._remote_model_name = None
self._remote_donefile_name = None
self._local_path = None
self._local_model_name = None
self._local_timestamp_file = None
self._interval = interval
self._remote_donefile_timestamp = None
self._local_tmp_path = None
self._unpacked_filename = None
def set_remote_path(self, remote_path):
self._remote_path = remote_path
def set_remote_model_name(self, model_name):
self._remote_model_name = model_name
def set_remote_donefile_name(self, donefile_name):
self._remote_donefile_name = donefile_name
def set_local_path(self, local_path):
self._local_path = local_path
def set_local_model_name(self, model_name):
self._local_model_name = model_name
def set_local_timestamp_file(self, timestamp_file):
self._local_timestamp_file = timestamp_file
def set_local_tmp_path(self, tmp_path):
self._local_tmp_path = tmp_path
def set_unpacked_filename(self, unpacked_filename):
self._unpacked_filename = unpacked_filename
def _check_param_help(self, param_name, param_value):
return "Please check the {}({}) parameter.".format(param_name,
param_value)
def _check_params(self, params):
for param in params:
if getattr(self, param, None) is None:
raise Exception('{} not set.'.format(param))
def _print_params(self, params_name):
self._check_params(params_name)
for name in params_name:
_LOGGER.info('{}: {}'.format(name, getattr(self, name)))
def _decompress_model_file(self, local_tmp_path, model_name,
unpacked_filename):
if unpacked_filename is None:
_LOGGER.debug('remote file({}) is already unpacked.'.format(
model_name))
return model_name
tar_model_path = os.path.join(local_tmp_path, model_name)
_LOGGER.info("try to unpack remote file({})".format(tar_model_path))
if not tarfile.is_tarfile(tar_model_path):
raise Exception('not a tar packaged file type. {}'.format(
self._check_param_help('remote_model_name', model_name)))
try:
_LOGGER.info('unpack remote file({}).'.format(model_name))
tar = tarfile.open(tar_model_path)
tar.extractall(local_tmp_path)
tar.close()
except:
raise Exception(
'Decompressing failed, maybe no disk space left. {}'.foemat(
self._check_param_help('local_tmp_path', local_tmp_path)))
finally:
os.remove(tar_model_path)
_LOGGER.debug('remove packed file({}).'.format(tar_model_path))
_LOGGER.info('using unpacked filename: {}.'.format(
unpacked_filename))
if not os.path.exists(
os.path.join(local_tmp_path, unpacked_filename)):
raise Exception('file not exist. {}'.format(
self._check_param_help('unpacked_filename',
unpacked_filename)))
return unpacked_filename
def run(self):
'''
Monitor the remote model by polling and update the local model.
'''
params = [
'_remote_path', '_remote_model_name', '_remote_donefile_name',
'_local_model_name', '_local_path', '_local_timestamp_file',
'_local_tmp_path', '_interval'
]
self._print_params(params)
local_tmp_path = os.path.join(self._local_path, self._local_tmp_path)
_LOGGER.info('local_tmp_path: {}'.format(local_tmp_path))
if not os.path.exists(local_tmp_path):
_LOGGER.info('mkdir: {}'.format(local_tmp_path))
os.makedirs(local_tmp_path)
while True:
[flag, timestamp] = self._exist_remote_file(
self._remote_path, self._remote_donefile_name, local_tmp_path)
if flag:
if self._remote_donefile_timestamp is None or \
timestamp != self._remote_donefile_timestamp:
_LOGGER.info('doneilfe({}) changed.'.format(
self._remote_donefile_name))
self._remote_donefile_timestamp = timestamp
self._pull_remote_dir(self._remote_path,
self._remote_model_name,
local_tmp_path)
_LOGGER.info('pull remote model({}).'.format(
self._remote_model_name))
unpacked_filename = self._decompress_model_file(
local_tmp_path, self._remote_model_name,
self._unpacked_filename)
self._update_local_model(local_tmp_path, unpacked_filename,
self._local_path,
self._local_model_name)
_LOGGER.info('update local model({}).'.format(
self._local_model_name))
self._update_local_donefile(self._local_path,
self._local_model_name,
self._local_timestamp_file)
_LOGGER.info('update model timestamp({}).'.format(
self._local_timestamp_file))
else:
_LOGGER.info('remote({}) has no donefile.'.format(
self._remote_path))
_LOGGER.info('sleep {}s.'.format(self._interval))
time.sleep(self._interval)
def _exist_remote_file(self, path, filename, local_tmp_path):
raise Exception('This function must be inherited.')
def _pull_remote_dir(self, remote_path, dirname, local_tmp_path):
raise Exception('This function must be inherited.')
def _update_local_model(self, local_tmp_path, remote_model_name, local_path,
local_model_name):
tmp_model_path = os.path.join(local_tmp_path, remote_model_name)
local_model_path = os.path.join(local_path, local_model_name)
cmd = 'cp -r {}/* {}'.format(tmp_model_path, local_model_path)
_LOGGER.debug('update model cmd: {}'.format(cmd))
if os.system(cmd) != 0:
raise Exception('update local model failed.')
def _update_local_donefile(self, local_path, local_model_name,
local_timestamp_file):
donefile_path = os.path.join(local_path, local_model_name,
local_timestamp_file)
cmd = 'touch {}'.format(donefile_path)
_LOGGER.debug('update timestamp cmd: {}'.format(cmd))
if os.system(cmd) != 0:
raise Exception('update local donefile failed.')
class HadoopMonitor(Monitor):
''' Monitor HDFS or AFS by Hadoop-client. '''
def __init__(self, hadoop_bin, fs_name='', fs_ugi='', interval=10):
super(HadoopMonitor, self).__init__(interval)
self._hadoop_bin = hadoop_bin
self._fs_name = fs_name
self._fs_ugi = fs_ugi
self._print_params(['_hadoop_bin', '_fs_name', '_fs_ugi'])
self._cmd_prefix = '{} fs '.format(self._hadoop_bin)
if self._fs_name:
self._cmd_prefix += '-D fs.default.name={} '.format(self._fs_name)
if self._fs_ugi:
self._cmd_prefix += '-D hadoop.job.ugi={} '.format(self._fs_ugi)
_LOGGER.info('Hadoop prefix cmd: {}'.format(self._cmd_prefix))
def _exist_remote_file(self, path, filename, local_tmp_path):
remote_filepath = os.path.join(path, filename)
cmd = '{} -ls {} 2>/dev/null'.format(self._cmd_prefix, remote_filepath)
_LOGGER.debug('check cmd: {}'.format(cmd))
[status, output] = subprocess.getstatusoutput(cmd)
_LOGGER.debug('resp: {}'.format(output))
if status == 0:
[_, _, _, _, _, mdate, mtime, _] = output.split('\n')[-1].split()
timestr = mdate + mtime
return [True, timestr]
else:
return [False, None]
def _pull_remote_dir(self, remote_path, dirname, local_tmp_path):
# remove old file before pull remote dir
local_dirpath = os.path.join(local_tmp_path, dirname)
if os.path.exists(local_dirpath):
_LOGGER.info('remove old temporary model file({}).'.format(dirname))
if self._unpacked_filename is None:
# the remote file is model folder.
shutil.rmtree(local_dirpath)
else:
# the remote file is a packed model file
os.remove(local_dirpath)
remote_dirpath = os.path.join(remote_path, dirname)
cmd = '{} -get {} {} 2>/dev/null'.format(self._cmd_prefix,
remote_dirpath, local_dirpath)
_LOGGER.debug('pull cmd: {}'.format(cmd))
if os.system(cmd) != 0:
raise Exception('pull remote dir failed. {}'.format(
self._check_param_help('remote_model_name', dirname)))
class FTPMonitor(Monitor):
''' FTP Monitor. '''
def __init__(self, host, port, username="", password="", interval=10):
super(FTPMonitor, self).__init__(interval)
import ftplib
self._ftp = ftplib.FTP()
self._ftp_host = host
self._ftp_port = port
self._ftp_username = username
self._ftp_password = password
self._ftp.connect(self._ftp_host, self._ftp_port)
self._ftp.login(self._ftp_username, self._ftp_password)
self._print_params(
['_ftp_host', '_ftp_port', '_ftp_username', '_ftp_password'])
def _exist_remote_file(self, path, filename, local_tmp_path):
import ftplib
try:
_LOGGER.debug('cwd: {}'.format(path))
self._ftp.cwd(path)
timestamp = self._ftp.voidcmd('MDTM {}'.format(filename))[4:].strip(
)
return [True, timestamp]
except ftplib.error_perm:
_LOGGER.debug('remote file({}) not exist.'.format(filename))
return [False, None]
def _download_remote_file(self,
remote_path,
remote_filename,
local_tmp_path,
overwrite=True):
local_fullpath = os.path.join(local_tmp_path, remote_filename)
if not overwrite and os.path.isfile(fullpath):
return
else:
with open(local_fullpath, 'wb') as f:
_LOGGER.debug('cwd: {}'.format(remote_path))
self._ftp.cwd(remote_path)
_LOGGER.debug('download remote file({})'.format(
remote_filename))
self._ftp.retrbinary('RETR {}'.format(remote_filename), f.write)
def _download_remote_files(self,
remote_path,
remote_dirname,
local_tmp_path,
overwrite=True):
import ftplib
remote_dirpath = os.path.join(remote_path, remote_dirname)
# Check whether remote_dirpath is a file or a folder
try:
_LOGGER.debug('cwd: {}'.format(remote_dirpath))
self._ftp.cwd(remote_dirpath)
_LOGGER.debug('{} is folder.'.format(remote_dirname))
local_dirpath = os.path.join(local_tmp_path, remote_dirname)
if not os.path.exists(local_dirpath):
_LOGGER.info('mkdir: {}'.format(local_dirpath))
os.mkdir(local_dirpath)
output = []
self._ftp.dir(output.append)
for line in output:
[attr, _, _, _, _, _, _, _, name] = line.split()
if attr[0] == 'd':
self._download_remote_files(
os.path.join(remote_path, remote_dirname), name,
os.path.join(local_tmp_path, remote_dirname), overwrite)
else:
self._download_remote_file(remote_dirpath, name,
local_dirpath, overwrite)
except ftplib.error_perm:
_LOGGER.debug('{} is file.'.format(remote_dirname))
self._download_remote_file(remote_path, remote_dirname,
local_tmp_path, overwrite)
return
def _pull_remote_dir(self, remote_path, dirname, local_tmp_path):
self._download_remote_files(
remote_path, dirname, local_tmp_path, overwrite=True)
class GeneralMonitor(Monitor):
''' General Monitor. '''
def __init__(self, host, interval=10):
super(GeneralMonitor, self).__init__(interval)
self._general_host = host
self._print_params(['_general_host'])
def _get_local_file_timestamp(self, filename):
return os.path.getmtime(filename)
def _exist_remote_file(self, remote_path, filename, local_tmp_path):
remote_filepath = os.path.join(remote_path, filename)
url = '{}/{}'.format(self._general_host, remote_filepath)
_LOGGER.debug('remote file url: {}'.format(url))
# only for check donefile, which is not a folder.
cmd = 'wget -nd -N -P {} {} &>/dev/null'.format(local_tmp_path, url)
_LOGGER.debug('wget cmd: {}'.format(cmd))
if os.system(cmd) != 0:
_LOGGER.debug('remote file({}) not exist.'.format(remote_filepath))
return [False, None]
else:
timestamp = self._get_local_file_timestamp(
os.path.join(local_tmp_path, filename))
return [True, timestamp]
def _pull_remote_dir(self, remote_path, dirname, local_tmp_path):
remote_dirpath = os.path.join(remote_path, dirname)
url = '{}/{}'.format(self._general_host, remote_dirpath)
_LOGGER.debug('remote file url: {}'.format(url))
if self._unpacked_filename is None:
# the remote file is model folder.
cmd = 'wget -nH -r -P {} {} &>/dev/null'.format(
os.path.join(local_tmp_path, dirname), url)
else:
# the remote file is a packed model file
cmd = 'wget -nd -N -P {} {} &>/dev/null'.format(local_tmp_path, url)
_LOGGER.debug('wget cmd: {}'.format(cmd))
if os.system(cmd) != 0:
raise Exception('pull remote dir failed. {}'.format(
self._check_param_help('remote_model_name', dirname)))
def parse_args():
""" parse args.
Returns:
parser.parse_args().
"""
parser = argparse.ArgumentParser(description="Monitor")
parser.add_argument(
"--type", type=str, default='general', help="Type of remote server")
parser.add_argument(
"--remote_path",
type=str,
required=True,
help="The base path for the remote")
parser.add_argument(
"--remote_model_name",
type=str,
required=True,
help="The model name to be pulled from the remote")
parser.add_argument(
"--remote_donefile_name",
type=str,
required=True,
help="The donefile name that marks the completion of the remote model update"
)
parser.add_argument(
"--local_path", type=str, required=True, help="Local work path")
parser.add_argument(
"--local_model_name", type=str, required=True, help="Local model name")
parser.add_argument(
"--local_timestamp_file",
type=str,
default='fluid_time_file',
help="The timestamp file used locally for hot loading, The file is considered to be placed in the `local_path/local_model_name` folder."
)
parser.add_argument(
"--local_tmp_path",
type=str,
default='_serving_monitor_tmp',
help="The path of the folder where temporary files are stored locally. If it does not exist, it will be created automatically"
)
parser.add_argument(
"--unpacked_filename",
type=str,
default=None,
help="If the model of the remote production is a packaged file, the unpacked file name should be set. Currently, only tar packaging format is supported."
)
parser.add_argument(
"--interval",
type=int,
default=10,
help="The polling interval in seconds")
parser.add_argument(
"--debug", action='store_true', help="If set, output more details")
parser.set_defaults(debug=False)
# general monitor
parser.add_argument("--general_host", type=str, help="General remote host")
# ftp monitor
parser.add_argument("--ftp_host", type=str, help="FTP remote host")
parser.add_argument("--ftp_port", type=int, help="FTP remote port")
parser.add_argument(
"--ftp_username",
type=str,
default='',
help="FTP username. Not used if anonymous access.")
parser.add_argument(
"--ftp_password",
type=str,
default='',
help="FTP password. Not used if anonymous access")
# afs/hdfs monitor
parser.add_argument(
"--hadoop_bin", type=str, help="Path of Hadoop binary file")
parser.add_argument(
"--fs_name",
type=str,
default='',
help="AFS/HDFS fs_name. Not used if set in Hadoop-client.")
parser.add_argument(
"--fs_ugi",
type=str,
default='',
help="AFS/HDFS fs_ugi, Not used if set in Hadoop-client")
return parser.parse_args()
def get_monitor(mtype):
""" generator monitor instance.
Args:
mtype: type of monitor
Returns:
monitor instance.
"""
if mtype == 'ftp':
return FTPMonitor(
args.ftp_host,
args.ftp_port,
username=args.ftp_username,
password=args.ftp_password,
interval=args.interval)
elif mtype == 'general':
return GeneralMonitor(args.general_host, interval=args.interval)
elif mtype == 'afs' or mtype == 'hdfs':
return HadoopMonitor(
args.hadoop_bin, args.fs_name, args.fs_ugi, interval=args.interval)
else:
raise Exception('unsupport type.')
def start_monitor(monitor, args):
monitor.set_remote_path(args.remote_path)
monitor.set_remote_model_name(args.remote_model_name)
monitor.set_remote_donefile_name(args.remote_donefile_name)
monitor.set_local_path(args.local_path)
monitor.set_local_model_name(args.local_model_name)
monitor.set_local_timestamp_file(args.local_timestamp_file)
monitor.set_local_tmp_path(args.local_tmp_path)
monitor.set_unpacked_filename(args.unpacked_filename)
monitor.run()
if __name__ == "__main__":
args = parse_args()
if args.debug:
logging.basicConfig(
format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
datefmt='%Y-%m-%d %H:%M',
level=logging.DEBUG)
else:
logging.basicConfig(
format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
datefmt='%Y-%m-%d %H:%M',
level=logging.INFO)
monitor = get_monitor(args.type)
start_monitor(monitor, args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Usage:
Host a trained paddle model with one line command
Example:
python -m paddle_serving_server.serve --model ./serving_server_model --port 9292
"""
import argparse
import os
import json
import base64
import time
from multiprocessing import Pool, Process
from paddle_serving_server_gpu import serve_args
from flask import Flask, request
import sys
if sys.version_info.major == 2:
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
elif sys.version_info.major == 3:
from http.server import BaseHTTPRequestHandler, HTTPServer
def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-string-missing
gpuid = int(gpuid)
device = "gpu"
if gpuid == -1:
device = "cpu"
elif gpuid >= 0:
port = port + index
thread_num = args.thread
model = args.model
mem_optim = args.mem_optim_off is False
ir_optim = args.ir_optim
max_body_size = args.max_body_size
use_multilang = args.use_multilang
workdir = args.workdir
if gpuid >= 0:
workdir = "{}_{}".format(args.workdir, gpuid)
if model == "":
print("You must specify your serving model")
exit(-1)
import paddle_serving_server_gpu as serving
op_maker = serving.OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response')
op_seq_maker = serving.OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
if use_multilang:
server = serving.MultiLangServer()
else:
server = serving.Server()
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num)
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.set_max_body_size(max_body_size)
if args.use_trt:
server.set_trt()
if args.use_lite:
server.set_lite()
device = "arm"
server.set_device(device)
if args.use_xpu:
server.set_xpu()
if args.product_name != None:
server.set_product_name(args.product_name)
if args.container_id != None:
server.set_container_id(args.container_id)
server.load_model_config(model)
server.prepare_server(
workdir=workdir,
port=port,
device=device,
use_encryption_model=args.use_encryption_model)
if gpuid >= 0:
server.set_gpuid(gpuid)
server.run_server()
def start_multi_card(args, serving_port=None): # pylint: disable=doc-string-missing
gpus = ""
if serving_port == None:
serving_port = args.port
if args.gpu_ids == "":
gpus = []
else:
gpus = args.gpu_ids.split(",")
if "CUDA_VISIBLE_DEVICES" in os.environ:
env_gpus = os.environ["CUDA_VISIBLE_DEVICES"].split(",")
for ids in gpus:
if int(ids) >= len(env_gpus):
print(
" Max index of gpu_ids out of range, the number of CUDA_VISIBLE_DEVICES is {}."
.format(len(env_gpus)))
exit(-1)
else:
env_gpus = []
if args.use_lite:
print("run arm server.")
start_gpu_card_model(-1, -1, args)
elif len(gpus) <= 0:
print("gpu_ids not set, going to run cpu service.")
start_gpu_card_model(-1, -1, serving_port, args)
else:
gpu_processes = []
for i, gpu_id in enumerate(gpus):
p = Process(
target=start_gpu_card_model,
args=(
i,
gpu_id,
serving_port,
args, ))
gpu_processes.append(p)
for p in gpu_processes:
p.start()
for p in gpu_processes:
p.join()
class MainService(BaseHTTPRequestHandler):
def get_available_port(self):
default_port = 12000
for i in range(1000):
if port_is_available(default_port + i):
return default_port + i
def start_serving(self):
start_multi_card(args, serving_port)
def get_key(self, post_data):
if "key" not in post_data:
return False
else:
key = base64.b64decode(post_data["key"].encode())
with open(args.model + "/key", "wb") as f:
f.write(key)
return True
def check_key(self, post_data):
if "key" not in post_data:
return False
else:
key = base64.b64decode(post_data["key"].encode())
with open(args.model + "/key", "rb") as f:
cur_key = f.read()
return (key == cur_key)
def start(self, post_data):
post_data = json.loads(post_data)
global p_flag
if not p_flag:
if args.use_encryption_model:
print("waiting key for model")
if not self.get_key(post_data):
print("not found key in request")
return False
global serving_port
global p
serving_port = self.get_available_port()
p = Process(target=self.start_serving)
p.start()
time.sleep(3)
if p.is_alive():
p_flag = True
else:
return False
else:
if p.is_alive():
if not self.check_key(post_data):
return False
else:
return False
return True
def do_POST(self):
content_length = int(self.headers['Content-Length'])
post_data = self.rfile.read(content_length)
if self.start(post_data):
response = {"endpoint_list": [serving_port]}
else:
response = {"message": "start serving failed"}
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(response).encode())
if __name__ == "__main__":
args = serve_args()
if args.name == "None":
from .web_service import port_is_available
if args.use_encryption_model:
p_flag = False
p = None
serving_port = 0
server = HTTPServer(('localhost', int(args.port)), MainService)
print(
'Starting encryption server, waiting for key from client, use <Ctrl-C> to stop'
)
server.serve_forever()
else:
start_multi_card(args)
else:
from .web_service import WebService
web_service = WebService(name=args.name)
web_service.load_model_config(args.model)
gpu_ids = args.gpu_ids
if gpu_ids == "":
if "CUDA_VISIBLE_DEVICES" in os.environ:
gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
if len(gpu_ids) > 0:
web_service.set_gpus(gpu_ids)
web_service.prepare_server(
workdir=args.workdir,
port=args.port,
device=args.device,
use_lite=args.use_lite,
use_xpu=args.use_xpu,
ir_optim=args.ir_optim)
web_service.run_rpc_service()
app_instance = Flask(__name__)
@app_instance.before_first_request
def init():
web_service._launch_web_service()
service_name = "/" + web_service.name + "/prediction"
@app_instance.route(service_name, methods=["POST"])
def run():
return web_service.get_prediction(request)
app_instance.run(host="0.0.0.0",
port=web_service.port,
threaded=False,
processes=4)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving Client version string """
serving_client_version = "0.0.0"
serving_server_version = "0.0.0"
module_proto_version = "0.0.0"
cuda_version = "9"
commit_id = ""
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#!flask/bin/python
# pylint: disable=doc-string-missing
from flask import Flask, request, abort
from contextlib import closing
from multiprocessing import Pool, Process, Queue
from paddle_serving_client import Client
from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
from paddle_serving_server_gpu.serve import start_multi_card
import socket
import sys
import numpy as np
import paddle_serving_server_gpu as serving
from paddle_serving_server_gpu import pipeline
from paddle_serving_server_gpu.pipeline import Op
def port_is_available(port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
result = sock.connect_ex(('0.0.0.0', port))
if result != 0:
return True
else:
return False
class WebService(object):
def __init__(self, name="default_service"):
self.name = name
# pipeline
self._server = pipeline.PipelineServer(self.name)
self.gpus = [] # deprecated
self.rpc_service_list = [] # deprecated
def get_pipeline_response(self, read_op):
return None
def prepare_pipeline_config(self, yaml_file):
# build dag
read_op = pipeline.RequestOp()
last_op = self.get_pipeline_response(read_op)
if not isinstance(last_op, Op):
raise ValueError("The return value type of `get_pipeline_response` "
"function is not Op type, please check function "
"`get_pipeline_response`.")
response_op = pipeline.ResponseOp(input_ops=[last_op])
self._server.set_response_op(response_op)
self._server.prepare_server(yaml_file)
def run_service(self):
self._server.run_server()
def load_model_config(self, model_config):
print("This API will be deprecated later. Please do not use it")
self.model_config = model_config
import os
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
if os.path.isdir(model_config):
client_config = "{}/serving_server_conf.prototxt".format(
model_config)
elif os.path.isfile(model_config):
client_config = model_config
model_conf = m_config.GeneralModelConfig()
f = open(client_config, 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
self.feed_vars = {var.name: var for var in model_conf.feed_var}
self.fetch_vars = {var.name: var for var in model_conf.fetch_var}
def set_gpus(self, gpus):
print("This API will be deprecated later. Please do not use it")
self.gpus = [int(x) for x in gpus.split(",")]
def default_rpc_service(self,
workdir="conf",
port=9292,
gpuid=0,
thread_num=2,
mem_optim=True,
use_lite=False,
use_xpu=False,
ir_optim=False):
device = "gpu"
if gpuid == -1:
if use_lite:
device = "arm"
else:
device = "cpu"
op_maker = serving.OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
server = Server()
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num)
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.set_device(device)
if use_lite:
server.set_lite()
if use_xpu:
server.set_xpu()
server.load_model_config(self.model_config)
if gpuid >= 0:
server.set_gpuid(gpuid)
server.prepare_server(workdir=workdir, port=port, device=device)
return server
def _launch_rpc_service(self, service_idx):
self.rpc_service_list[service_idx].run_server()
def port_is_available(self, port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
result = sock.connect_ex(('0.0.0.0', port))
if result != 0:
return True
else:
return False
def prepare_server(self,
workdir="",
port=9393,
device="gpu",
use_lite=False,
use_xpu=False,
ir_optim=False,
gpuid=0,
mem_optim=True):
print("This API will be deprecated later. Please do not use it")
self.workdir = workdir
self.port = port
self.device = device
self.gpuid = gpuid
self.port_list = []
default_port = 12000
for i in range(1000):
if port_is_available(default_port + i):
self.port_list.append(default_port + i)
if len(self.port_list) > len(self.gpus):
break
if len(self.gpus) == 0:
# init cpu service
self.rpc_service_list.append(
self.default_rpc_service(
self.workdir,
self.port_list[0],
-1,
thread_num=2,
mem_optim=mem_optim,
use_lite=use_lite,
use_xpu=use_xpu,
ir_optim=ir_optim))
else:
for i, gpuid in enumerate(self.gpus):
self.rpc_service_list.append(
self.default_rpc_service(
"{}_{}".format(self.workdir, i),
self.port_list[i],
gpuid,
thread_num=2,
mem_optim=mem_optim,
use_lite=use_lite,
use_xpu=use_xpu,
ir_optim=ir_optim))
def _launch_web_service(self):
gpu_num = len(self.gpus)
self.client = Client()
self.client.load_client_config("{}/serving_server_conf.prototxt".format(
self.model_config))
endpoints = ""
if gpu_num > 0:
for i in range(gpu_num):
endpoints += "127.0.0.1:{},".format(self.port_list[i])
else:
endpoints = "127.0.0.1:{}".format(self.port_list[0])
self.client.connect([endpoints])
def get_prediction(self, request):
if not request.json:
abort(400)
if "fetch" not in request.json:
abort(400)
try:
feed, fetch, is_batch = self.preprocess(request.json["feed"],
request.json["fetch"])
if isinstance(feed, dict) and "fetch" in feed:
del feed["fetch"]
if len(feed) == 0:
raise ValueError("empty input")
fetch_map = self.client.predict(
feed=feed, fetch=fetch, batch=is_batch)
result = self.postprocess(
feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
result = {"result": result}
except ValueError as err:
result = {"result": str(err)}
return result
def run_rpc_service(self):
print("This API will be deprecated later. Please do not use it")
import socket
localIP = socket.gethostbyname(socket.gethostname())
print("web service address:")
print("http://{}:{}/{}/prediction".format(localIP, self.port,
self.name))
server_pros = []
for i, service in enumerate(self.rpc_service_list):
p = Process(target=self._launch_rpc_service, args=(i, ))
server_pros.append(p)
for p in server_pros:
p.start()
app_instance = Flask(__name__)
@app_instance.before_first_request
def init():
self._launch_web_service()
service_name = "/" + self.name + "/prediction"
@app_instance.route(service_name, methods=["POST"])
def run():
return self.get_prediction(request)
self.app_instance = app_instance
# TODO: maybe change another API name: maybe run_local_predictor?
def run_debugger_service(self, gpu=False):
print("This API will be deprecated later. Please do not use it")
import socket
localIP = socket.gethostbyname(socket.gethostname())
print("web service address:")
print("http://{}:{}/{}/prediction".format(localIP, self.port,
self.name))
app_instance = Flask(__name__)
@app_instance.before_first_request
def init():
self._launch_local_predictor(gpu)
service_name = "/" + self.name + "/prediction"
@app_instance.route(service_name, methods=["POST"])
def run():
return self.get_prediction(request)
self.app_instance = app_instance
def _launch_local_predictor(self, gpu):
from paddle_serving_app.local_predict import LocalPredictor
self.client = LocalPredictor()
self.client.load_model_config(
"{}".format(self.model_config), use_gpu=True, gpu_id=self.gpus[0])
def run_web_service(self):
print("This API will be deprecated later. Please do not use it")
self.app_instance.run(host="0.0.0.0", port=self.port, threaded=True)
def get_app_instance(self):
return self.app_instance
def preprocess(self, feed=[], fetch=[]):
print("This API will be deprecated later. Please do not use it")
is_batch = True
feed_dict = {}
for var_name in self.feed_vars.keys():
feed_dict[var_name] = []
for feed_ins in feed:
for key in feed_ins:
feed_dict[key].append(
np.array(feed_ins[key]).reshape(
list(self.feed_vars[key].shape))[np.newaxis, :])
feed = {}
for key in feed_dict:
feed[key] = np.concatenate(feed_dict[key], axis=0)
return feed, fetch, is_batch
def postprocess(self, feed=[], fetch=[], fetch_map=None):
print("This API will be deprecated later. Please do not use it")
for key in fetch_map:
fetch_map[key] = fetch_map[key].tolist()
return fetch_map
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
import os import os
import logging import logging
import multiprocessing import multiprocessing
#from paddle_serving_server_gpu import OpMaker, OpSeqMaker #from paddle_serving_server import OpMaker, OpSeqMaker
#from paddle_serving_server_gpu import Server as GpuServer #from paddle_serving_server import Server as GpuServer
#from paddle_serving_server import Server as CpuServer #from paddle_serving_server import Server as CpuServer
from . import util from . import util
#from paddle_serving_app.local_predict import LocalPredictor #from paddle_serving_app.local_predict import LocalPredictor
...@@ -235,7 +235,7 @@ class LocalServiceHandler(object): ...@@ -235,7 +235,7 @@ class LocalServiceHandler(object):
server = Server() server = Server()
else: else:
#gpu or arm #gpu or arm
from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server from paddle_serving_server import OpMaker, OpSeqMaker, Server
op_maker = OpMaker() op_maker = OpMaker()
read_op = op_maker.create('general_reader') read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer') general_infer_op = op_maker.create('general_infer')
......
...@@ -26,10 +26,11 @@ from time import time as _time ...@@ -26,10 +26,11 @@ from time import time as _time
import time import time
import threading import threading
import multiprocessing import multiprocessing
import copy
_LOGGER = logging.getLogger(__name__) _LOGGER = logging.getLogger(__name__)
_LOGGER.propagate = False _LOGGER.propagate = False
_is_profile = int(os.environ.get('FLAGS_profile_pipeline', 0))
class PerformanceTracer(object): class PerformanceTracer(object):
def __init__(self, is_thread_mode, interval_s, server_worker_num): def __init__(self, is_thread_mode, interval_s, server_worker_num):
...@@ -48,6 +49,8 @@ class PerformanceTracer(object): ...@@ -48,6 +49,8 @@ class PerformanceTracer(object):
self._channels = [] self._channels = []
# The size of data in Channel will not exceed server_worker_num # The size of data in Channel will not exceed server_worker_num
self._server_worker_num = server_worker_num self._server_worker_num = server_worker_num
if _is_profile:
self.profile_dict = {}
def data_buffer(self): def data_buffer(self):
return self._data_buffer return self._data_buffer
...@@ -82,7 +85,7 @@ class PerformanceTracer(object): ...@@ -82,7 +85,7 @@ class PerformanceTracer(object):
item = self._data_buffer.get_nowait() item = self._data_buffer.get_nowait()
name = item["name"] name = item["name"]
actions = item["actions"] actions = item["actions"]
if name == "DAG": if name == "DAG":
succ = item["succ"] succ = item["succ"]
req_id = item["id"] req_id = item["id"]
...@@ -106,9 +109,9 @@ class PerformanceTracer(object): ...@@ -106,9 +109,9 @@ class PerformanceTracer(object):
for action, costs in op_cost[name].items(): for action, costs in op_cost[name].items():
op_cost[name][action] = sum(costs) / (1e3 * len(costs)) op_cost[name][action] = sum(costs) / (1e3 * len(costs))
tot_cost += op_cost[name][action] tot_cost += op_cost[name][action]
if name != "DAG": if name != "DAG":
_LOGGER.info("Op({}):".format(name)) _LOGGER.info("Op({}):".format(name))
for action in all_actions: for action in all_actions:
if action in op_cost[name]: if action in op_cost[name]:
_LOGGER.info("\t{}[{} ms]".format( _LOGGER.info("\t{}[{} ms]".format(
...@@ -118,7 +121,9 @@ class PerformanceTracer(object): ...@@ -118,7 +121,9 @@ class PerformanceTracer(object):
calcu_cost += op_cost[name][action] calcu_cost += op_cost[name][action]
_LOGGER.info("\tidle[{}]".format(1 - 1.0 * calcu_cost / _LOGGER.info("\tidle[{}]".format(1 - 1.0 * calcu_cost /
tot_cost)) tot_cost))
if _is_profile:
self.profile_dict = copy.deepcopy(op_cost)
if "DAG" in op_cost: if "DAG" in op_cost:
calls = list(op_cost["DAG"].values()) calls = list(op_cost["DAG"].values())
calls.sort() calls.sort()
...@@ -137,7 +142,17 @@ class PerformanceTracer(object): ...@@ -137,7 +142,17 @@ class PerformanceTracer(object):
for latency in latencys: for latency in latencys:
_LOGGER.info("\t\t.{}[{} ms]".format(latency, calls[int( _LOGGER.info("\t\t.{}[{} ms]".format(latency, calls[int(
tot * latency / 100.0)])) tot * latency / 100.0)]))
if _is_profile:
self.profile_dict["DAG"]["query_count"] = tot
self.profile_dict["DAG"]["qps"] = qps
self.profile_dict["DAG"]["succ"] = 1 - 1.0 * err_count / tot
self.profile_dict["DAG"]["avg"] = ave_cost
for latency in latencys:
self.profile_dict["DAG"][str(latency)] = calls[int(tot * latency / 100.0)]
if _is_profile:
import yaml
with open("benchmark.log", "w") as fout:
yaml.dump(self.profile_dict, fout, default_flow_style=False)
# channel # channel
_LOGGER.info("Channel (server worker num[{}]):".format( _LOGGER.info("Channel (server worker num[{}]):".format(
self._server_worker_num)) self._server_worker_num))
......
...@@ -2,14 +2,15 @@ numpy>=1.12, <=1.16.4 ; python_version<"3.5" ...@@ -2,14 +2,15 @@ numpy>=1.12, <=1.16.4 ; python_version<"3.5"
shapely==1.7.0 shapely==1.7.0
wheel>=0.34.0, <0.35.0 wheel>=0.34.0, <0.35.0
setuptools>=44.1.0 setuptools>=44.1.0
opencv-python==4.2.0.32
google>=2.0.3 google>=2.0.3
opencv-python==4.2.0.32
protobuf>=3.12.2 protobuf>=3.12.2
grpcio-tools>=1.28.1 grpcio-tools>=1.28.1
grpcio>=1.28.1 grpcio>=1.28.1
func-timeout>=4.3.5 func-timeout>=4.3.5
pyyaml>=1.3.0 pyyaml>=1.3.0
sentencepiece==0.1.92
flask>=1.1.2 flask>=1.1.2
ujson>=2.0.3 ujson>=2.0.3
sentencepiece==0.1.92; platform_machine != "aarch64"
sentencepiece; platform_machine == "aarch64"
opencv-python==4.2.0.32; platform_machine != "aarch64"
opencv-python; platform_machine == "aarch64"
...@@ -2,14 +2,13 @@ numpy>=1.12, <=1.16.4 ; python_version<"3.5" ...@@ -2,14 +2,13 @@ numpy>=1.12, <=1.16.4 ; python_version<"3.5"
shapely==1.7.0 shapely==1.7.0
wheel>=0.34.0, <0.35.0 wheel>=0.34.0, <0.35.0
setuptools>=44.1.0 setuptools>=44.1.0
opencv-python==4.2.0.32
google>=2.0.3 google>=2.0.3
opencv-python==4.2.0.32 opencv-python==4.2.0.32
protobuf>=3.12.2 protobuf>=3.12.2
grpcio-tools>=1.33.2
grpcio>=1.33.2
func-timeout>=4.3.5 func-timeout>=4.3.5
pyyaml>=1.3.0 pyyaml>=1.3.0
sentencepiece==0.1.83
flask>=1.1.2 flask>=1.1.2
ujson>=2.0.3 ujson>=2.0.3
grpcio-tools>=1.33.2
grpcio>=1.33.2
sentencepiece==0.1.83
...@@ -41,8 +41,13 @@ if '${PACK}' == 'ON': ...@@ -41,8 +41,13 @@ if '${PACK}' == 'ON':
copy_lib() copy_lib()
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'sentencepiece<=0.1.83', 'opencv-python<=4.2.0.32', 'pillow', 'six >= 1.10.0',
'pyclipper', 'shapely' 'pillow',
'pyclipper', 'shapely',
'sentencepiece<=0.1.83; platform_machine != "aarch64"',
'sentencepiece; platform_machine == "aarch64"',
'opencv-python<=4.2.0.32; platform_machine != "aarch64"',
'opencv-python; platform_machine == "aarch64"',
] ]
packages=['paddle_serving_app', packages=['paddle_serving_app',
......
...@@ -19,11 +19,15 @@ from __future__ import print_function ...@@ -19,11 +19,15 @@ from __future__ import print_function
from setuptools import setup, Distribution, Extension from setuptools import setup, Distribution, Extension
from setuptools import find_packages from setuptools import find_packages
from setuptools import setup from setuptools import setup
from paddle_serving_server.version import serving_server_version from paddle_serving_server.version import serving_server_version, version_suffix
import util import util
max_version, mid_version, min_version = util.python_version() package_version = serving_server_version.replace('-', '')
if version_suffix != "":
version_suffix = "post" + version_suffix
package_version = package_version + "." + version_suffix
max_version, mid_version, min_version = util.python_version()
# gen pipeline proto code # gen pipeline proto code
util.gen_pipeline_code("paddle_serving_server") util.gen_pipeline_code("paddle_serving_server")
...@@ -55,8 +59,8 @@ package_dir={'paddle_serving_server': ...@@ -55,8 +59,8 @@ package_dir={'paddle_serving_server':
package_data={'paddle_serving_server': ['pipeline/gateway/libproxy_server.so'],} package_data={'paddle_serving_server': ['pipeline/gateway/libproxy_server.so'],}
setup( setup(
name='paddle-serving-server', name='${SERVER_PACKAGE_NAME}',
version=serving_server_version.replace('-', ''), version= package_version,
description= description=
('Paddle Serving Package for saved model with PaddlePaddle'), ('Paddle Serving Package for saved model with PaddlePaddle'),
url='https://github.com/PaddlePaddle/Serving', url='https://github.com/PaddlePaddle/Serving',
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Setup for pip package."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from setuptools import setup, Distribution, Extension
from setuptools import find_packages
from setuptools import setup
from paddle_serving_server_gpu.version import serving_server_version, cuda_version
import util
if cuda_version != "trt":
cuda_version = "post" + cuda_version
max_version, mid_version, min_version = util.python_version()
# gen pipeline proto code
util.gen_pipeline_code("paddle_serving_server_gpu")
REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio <= 1.33.2', 'grpcio-tools <= 1.33.2',
'flask >= 1.1.1', 'func_timeout', 'pyyaml'
]
packages=['paddle_serving_server_gpu',
'paddle_serving_server_gpu.proto',
'paddle_serving_server_gpu.pipeline',
'paddle_serving_server_gpu.pipeline.proto',
'paddle_serving_server_gpu.pipeline.gateway',
'paddle_serving_server_gpu.pipeline.gateway.proto']
package_dir={'paddle_serving_server_gpu':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu',
'paddle_serving_server_gpu.proto':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto',
'paddle_serving_server_gpu.pipeline':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline',
'paddle_serving_server_gpu.pipeline.proto':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/proto',
'paddle_serving_server_gpu.pipeline.gateway':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/gateway',
'paddle_serving_server_gpu.pipeline.gateway.proto':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/gateway/proto'}
package_data={'paddle_serving_server_gpu': ['pipeline/gateway/libproxy_server.so'],}
setup(
name='paddle-serving-server-gpu',
version=serving_server_version.replace('-', '') + "." + cuda_version,
description=
('Paddle Serving Package for saved model with PaddlePaddle'),
url='https://github.com/PaddlePaddle/Serving',
author='PaddlePaddle Author',
author_email='guru4elephant@gmail.com',
install_requires=REQUIRED_PACKAGES,
packages=packages,
package_data=package_data,
package_dir=package_dir,
# PyPI package information.
classifiers=[
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Intended Audience :: Education',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Mathematics',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Software Development',
'Topic :: Software Development :: Libraries',
'Topic :: Software Development :: Libraries :: Python Modules',
],
license='Apache 2.0',
keywords=('paddle-serving serving-server deployment industrial easy-to-use'))
...@@ -41,24 +41,24 @@ include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../kvdb/include) ...@@ -41,24 +41,24 @@ include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../kvdb/include)
include(op/CMakeLists.txt) include(op/CMakeLists.txt)
include(proto/CMakeLists.txt) include(proto/CMakeLists.txt)
add_executable(serving ${serving_srcs}) add_executable(serving ${serving_srcs})
add_dependencies(serving pdcodegen fluid_cpu_engine pdserving paddle_fluid add_dependencies(serving pdcodegen paddle_inference_engine pdserving paddle_inference
opencv_imgcodecs cube-api) opencv_imgcodecs cube-api)
if (WITH_GPU) if (WITH_GPU)
add_dependencies(serving fluid_gpu_engine) add_dependencies(serving paddle_inference_engine)
endif() endif()
target_include_directories(serving PUBLIC target_include_directories(serving PUBLIC
${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor ${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
) )
if(WITH_GPU) if(WITH_GPU)
target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine target_link_libraries(serving -Wl,--whole-archive paddle_inference_engine
-Wl,--no-whole-archive) -Wl,--no-whole-archive)
endif() endif()
target_link_libraries(serving -Wl,--whole-archive fluid_cpu_engine target_link_libraries(serving -Wl,--whole-archive paddle_inference_engine
-Wl,--no-whole-archive) -Wl,--no-whole-archive)
target_link_libraries(serving paddle_fluid ${paddle_depend_libs}) target_link_libraries(serving paddle_inference ${paddle_depend_libs})
target_link_libraries(serving opencv_imgcodecs target_link_libraries(serving opencv_imgcodecs
${opencv_depend_libs}) ${opencv_depend_libs})
......
...@@ -18,16 +18,16 @@ include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../kvdb/include) ...@@ -18,16 +18,16 @@ include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../kvdb/include)
include(op/CMakeLists.txt) include(op/CMakeLists.txt)
include(proto/CMakeLists.txt) include(proto/CMakeLists.txt)
add_executable(elastic_serving ${serving_srcs}) add_executable(elastic_serving ${serving_srcs})
add_dependencies(elastic_serving pdcodegen fluid_cpu_engine pdserving paddle_fluid cube-api) add_dependencies(elastic_serving pdcodegen paddle_inference_engine pdserving paddle_inference cube-api)
target_include_directories(elastic_serving PUBLIC target_include_directories(elastic_serving PUBLIC
${CMAKE_CURRENT_BINARY_DIR}/../../predictor ${CMAKE_CURRENT_BINARY_DIR}/../../predictor
) )
target_link_libraries(elastic_serving -Wl,--whole-archive fluid_cpu_engine target_link_libraries(elastic_serving -Wl,--whole-archive paddle_inference_engine
-Wl,--no-whole-archive) -Wl,--no-whole-archive)
target_link_libraries(elastic_serving paddle_fluid ${paddle_depend_libs}) target_link_libraries(elastic_serving paddle_inference ${paddle_depend_libs})
target_link_libraries(elastic_serving pdserving) target_link_libraries(elastic_serving pdserving)
target_link_libraries(elastic_serving cube-api) target_link_libraries(elastic_serving cube-api)
......
#!/bin/bash
echo "################################################################"
echo "# #"
echo "# #"
echo "# #"
echo "# Paddle Serving begin run with python2.7.15!! #"
echo "# #"
echo "# #"
echo "# #"
echo "################################################################"
export GOPATH=$HOME/go
export PATH=$PATH:$GOROOT/bin:$GOPATH/bin
export CUDA_INCLUDE_DIRS=/usr/local/cuda-10.2/include
export PYTHONROOT=/usr/local/python2.7.15/
go env -w GO111MODULE=on
go env -w GOPROXY=https://goproxy.cn,direct
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0
build_path=/workspace/Serving
build_whl_list=(build_gpu_server build_client build_cpu_server build_app)
rpc_model_list=(grpc_impl pipeline_imagenet bert_rpc_gpu bert_rpc_cpu faster_rcnn_model_rpc ResNet50_rpc lac_rpc \
cnn_rpc bow_rpc lstm_rpc fit_a_line_rpc cascade_rcnn_rpc deeplabv3_rpc mobilenet_rpc unet_rpc resnetv2_rpc \
ocr_rpc criteo_ctr_rpc_cpu criteo_ctr_rpc_gpu yolov4_rpc_gpu)
http_model_list=(fit_a_line_http lac_http cnn_http bow_http lstm_http ResNet50_http bert_http)
function setproxy(){
export http_proxy=${proxy}
export https_proxy=${proxy}
}
function unsetproxy(){
unset http_proxy
unset https_proxy
}
function kill_server_process(){
kill `ps -ef|grep serving|awk '{print $2}'`
}
function check() {
cd ${build_path}
if [ ! -f paddle_serving_app* ]; then
echo "paddle_serving_app is compiled failed, please check your pull request"
exit 1
elif [ ! -f paddle_serving_server-* ]; then
echo "paddle_serving_server-cpu is compiled failed, please check your pull request"
exit 1
elif [ ! -f paddle_serving_server_* ]; then
echo "paddle_serving_server_gpu is compiled failed, please check your pull request"
exit 1
elif [ ! -f paddle_serving_client* ]; then
echo "paddle_serving_server_client is compiled failed, please check your pull request"
exit 1
else
echo "paddle serving Build Passed"
fi
}
function check_result() {
if [ $? -ne 0 ];then
echo -e "\033[4;31;42m$1 model runs failed, please check your pull request or modify test case! \033[0m"
exit 1
else
echo -e "\033[4;37;42m$1 model runs successfully, congratulations! \033[0m"
fi
}
function before_hook(){
setproxy
cd ${build_path}/python
pip2.7 install --upgrade pip
pip2.7 install opencv-python==4.2.0.32 requests
pip2.7 install -r requirements.txt
echo "before hook configuration is successful.... "
}
function run_env(){
setproxy
pip2.7 install --upgrade nltk==3.4
pip2.7 install --upgrade scipy==1.2.1
pip2.7 install --upgrade setuptools
pip2.7 install paddlehub ujson paddlepaddle==2.0.0
echo "run env configuration is successful.... "
}
function run_gpu_env(){
cd ${build_path}
export LD_LIBRARY_PATH=/usr/local/python2.7.15/lib/python2.7/site-packages/paddle/libs/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/workspace/Serving/build_gpu/third_party/install/Paddle/lib/:/workspace/Serving/build_gpu/third_party/Paddle/src/extern_paddle/third_party/install/mklml/lib/:/workspace/Serving/build_gpu/third_party/Paddle/src/extern_paddle/third_party/install/mkldnn/lib/:$LD_LIBRARY_PATH
export SERVING_BIN=${build_path}/build_gpu/core/general-server/serving
echo "run gpu env configuration is successful.... "
}
function run_cpu_env(){
cd ${build_path}
export LD_LIBRARY_PATH=/usr/local/python2.7.15/lib/python2.7/site-packages/paddle/libs/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/workspace/Serving/build_cpu/third_party/install/Paddle/lib/:$LD_LIBRARY_PATH
export SERVING_BIN=${build_path}/build_cpu/core/general-server/serving
echo "run cpu env configuration is successful.... "
}
function build_gpu_server() {
setproxy
cd ${build_path}
git submodule update --init --recursive
if [ -d build_gpu ];then
rm -rf build_gpu
fi
if [ -d build ];then
rm -rf build
fi
mkdir build && cd build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON \
-DTENSORRT_ROOT=/usr \
-DWITH_GPU=ON ..
make -j18
make -j18
make install -j18
pip2.7 uninstall paddle-serving-server-gpu -y
pip2.7 install ${build_path}/build/python/dist/*
cp ${build_path}/build/python/dist/* ../
cp -r ${build_path}/build/ ${build_path}/build_gpu
}
function build_client() {
setproxy
cd ${build_path}
if [ -d build ];then
rm -rf build
fi
mkdir build && cd build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCLIENT=ON ..
make -j18
make -j18
cp ${build_path}/build/python/dist/* ../
pip2.7 uninstall paddle-serving-client -y
pip2.7 install ${build_path}/build/python/dist/*
}
function build_cpu_server(){
setproxy
cd ${build_path}
if [ -d build_cpu ];then
rm -rf build_cpu
fi
if [ -d build ];then
rm -rf build
fi
mkdir build && cd build
pwd
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DWITH_GPU=OFF \
-DSERVER=ON ..
make -j18
make -j18
make install -j18
cp ${build_path}/build/python/dist/* ../
pip2.7 uninstall paddle-serving-server -y
pip2.7 install ${build_path}/build/python/dist/*
cp -r ${build_path}/build/ ${build_path}/build_cpu
}
function build_app() {
setproxy
cd ${build_path}
if [ -d build ];then
rm -rf build
fi
mkdir build && cd build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCMAKE_INSTALL_PREFIX=./output -DAPP=ON ..
make
cp ${build_path}/build/python/dist/* ../
pip2.7 uninstall paddle-serving-app -y
pip2.7 install ${build_path}/build/python/dist/*
}
function bert_rpc_gpu(){
run_gpu_env
setproxy
cd ${build_path}/python/examples/bert
sh get_data.sh >/dev/null 2>&1
sed -i 's/9292/8860/g' bert_client.py
sed -i '$aprint(result)' bert_client.py
cp -r /root/.cache/dist_data/serving/bert/bert_seq128_* ./
ls -hlst
python2.7 -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 8860 --gpu_ids 0 > bert_rpc_gpu 2>&1 &
sleep 15
head data-c.txt | python2.7 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
cat bert_rpc_gpu
check_result $FUNCNAME
kill_server_process
}
function bert_rpc_cpu(){
run_cpu_env
setproxy
cd ${build_path}/python/examples/bert
sed -i 's/8860/8861/g' bert_client.py
python2.7 -m paddle_serving_server.serve --model bert_seq128_model/ --port 8861 > bert_rpc_cpu 2>&1 &
sleep 3
cp data-c.txt.1 data-c.txt
head data-c.txt | python2.7 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
cat bert_rpc_cpu
check_result $FUNCNAME
kill_server_process
}
function criteo_ctr_with_cube_rpc(){
setproxy
run_cpu_env
cd ${build_path}/python/examples/criteo_ctr_with_cube
ln -s /root/.cache/dist_data/serving/criteo_ctr_with_cube/raw_data ./
sed -i "s/9292/8888/g" test_server.py
sed -i "s/9292/8888/g" test_client.py
wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz >/dev/null 2>&1
tar xf ctr_cube_unittest.tar.gz
mv models/ctr_client_conf ./
mv models/ctr_serving_model_kv ./
mv models/data ./cube/
wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz >/dev/null 2>&1
tar xf cube_app.tar.gz
mv cube_app/cube* ./cube/
sh cube_prepare.sh > haha 2>&1 &
sleep 5
python2.7 test_server.py ctr_serving_model_kv > criteo_ctr_rpc 2>&1 &
sleep 5
python2.7 test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
cat criteo_ctr_rpc
check_result $FUNCNAME
kill `ps -ef|grep cube|awk '{print $2}'`
kill_server_process
}
function pipeline_imagenet(){
run_gpu_env
setproxy
cd ${build_path}/python/examples/pipeline/imagenet
cp -r /root/.cache/dist_data/serving/imagenet/* ./
ls -a
python2.7 resnet50_web_service.py > pipelog 2>&1 &
sleep 5
python2.7 pipeline_rpc_client.py
# check_result $FUNCNAME
kill_server_process
}
function ResNet50_rpc(){
run_gpu_env
setproxy
cd ${build_path}/python/examples/imagenet
cp -r /root/.cache/dist_data/serving/imagenet/* ./
sed -i 's/9696/8863/g' resnet50_rpc_client.py
python2.7 -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 8863 --gpu_ids 0 > ResNet50_rpc 2>&1 &
sleep 5
python2.7 resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
tail ResNet50_rpc
check_result $FUNCNAME
kill_server_process
sleep 5
}
function ResNet101_rpc(){
run_gpu_env
setproxy
cd ${build_path}/python/examples/imagenet
sed -i 's/9292/8864/g' image_rpc_client.py
python2.7 -m paddle_serving_server_gpu.serve --model ResNet101_vd_model --port 8864 --gpu_ids 0 > ResNet101_rpc 2>&1 &
sleep 5
python2.7 image_rpc_client.py ResNet101_vd_client_config/serving_client_conf.prototxt
tail ResNet101_rpc
kill_server_process
check_result $FUNCNAME
}
function cnn_rpc(){
setproxy
run_cpu_env
cd ${build_path}/python/examples/imdb
cp -r /root/.cache/dist_data/serving/imdb/* ./
tar xf imdb_model.tar.gz && tar xf text_classification_data.tar.gz
sed -i 's/9292/8865/g' test_client.py
python2.7 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 8865 > cnn_rpc 2>&1 &
sleep 5
head test_data/part-0 | python2.7 test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
tail cnn_rpc
check_result $FUNCNAME
kill_server_process
}
function bow_rpc(){
setproxy
run_cpu_env
cd ${build_path}/python/examples/imdb
sed -i 's/8865/8866/g' test_client.py
python2.7 -m paddle_serving_server.serve --model imdb_bow_model/ --port 8866 > bow_rpc 2>&1 &
sleep 5
head test_data/part-0 | python2.7 test_client.py imdb_bow_client_conf/serving_client_conf.prototxt imdb.vocab
tail bow_rpc
check_result $FUNCNAME
kill_server_process
sleep 5
}
function lstm_rpc(){
setproxy
run_cpu_env
cd ${build_path}/python/examples/imdb
sed -i 's/8866/8867/g' test_client.py
python2.7 -m paddle_serving_server.serve --model imdb_lstm_model/ --port 8867 > lstm_rpc 2>&1 &
sleep 5
head test_data/part-0 | python2.7 test_client.py imdb_lstm_client_conf/serving_client_conf.prototxt imdb.vocab
tail lstm_rpc
check_result $FUNCNAME
kill_server_process
kill `ps -ef|grep imdb|awk '{print $2}'`
}
function lac_rpc(){
setproxy
run_cpu_env
cd ${build_path}/python/examples/lac
python2.7 -m paddle_serving_app.package --get_model lac >/dev/null 2>&1
tar xf lac.tar.gz
sed -i 's/9292/8868/g' lac_client.py
python2.7 -m paddle_serving_server.serve --model lac_model/ --port 8868 > lac_rpc 2>&1 &
sleep 5
echo "我爱北京天安门" | python2.7 lac_client.py lac_client/serving_client_conf.prototxt lac_dict/
tail lac_rpc
check_result $FUNCNAME
kill_server_process
}
function fit_a_line_rpc(){
setproxy
run_cpu_env
cd ${build_path}/python/examples/fit_a_line
sh get_data.sh >/dev/null 2>&1
sed -i 's/9393/8869/g' test_client.py
python2.7 -m paddle_serving_server.serve --model uci_housing_model --port 8869 > line_rpc 2>&1 &
sleep 5
python2.7 test_client.py uci_housing_client/serving_client_conf.prototxt
tail line_rpc
check_result $FUNCNAME
kill_server_process
}
function faster_rcnn_model_rpc(){
run_gpu_env
setproxy
cd ${build_path}/python/examples/faster_rcnn_model
cp -r /root/.cache/dist_data/serving/faster_rcnn/faster_rcnn_model.tar.gz ./
tar xf faster_rcnn_model.tar.gz
wget https://paddle-serving.bj.bcebos.com/pddet_demo/infer_cfg.yml >/dev/null 2>&1
mv faster_rcnn_model/pddet* ./
sed -i 's/9494/8870/g' test_client.py
python2.7 -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 8870 --gpu_id 0 > faster_rcnn_rpc 2>&1 &
sleep 3
python2.7 test_client.py pddet_client_conf/serving_client_conf.prototxt infer_cfg.yml 000000570688.jpg
tail faster_rcnn_rpc
check_result $FUNCNAME
kill_server_process
}
function cascade_rcnn_rpc(){
setproxy
run_gpu_env
cd ${build_path}/python/examples/cascade_rcnn
cp -r /root/.cache/dist_data/serving/cascade_rcnn/cascade_rcnn_r50_fpx_1x_serving.tar.gz ./
tar xf cascade_rcnn_r50_fpx_1x_serving.tar.gz
sed -i "s/9292/8879/g" test_client.py
python2.7 -m paddle_serving_server_gpu.serve --model serving_server --port 8879 --gpu_id 0 > rcnn_rpc 2>&1 &
ls -hlst
sleep 5
python2.7 test_client.py
tail rcnn_rpc
check_result $FUNCNAME
kill_server_process
}
function deeplabv3_rpc() {
setproxy
run_gpu_env
cd ${build_path}/python/examples/deeplabv3
cp -r /root/.cache/dist_data/serving/deeplabv3/deeplabv3.tar.gz ./
tar xf deeplabv3.tar.gz
sed -i "s/9494/8880/g" deeplabv3_client.py
python2.7 -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 8880 > deeplab_rpc 2>&1 &
sleep 5
python2.7 deeplabv3_client.py
tail deeplab_rpc
check_result $FUNCNAME
kill_server_process
}
function mobilenet_rpc() {
setproxy
run_gpu_env
cd ${build_path}/python/examples/mobilenet
python2.7 -m paddle_serving_app.package --get_model mobilenet_v2_imagenet >/dev/null 2>&1
tar xf mobilenet_v2_imagenet.tar.gz
sed -i "s/9393/8881/g" mobilenet_tutorial.py
python2.7 -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 8881 > mobilenet_rpc 2>&1 &
sleep 5
python2.7 mobilenet_tutorial.py
tail mobilenet_rpc
check_result $FUNCNAME
kill_server_process
}
function unet_rpc() {
setproxy
run_gpu_env
cd ${build_path}/python/examples/unet_for_image_seg
python2.7 -m paddle_serving_app.package --get_model unet >/dev/null 2>&1
tar xf unet.tar.gz
sed -i "s/9494/8882/g" seg_client.py
python2.7 -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 8882 > unet_rpc 2>&1 &
sleep 5
python2.7 seg_client.py
tail unet_rpc
check_result $FUNCNAME
kill_server_process
}
function resnetv2_rpc() {
setproxy
run_gpu_env
cd ${build_path}/python/examples/resnet_v2_50
cp /root/.cache/dist_data/serving/resnet_v2_50/resnet_v2_50_imagenet.tar.gz ./
tar xf resnet_v2_50_imagenet.tar.gz
sed -i 's/9393/8883/g' resnet50_v2_tutorial.py
python2.7 -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 8883 > v2_log 2>&1 &
sleep 10
python2.7 resnet50_v2_tutorial.py
tail v2_log
check_result $FUNCNAME
kill_server_process
}
function ocr_rpc() {
setproxy
run_cpu_env
cd ${build_path}/python/examples/ocr
cp -r /root/.cache/dist_data/serving/ocr/test_imgs ./
python2.7 -m paddle_serving_app.package --get_model ocr_rec >/dev/null 2>&1
tar xf ocr_rec.tar.gz
sed -i 's/9292/8884/g' test_ocr_rec_client.py
python2.7 -m paddle_serving_server.serve --model ocr_rec_model --port 8884 > ocr_rpc 2>&1 &
sleep 5
python2.7 test_ocr_rec_client.py
tail ocr_rpc
check_result $FUNCNAME
kill_server_process
}
function criteo_ctr_rpc_cpu() {
setproxy
run_cpu_env
cd ${build_path}/python/examples/criteo_ctr
sed -i "s/9292/8885/g" test_client.py
ln -s /root/.cache/dist_data/serving/criteo_ctr_with_cube/raw_data ./
wget https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz >/dev/null 2>&1
tar xf criteo_ctr_demo_model.tar.gz
mv models/ctr_client_conf .
mv models/ctr_serving_model .
python2.7 -m paddle_serving_server.serve --model ctr_serving_model/ --port 8885 > criteo_ctr_cpu_rpc 2>&1 &
sleep 5
python2.7 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0
tail criteo_ctr_cpu_rpc
check_result $FUNCNAME
kill_server_process
}
function criteo_ctr_rpc_gpu() {
setproxy
run_gpu_env
cd ${build_path}/python/examples/criteo_ctr
sed -i "s/8885/8886/g" test_client.py
python2.7 -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 8886 --gpu_ids 0 > criteo_ctr_gpu_rpc 2>&1 &
sleep 5
python2.7 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/
tail criteo_ctr_gpu_rpc
check_result $FUNCNAME
kill_server_process
}
function yolov4_rpc_gpu() {
setproxy
run_gpu_env
cd ${build_path}/python/examples/yolov4
sed -i "s/9393/8887/g" test_client.py
cp -r /root/.cache/dist_data/serving/yolov4/yolov4.tar.gz ./
tar xf yolov4.tar.gz
python2.7 -m paddle_serving_server_gpu.serve --model yolov4_model --port 8887 --gpu_ids 0 > yolov4_rpc_log 2>&1 &
sleep 5
python2.7 test_client.py 000000570688.jpg
tail yolov4_rpc_log
# check_result $FUNCNAME
kill_server_process
}
function senta_rpc_cpu() {
setproxy
run_gpu_env
cd ${build_path}/python/examples/senta
sed -i "s/9393/8887/g" test_client.py
cp -r /data/.cache/dist_data/serving/yolov4/yolov4.tar.gz ./
tar xf yolov4.tar.gz
python2.7 -m paddle_serving_server_gpu.serve --model yolov4_model --port 8887 --gpu_ids 0 > yolov4_rpc_log 2>&1 &
sleep 5
python2.7 test_client.py 000000570688.jpg
tail yolov4_rpc_log
check_result $FUNCNAME
kill_server_process
}
function fit_a_line_http() {
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/fit_a_line
sed -i "s/9292/8871/g" test_server.py
python2.7 test_server.py > http_log2 2>&1 &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://${host}:8871/uci/prediction
check_result $FUNCNAME
kill_server_process
}
function lac_http() {
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/lac
python2.7 lac_web_service.py lac_model/ lac_workdir 8872 > http_lac_log2 2>&1 &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://${host}:8872/lac/prediction
check_result $FUNCNAME
kill_server_process
}
function cnn_http() {
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/imdb
python2.7 text_classify_service.py imdb_cnn_model/ workdir/ 8873 imdb.vocab > cnn_http 2>&1 &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://${host}:8873/imdb/prediction
check_result $FUNCNAME
kill_server_process
}
function bow_http() {
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/imdb
python2.7 text_classify_service.py imdb_bow_model/ workdir/ 8874 imdb.vocab > bow_http 2>&1 &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://${host}:8874/imdb/prediction
check_result $FUNCNAME
kill_server_process
}
function lstm_http() {
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/imdb
python2.7 text_classify_service.py imdb_bow_model/ workdir/ 8875 imdb.vocab > bow_http 2>&1 &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://${host}:8875/imdb/prediction
check_result $FUNCNAME
kill_server_process
}
function ResNet50_http() {
unsetproxy
run_gpu_env
cd ${build_path}/python2.7/examples/imagenet
python2.7 resnet50_web_service.py ResNet50_vd_model gpu 8876 > resnet50_http 2>&1 &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://${host}:8876/image/prediction
check_result $FUNCNAME
kill_server_process
}
bert_http(){
run_gpu_env
unsetproxy
cd ${build_path}/python/examples/bert
cp data-c.txt.1 data-c.txt
cp vocab.txt.1 vocab.txt
export CUDA_VISIBLE_DEVICES=0
python2.7 bert_web_service.py bert_seq128_model/ 8878 > bert_http 2>&1 &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://${host}:8878/bert/prediction
check_result $FUNCNAME
kill_server_process
}
grpc_impl(){
run_gpu_env
cd ${build_path}/python/examples/grpc_impl_example/fit_a_line
sh get_data.sh >/dev/null 2>&1
python2.7 test_server.py uci_housing_model/ > grpclog 2>&1 &
sleep 5
echo "sync predict"
python2.7 test_sync_client.py
echo "async predict"
python2.7 test_asyn_client.py
echo "batch predict"
python2.7 test_batch_client.py
echo "timeout predict"
python2.7 test_timeout_client.py
# check_result $FUNCNAME
kill_server_process
}
function build_all_whl(){
for whl in ${build_whl_list[@]}
do
echo "===========${whl} begin build==========="
$whl
sleep 3
echo "===========${whl} build over ==========="
done
}
function run_rpc_models(){
for model in ${rpc_model_list[@]}
do
echo "===========${model} run begin==========="
$model
sleep 3
echo "===========${model} run end ==========="
done
}
function run_http_models(){
for model in ${http_model_list[@]}
do
echo "===========${model} run begin==========="
$model
sleep 3
echo "===========${model} run end ==========="
done
}
function end_hook(){
cd ${build_path}
kill_server_process
kill `ps -ef|grep python|awk '{print $2}'`
sleep 5
echo "===========files==========="
ls -hlst
echo "=========== end ==========="
}
function main() {
before_hook
build_all_whl
check
run_env
run_rpc_models
# run_http_models
end_hook
}
main$@
#!/bin/bash
echo "################################################################"
echo "# #"
echo "# #"
echo "# #"
echo "# Paddle Serving begin run with python3.6.8! #"
echo "# #"
echo "# #"
echo "# #"
echo "################################################################"
export GOPATH=$HOME/go
export PATH=$PATH:$GOROOT/bin:$GOPATH/bin
export CUDA_INCLUDE_DIRS=/usr/local/cuda-10.2/include
export PYTHONROOT=/usr/local
go env -w GO111MODULE=on
go env -w GOPROXY=https://goproxy.cn,direct
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0
build_path=/workspace/Serving/
build_whl_list=(build_gpu_server build_client build_cpu_server build_app)
rpc_model_list=(grpc_impl pipeline_imagenet bert_rpc_gpu bert_rpc_cpu ResNet50_rpc lac_rpc \
cnn_rpc bow_rpc lstm_rpc fit_a_line_rpc deeplabv3_rpc mobilenet_rpc unet_rpc resnetv2_rpc \
criteo_ctr_rpc_cpu criteo_ctr_rpc_gpu ocr_rpc yolov4_rpc_gpu)
http_model_list=(fit_a_line_http lac_http cnn_http bow_http lstm_http ResNet50_http bert_http)
function setproxy(){
export http_proxy=${proxy}
export https_proxy=${proxy}
}
function unsetproxy(){
unset http_proxy
unset https_proxy
}
function kill_server_process(){
kill `ps -ef|grep $1 |awk '{print $2}'`
kill `ps -ef|grep serving |awk '{print $2}'`
}
function check() {
cd ${build_path}
if [ ! -f paddle_serving_app* ]; then
echo "paddle_serving_app is compiled failed, please check your pull request"
exit 1
elif [ ! -f paddle_serving_server-* ]; then
echo "paddle_serving_server-cpu is compiled failed, please check your pull request"
exit 1
elif [ ! -f paddle_serving_server_* ]; then
echo "paddle_serving_server_gpu is compiled failed, please check your pull request"
exit 1
elif [ ! -f paddle_serving_client* ]; then
echo "paddle_serving_server_client is compiled failed, please check your pull request"
exit 1
else
echo "paddle serving build passed"
fi
}
function check_result() {
if [ $? -ne 0 ];then
echo -e "\033[4;31;42m$1 model runs failed, please check your pull request or modify test case! \033[0m"
exit 1
else
echo -e "\033[4;37;42m$1 model runs successfully, congratulations! \033[0m"
fi
}
function before_hook(){
setproxy
cd ${build_path}/python
pip3.6 install --upgrade pip
pip3.6 install requests
pip3.6 install -r requirements.txt
pip3.6 install numpy==1.16.4
echo "before hook configuration is successful.... "
}
function run_env(){
setproxy
pip3.6 install --upgrade nltk==3.4
pip3.6 install --upgrade scipy==1.2.1
pip3.6 install --upgrade setuptools==41.0.0
pip3.6 install paddlehub ujson paddlepaddle==2.0.0
echo "run env configuration is successful.... "
}
function run_gpu_env(){
cd ${build_path}
export LD_LIBRARY_PATH=/usr/local/lib64/python3.6/site-packages/paddle/libs/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/workspace/Serving/build_gpu/third_party/install/Paddle/lib/:/workspace/Serving/build_gpu/third_party/Paddle/src/extern_paddle/third_party/install/mklml/lib/:/workspace/Serving/build_gpu/third_party/Paddle/src/extern_paddle/third_party/install/mkldnn/lib/:$LD_LIBRARY_PATH
export SERVING_BIN=${build_path}/build_gpu/core/general-server/serving
echo "run gpu env configuration is successful.... "
}
function run_cpu_env(){
cd ${build_path}
export LD_LIBRARY_PATH=/usr/local/lib64/python3.6/site-packages/paddle/libs/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/workspace/Serving/build_cpu/third_party/install/Paddle/lib/:$LD_LIBRARY_PATH
export SERVING_BIN=${build_path}/build_cpu/core/general-server/serving
echo "run cpu env configuration is successful.... "
}
function build_gpu_server() {
setproxy
cd ${build_path}
git submodule update --init --recursive
if [ -d build_gpu ];then
rm -rf build_gpu
fi
if [ -d build ];then
rm -rf build
fi
mkdir build && cd build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python3.6m/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython3.6.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.6 \
-DSERVER=ON \
-DTENSORRT_ROOT=/usr \
-DWITH_GPU=ON ..
make -j18
make -j18
make install -j18
pip3.6 uninstall paddle-serving-server-gpu -y
pip3.6 install ${build_path}/build/python/dist/*
cp ${build_path}/build/python/dist/* ../
cp -r ${build_path}/build/ ${build_path}/build_gpu
}
function build_client() {
setproxy
cd ${build_path}
if [ -d build ];then
rm -rf build
fi
mkdir build && cd build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python3.6m/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython3.6.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.6 \
-DCLIENT=ON ..
make -j18
make -j18
cp ${build_path}/build/python/dist/* ../
pip3.6 uninstall paddle-serving-client -y
pip3.6 install ${build_path}/build/python/dist/*
}
function build_cpu_server(){
setproxy
cd ${build_path}
if [ -d build_cpu ];then
rm -rf build_cpu
fi
if [ -d build ];then
rm -rf build
fi
mkdir build && cd build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python3.6m/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython3.6.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.6 \
-DWITH_GPU=OFF \
-DSERVER=ON ..
make -j18
make -j18
make install -j18
cp ${build_path}/build/python/dist/* ../
pip3.6 uninstall paddle-serving-server -y
pip3.6 install ${build_path}/build/python/dist/*
cp -r ${build_path}/build/ ${build_path}/build_cpu
}
function build_app() {
setproxy
pip3.6 install paddlehub ujson Pillow
pip3.6 install paddlepaddle==2.0.0
cd ${build_path}
if [ -d build ];then
rm -rf build
fi
mkdir build && cd build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python3.6m/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython3.6.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.6 \
-DCMAKE_INSTALL_PREFIX=./output -DAPP=ON ..
make
cp ${build_path}/build/python/dist/* ../
pip3.6 uninstall paddle-serving-app -y
pip3.6 install ${build_path}/build/python/dist/*
}
function bert_rpc_gpu(){
run_gpu_env
unsetproxy
cd ${build_path}/python/examples/bert
sh get_data.sh >/dev/null 2>&1
sed -i 's/9292/8860/g' bert_client.py
sed -i '$aprint(result)' bert_client.py
cp -r /root/.cache/dist_data/serving/bert/bert_seq128_* ./
ls -hlst
python3.6 -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 8860 --gpu_ids 0 &
sleep 15
nvidia-smi
head data-c.txt | python3.6 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
nvidia-smi
check_result $FUNCNAME
kill_server_process serving
}
function bert_rpc_cpu(){
run_cpu_env
unsetproxy
cd ${build_path}/python/examples/bert
sed -i 's/8860/8861/g' bert_client.py
python3.6 -m paddle_serving_server.serve --model bert_seq128_model/ --port 8861 &
sleep 3
cp data-c.txt.1 data-c.txt
head data-c.txt | python3.6 bert_client.py --model bert_seq128_client/serving_client_conf.prototxt
check_result $FUNCNAME
kill_server_process serving
}
function criteo_ctr_with_cube_rpc(){
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/criteo_ctr_with_cube
ln -s /root/.cache/dist_data/serving/criteo_ctr_with_cube/raw_data ./
sed -i "s/9292/8888/g" test_server.py
sed -i "s/9292/8888/g" test_client.py
wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz >/dev/null 2>&1
tar xf ctr_cube_unittest.tar.gz
mv models/ctr_client_conf ./
mv models/ctr_serving_model_kv ./
mv models/data ./cube/
wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz >/dev/null 2>&1
tar xf cube_app.tar.gz
mv cube_app/cube* ./cube/
sh cube_prepare.sh > haha 2>&1 &
sleep 5
python3.6 test_server.py ctr_serving_model_kv &
sleep 5
python3.6 test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
check_result $FUNCNAME
kill `ps -ef|grep cube|awk '{print $2}'`
kill_server_process test_server
}
function pipeline_imagenet(){
run_gpu_env
unsetproxy
cd ${build_path}/python/examples/pipeline/imagenet
cp -r /root/.cache/dist_data/serving/imagenet/* ./
ls -a
python3.6 resnet50_web_service.py &
sleep 5
nvidia-smi
python3.6 pipeline_rpc_client.py
nvidia-smi
# check_result $FUNCNAME
kill_server_process resnet50_web_service
}
function ResNet50_rpc(){
run_gpu_env
unsetproxy
cd ${build_path}/python/examples/imagenet
cp -r /root/.cache/dist_data/serving/imagenet/* ./
sed -i 's/9696/8863/g' resnet50_rpc_client.py
python3.6 -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 8863 --gpu_ids 0 &
sleep 5
nvidia-smi
python3.6 resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
nvidia-smi
check_result $FUNCNAME
kill_server_process serving
}
function ResNet101_rpc(){
run_gpu_env
unsetproxy
cd ${build_path}/python/examples/imagenet
sed -i "22cclient.connect(['${host}:8864'])" image_rpc_client.py
python3.6 -m paddle_serving_server_gpu.serve --model ResNet101_vd_model --port 8864 --gpu_ids 0 &
sleep 5
nvidia-smi
python3.6 image_rpc_client.py ResNet101_vd_client_config/serving_client_conf.prototxt
nvidia-smi
check_result $FUNCNAME
kill_server_process serving
sleep 5
}
function cnn_rpc(){
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/imdb
cp -r /root/.cache/dist_data/serving/imdb/* ./
tar xf imdb_model.tar.gz && tar xf text_classification_data.tar.gz
sed -i 's/9292/8865/g' test_client.py
python3.6 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 8865 &
sleep 5
head test_data/part-0 | python3.6 test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab
check_result $FUNCNAME
kill_server_process serving
}
function bow_rpc(){
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/imdb
sed -i 's/8865/8866/g' test_client.py
python3.6 -m paddle_serving_server.serve --model imdb_bow_model/ --port 8866 &
sleep 5
head test_data/part-0 | python3.6 test_client.py imdb_bow_client_conf/serving_client_conf.prototxt imdb.vocab
check_result $FUNCNAME
kill_server_process serving
}
function lstm_rpc(){
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/imdb
sed -i 's/8866/8867/g' test_client.py
python3.6 -m paddle_serving_server.serve --model imdb_lstm_model/ --port 8867 &
sleep 5
head test_data/part-0 | python3.6 test_client.py imdb_lstm_client_conf/serving_client_conf.prototxt imdb.vocab
check_result $FUNCNAME
kill_server_process serving
}
function lac_rpc(){
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/lac
python3.6 -m paddle_serving_app.package --get_model lac >/dev/null 2>&1
tar xf lac.tar.gz
sed -i 's/9292/8868/g' lac_client.py
python3.6 -m paddle_serving_server.serve --model lac_model/ --port 8868 &
sleep 5
echo "我爱北京天安门" | python3.6 lac_client.py lac_client/serving_client_conf.prototxt lac_dict/
check_result $FUNCNAME
kill_server_process serving
}
function fit_a_line_rpc(){
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/fit_a_line
sh get_data.sh >/dev/null 2>&1
sed -i 's/9393/8869/g' test_client.py
python3.6 -m paddle_serving_server.serve --model uci_housing_model --port 8869 &
sleep 5
python3.6 test_client.py uci_housing_client/serving_client_conf.prototxt
check_result $FUNCNAME
kill_server_process serving
}
function faster_rcnn_model_rpc(){
unsetproxy
run_gpu_env
cd ${build_path}/python/examples/faster_rcnn
cp -r /root/.cache/dist_data/serving/faster_rcnn/faster_rcnn_model.tar.gz ./
tar xf faster_rcnn_model.tar.gz
wget https://paddle-serving.bj.bcebos.com/pddet_demo/infer_cfg.yml >/dev/null 2>&1
mv faster_rcnn_model/pddet* ./
sed -i 's/9494/8870/g' test_client.py
python3.6 -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 8870 --gpu_id 0 --thread 2 &
echo "faster rcnn running ..."
nvidia-smi
sleep 5
python3.6 test_client.py pddet_client_conf/serving_client_conf.prototxt infer_cfg.yml 000000570688.jpg
nvidia-smi
check_result $FUNCNAME
kill_server_process serving
}
function cascade_rcnn_rpc(){
unsetproxy
run_gpu_env
cd ${build_path}/python/examples/cascade_rcnn
cp -r /root/.cache/dist_data/serving/cascade_rcnn/cascade_rcnn_r50_fpx_1x_serving.tar.gz ./
tar xf cascade_rcnn_r50_fpx_1x_serving.tar.gz
sed -i "s/9292/8879/g" test_client.py
python3.6 -m paddle_serving_server_gpu.serve --model serving_server --port 8879 --gpu_id 0 --thread 2 &
sleep 5
nvidia-smi
python3.6 test_client.py
nvidia-smi
check_result $FUNCNAME
kill_server_process serving
}
function deeplabv3_rpc() {
unsetproxy
run_gpu_env
cd ${build_path}/python/examples/deeplabv3
cp -r /root/.cache/dist_data/serving/deeplabv3/deeplabv3.tar.gz ./
tar xf deeplabv3.tar.gz
sed -i "s/9494/8880/g" deeplabv3_client.py
python3.6 -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 8880 --thread 2 &
sleep 5
nvidia-smi
python3.6 deeplabv3_client.py
nvidia-smi
check_result $FUNCNAME
kill_server_process serving
}
function mobilenet_rpc() {
unsetproxy
run_gpu_env
cd ${build_path}/python/examples/mobilenet
python3.6 -m paddle_serving_app.package --get_model mobilenet_v2_imagenet >/dev/null 2>&1
tar xf mobilenet_v2_imagenet.tar.gz
sed -i "s/9393/8881/g" mobilenet_tutorial.py
python3.6 -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 8881 &
sleep 5
nvidia-smi
python3.6 mobilenet_tutorial.py
nvidia-smi
check_result $FUNCNAME
kill_server_process serving
}
function unet_rpc() {
unsetproxy
run_gpu_env
cd ${build_path}/python/examples/unet_for_image_seg
python3.6 -m paddle_serving_app.package --get_model unet >/dev/null 2>&1
tar xf unet.tar.gz
sed -i "s/9494/8882/g" seg_client.py
python3.6 -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 8882 &
sleep 5
nvidia-smi
python3.6 seg_client.py
nvidia-smi
check_result $FUNCNAME
kill_server_process serving
}
function resnetv2_rpc() {
unsetproxy
run_gpu_env
cd ${build_path}/python/examples/resnet_v2_50
cp /root/.cache/dist_data/serving/resnet_v2_50/resnet_v2_50_imagenet.tar.gz ./
tar xf resnet_v2_50_imagenet.tar.gz
sed -i 's/9393/8883/g' resnet50_v2_tutorial.py
python3.6 -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 8883 &
sleep 10
nvidia-smi
python3.6 resnet50_v2_tutorial.py
nvidia-smi
check_result $FUNCNAME
kill_server_process serving
}
function ocr_rpc() {
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/ocr
cp -r /root/.cache/dist_data/serving/ocr/test_imgs ./
python3.6 -m paddle_serving_app.package --get_model ocr_rec >/dev/null 2>&1
tar xf ocr_rec.tar.gz
sed -i 's/9292/8884/g' test_ocr_rec_client.py
python3.6 -m paddle_serving_server.serve --model ocr_rec_model --port 8884 &
sleep 5
python3.6 test_ocr_rec_client.py
# check_result $FUNCNAME
kill_server_process serving
}
function criteo_ctr_rpc_cpu() {
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/criteo_ctr
sed -i "s/9292/8885/g" test_client.py
ln -s /root/.cache/dist_data/serving/criteo_ctr_with_cube/raw_data ./
wget https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz >/dev/null 2>&1
tar xf criteo_ctr_demo_model.tar.gz
mv models/ctr_client_conf .
mv models/ctr_serving_model .
python3.6 -m paddle_serving_server.serve --model ctr_serving_model/ --port 8885 &
sleep 5
python3.6 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0
check_result $FUNCNAME
kill_server_process serving
}
function criteo_ctr_rpc_gpu() {
unsetproxy
run_gpu_env
cd ${build_path}/python/examples/criteo_ctr
sed -i "s/8885/8886/g" test_client.py
wget https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz >/dev/null 2>&1
python3.6 -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 8886 --gpu_ids 0 &
sleep 5
nvidia-smi
python3.6 test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/
nvidia-smi
check_result $FUNCNAME
kill `ps -ef|grep ctr|awk '{print $2}'`
kill_server_process serving
}
function yolov4_rpc_gpu() {
unsetproxy
run_gpu_env
cd ${build_path}/python/examples/yolov4
sed -i "s/9393/8887/g" test_client.py
cp -r /root/.cache/dist_data/serving/yolov4/yolov4.tar.gz ./
tar xf yolov4.tar.gz
python3.6 -m paddle_serving_server_gpu.serve --model yolov4_model --port 8887 --gpu_ids 0 &
nvidia-smi
sleep 5
python3.6 test_client.py 000000570688.jpg
nvidia-smi
# check_result $FUNCNAME
kill_server_process serving
}
function senta_rpc_cpu() {
unsetproxy
run_gpu_env
cd ${build_path}/python/examples/senta
sed -i "s/9393/8887/g" test_client.py
cp -r /data/.cache/dist_data/serving/yolov4/yolov4.tar.gz ./
tar xf yolov4.tar.gz
python3.6 -m paddle_serving_server_gpu.serve --model yolov4_model --port 8887 --gpu_ids 0 &
nvidia-smi
sleep 5
python3.6 test_client.py 000000570688.jpg
nvidia-smi
check_result $FUNCNAME
kill_server_process serving
}
function fit_a_line_http() {
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/fit_a_line
sed -i "s/9292/8871/g" test_server.py
python3.6 test_server.py &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]}], "fetch":["price"]}' http://${host}:8871/uci/prediction
check_result $FUNCNAME
kill_server_process test_server
}
function lac_http() {
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/lac
python3.6 lac_web_service.py lac_model/ lac_workdir 8872 &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "我爱北京天安门"}], "fetch":["word_seg"]}' http://${host}:8872/lac/prediction
check_result $FUNCNAME
kill_server_process lac_web_service
}
function cnn_http() {
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/imdb
python3.6 text_classify_service.py imdb_cnn_model/ workdir/ 8873 imdb.vocab &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://${host}:8873/imdb/prediction
check_result $FUNCNAME
kill_server_process text_classify_service
}
function bow_http() {
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/imdb
python3.6 text_classify_service.py imdb_bow_model/ workdir/ 8874 imdb.vocab &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://${host}:8874/imdb/prediction
check_result $FUNCNAME
kill_server_process text_classify_service
}
function lstm_http() {
unsetproxy
run_cpu_env
cd ${build_path}/python/examples/imdb
python3.6 text_classify_service.py imdb_bow_model/ workdir/ 8875 imdb.vocab &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "i am very sad | 0"}], "fetch":["prediction"]}' http://${host}:8875/imdb/prediction
check_result $FUNCNAME
kill `ps -ef|grep imdb|awk '{print $2}'`
kill_server_process text_classify_service
}
function ResNet50_http() {
unsetproxy
run_gpu_env
cd ${build_path}/python/examples/imagenet
python3.6 resnet50_web_service.py ResNet50_vd_model gpu 8876 &
sleep 10
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"image": "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"}], "fetch": ["score"]}' http://${host}:8876/image/prediction
check_result $FUNCNAME
kill_server_process resnet50_web_service
}
function bert_http(){
unsetproxy
run_gpu_env
cd ${build_path}/python/examples/bert
cp data-c.txt.1 data-c.txt
cp vocab.txt.1 vocab.txt
export CUDA_VISIBLE_DEVICES=0
python3.6 bert_web_service.py bert_seq128_model/ 8878 &
sleep 5
curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}], "fetch":["pooled_output"]}' http://127.0.0.1:8878/bert/prediction
check_result $FUNCNAME
kill_server_process bert_web_service
}
grpc_impl(){
unsetproxy
run_gpu_env
cd ${build_path}/python/examples/grpc_impl_example/fit_a_line
sh get_data.sh >/dev/null 2>&1
python3.6 test_server.py uci_housing_model/ &
sleep 5
echo "sync predict"
python3.6 test_sync_client.py
echo "async predict"
python3.6 test_asyn_client.py
echo "batch predict"
python3.6 test_batch_client.py
echo "timeout predict"
python3.6 test_timeout_client.py
# check_result $FUNCNAME
kill_server_process test_server
}
function build_all_whl(){
for whl in ${build_whl_list[@]}
do
echo "===========${whl} begin build==========="
$whl
sleep 3
echo "===========${whl} build over ==========="
done
}
function run_rpc_models(){
for model in ${rpc_model_list[@]}
do
echo "===========${model} run begin==========="
$model
sleep 3
echo "===========${model} run end ==========="
done
}
function run_http_models(){
for model in ${http_model_list[@]}
do
echo "===========${model} run begin==========="
$model
sleep 3
echo "===========${model} run end ==========="
done
}
function end_hook(){
cd ${build_path}
kill_server_process
kill `ps -ef|grep python|awk '{print $2}'`
sleep 5
echo "===========files==========="
ls -hlst
echo "=========== end ==========="
}
function main() {
before_hook
build_all_whl
check
run_env
run_rpc_models
# run_http_models
end_hook
}
main$@
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册