diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index b3481462ef01097257e49c4bfd389b64e90f3f3e..2a855b88bfb439647fb2ea22b2b592e0f611a3cd 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -30,7 +30,7 @@ message( "WITH_GPU = ${WITH_GPU}") # Paddle Version should be one of: # latest: latest develop build # version number like 1.5.2 -SET(PADDLE_VERSION "2.3.0") +SET(PADDLE_VERSION "2.3.0-no-ort") if (WITH_GPU) message("CUDA: ${CUDA_VERSION}, CUDNN_MAJOR_VERSION: ${CUDNN_MAJOR_VERSION}") # cuda 11.0 is not supported, 11.2 would be added. @@ -171,25 +171,25 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib) SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib") LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib) -SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib") -LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib) +#SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib") +#LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib) -SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib") -LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib) +#SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib") +#LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib) if (NOT WITH_MKLML) ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a) endif() -ADD_LIBRARY(paddle2onnx STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET paddle2onnx PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so) +#ADD_LIBRARY(paddle2onnx STATIC IMPORTED GLOBAL) +#SET_PROPERTY(TARGET paddle2onnx PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so) -ADD_LIBRARY(onnxruntime STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET onnxruntime PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so.1.10.0) +#ADD_LIBRARY(onnxruntime STATIC IMPORTED GLOBAL) +#SET_PROPERTY(TARGET onnxruntime PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so.1.10.0) ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so) +SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a) if (WITH_ASCEND_CL) SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so) endif() diff --git a/core/configure/proto/general_model_service.proto b/core/configure/proto/general_model_service.proto index f3da98a9236cc8a4529fc2b63ba6531b00da9b25..b4f1ce0cfbb1f407a217e1a72440b55489e8a1c3 100644 --- a/core/configure/proto/general_model_service.proto +++ b/core/configure/proto/general_model_service.proto @@ -92,7 +92,6 @@ message Response { repeated int64 profile_time = 2; bool profile_server = 3; uint64 log_id = 4; - // Error code int32 err_no = 5; // Error messages diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto index f5edd23fd104c9e4fb65b308aa119e2ea0db10d4..4f49aa3c959a63afbb623853bcc1f0c14cbd52e3 100644 --- a/core/configure/proto/server_configure.proto +++ b/core/configure/proto/server_configure.proto @@ -51,17 +51,14 @@ message EngineDesc { /* * "gpu_memory_mb": allocate gpu memory by config.EnableUseGpu() - * "cpu_math_thread_num": set thread numbers of cpu math by - * config.SetCpuMathLibraryNumThreads() - * "trt_workspace_size": set TensorRT workspace size by - * config.EnableTensorRtEngine(), 1 << 25 default - * "trt_use_static": If true, save the optimization information of the TRT - * serialized to the disk, and load from the disk. + * "cpu_math_thread_num": set thread numbers of cpu math by config.SetCpuMathLibraryNumThreads() + * "trt_workspace_size": set TensorRT workspace size by config.EnableTensorRtEngine(), 1 << 25 default + * "trt_use_static": If true, save the optimization information of the TRT serialized to the disk, and load from the disk. */ - optional int32 gpu_memory_mb = 22 [ default = 100 ]; - optional int32 cpu_math_thread_num = 23 [ default = 1 ]; - optional int32 trt_workspace_size = 24 [ default = 33554432 ]; - optional bool trt_use_static = 25 [ default = false ]; + optional int32 gpu_memory_mb = 22 [default = 100]; + optional int32 cpu_math_thread_num = 23 [default = 1]; + optional int32 trt_workspace_size = 24 [default = 33554432]; + optional bool trt_use_static = 25 [default = false]; /* * "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto index 904ffb97f6f89b841f167215d207970fb2435d2a..a5adeeb95b59f65f5d009bdafb7e034a631a4e6f 100755 --- a/core/general-server/proto/general_model_service.proto +++ b/core/general-server/proto/general_model_service.proto @@ -94,9 +94,9 @@ message Response { repeated int64 profile_time = 2; bool profile_server = 3; uint64 log_id = 4; + // Error code int32 err_no = 5; - // Error messages string err_msg = 6; }; diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto index de9516259011ffab847593a607ce25a863dd72a7..5c17f955fe63a82f52f54b3c394ab1b9324608cb 100755 --- a/core/sdk-cpp/proto/general_model_service.proto +++ b/core/sdk-cpp/proto/general_model_service.proto @@ -94,9 +94,9 @@ message Response { repeated int64 profile_time = 2; bool profile_server = 3; uint64 log_id = 4; + // Error code int32 err_no = 5; - // Error messages string err_msg = 6; }; diff --git a/core/sdk-cpp/proto/load_general_model_service.proto b/core/sdk-cpp/proto/load_general_model_service.proto index c58f79ecd6b00e82bd959d24b20ffaa653360d45..da731589c11695bb808bb9fab6ee60d12d67a69f 100644 --- a/core/sdk-cpp/proto/load_general_model_service.proto +++ b/core/sdk-cpp/proto/load_general_model_service.proto @@ -21,6 +21,7 @@ option cc_generic_services = true; message RequestAndResponse { required int32 a = 1; required float b = 2; + required uint64 log_id = 3 [ default = 0 ]; }; service LoadGeneralModelService { diff --git a/doc/Offical_Docs/6-1_Cpp_Asynchronous_Framwork_CN.md b/doc/Offical_Docs/6-1_Cpp_Asynchronous_Framwork_CN.md index ed7daf542c0fa16f2da7b41d410587803c7f0f52..37c2f2af9be8aa5efcdf7b8fe87991c406ef6d71 100644 --- a/doc/Offical_Docs/6-1_Cpp_Asynchronous_Framwork_CN.md +++ b/doc/Offical_Docs/6-1_Cpp_Asynchronous_Framwork_CN.md @@ -8,8 +8,8 @@ - [开启同步模式](#2.1) - [开启异步模式](#2.2) - [性能测试](#3) - - [测试数据](#3.1) - - [测试结论](#3.2) + - [测试结果](#3.1) + - [测试数据](#3.2) @@ -75,7 +75,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 16 --p **二.开启异步模式** -启动命令使用 `--runtime_thread_num 4` 和 `--batch_infer_size 32` 开启异步模式,Serving 框架会启动8个异步线程,单次合并最大批量为32,自动开启动态 Padding。 +启动命令使用 `--runtime_thread_num 2` 和 `--batch_infer_size 32` 开启异步模式,Serving 框架会启动2个异步线程,单次合并最大批量为32,自动开启动态 Padding。 ``` python3 -m paddle_serving_server.serve --model uci_housing_model --thread 16 --port 9292 --runtime_thread_num 4 --batch_infer_size 32 --ir_optim --gpu_multi_stream --gpu_ids 0 ``` @@ -84,11 +84,12 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 16 --p ## 性能测试 -GPU:Tesla P4 7611 MiB -Cuda:cuda11.2-cudnn8-trt8 -Python:python3.7 -模型:ResNet_v2_50 -测试数据:构造全1输入,单client请求100次,shape 范围(1, 224 ± 50, 224 ± 50) + +- GPU:Tesla P4 7611 MiB +- CUDA:cuda11.2-cudnn8-trt8 +- Python 版本:python3.7 +- 模型:ResNet_v2_50 +- 测试数据:构造全1输入,单client请求100次,shape 范围(1, 224 ± 50, 224 ± 50) 同步模式启动命令: ``` @@ -102,7 +103,25 @@ python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --por -**一.测试数据** +**一.测试结果** + +使用异步模式,并开启动态批量后,并发测试不同 shape 数据时,吞吐性能大幅提升。 +