未验证 提交 7b77852b 编写于 作者: T TeslaZhao 提交者: GitHub

Merge pull request #32 from PaddlePaddle/develop

Sync codes
......@@ -53,7 +53,7 @@ We consider deploying deep learning inference service online to be a user-facing
<h2 align="center">AIStudio Turorial</h2>
Here we provide tutorial on AIStudio(Chinese Version) [AIStudio教程-Paddle Serving服务化部署框架](https://aistudio.baidu.com/aistudio/projectdetail/1550674)
Here we provide tutorial on AIStudio(Chinese Version) [AIStudio教程-Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/1555945)
The tutorial provides
<ul>
......@@ -85,14 +85,14 @@ We **highly recommend** you to **run Paddle Serving in Docker**, please visit [R
```
# Run CPU Docker
docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-devel
docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-devel
docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-devel bash
docker exec -it test bash
git clone https://github.com/PaddlePaddle/Serving
```
```
# Run GPU Docker
nvidia-docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel
nvidia-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel
nvidia-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel bash
nvidia-docker exec -it test bash
git clone https://github.com/PaddlePaddle/Serving
```
......
......@@ -53,7 +53,7 @@ Paddle Serving 旨在帮助深度学习开发者轻易部署在线预测服务
<h2 align="center">教程</h2>
Paddle Serving开发者为您提供了简单易用的[AIStudio教程-Paddle Serving服务化部署框架](https://aistudio.baidu.com/aistudio/projectdetail/1550674)
Paddle Serving开发者为您提供了简单易用的[AIStudio教程-Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/1555945)
教程提供了如下内容
......@@ -86,14 +86,14 @@ Paddle Serving开发者为您提供了简单易用的[AIStudio教程-Paddle Serv
```
# 启动 CPU Docker
docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-devel
docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-devel
docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-devel bash
docker exec -it test bash
git clone https://github.com/PaddlePaddle/Serving
```
```
# 启动 GPU Docker
nvidia-docker pull registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel
nvidia-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel
nvidia-docker run -p 9292:9292 --name test -dit registry.baidubce.com/paddlepaddle/serving:0.5.0-cuda10.2-cudnn8-devel bash
nvidia-docker exec -it test bash
git clone https://github.com/PaddlePaddle/Serving
```
......
......@@ -18,7 +18,7 @@ SET(PADDLE_SOURCES_DIR ${THIRD_PARTY_PATH}/Paddle)
SET(PADDLE_DOWNLOAD_DIR ${PADDLE_SOURCES_DIR}/src/extern_paddle)
SET(PADDLE_INSTALL_DIR ${THIRD_PARTY_PATH}/install/Paddle/)
SET(PADDLE_INCLUDE_DIR "${PADDLE_INSTALL_DIR}/include" CACHE PATH "PaddlePaddle include directory." FORCE)
SET(PADDLE_LIBRARIES "${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a" CACHE FILEPATH "Paddle library." FORCE)
SET(PADDLE_LIBRARIES "${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a" CACHE FILEPATH "Paddle library." FORCE)
message("paddle install dir: " ${PADDLE_INSTALL_DIR})
......@@ -31,7 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of:
# latest: latest develop build
# version number like 1.5.2
SET(PADDLE_VERSION "2.0.0")
SET(PADDLE_VERSION "2.0.1")
if (WITH_GPU)
if(CUDA_VERSION EQUAL 11.0)
set(CUDA_SUFFIX "cuda11-cudnn8-avx-mkl")
......@@ -55,9 +55,9 @@ if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-${CUDA_SUFFIX}")
elseif (WITH_LITE)
if (WITH_XPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm-xpu")
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}-xpu")
else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-arm")
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-${CMAKE_SYSTEM_PROCESSOR}")
endif()
else()
if (WITH_AVX)
......@@ -139,8 +139,8 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a)
ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a)
if (WITH_TRT)
ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
......
if (SERVER OR CLIENT)
LIST(APPEND protofiles
${CMAKE_CURRENT_LIST_DIR}/proto/server_configure.proto
${CMAKE_CURRENT_LIST_DIR}/proto/sdk_configure.proto
${CMAKE_CURRENT_LIST_DIR}/proto/inferencer_configure.proto
${CMAKE_CURRENT_LIST_DIR}/proto/general_model_config.proto
)
PROTOBUF_GENERATE_CPP(configure_proto_srcs configure_proto_hdrs ${protofiles})
list(APPEND configure_srcs ${configure_proto_srcs})
list(APPEND configure_srcs ${CMAKE_CURRENT_LIST_DIR}/src/configure_parser.cpp)
add_library(configure ${configure_srcs})
add_dependencies(configure brpc)
install(TARGETS configure
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
install(FILES ${CMAKE_CURRENT_LIST_DIR}/include/configure_parser.h
DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure/include)
FILE(GLOB inc ${CMAKE_CURRENT_BINARY_DIR}/*.pb.h)
install(FILES ${inc}
DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure)
LIST(APPEND protofiles
${CMAKE_CURRENT_LIST_DIR}/proto/server_configure.proto
${CMAKE_CURRENT_LIST_DIR}/proto/sdk_configure.proto
${CMAKE_CURRENT_LIST_DIR}/proto/inferencer_configure.proto
${CMAKE_CURRENT_LIST_DIR}/proto/general_model_config.proto
)
PROTOBUF_GENERATE_CPP(configure_proto_srcs configure_proto_hdrs ${protofiles})
list(APPEND configure_srcs ${configure_proto_srcs})
list(APPEND configure_srcs ${CMAKE_CURRENT_LIST_DIR}/src/configure_parser.cpp)
add_library(configure ${configure_srcs})
add_dependencies(configure brpc)
install(TARGETS configure
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
install(FILES ${CMAKE_CURRENT_LIST_DIR}/include/configure_parser.h
DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure/include)
FILE(GLOB inc ${CMAKE_CURRENT_BINARY_DIR}/*.pb.h)
install(FILES ${inc}
DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/include/configure)
endif()
if (WITH_PYTHON)
py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto)
add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(general_model_config_py_proto general_model_config_py_proto_init)
py_grpc_proto_compile(multi_lang_general_model_service_py_proto SRCS proto/multi_lang_general_model_service.proto)
add_custom_target(multi_lang_general_model_service_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(multi_lang_general_model_service_py_proto multi_lang_general_model_service_py_proto_init)
if (CLIENT)
py_proto_compile(sdk_configure_py_proto SRCS proto/sdk_configure.proto)
add_custom_target(sdk_configure_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(sdk_configure_py_proto sdk_configure_py_proto_init)
add_custom_command(TARGET sdk_configure_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMENT "Copy generated python proto into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET general_model_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif()
if (APP)
add_custom_command(TARGET general_model_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_app/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif()
if (SERVER)
py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto)
add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(server_config_py_proto server_config_py_proto_init)
if (NOT WITH_GPU AND NOT WITH_LITE)
add_custom_command(TARGET server_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMENT "Copy generated python proto into directory paddle_serving_server/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINRARY_DIR})
add_custom_command(TARGET general_model_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
else()
add_custom_command(TARGET server_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory
${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
COMMAND cp -f *.py
${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
COMMENT "Copy generated python proto into directory
paddle_serving_server_gpu/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINRARY_DIR})
add_custom_command(TARGET general_model_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory
${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
COMMAND cp -f *.py
${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
COMMENT "Copy generated general_model_config proto file into directory
paddle_serving_server_gpu/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server_gpu/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif()
endif()
py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.proto)
add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(general_model_config_py_proto general_model_config_py_proto_init)
py_grpc_proto_compile(multi_lang_general_model_service_py_proto SRCS proto/multi_lang_general_model_service.proto)
add_custom_target(multi_lang_general_model_service_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(multi_lang_general_model_service_py_proto multi_lang_general_model_service_py_proto_init)
if (CLIENT)
py_proto_compile(sdk_configure_py_proto SRCS proto/sdk_configure.proto)
add_custom_target(sdk_configure_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(sdk_configure_py_proto sdk_configure_py_proto_init)
add_custom_command(TARGET sdk_configure_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMENT "Copy generated python proto into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET general_model_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto
COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_client/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif()
if (APP)
add_custom_command(TARGET general_model_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_app/proto
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_app/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif()
if (SERVER)
py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto)
add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
add_dependencies(server_config_py_proto server_config_py_proto_init)
add_custom_command(TARGET server_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMENT "Copy generated python proto into directory paddle_serving_server/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINRARY_DIR})
add_custom_command(TARGET general_model_config_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMAND cp -f *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
COMMENT "Copy generated multi_lang_general_model_service proto file into directory paddle_serving_server/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif()
endif()
......@@ -20,7 +20,7 @@ message EngineDesc {
required string type = 2;
required string reloadable_meta = 3;
required string reloadable_type = 4;
required string model_data_path = 5;
required string model_dir = 5;
required int32 runtime_thread_num = 6;
required int32 batch_infer_size = 7;
required int32 enable_batch_align = 8;
......@@ -41,12 +41,13 @@ message EngineDesc {
optional SparseParamServiceType sparse_param_service_type = 11;
optional string sparse_param_service_table_name = 12;
optional bool enable_memory_optimization = 13;
optional bool static_optimization = 14;
optional bool force_update_static_cache = 15;
optional bool enable_ir_optimization = 16;
optional bool use_trt = 17;
optional bool use_lite = 18;
optional bool use_xpu = 19;
optional bool enable_ir_optimization = 14;
optional bool use_trt = 15;
optional bool use_lite = 16;
optional bool use_xpu = 17;
optional bool use_gpu = 18;
optional bool combined_model = 19;
optional bool encrypted_model = 20;
};
// model_toolkit conf
......
......@@ -69,8 +69,6 @@ int test_write_conf() {
engine->set_sparse_param_service_type(EngineDesc::LOCAL);
engine->set_sparse_param_service_table_name("local_kv");
engine->set_enable_memory_optimization(true);
engine->set_static_optimization(false);
engine->set_force_update_static_cache(false);
int ret = baidu::paddle_serving::configure::write_proto_conf(
&model_toolkit_conf, output_dir, model_toolkit_conf_file);
......
if(CLIENT)
add_subdirectory(pybind11)
pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
add_dependencies(serving_client sdk_cpp)
target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -Wl,-rpath,'$ORIGIN'/lib)
endif()
......@@ -2,33 +2,25 @@ include_directories(SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../../)
include(op/CMakeLists.txt)
include(proto/CMakeLists.txt)
add_executable(serving ${serving_srcs})
add_dependencies(serving pdcodegen fluid_cpu_engine pdserving paddle_fluid cube-api utils)
add_dependencies(serving pdcodegen paddle_inference_engine pdserving paddle_inference cube-api utils)
if (WITH_GPU)
add_dependencies(serving fluid_gpu_engine)
add_dependencies(serving paddle_inference_engine)
endif()
if (WITH_LITE)
add_dependencies(serving fluid_arm_engine)
add_dependencies(serving paddle_inference_engine)
endif()
target_include_directories(serving PUBLIC
${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
)
include_directories(${CUDNN_ROOT}/include/)
if(WITH_GPU)
target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine
-Wl,--no-whole-archive)
endif()
if(WITH_LITE)
target_link_libraries(serving -Wl,--whole-archive fluid_arm_engine
-Wl,--no-whole-archive)
endif()
)
include_directories(${CUDNN_ROOT}/include/)
target_link_libraries(serving -Wl,--whole-archive fluid_cpu_engine
target_link_libraries(serving -Wl,--whole-archive paddle_inference_engine
-Wl,--no-whole-archive)
target_link_libraries(serving paddle_fluid ${paddle_depend_libs})
target_link_libraries(serving paddle_inference ${paddle_depend_libs})
target_link_libraries(serving brpc)
target_link_libraries(serving protobuf)
target_link_libraries(serving pdserving)
......
......@@ -12,12 +12,12 @@ set_source_files_properties(
${pdserving_srcs}
PROPERTIES
COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure extern_paddle paddle_fluid)
add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure extern_paddle paddle_inference)
if (WITH_TRT)
add_definitions(-DWITH_TRT)
endif()
target_link_libraries(pdserving
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_fluid ${paddle_depend_libs})
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_inference ${paddle_depend_libs})
# install
install(TARGETS pdserving
RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin
......
......@@ -14,6 +14,7 @@
#pragma once
#include <string>
#include <fstream>
#include "core/predictor/common/inner_common.h"
#include "core/predictor/common/macros.h"
......@@ -148,6 +149,16 @@ class IsDerivedFrom {
}
};
static void ReadBinaryFile(const std::string& filename, std::string* contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
} // namespace predictor
} // namespace paddle_serving
} // namespace baidu
......@@ -16,6 +16,7 @@
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <pthread.h>
#include <string>
#include <utility>
#include <vector>
......@@ -29,83 +30,29 @@ namespace predictor {
using configure::ModelToolkitConf;
class InferEngineCreationParams {
class AutoLock {
public:
InferEngineCreationParams() {
_path = "";
_enable_memory_optimization = false;
_enable_ir_optimization = false;
_static_optimization = false;
_force_update_static_cache = false;
_use_trt = false;
_use_lite = false;
_use_xpu = false;
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex);
}
~AutoLock() { pthread_mutex_unlock(&_mut); }
void set_path(const std::string& path) { _path = path; }
void set_enable_memory_optimization(bool enable_memory_optimization) {
_enable_memory_optimization = enable_memory_optimization;
}
void set_enable_ir_optimization(bool enable_ir_optimization) {
_enable_ir_optimization = enable_ir_optimization;
}
void set_use_trt(bool use_trt) { _use_trt = use_trt; }
void set_use_lite(bool use_lite) { _use_lite = use_lite; }
void set_use_xpu(bool use_xpu) { _use_xpu = use_xpu; }
bool enable_memory_optimization() const {
return _enable_memory_optimization;
}
bool enable_ir_optimization() const { return _enable_ir_optimization; }
bool use_trt() const { return _use_trt; }
bool use_lite() const { return _use_lite; }
bool use_xpu() const { return _use_xpu; }
void set_static_optimization(bool static_optimization = false) {
_static_optimization = static_optimization;
}
void set_force_update_static_cache(bool force_update_static_cache = false) {
_force_update_static_cache = force_update_static_cache;
}
bool static_optimization() const { return _static_optimization; }
bool force_update_static_cache() const { return _force_update_static_cache; }
private:
pthread_mutex_t& _mut;
};
std::string get_path() const { return _path; }
class GlobalCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
void dump() const {
LOG(INFO) << "InferEngineCreationParams: "
<< "model_path = " << _path << ", "
<< "enable_memory_optimization = " << _enable_memory_optimization
<< ", "
<< "enable_tensorrt = " << _use_trt << ", "
<< "enable_lite = " << _use_lite << ", "
<< "enable_xpu = " << _use_xpu << ", "
<< "enable_ir_optimization = " << _enable_ir_optimization << ", "
<< "static_optimization = " << _static_optimization << ", "
<< "force_update_static_cache = " << _force_update_static_cache;
static pthread_mutex_t& instance() {
static GlobalCreateMutex gmutex;
return gmutex.mutex();
}
private:
std::string _path;
bool _enable_memory_optimization;
bool _enable_ir_optimization;
bool _static_optimization;
bool _force_update_static_cache;
bool _use_trt;
bool _use_lite;
bool _use_xpu;
GlobalCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
class InferEngine {
......@@ -152,57 +99,19 @@ class ReloadableInferEngine : public InferEngine {
uint64_t last_revision;
};
virtual int load(const InferEngineCreationParams& params) = 0;
virtual int load(const configure::EngineDesc& conf) = 0;
int proc_initialize_impl(const configure::EngineDesc& conf, bool version) {
_reload_tag_file = conf.reloadable_meta();
_reload_mode_tag = conf.reloadable_type();
_model_data_path = conf.model_data_path();
_model_data_path = conf.model_dir();
_infer_thread_num = conf.runtime_thread_num();
_infer_batch_size = conf.batch_infer_size();
_infer_batch_align = conf.enable_batch_align();
bool enable_memory_optimization = false;
if (conf.has_enable_memory_optimization()) {
enable_memory_optimization = conf.enable_memory_optimization();
}
bool static_optimization = false;
if (conf.has_static_optimization()) {
static_optimization = conf.static_optimization();
}
bool force_update_static_cache = false;
if (conf.has_force_update_static_cache()) {
force_update_static_cache = conf.force_update_static_cache();
}
_conf = conf;
if (conf.has_enable_ir_optimization()) {
_infer_engine_params.set_enable_ir_optimization(
conf.enable_ir_optimization());
}
_infer_engine_params.set_path(_model_data_path);
if (enable_memory_optimization) {
_infer_engine_params.set_enable_memory_optimization(true);
_infer_engine_params.set_static_optimization(static_optimization);
_infer_engine_params.set_force_update_static_cache(
force_update_static_cache);
}
if (conf.has_use_trt()) {
_infer_engine_params.set_use_trt(conf.use_trt());
}
if (conf.has_use_lite()) {
_infer_engine_params.set_use_lite(conf.use_lite());
}
if (conf.has_use_xpu()) {
_infer_engine_params.set_use_xpu(conf.use_xpu());
}
if (!check_need_reload() || load(_infer_engine_params) != 0) {
if (!check_need_reload() || load(conf) != 0) {
LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
return -1;
}
......@@ -230,7 +139,6 @@ class ReloadableInferEngine : public InferEngine {
if (_infer_thread_num > 0) {
return 0;
}
return thrd_initialize_impl();
}
......@@ -254,13 +162,13 @@ class ReloadableInferEngine : public InferEngine {
int reload() {
if (check_need_reload()) {
LOG(WARNING) << "begin reload model[" << _model_data_path << "].";
return load(_infer_engine_params);
return load(_conf);
}
return 0;
}
uint64_t version() const { return _version; }
uint32_t thread_num() const { return _infer_thread_num; }
private:
......@@ -322,7 +230,7 @@ class ReloadableInferEngine : public InferEngine {
protected:
std::string _model_data_path;
InferEngineCreationParams _infer_engine_params;
configure::EngineDesc _conf;
private:
std::string _reload_tag_file;
......@@ -361,25 +269,25 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
return ReloadableInferEngine::proc_initialize(conf, version);
}
virtual int load(const InferEngineCreationParams& params) {
virtual int load(const configure::EngineDesc& conf) {
if (_reload_vec.empty()) {
return 0;
}
for (uint32_t ti = 0; ti < _reload_vec.size(); ++ti) {
if (load_data(_reload_vec[ti], params) != 0) {
if (load_data(_reload_vec[ti], conf) != 0) {
LOG(ERROR) << "Failed reload engine model: " << ti;
return -1;
}
}
LOG(WARNING) << "Succ load engine, path: " << params.get_path();
LOG(WARNING) << "Succ load engine, path: " << conf.model_dir();
return 0;
}
int load_data(ModelData<EngineCore>* md,
const InferEngineCreationParams& params) {
const configure::EngineDesc& conf) {
uint32_t next_idx = (md->current_idx + 1) % 2;
if (md->cores[next_idx]) {
delete md->cores[next_idx];
......@@ -387,9 +295,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
md->cores[next_idx] = new (std::nothrow) EngineCore;
params.dump();
if (!md->cores[next_idx] || md->cores[next_idx]->create(params) != 0) {
LOG(ERROR) << "Failed create model, path: " << params.get_path();
//params.dump();
if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) {
LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
return -1;
}
md->current_idx = next_idx;
......@@ -400,9 +308,9 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
// memory pool to be inited in non-serving-threads
ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>;
if (!md || load_data(md, _infer_engine_params) != 0) {
if (!md || load_data(md, _conf) != 0) {
LOG(ERROR) << "Failed create thread data from "
<< _infer_engine_params.get_path();
<< _conf.model_dir();
return -1;
}
......@@ -458,16 +366,16 @@ class CloneDBReloadableInferEngine
return DBReloadableInferEngine<EngineCore>::proc_initialize(conf, version);
}
virtual int load(const InferEngineCreationParams& params) {
virtual int load(const configure::EngineDesc& conf) {
// 加载进程级模型数据
if (!_pd ||
DBReloadableInferEngine<EngineCore>::load_data(_pd, params) != 0) {
LOG(ERROR) << "Failed to create common model from [" << params.get_path()
DBReloadableInferEngine<EngineCore>::load_data(_pd, conf) != 0) {
LOG(ERROR) << "Failed to create common model from [" << conf.model_dir()
<< "].";
return -1;
}
LOG(WARNING) << "Succ load common model[" << _pd->cores[_pd->current_idx]
<< "], path[" << params.get_path() << "].";
<< "], path[" << conf.model_dir() << "].";
if (DBReloadableInferEngine<EngineCore>::_reload_vec.empty()) {
return 0;
......@@ -483,7 +391,7 @@ class CloneDBReloadableInferEngine
}
}
LOG(WARNING) << "Succ load clone model, path[" << params.get_path() << "]";
LOG(WARNING) << "Succ load clone model, path[" << conf.model_dir() << "]";
return 0;
}
......@@ -527,18 +435,18 @@ class CloneDBReloadableInferEngine
_pd; // 进程级EngineCore,多个线程级EngineCore共用该对象的模型数据
};
template <typename FluidFamilyCore>
template <typename PaddleInferenceCore>
#ifdef WITH_TRT
class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> {
class FluidInferEngine : public DBReloadableInferEngine<PaddleInferenceCore> {
#else
class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore> {
#endif
public: // NOLINT
FluidInferEngine() {}
~FluidInferEngine() {}
std::vector<std::string> GetInputNames() {
FluidFamilyCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core();
PaddleInferenceCore* core =
DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
}
......@@ -546,8 +454,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
}
std::vector<std::string> GetOutputNames() {
FluidFamilyCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core();
PaddleInferenceCore* core =
DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
}
......@@ -556,8 +464,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
std::unique_ptr<paddle_infer::Tensor> GetInputHandle(
const std::string& name) {
FluidFamilyCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core();
PaddleInferenceCore* core =
DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetInputHandle()";
}
......@@ -566,8 +474,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
std::unique_ptr<paddle_infer::Tensor> GetOutputHandle(
const std::string& name) {
FluidFamilyCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core();
PaddleInferenceCore* core =
DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in GetOutputHandle()";
}
......@@ -575,8 +483,8 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
}
int infer_impl() {
FluidFamilyCore* core =
DBReloadableInferEngine<FluidFamilyCore>::get_core();
PaddleInferenceCore* core =
DBReloadableInferEngine<PaddleInferenceCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in infer_impl()";
return -1;
......
......@@ -77,7 +77,7 @@ export PYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.8
## Install Python dependencies
```shell
pip install -r python/requirements.txt
pip install -r python/requirements.txt -i https://mirror.baidu.com/pypi/simple
```
If you use other Python version, please use the right `pip` accordingly.
......@@ -123,14 +123,13 @@ Compared with CPU environment, GPU environment needs to refer to the following t
**It should be noted that the following table is used as a reference for non-Docker compilation environment. The Docker compilation environment has been configured with relevant parameters and does not need to be specified in cmake process. **
| cmake environment variable | meaning | GPU environment considerations | whether Docker environment is needed |
|-----------------------|------------------------- ------------|-------------------------------|----- ---------------|
| CUDA_TOOLKIT_ROOT_DIR | cuda installation path, usually /usr/local/cuda | Required for all environments | No
(/usr/local/cuda) |
|-----------------------|-------------------------------------|-------------------------------|--------------------|
| CUDA_TOOLKIT_ROOT_DIR | cuda installation path, usually /usr/local/cuda | Required for all environments | No (/usr/local/cuda) |
| CUDNN_LIBRARY | The directory where libcudnn.so.* is located, usually /usr/local/cuda/lib64/ | Required for all environments | No (/usr/local/cuda/lib64/) |
| CUDA_CUDART_LIBRARY | The directory where libcudart.so.* is located, usually /usr/local/cuda/lib64/ | Required for all environments | No (/usr/local/cuda/lib64/) |
| TENSORRT_ROOT | The upper level directory of the directory where libnvinfer.so.* is located, depends on the TensorRT installation directory | Cuda 9.0/10.0 does not need, other needs | No (/usr) |
If not in Docker environment, users can refer to the following execution methods. The specific path is subject to the current environment, and the code is only for reference.
If not in Docker environment, users can refer to the following execution methods. The specific path is subject to the current environment, and the code is only for reference.TENSORRT_LIBRARY_PATH is related to the TensorRT version and should be set according to the actual situation。For example, in the cuda10.1 environment, the TensorRT version is 6.0 (/usr/local/TensorRT-6.0.1.5/targets/x86_64-linux-gnu/),In the cuda10.2 environment, the TensorRT version is 7.1 (/usr/local/TensorRT-7.1.3.4/targets/x86_64-linux-gnu/).
``` shell
export CUDA_PATH='/usr/local/cuda'
......@@ -145,7 +144,7 @@ cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DCUDA_CUDART_LIBRARY=${CUDA_CUDART_LIBRARY} \
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH}
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
-DSERVER=ON \
-DWITH_GPU=ON ..
make -j10
......
......@@ -76,7 +76,7 @@ export PYTHON_EXECUTABLE=$PYTHONROOT/bin/python3.8
## 安装Python依赖
```shell
pip install -r python/requirements.txt
pip install -r python/requirements.txt -i https://mirror.baidu.com/pypi/simple
```
如果使用其他Python版本,请使用对应版本的`pip`
......@@ -128,7 +128,7 @@ make -j10
| CUDA_CUDART_LIBRARY | libcudart.so.*所在目录,通常为/usr/local/cuda/lib64/ | 全部环境都需要 | 否(/usr/local/cuda/lib64/) |
| TENSORRT_ROOT | libnvinfer.so.*所在目录的上一级目录,取决于TensorRT安装目录 | Cuda 9.0/10.0不需要,其他需要 | 否(/usr) |
非Docker环境下,用户可以参考如下执行方式,具体的路径以当时环境为准,代码仅作为参考。
非Docker环境下,用户可以参考如下执行方式,具体的路径以当时环境为准,代码仅作为参考。TENSORRT_LIBRARY_PATH和TensorRT版本有关,要根据实际情况设置。例如在cuda10.1环境下TensorRT版本是6.0(/usr/local/TensorRT-6.0.1.5/targets/x86_64-linux-gnu/),在cuda10.2环境下TensorRT版本是7.1(/usr/local/TensorRT-7.1.3.4/targets/x86_64-linux-gnu/)。
``` shell
export CUDA_PATH='/usr/local/cuda'
......@@ -143,7 +143,7 @@ cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DCUDA_CUDART_LIBRARY=${CUDA_CUDART_LIBRARY} \
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH}
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
-DSERVER=ON \
-DWITH_GPU=ON ..
make -j10
......@@ -159,7 +159,7 @@ make -j10
mkdir client-build && cd client-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR \
-DPYTHON_LIBRARIES=$PYTHON_LIBRARIES \
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
-DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
-DCLIENT=ON ..
make -j10
```
......
......@@ -6,17 +6,17 @@
#### Q: Paddle Serving 、Paddle Inference、PaddleHub Serving三者的区别及联系?
**A:** paddle serving是远程服务,即发起预测的设备(手机、浏览器、客户端等)与实际预测的硬件不在一起。 paddle inference是一个library,适合嵌入到一个大系统中保证预测效率,paddle serving调用了paddle inference做远程服务。paddlehub serving可以认为是一个示例,都会使用paddle serving作为统一预测服务入口。如果在web端交互,一般是调用远程服务的形式,可以使用paddle serving的web service搭建。
**A:** paddle serving是远程服务,即发起预测的设备(手机、浏览器、客户端等)与实际预测的硬件不在一起。 paddle inference是一个library,适合嵌入到一个大系统中保证预测效率,paddle serving调用了paddle inference做远程服务。paddlehub serving可以认为是一个示例,都会使用paddle serving作为统一预测服务入口。如果在web端交互,一般是调用远程服务的形式,可以使用paddle serving的web service搭建。
#### Q: paddle-serving是否支持Int32支持
**A:** 在protobuf定feed_type和fetch_type编号与数据类型对应如下
0-int64
1-float32
2-int32
0-int64
1-float32
2-int32
#### Q: paddle-serving是否支持windows和Linux环境下的多线程调用
......@@ -37,6 +37,7 @@
## 安装问题
#### Q: pip install安装whl包过程,报错信息如下:
```
Collecting opencv-python
Using cached opencv-python-4.3.0.38.tar.gz (88.0 MB)
......@@ -69,9 +70,11 @@ Collecting opencv-python
s = list(pattern)
TypeError: 'NoneType' object is not iterable
```
**A:** 指定opencv-python版本安装,pip install opencv-python==4.2.0.32,再安装whl包
#### Q: pip3 install whl包过程报错信息如下:
```
Complete output from command python setup.py egg_info:
Found cython-generated files...
......@@ -80,13 +83,16 @@ Collecting opencv-python
----------------------------------------
Command "python setup.py egg_info" failed with error code 1 in /tmp/pip-install-taoxz02y/grpcio/
```
**A:** 需要升级pip3,再重新执行安装命令。
```
pip3 install --upgrade pip
pip3 install --upgrade setuptools
```
#### Q: 运行过程中报错,信息如下:
```
Traceback (most recent call last):
File "../../deploy/serving/test_client.py", line 18, in <module>
......@@ -97,7 +103,9 @@ Traceback (most recent call last):
from shapely.geometry import Polygon
ImportError: No module named shapely.geometry
```
**A:** 有2种方法,第一种通过pip/pip3安装shapely,第二种通过pip/pip3安装所有依赖组件。
```
方法1:
pip install shapely==1.7.0
......@@ -116,7 +124,69 @@ pip install -r python/requirements.txt
**A:** 没有安装JDK,或者JAVA_HOME路径配置错误(正确配置是JDK路径,常见错误配置成JRE路径,例如正确路径参考JAVA_HOME="/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.262.b10-0.el7_8.x86_64/")。Java JDK安装参考https://segmentfault.com/a/1190000015389941
## 环境问题
#### Q:使用过程中出现CXXABI错误。
这个问题出现的原因是Python使用的gcc版本和Serving所需的gcc版本对不上。对于Docker用户,推荐使用[Docker容器](./RUN_IN_DOCKER_CN.md),由于Docker容器内的Python版本与Serving在发布前都做过适配,这样就不会出现类似的错误。如果是其他开发环境,首先需要确保开发环境中具备GCC 8.2,如果没有gcc 8.2,参考安装方式
```bash
wget -q https://paddle-ci.gz.bcebos.com/gcc-8.2.0.tar.xz
tar -xvf gcc-8.2.0.tar.xz && \
cd gcc-8.2.0 && \
unset LIBRARY_PATH CPATH C_INCLUDE_PATH PKG_CONFIG_PATH CPLUS_INCLUDE_PATH INCLUDE && \
./contrib/download_prerequisites && \
cd .. && mkdir temp_gcc82 && cd temp_gcc82 && \
../gcc-8.2.0/configure --prefix=/usr/local/gcc-8.2 --enable-threads=posix --disable-checking --disable-multilib && \
make -j8 && make install
cd .. && rm -rf temp_gcc82
cp ${lib_so_6} ${lib_so_6}.bak && rm -f ${lib_so_6} &&
ln -s /usr/local/gcc-8.2/lib64/libgfortran.so.5 ${lib_so_5} && \
ln -s /usr/local/gcc-8.2/lib64/libstdc++.so.6 ${lib_so_6} && \
cp /usr/local/gcc-8.2/lib64/libstdc++.so.6.0.25 ${lib_path}
```
假如已经有了GCC 8.2,可以自行安装Python,此外我们也提供了两个GCC 8.2编译的[Python2.7](https://paddle-serving.bj.bcebos.com/others/Python2.7.17-gcc82.tar)[Python3.6](https://paddle-serving.bj.bcebos.com/others/Python3.6.10-gcc82.tar) 。下载解压后,需要将对应的目录设置为`PYTHONROOT`,并设置`PATH``LD_LIBRARY_PATH`
```bash
export PYTHONROOT=/path/of/python # 对应解压后的Python目录
export PATH=$PYTHONROOT/bin:$PATH
export LD_LIBRARY_PATH=$PYTHONROOT/lib:$LD_LIBRARY_PATH
```
#### Q:遇到libstdc++.so.6的版本不够的问题
触发该问题的原因在于,编译Paddle Serving相关可执行程序和动态库,所采用的是GCC 8.2(Cuda 9.0和10.0的Server可执行程序受限Cuda兼容性采用GCC 4.8编译)。Python在调用的过程中,有可能链接到了其他GCC版本的 `libstdc++.so`。 需要做的就是受限确保所在环境具备GCC 8.2,其次将GCC8.2的`libstdc++.so.*`拷贝到某个目录例如`/home/libstdcpp`下。最后`export LD_LIBRARY_PATH=/home/libstdcpp:$LD_LIBRARY_PATH` 即可。
#### Q: 遇到OPENSSL_1.0.1EC 符号找不到的问题。
目前Serving的可执行程序和客户端动态库需要链接1.0.2k版本的openssl动态库。如果环境当中没有,可以执行
```bash
wget https://paddle-serving.bj.bcebos.com/others/centos_ssl.tar && \
tar xf centos_ssl.tar && rm -rf centos_ssl.tar && \
mv libcrypto.so.1.0.2k /usr/lib/libcrypto.so.1.0.2k && mv libssl.so.1.0.2k /usr/lib/libssl.so.1.0.2k && \
ln -sf /usr/lib/libcrypto.so.1.0.2k /usr/lib/libcrypto.so.10 && \
ln -sf /usr/lib/libssl.so.1.0.2k /usr/lib/libssl.so.10 && \
ln -sf /usr/lib/libcrypto.so.10 /usr/lib/libcrypto.so && \
ln -sf /usr/lib/libssl.so.10 /usr/lib/libssl.so
```
其中`/usr/lib` 可以换成其他目录,并确保该目录在`LD_LIBRARY_PATH`下。
### GPU相关环境问题
#### Q:需要做哪些检查确保Serving可以运行在GPU环境
**注:如果是使用Serving提供的镜像不需要做下列检查,如果是其他开发环境可以参考以下指导。**
首先需要确保`nvidia-smi`可用,其次需要确保所需的动态库so文件在`LD_LIBRARY_PATH`所在的目录(包括系统lib库)。
(1)Cuda显卡驱动:文件名通常为 `libcuda.so.$DRIVER_VERSION` 例如驱动版本为440.10.15,文件名就是`libcuda.so.440.10.15`
(2)Cuda和Cudnn动态库:文件名通常为 `libcudart.so.$CUDA_VERSION`,和 `libcudnn.so.$CUDNN_VERSION`。例如Cuda9就是 `libcudart.so.9.0`,Cudnn7就是 `libcudnn.so.7`。Cuda和Cudnn与Serving的版本匹配参见[Serving所有镜像列表](DOCKER_IMAGES_CN.md#%E9%99%84%E5%BD%95%E6%89%80%E6%9C%89%E9%95%9C%E5%83%8F%E5%88%97%E8%A1%A8).
(3) Cuda10.1及更高版本需要TensorRT。安装TensorRT相关文件的脚本参考 [install_trt.sh](../tools/dockerfile/build_scripts/install_trt.sh).
## 部署问题
......@@ -154,7 +224,7 @@ InvalidArgumentError: Device id must be less than GPU count, but received id is:
**A:**:1)使用[GPU docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md#gpunvidia-docker)解决环境问题
2)修改anaconda的虚拟环境下安装的python的gcc版本[参考](https://www.jianshu.com/p/c498b3d86f77)
2)修改anaconda的虚拟环境下安装的python的gcc版本[参考](https://www.jianshu.com/p/c498b3d86f77)
#### Q: paddle-serving是否支持本地离线安装
......@@ -221,9 +291,10 @@ client端的日志直接打印到标准输出。
**A:** 1)警告是glog组件打印的,告知glog初始化之前日志打印在STDERR
2)一般采用GLOG_v方式启动服务同时设置日志级别。
2)一般采用GLOG_v方式启动服务同时设置日志级别。
例如:
```
GLOG_v=2 python -m paddle_serving_server.serve --model xxx_conf/ --port 9999
```
......
......@@ -13,13 +13,5 @@
# limitations under the License
if (NOT CLIENT_ONLY)
add_subdirectory(inferencer-fluid-cpu)
if (WITH_GPU)
add_subdirectory(inferencer-fluid-gpu)
endif()
if (WITH_LITE)
add_subdirectory(inferencer-fluid-arm)
endif()
add_subdirectory(paddle)
endif()
FILE(GLOB fluid_arm_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_arm_engine ${fluid_arm_engine_srcs})
target_include_directories(fluid_arm_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_arm_engine pdserving extern_paddle configure)
target_link_libraries(fluid_arm_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS fluid_arm_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
FILE(GLOB fluid_cpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_cpu_engine ${fluid_cpu_engine_srcs})
target_include_directories(fluid_cpu_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_cpu_engine pdserving extern_paddle configure)
target_link_libraries(fluid_cpu_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS fluid_cpu_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pthread.h>
#include <fstream>
#include <map>
#include <string>
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
namespace fluid_cpu {
class AutoLock {
public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex);
}
~AutoLock() { pthread_mutex_unlock(&_mut); }
private:
pthread_mutex_t& _mut;
};
class GlobalPaddleCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
static pthread_mutex_t& instance() {
static GlobalPaddleCreateMutex gmutex;
return gmutex.mutex();
}
private:
GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
using paddle_infer::Config;
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::CreatePredictor;
// data interface
class FluidFamilyCore {
public:
virtual ~FluidFamilyCore() {}
virtual std::vector<std::string> GetInputNames() {
return _core->GetInputNames();
}
virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
return _core->GetInputHandle(name);
}
virtual std::vector<std::string> GetOutputNames() {
return _core->GetOutputNames();
}
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name);
}
virtual bool Run() {
if (!_core->Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor";
return false;
}
return true;
}
virtual int create(const predictor::InferEngineCreationParams& params) = 0;
virtual int clone(void* origin_core) {
if (origin_core == NULL) {
LOG(ERROR) << "origin paddle Predictor is null.";
return -1;
}
Predictor* p_predictor = (Predictor*)origin_core;
_core = p_predictor->Clone();
if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
return -1;
}
return 0;
}
virtual void* get() { return _core.get(); }
protected:
std::shared_ptr<Predictor> _core;
};
// infer interface
class FluidCpuAnalysisCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetParamsFile(data_path + "/__params__");
config.SetProgFile(data_path + "/__model__");
config.DisableGpu();
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidCpuAnalysisDirCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetModel(data_path);
config.DisableGpu();
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
} else {
config.SwitchIrOptim(false);
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class Parameter {
public:
Parameter() : _row(0), _col(0), _params(NULL) {}
~Parameter() {
VLOG(2) << "before destroy Parameter, file_name[" << _file_name << "]";
destroy();
}
int init(int row, int col, const char* file_name) {
destroy();
_file_name = file_name;
_row = row;
_col = col;
_params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
if (_params == NULL) {
LOG(ERROR) << "Load " << _file_name << " malloc error.";
return -1;
}
VLOG(2) << "Load parameter file[" << _file_name << "] success.";
return 0;
}
void destroy() {
_row = 0;
_col = 0;
if (_params != NULL) {
free(_params);
_params = NULL;
}
}
int load() {
if (_params == NULL || _row <= 0 || _col <= 0) {
LOG(ERROR) << "load parameter error [not inited].";
return -1;
}
FILE* fs = fopen(_file_name.c_str(), "rb");
if (fs == NULL) {
LOG(ERROR) << "load " << _file_name << " fopen error.";
return -1;
}
static const uint32_t MODEL_FILE_HEAD_LEN = 16;
char head[MODEL_FILE_HEAD_LEN] = {0};
if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
destroy();
LOG(ERROR) << "Load " << _file_name << " read head error.";
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
uint32_t matrix_size = _row * _col;
if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
VLOG(2) << "load " << _file_name << " read ok.";
return 0;
} else {
LOG(ERROR) << "load " << _file_name << " read error.";
destroy();
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
return 0;
}
public:
std::string _file_name;
int _row;
int _col;
float* _params;
};
class FluidCpuAnalysisEncryptCore : public FluidFamilyCore {
public:
void ReadBinaryFile(const std::string& filename, std::string* contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path note exits: "
<< data_path;
return -1;
}
std::string model_buffer, params_buffer, key_buffer;
ReadBinaryFile(data_path + "encrypt_model", &model_buffer);
ReadBinaryFile(data_path + "encrypt_params", &params_buffer);
ReadBinaryFile(data_path + "key", &key_buffer);
VLOG(2) << "prepare for encryption model";
auto cipher = paddle::MakeCipher("");
std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer);
Config analysis_config;
// paddle::AnalysisConfig analysis_config;
analysis_config.SetModelBuffer(&real_model_buffer[0],
real_model_buffer.size(),
&real_params_buffer[0],
real_params_buffer.size());
analysis_config.DisableGpu();
analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
}
analysis_config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
VLOG(2) << "decrypt model file sucess";
_core = CreatePredictor(analysis_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
} // namespace fluid_cpu
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h"
#include "core/predictor/framework/factory.h"
namespace baidu {
namespace paddle_serving {
namespace fluid_cpu {
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidCpuAnalysisCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidCpuAnalysisDirCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS_DIR");
#if 1
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidCpuAnalysisEncryptCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_CPU_ANALYSIS_ENCRYPT");
#endif
} // namespace fluid_cpu
} // namespace paddle_serving
} // namespace baidu
FILE(GLOB fluid_gpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_gpu_engine ${fluid_gpu_engine_srcs})
target_include_directories(fluid_gpu_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_gpu_engine pdserving extern_paddle configure)
target_link_libraries(fluid_gpu_engine pdserving paddle_fluid iomp5 mklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS fluid_gpu_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pthread.h>
#include <fstream>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT
DECLARE_int32(gpuid);
namespace baidu {
namespace paddle_serving {
namespace fluid_gpu {
using configure::SigmoidConf;
class AutoLock {
public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex);
}
~AutoLock() { pthread_mutex_unlock(&_mut); }
private:
pthread_mutex_t& _mut;
};
class GlobalPaddleCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
static pthread_mutex_t& instance() {
static GlobalPaddleCreateMutex gmutex;
return gmutex.mutex();
}
private:
GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
using paddle_infer::Config;
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::CreatePredictor;
// data interface
class FluidFamilyCore {
public:
virtual ~FluidFamilyCore() {}
virtual std::vector<std::string> GetInputNames() {
return _core->GetInputNames();
}
virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
return _core->GetInputHandle(name);
}
virtual std::vector<std::string> GetOutputNames() {
return _core->GetOutputNames();
}
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name);
}
virtual bool Run() {
if (!_core->Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor";
return false;
}
return true;
}
virtual int create(const predictor::InferEngineCreationParams& params) = 0;
virtual int clone(void* origin_core) {
if (origin_core == NULL) {
LOG(ERROR) << "origin paddle Predictor is null.";
return -1;
}
Predictor* p_predictor = (Predictor*)origin_core;
_core = p_predictor->Clone();
if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
return -1;
}
return 0;
}
virtual void* get() { return _core.get(); }
protected:
std::shared_ptr<Predictor> _core;
};
// infer interface
class FluidGpuAnalysisCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetParamsFile(data_path + "/__params__");
config.SetProgFile(data_path + "/__model__");
config.EnableUseGpu(100, FLAGS_gpuid);
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidGpuAnalysisDirCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
}
Config config;
config.SetModel(data_path);
config.EnableUseGpu(1500, FLAGS_gpuid);
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
int max_batch = 32;
int min_subgraph_size = 3;
if (params.use_trt()) {
config.EnableTensorRtEngine(1 << 20,
max_batch,
min_subgraph_size,
Config::Precision::kFloat32,
false,
false);
LOG(INFO) << "create TensorRT predictor";
} else {
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
} else {
config.SwitchIrOptim(false);
}
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class Parameter {
public:
Parameter() : _row(0), _col(0), _params(NULL) {}
~Parameter() {
LOG(INFO) << "before destroy Parameter, file_name[" << _file_name << "]";
destroy();
}
int init(int row, int col, const char* file_name) {
destroy();
_file_name = file_name;
_row = row;
_col = col;
_params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
if (_params == NULL) {
LOG(ERROR) << "Load " << _file_name << " malloc error.";
return -1;
}
VLOG(2) << "Load parameter file[" << _file_name << "] success.";
return 0;
}
void destroy() {
_row = 0;
_col = 0;
if (_params != NULL) {
free(_params);
_params = NULL;
}
}
int load() {
if (_params == NULL || _row <= 0 || _col <= 0) {
LOG(ERROR) << "load parameter error [not inited].";
return -1;
}
FILE* fs = fopen(_file_name.c_str(), "rb");
if (fs == NULL) {
LOG(ERROR) << "load " << _file_name << " fopen error.";
return -1;
}
static const uint32_t MODEL_FILE_HEAD_LEN = 16;
char head[MODEL_FILE_HEAD_LEN] = {0};
if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
destroy();
LOG(ERROR) << "Load " << _file_name << " read head error.";
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
uint32_t matrix_size = _row * _col;
if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
LOG(INFO) << "load " << _file_name << " read ok.";
return 0;
} else {
LOG(ERROR) << "load " << _file_name << " read error.";
destroy();
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
return 0;
}
public:
std::string _file_name;
int _row;
int _col;
float* _params;
};
class FluidGpuAnalysisEncryptCore : public FluidFamilyCore {
public:
void ReadBinaryFile(const std::string& filename, std::string* contents) {
std::ifstream fin(filename, std::ios::in | std::ios::binary);
fin.seekg(0, std::ios::end);
contents->clear();
contents->resize(fin.tellg());
fin.seekg(0, std::ios::beg);
fin.read(&(contents->at(0)), contents->size());
fin.close();
}
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path note exits: "
<< data_path;
return -1;
}
std::string model_buffer, params_buffer, key_buffer;
ReadBinaryFile(data_path + "encrypt_model", &model_buffer);
ReadBinaryFile(data_path + "encrypt_params", &params_buffer);
ReadBinaryFile(data_path + "key", &key_buffer);
VLOG(2) << "prepare for encryption model";
auto cipher = paddle::MakeCipher("");
std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
std::string real_params_buffer = cipher->Decrypt(params_buffer, key_buffer);
Config analysis_config;
analysis_config.SetModelBuffer(&real_model_buffer[0],
real_model_buffer.size(),
&real_params_buffer[0],
real_params_buffer.size());
analysis_config.EnableUseGpu(100, FLAGS_gpuid);
analysis_config.SetCpuMathLibraryNumThreads(1);
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
}
analysis_config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
VLOG(2) << "decrypt model file sucess";
_core = CreatePredictor(analysis_config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
} // namespace fluid_gpu
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h"
#include "core/predictor/framework/factory.h"
DEFINE_int32(gpuid, 0, "GPU device id to use");
namespace baidu {
namespace paddle_serving {
namespace fluid_gpu {
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidGpuAnalysisCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_GPU_ANALYSIS");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidGpuAnalysisDirCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_GPU_ANALYSIS_DIR");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidGpuAnalysisEncryptCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_GPU_ANALYSIS_ENCRPT")
} // namespace fluid_gpu
} // namespace paddle_serving
} // namespace baidu
FILE(GLOB paddle_inference_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(paddle_inference_engine ${paddle_inference_engine_srcs})
target_include_directories(paddle_inference_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(paddle_inference_engine pdserving extern_paddle configure)
target_link_libraries(paddle_inference_engine pdserving paddle_inference -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
install(TARGETS paddle_inference_engine
ARCHIVE DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/lib
)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -17,275 +17,174 @@
#include <pthread.h>
#include <fstream>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/common/utils.h"
#include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu {
namespace paddle_serving {
namespace fluid_arm {
class AutoLock {
public:
explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
pthread_mutex_lock(&mutex);
}
~AutoLock() { pthread_mutex_unlock(&_mut); }
private:
pthread_mutex_t& _mut;
};
class GlobalPaddleCreateMutex {
public:
pthread_mutex_t& mutex() { return _mut; }
static pthread_mutex_t& instance() {
static GlobalPaddleCreateMutex gmutex;
return gmutex.mutex();
}
private:
GlobalPaddleCreateMutex() { pthread_mutex_init(&_mut, NULL); }
pthread_mutex_t _mut;
};
namespace inference {
using paddle_infer::Config;
using paddle_infer::PrecisionType;
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::PrecisionType;
using paddle_infer::CreatePredictor;
// data interface
class FluidFamilyCore {
DECLARE_int32(gpuid);
static const int max_batch = 32;
static const int min_subgraph_size = 3;
// Engine Base
class PaddleEngineBase {
public:
virtual ~FluidFamilyCore() {}
virtual ~PaddleEngineBase() {}
virtual std::vector<std::string> GetInputNames() {
return _core->GetInputNames();
return _predictor->GetInputNames();
}
virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
return _core->GetInputHandle(name);
return _predictor->GetInputHandle(name);
}
virtual std::vector<std::string> GetOutputNames() {
return _core->GetOutputNames();
return _predictor->GetOutputNames();
}
virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
return _core->GetOutputHandle(name);
return _predictor->GetOutputHandle(name);
}
virtual bool Run() {
if (!_core->Run()) {
if (!_predictor->Run()) {
LOG(ERROR) << "Failed call Run with paddle predictor";
return false;
}
return true;
}
virtual int create(const predictor::InferEngineCreationParams& params) = 0;
virtual int create(const configure::EngineDesc& conf) = 0;
virtual int clone(void* origin_core) {
if (origin_core == NULL) {
virtual int clone(void* predictor) {
if (predictor == NULL) {
LOG(ERROR) << "origin paddle Predictor is null.";
return -1;
}
Predictor* p_predictor = (Predictor*)origin_core;
_core = p_predictor->Clone();
if (_core.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << origin_core;
Predictor* prep = static_cast<Predictor*>(predictor);
_predictor = prep->Clone();
if (_predictor.get() == NULL) {
LOG(ERROR) << "fail to clone paddle predictor: " << predictor;
return -1;
}
return 0;
}
virtual void* get() { return _core.get(); }
virtual void* get() { return _predictor.get(); }
protected:
std::shared_ptr<Predictor> _core;
std::shared_ptr<Predictor> _predictor;
};
// infer interface
class FluidArmAnalysisCore : public FluidFamilyCore {
// Paddle Inference Engine
class PaddleInferenceEngine : public PaddleEngineBase {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
int create(const configure::EngineDesc& engine_conf) {
std::string model_path = engine_conf.model_dir();
if (access(model_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
<< model_path;
return -1;
}
Config config;
config.SetParamsFile(data_path + "/__params__");
config.SetProgFile(data_path + "/__model__");
config.DisableGpu();
config.SetCpuMathLibraryNumThreads(1);
if (params.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.use_xpu()) {
config.EnableXpu(2 * 1024 * 1024);
}
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
// todo, auto config(zhangjun)
if (engine_conf.has_combined_model()) {
if (!engine_conf.combined_model()) {
config.SetModel(model_path);
} else {
config.SetParamsFile(model_path + "/__params__");
config.SetProgFile(model_path + "/__model__");
}
} else {
config.SwitchIrOptim(false);
config.SetParamsFile(model_path + "/__params__");
config.SetProgFile(model_path + "/__model__");
}
config.SwitchSpecifyInputNames(true);
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
config.SetCpuMathLibraryNumThreads(1);
if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) {
// 2000MB GPU memory
config.EnableUseGpu(2000, FLAGS_gpuid);
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class FluidArmAnalysisDirCore : public FluidFamilyCore {
public:
int create(const predictor::InferEngineCreationParams& params) {
std::string data_path = params.get_path();
if (access(data_path.c_str(), F_OK) == -1) {
LOG(ERROR) << "create paddle predictor failed, path not exits: "
<< data_path;
return -1;
if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
config.EnableUseGpu(2000, FLAGS_gpuid);
}
config.EnableTensorRtEngine(1 << 20,
max_batch,
min_subgraph_size,
Config::Precision::kFloat32,
false,
false);
LOG(INFO) << "create TensorRT predictor";
}
Config config;
config.SetModel(data_path);
config.DisableGpu();
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
if (params.use_lite()) {
if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
config.EnableLiteEngine(PrecisionType::kFloat32, true);
}
if (params.use_xpu()) {
if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) {
// 2 MB l3 cache
config.EnableXpu(2 * 1024 * 1024);
}
if (params.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
config.SwitchIrOptim(true);
} else {
if (engine_conf.has_enable_ir_optimization() &&
!engine_conf.enable_ir_optimization()) {
config.SwitchIrOptim(false);
} else {
config.SwitchIrOptim(true);
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = CreatePredictor(config);
if (NULL == _core.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << data_path;
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << data_path;
return 0;
}
};
class Parameter {
public:
Parameter() : _row(0), _col(0), _params(NULL) {}
~Parameter() {
VLOG(2) << "before destroy Parameter, file_name[" << _file_name << "]";
destroy();
}
int init(int row, int col, const char* file_name) {
destroy();
_file_name = file_name;
_row = row;
_col = col;
_params = reinterpret_cast<float*>(malloc(_row * _col * sizeof(float)));
if (_params == NULL) {
LOG(ERROR) << "Load " << _file_name << " malloc error.";
return -1;
if (engine_conf.has_enable_memory_optimization() &&
engine_conf.enable_memory_optimization()) {
config.EnableMemoryOptim();
}
VLOG(2) << "Load parameter file[" << _file_name << "] success.";
return 0;
}
void destroy() {
_row = 0;
_col = 0;
if (_params != NULL) {
free(_params);
_params = NULL;
}
}
if (engine_conf.has_encrypted_model() && engine_conf.encrypted_model()) {
// decrypt model
std::string model_buffer, params_buffer, key_buffer;
predictor::ReadBinaryFile(model_path + "encrypt_model", &model_buffer);
predictor::ReadBinaryFile(model_path + "encrypt_params", &params_buffer);
predictor::ReadBinaryFile(model_path + "key", &key_buffer);
int load() {
if (_params == NULL || _row <= 0 || _col <= 0) {
LOG(ERROR) << "load parameter error [not inited].";
return -1;
auto cipher = paddle::MakeCipher("");
std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
std::string real_params_buffer =
cipher->Decrypt(params_buffer, key_buffer);
config.SetModelBuffer(&real_model_buffer[0],
real_model_buffer.size(),
&real_params_buffer[0],
real_params_buffer.size());
}
FILE* fs = fopen(_file_name.c_str(), "rb");
if (fs == NULL) {
LOG(ERROR) << "load " << _file_name << " fopen error.";
return -1;
}
static const uint32_t MODEL_FILE_HEAD_LEN = 16;
char head[MODEL_FILE_HEAD_LEN] = {0};
if (fread(head, 1, MODEL_FILE_HEAD_LEN, fs) != MODEL_FILE_HEAD_LEN) {
destroy();
LOG(ERROR) << "Load " << _file_name << " read head error.";
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
predictor::AutoLock lock(predictor::GlobalCreateMutex::instance());
_predictor = CreatePredictor(config);
if (NULL == _predictor.get()) {
LOG(ERROR) << "create paddle predictor failed, path: " << model_path;
return -1;
}
uint32_t matrix_size = _row * _col;
if (matrix_size == fread(_params, sizeof(float), matrix_size, fs)) {
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
VLOG(2) << "load " << _file_name << " read ok.";
return 0;
} else {
LOG(ERROR) << "load " << _file_name << " read error.";
destroy();
if (fs != NULL) {
fclose(fs);
fs = NULL;
}
return -1;
}
VLOG(2) << "create paddle predictor sucess, path: " << model_path;
return 0;
}
public:
std::string _file_name;
int _row;
int _col;
float* _params;
};
} // namespace fluid_arm
} // namespace inference
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......@@ -12,24 +12,20 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle_inference/inferencer-fluid-arm/include/fluid_arm_engine.h"
#include "paddle_inference/paddle/include/paddle_engine.h"
#include "core/predictor/framework/factory.h"
namespace baidu {
namespace paddle_serving {
namespace fluid_arm {
namespace inference {
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<FluidArmAnalysisCore>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_ANALYSIS");
DEFINE_int32(gpuid, 0, "GPU device id to use");
REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
::baidu::paddle_serving::predictor::FluidInferEngine<
FluidArmAnalysisDirCore>,
::baidu::paddle_serving::predictor::FluidInferEngine<PaddleInferenceEngine>,
::baidu::paddle_serving::predictor::InferEngine,
"FLUID_ARM_ANALYSIS_DIR");
"PADDLE_INFER");
} // namespace fluid_arm
} // namespace inference
} // namespace paddle_serving
} // namespace baidu
if (CLIENT)
file(INSTALL pipeline DESTINATION paddle_serving_client)
file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py)
set(PY_FILES ${SERVING_CLIENT_PY_FILES})
SET(PACKAGE_NAME "serving_client")
set(SETUP_LOG_FILE "setup.py.client.log")
file(INSTALL pipeline DESTINATION paddle_serving_client)
file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py)
set(PY_FILES ${SERVING_CLIENT_PY_FILES})
SET(PACKAGE_NAME "serving_client")
set(SETUP_LOG_FILE "setup.py.client.log")
endif()
if (SERVER)
if (NOT WITH_GPU AND NOT WITH_LITE)
file(INSTALL pipeline DESTINATION paddle_serving_server)
file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py)
else()
file(INSTALL pipeline DESTINATION paddle_serving_server_gpu)
file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server_gpu/*.py)
endif()
set(PY_FILES ${SERVING_SERVER_PY_FILES})
SET(PACKAGE_NAME "serving_server")
set(SETUP_LOG_FILE "setup.py.server.log")
SET(SERVER_PACKAGE_NAME "paddle-serving-server")
if (WITH_GPU)
set(SERVER_PACKAGE_NAME "paddle-serving-server-gpu")
elseif(WITH_XPU)
set(SERVER_PACKAGE_NAME "paddle-serving-server-xpu")
endif()
file(INSTALL pipeline DESTINATION paddle_serving_server)
file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py)
set(PY_FILES ${SERVING_SERVER_PY_FILES})
set(SETUP_LOG_FILE "setup.py.server.log")
endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/util.py
${CMAKE_CURRENT_BINARY_DIR}/util.py)
if (CLIENT)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../tools/python_tag.py
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/../tools/python_tag.py
${CMAKE_CURRENT_BINARY_DIR}/python_tag.py)
endif()
if (APP)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.app.in
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.app.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
endif()
if (SERVER)
if (NOT WITH_GPU AND NOT WITH_LITE)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
else()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server_gpu.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py)
endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/gen_version.py
......@@ -50,108 +45,73 @@ set (SERVING_CLIENT_CORE ${PADDLE_SERVING_BINARY_DIR}/core/general-client/*.so)
message("python env: " ${py_env})
if (APP)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "app"
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "app"
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
endif()
if (CLIENT)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} python_tag.py
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "client"
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
endif()
if (SERVER)
if(NOT WITH_GPU AND NOT WITH_LITE)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "server"
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
elseif(WITH_TRT)
if(CUDA_VERSION EQUAL 10.1)
set(SUFFIX 101)
elseif(CUDA_VERSION EQUAL 10.2)
set(SUFFIX 102)
elseif(CUDA_VERSION EQUAL 11.0)
set(SUFFIX 11)
endif()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" ${SUFFIX}
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
elseif(WITH_LITE)
if(WITH_XPU)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" arm-xpu
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
else()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" arm
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
endif()
else()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" ${CUDA_VERSION_MAJOR}
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
# todo, generate suffix for cpu、gpu、arm
if(WITH_TRT)
if(CUDA_VERSION EQUAL 10.1)
set(VERSION_SUFFIX 101)
elseif(CUDA_VERSION EQUAL 10.2)
set(VERSION_SUFFIX 102)
elseif(CUDA_VERSION EQUAL 11.0)
set(VERSION_SUFFIX 11)
endif()
endif()
if(WITH_LITE)
set(VERSION_SUFFIX 2)
endif()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server" ${VERSION_SUFFIX}
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
endif()
set(SERVING_CLIENT_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)
set(SERVING_SERVER_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)
if (CLIENT)
install(DIRECTORY ${SERVING_CLIENT_PYTHON_PACKAGE_DIR}
install(DIRECTORY ${SERVING_CLIENT_PYTHON_PACKAGE_DIR}
DESTINATION opt/serving_client/share/wheels
)
)
endif()
if (SERVER)
install(DIRECTORY ${SERVING_SERVER_PYTHON_PACKAGE_DIR}
DESTINATION opt/serving_server/share/wheels
)
install(DIRECTORY ${SERVING_SERVER_PYTHON_PACKAGE_DIR}
DESTINATION opt/serving_server/share/wheels
)
endif()
if (CLIENT OR SERVER)
find_program(PATCHELF_EXECUTABLE patchelf)
if (NOT PATCHELF_EXECUTABLE)
message(FATAL_ERROR "patchelf not found, please install it.\n"
"For Ubuntu, the command is: apt-get install -y patchelf.")
endif()
find_program(PATCHELF_EXECUTABLE patchelf)
if (NOT PATCHELF_EXECUTABLE)
message(FATAL_ERROR "patchelf not found, please install it.\n"
"For Ubuntu, the command is: apt-get install -y patchelf.")
endif()
endif()
......@@ -49,7 +49,7 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #c
```
Or,start gpu inference service,Run
```
python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0
python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #launch gpu inference service at GPU 0
```
### RPC Inference
......
......@@ -48,7 +48,7 @@ python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 #
```
或者,启动gpu预测服务,执行
```
python -m paddle_serving_server_gpu.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务
python -m paddle_serving_server.serve --model bert_seq128_model/ --port 9292 --gpu_ids 0 #在gpu 0上启动gpu预测服务
```
......
......@@ -12,7 +12,7 @@ else
mkdir utilization
fi
#start server
$PYTHONROOT/bin/python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 &
$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 &
sleep 5
#warm up
......
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
python -m paddle_serving_server.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
export FLAGS_profile_client=1
export FLAGS_profile_server=1
sleep 5
......
......@@ -14,9 +14,9 @@
import os
import sys
from paddle_serving_server_gpu import OpMaker
from paddle_serving_server_gpu import OpSeqMaker
from paddle_serving_server_gpu import Server
from paddle_serving_server import OpMaker
from paddle_serving_server import OpSeqMaker
from paddle_serving_server import Server
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
......
......@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_server.web_service import WebService
from paddle_serving_app.reader import ChineseBertReader
import sys
import os
......
......@@ -10,7 +10,7 @@ If you want to have more detection models, please refer to [Paddle Detection Mod
### Start the service
```
python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0
python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
```
### Perform prediction
......
......@@ -10,7 +10,7 @@ sh get_data.sh
### 启动服务
```
python -m paddle_serving_server_gpu.serve --model serving_server --port 9292 --gpu_id 0
python -m paddle_serving_server.serve --model serving_server --port 9292 --gpu_id 0
```
### 执行预测
......
......@@ -20,7 +20,7 @@ the directories like `ctr_serving_model` and `ctr_client_conf` will appear.
```
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #CPU RPC Service
python -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #RPC Service on GPU 0
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #RPC Service on GPU 0
```
### RPC Infer
......
......@@ -20,7 +20,7 @@ mv models/ctr_serving_model .
```
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 #启动CPU预测服务
python -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #在GPU 0上启动预测服务
python -m paddle_serving_server.serve --model ctr_serving_model/ --port 9292 --gpu_ids 0 #在GPU 0上启动预测服务
```
### 执行预测
......
......@@ -12,7 +12,7 @@ tar -xzvf deeplabv3.tar.gz
### Start Service
```
python -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 9494
python -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494
```
### Client Prediction
......
......@@ -12,7 +12,7 @@ tar -xzvf deeplabv3.tar.gz
### 启动服务端
```
python -m paddle_serving_server_gpu.serve --model deeplabv3_server --gpu_ids 0 --port 9494
python -m paddle_serving_server.serve --model deeplabv3_server --gpu_ids 0 --port 9494
```
### 客户端预测
......
......@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service
```
tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
This model support TensorRT, if you want a faster inference, please use `--use_trt`.
......
......@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务
```
tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
......
......@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service
```
tar xf ppyolo_r50vd_dcn_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
This model support TensorRT, if you want a faster inference, please use `--use_trt`.
......
......@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务
```
tar xf ppyolo_r50vd_dcn_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
......
......@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service
```
tar xf ttfnet_darknet53_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
This model support TensorRT, if you want a faster inference, please use `--use_trt`.
......
......@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务
```
tar xf ttfnet_darknet53_1x_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
......
......@@ -10,7 +10,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### Start the service
```
tar xf yolov3_darknet53_270e_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
This model support TensorRT, if you want a faster inference, please use `--use_trt`.
......
......@@ -11,7 +11,7 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
### 启动服务
```
tar xf yolov3_darknet53_270e_coco.tar
python -m paddle_serving_server_gpu.serve --model serving_server --port 9494 --gpu_ids 0
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
......
......@@ -26,7 +26,7 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_
```
GPU Service
```
python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
```
## Prediction
......
......@@ -24,7 +24,7 @@ python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_
```
GPU预测服务
```
python -m paddle_serving_server_gpu.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
python -m paddle_serving_server.serve --model encrypt_server/ --port 9300 --use_encryption_model --gpu_ids 0
```
## 预测
......
......@@ -15,9 +15,9 @@
import os
import sys
from paddle_serving_server_gpu import OpMaker
from paddle_serving_server_gpu import OpSeqMaker
from paddle_serving_server_gpu import MultiLangServer as Server
from paddle_serving_server import OpMaker
from paddle_serving_server import OpSeqMaker
from paddle_serving_server import MultiLangServer as Server
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
......
......@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz
## Start RPC Service
```
python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
```
## Prediction
......
......@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz
## 启动RPC服务
```
python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang
```
## 预测
......
......@@ -39,7 +39,7 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu
```
```
python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service
python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu inference service
```
client send inference request
......
......@@ -39,7 +39,7 @@ python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 #cpu
```
```
python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务
python -m paddle_serving_server.serve --model ResNet50_vd_model --port 9696 --gpu_ids 0 #gpu预测服务
```
client端进行预测
......
......@@ -2,7 +2,7 @@ rm profile_log*
export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_profile_server=1
export FLAGS_profile_client=1
python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog &
python -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog &
sleep 5
gpu_id=0
......
......@@ -25,7 +25,7 @@ device = sys.argv[2]
if device == "cpu":
from paddle_serving_server.web_service import WebService
else:
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_server.web_service import WebService
class ImageService(WebService):
......
......@@ -12,7 +12,7 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz
### Start Service
```
python -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
python -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
```
### Client Prediction
......
......@@ -12,7 +12,7 @@ tar -xzvf mobilenet_v2_imagenet.tar.gz
### 启动服务端
```
python -m paddle_serving_server_gpu.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
python -m paddle_serving_server.serve --model mobilenet_v2_imagenet_model --gpu_ids 0 --port 9393
```
### 客户端预测
......
......@@ -26,7 +26,7 @@ tar xf test_imgs.tar
python -m paddle_serving_server.serve --model ocr_det_model --port 9293
python ocr_web_server.py cpu
#for gpu user
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0
python -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_id 0
python ocr_web_server.py gpu
```
......
......@@ -25,7 +25,7 @@ tar xf test_imgs.tar
python -m paddle_serving_server.serve --model ocr_det_model --port 9293
python ocr_web_server.py cpu
#for gpu user
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0
python -m paddle_serving_server.serve --model ocr_det_model --port 9293 --gpu_id 0
python ocr_web_server.py gpu
```
......
......@@ -22,7 +22,7 @@ from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes
if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time
......
......@@ -22,7 +22,7 @@ from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes
if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time
......
......@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
from paddle_serving_app.local_predict import LocalPredictor
......
......@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time
......
......@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time
......
......@@ -23,7 +23,7 @@ from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_server.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time
......
import sys
import os
import yaml
import requests
import time
import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
'''
2021-03-16 10:26:01,832 ==================== TRACER ======================
2021-03-16 10:26:01,838 Op(bert):
2021-03-16 10:26:01,838 in[5.7833 ms]
2021-03-16 10:26:01,838 prep[8.2001 ms]
2021-03-16 10:26:01,838 midp[198.79853333333332 ms]
2021-03-16 10:26:01,839 postp[0.8411 ms]
2021-03-16 10:26:01,839 out[0.9440666666666667 ms]
2021-03-16 10:26:01,839 idle[0.03135320683677345]
2021-03-16 10:26:01,839 DAGExecutor:
2021-03-16 10:26:01,839 Query count[30]
2021-03-16 10:26:01,839 QPS[3.0 q/s]
2021-03-16 10:26:01,839 Succ[1.0]
2021-03-16 10:26:01,839 Error req[]
2021-03-16 10:26:01,839 Latency:
2021-03-16 10:26:01,839 ave[237.85519999999997 ms]
2021-03-16 10:26:01,839 .50[179.937 ms]
2021-03-16 10:26:01,839 .60[179.994 ms]
2021-03-16 10:26:01,839 .70[180.515 ms]
2021-03-16 10:26:01,840 .80[180.735 ms]
2021-03-16 10:26:01,840 .90[182.275 ms]
2021-03-16 10:26:01,840 .95[182.789 ms]
2021-03-16 10:26:01,840 .99[1921.33 ms]
2021-03-16 10:26:01,840 Channel (server worker num[1]):
2021-03-16 10:26:01,840 chl0(In: ['@DAGExecutor'], Out: ['bert']) size[0/0]
2021-03-16 10:26:01,841 chl1(In: ['bert'], Out: ['@DAGExecutor']) size[0/0]
'''
def parse_benchmark(filein, fileout):
with open(filein, "r") as fin:
res = yaml.load(fin)
del_list = []
for key in res["DAG"].keys():
if "call" in key:
del_list.append(key)
for key in del_list:
del res["DAG"][key]
with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device):
fin = open("config.yml", "r")
config = yaml.load(fin)
fin.close()
config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu":
config["op"]["bert"]["local_service_conf"]["device_type"] = 1
config["op"]["bert"]["local_service_conf"]["devices"] = "2"
with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False)
def run_http(idx, batch_size):
print("start thread ({})".format(idx))
url = "http://127.0.0.1:18082/bert/prediction"
start = time.time()
with open("data-c.txt", 'r') as fin:
start = time.time()
lines = fin.readlines()
start_idx = 0
while start_idx < len(lines):
end_idx = min(len(lines), start_idx + batch_size)
feed = {}
for i in range(start_idx, end_idx):
feed[str(i - start_idx)] = lines[i]
keys = list(feed.keys())
values = [feed[x] for x in keys]
data = {"key": keys, "value": values}
r = requests.post(url=url, data=json.dumps(data))
start_idx += batch_size
if start_idx > 2000:
break
end = time.time()
return [[end - start]]
def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_http , thread, batch_size)
def run_rpc(thread, batch_size):
client = PipelineClient()
client.connect(['127.0.0.1:9998'])
with open("data-c.txt", 'r') as fin:
start = time.time()
lines = fin.readlines()
start_idx = 0
while start_idx < len(lines):
end_idx = min(len(lines), start_idx + batch_size)
feed = {}
for i in range(start_idx, end_idx):
feed[str(i - start_idx)] = lines[i]
ret = client.predict(feed_dict=feed, fetch=["res"])
start_idx += batch_size
if start_idx > 1000:
break
end = time.time()
return [[end - start]]
def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size)
if __name__ == "__main__":
if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3])
device = sys.argv[4]
gen_yml(device)
elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3])
batch_size = int(sys.argv[4])
if mode == "http":
multithread_http(thread, batch_size)
elif mode == "rpc":
multithread_rpc(thread, batch_size)
elif sys.argv[1] == "dump":
filein = sys.argv[2]
fileout = sys.argv[3]
parse_benchmark(filein, fileout)
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="bert"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1 gpu
rm -rf profile_log_$modelname
for thread_num in 1 8 16
do
for batch_size in 1 10 100
do
echo "----Bert thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=2 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=2 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
# RPC
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1 gpu
for thread_num in 1 8 16
do
for batch_size in 1 10 100
do
echo "----Bert thread num: $thread_num batch size: $batch_size mode:rpc ----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=2 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=2 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run rpc $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
cat benchmark.log >> profile_log_$modelname
done
done
dag:
is_thread_op: false
tracer:
interval_s: 10
http_port: 18082
op:
bert:
local_service_conf:
client_type: local_predictor
concurrency: 2
device_type: 1
devices: '2'
fetch_list:
- pooled_output
model_config: bert_seq128_model/
rpc_port: 9998
worker_num: 20
wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz
tar -xzf bert_chinese_L-12_H-768_A-12.tar.gz
mv bert_chinese_L-12_H-768_A-12_model bert_seq128_model
mv bert_chinese_L-12_H-768_A-12_client bert_seq128_client
wget https://paddle-serving.bj.bcebos.com/bert_example/data-c.txt --no-check-certificate
wget https://paddle-serving.bj.bcebos.com/bert_example/vocab.txt --no-check-certificate
import sys
import os
import yaml
import requests
import time
import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
client = PipelineClient()
client.connect(['127.0.0.1:9998'])
batch_size = 101
with open("data-c.txt", 'r') as fin:
lines = fin.readlines()
start_idx = 0
while start_idx < len(lines):
end_idx = min(len(lines), start_idx + batch_size)
feed = {}
for i in range(start_idx, end_idx):
feed[str(i - start_idx)] = lines[i]
ret = client.predict(feed_dict=feed, fetch=["res"])
print(ret)
start_idx += batch_size
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_serving_server_gpu.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging
import numpy as np
import sys
from paddle_serving_app.reader import ChineseBertReader
_LOGGER = logging.getLogger()
class BertOp(Op):
def init_op(self):
self.reader = ChineseBertReader({
"vocab_file": "vocab.txt",
"max_seq_len": 128
})
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
print("input dict", input_dict)
batch_size = len(input_dict.keys())
feed_res = []
for i in range(batch_size):
feed_dict = self.reader.process(input_dict[str(i)].encode("utf-8"))
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape((1, len(feed_dict[key]), 1))
feed_res.append(feed_dict)
feed_dict = {}
for key in feed_res[0].keys():
feed_dict[key] = np.concatenate([x[key] for x in feed_res], axis=0)
print(key, feed_dict[key].shape)
return feed_dict, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id):
fetch_dict["pooled_output"] = str(fetch_dict["pooled_output"])
return fetch_dict, None, ""
class BertService(WebService):
def get_pipeline_response(self, read_op):
bert_op = BertOp(name="bert", input_ops=[read_op])
return bert_op
bert_service = BertService(name="bert")
bert_service.prepare_pipeline_config("config2.yml")
bert_service.run_service()
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
......
......@@ -14,7 +14,7 @@
import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try:
from paddle_serving_server_gpu.web_service import WebService, Op
from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging
......
......@@ -22,7 +22,7 @@ import logging
try:
from paddle_serving_server.web_service import WebService
except ImportError:
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_server.web_service import WebService
_LOGGER = logging.getLogger()
user_handler = logging.StreamHandler()
......
import sys
import os
import base64
import yaml
import requests
import time
import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout):
with open(filein, "r") as fin:
res = yaml.load(fin)
del_list = []
for key in res["DAG"].keys():
if "call" in key:
del_list.append(key)
for key in del_list:
del res["DAG"][key]
with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device):
fin = open("config.yml", "r")
config = yaml.load(fin)
fin.close()
config["dag"]["tracer"] = {"interval_s": 10}
if device == "gpu":
config["op"]["det"]["local_service_conf"]["device_type"] = 1
config["op"]["det"]["local_service_conf"]["devices"] = "2"
config["op"]["rec"]["local_service_conf"]["device_type"] = 1
config["op"]["rec"]["local_service_conf"]["devices"] = "2"
with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False)
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
def run_http(idx, batch_size):
print("start thread ({})".format(idx))
url = "http://127.0.0.1:9999/ocr/prediction"
start = time.time()
test_img_dir = "imgs/"
for img_file in os.listdir(test_img_dir):
with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
data = {"key": ["image"], "value": [image]}
for i in range(100):
r = requests.post(url=url, data=json.dumps(data))
end = time.time()
return [[end - start]]
def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_http , thread, batch_size)
def run_rpc(thread, batch_size):
client = PipelineClient()
client.connect(['127.0.0.1:18090'])
start = time.time()
test_img_dir = "imgs/"
for img_file in os.listdir(test_img_dir):
with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data = file.read()
image = cv2_to_base64(image_data)
for i in range(100):
ret = client.predict(feed_dict={"image": image}, fetch=["res"])
end = time.time()
return [[end - start]]
def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size)
if __name__ == "__main__":
if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3])
device = sys.argv[4]
gen_yml(device)
elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3])
batch_size = int(sys.argv[4])
if mode == "http":
multithread_http(thread, batch_size)
elif mode == "rpc":
multithread_rpc(thread, batch_size)
elif sys.argv[1] == "dump":
filein = sys.argv[2]
fileout = sys.argv[3]
parse_benchmark(filein, fileout)
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="ocr"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1 gpu
rm -rf profile_log_$modelname
for thread_num in 1 8 16
do
for batch_size in 1
do
echo "----Bert thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=2 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=2 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
# RPC
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1 gpu
for thread_num in 1 8 16
do
for batch_size in 1
do
echo "----Bert thread num: $thread_num batch size: $batch_size mode:rpc ----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=2 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=2 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run rpc $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
cat benchmark.log >> profile_log_$modelname
done
done
......@@ -6,7 +6,7 @@ http_port: 9999
#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG
##当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num
worker_num: 1
worker_num: 5
#build_dag_each_worker, False,框架在进程内创建一条DAG;True,框架会每个进程内创建多个独立的DAG
build_dag_each_worker: false
......@@ -20,6 +20,9 @@ dag:
#使用性能分析, True,生成Timeline性能数据,对性能有一定影响;False为不使用
use_profile: false
tracer:
interval_s: 10
op:
det:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
......@@ -37,7 +40,7 @@ op:
fetch_list: ["concat_1.tmp_0"]
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "0"
devices: "2"
rec:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 2
......@@ -61,4 +64,4 @@ op:
fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "0"
devices: "2"
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
from paddle_serving_server.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
......
......@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server_gpu.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging
import numpy as np
import cv2
......@@ -45,16 +45,19 @@ class DetOp(Op):
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
# Note: class variables(self.var) can only be used in process op mode
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img[np.newaxis, :].copy()}, False, None, ""
imgs = []
for key in input_dict.keys():
data = base64.b64decode(input_dict[key].encode('utf8'))
data = np.fromstring(data, np.uint8)
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
imgs.append(det_img[np.newaxis, :].copy())
return {"image": np.concatenate(imgs, axis=0)}, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id):
# print(fetch_dict)
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
......@@ -62,7 +65,6 @@ class DetOp(Op):
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
print("out dict", out_dict)
return out_dict, None, ""
......@@ -112,5 +114,5 @@ class OcrService(WebService):
uci_service = OcrService(name="ocr")
uci_service.prepare_pipeline_config("config.yml")
uci_service.prepare_pipeline_config("config2.yml")
uci_service.run_service()
import sys
import os
import yaml
import requests
import time
import json
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
def gen_yml():
fin = open("config.yml", "r")
config = yaml.load(fin)
fin.close()
config["dag"]["tracer"] = {"interval_s": 5}
with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False)
def run_http(idx, batch_size):
print("start thread ({})".format(idx))
url = "http://127.0.0.1:18082/uci/prediction"
start = time.time()
value = "0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"
all_value = ";".join([value for i in range(batch_size)])
data = {"key": ["x"], "value": [all_value]}
for i in range(1000):
r = requests.post(url=url, data=json.dumps(data))
print(r.json())
end = time.time()
return [[end - start]]
def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_http , thread, batch_size)
def run_rpc(thread, batch_size):
client = PipelineClient()
client.connect(['127.0.0.1:9998'])
value = "0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"
all_value = ";".join([value for i in range(batch_size)])
data = {"key": "x", "value": all_value}
for i in range(1000):
ret = client.predict(feed_dict={data["key"]: data["value"]}, fetch=["res"])
print(ret)
def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size)
if __name__ == "__main__":
if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3])
gen_yml()
elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3])
batch_size = int(sys.argv[4])
if mode == "http":
multithread_http(thread, batch_size)
elif mode == "rpc":
multithread_rpc(thread, batch_size)
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1
for thread_num in 1
do
for batch_size in 1
do
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py
echo "------------Fit a line pipeline benchmark (Thread: $thread_num) (BatchSize: $batch_size)"
tail -n 25 PipelineServingLogs/pipeline.tracer
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
done
done
# RPC
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1
for thread_num in 1
do
for batch_size in 1
do
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run rpc $thread_num $batch_size
python3 cpu_utilization.py
echo "------------Fit a line pipeline benchmark (Thread: $thread_num) (BatchSize: $batch_size)"
tail -n 25 PipelineServingLogs/pipeline.tracer
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
done
done
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_serving_server_gpu.web_service import WebService, Op
from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging
......@@ -25,32 +25,24 @@ _LOGGER = logging.getLogger()
class UciOp(Op):
def init_op(self):
self.separator = ","
self.batch_separator = ";"
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
_LOGGER.error("UciOp::preprocess >>> log_id:{}, input:{}".format(
log_id, input_dict))
x_value = input_dict["x"]
x_value = input_dict["x"].split(self.batch_separator)
x_lst = []
for x_val in x_value:
x_lst.append(np.array([float(x.strip()) for x in x_val.split(self.separator)]).reshape(1, 13))
input_dict["x"] = np.concatenate(x_lst, axis=0)
proc_dict = {}
if sys.version_info.major == 2:
if isinstance(x_value, (str, unicode)):
input_dict["x"] = np.array(
[float(x.strip())
for x in x_value.split(self.separator)]).reshape(1, 13)
_LOGGER.error("input_dict:{}".format(input_dict))
else:
if isinstance(x_value, str):
input_dict["x"] = np.array(
[float(x.strip())
for x in x_value.split(self.separator)]).reshape(1, 13)
_LOGGER.error("input_dict:{}".format(input_dict))
return input_dict, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id):
_LOGGER.info("UciOp::postprocess >>> log_id:{}, fetch_dict:{}".format(
log_id, fetch_dict))
fetch_dict["price"] = str(fetch_dict["price"][0][0])
fetch_dict["price"] = str(fetch_dict["price"])
return fetch_dict, None, ""
......@@ -61,5 +53,5 @@ class UciService(WebService):
uci_service = UciService(name="uci")
uci_service.prepare_pipeline_config("config.yml")
uci_service.prepare_pipeline_config("config2.yml")
uci_service.run_service()
......@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### Start Service
```
python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
```
### Client Prediction
......
......@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### 启动服务端
```
python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --gpu_ids 0 --port 9393
```
### 客户端预测
......
......@@ -12,7 +12,7 @@ tar -xzvf unet.tar.gz
### Start Service
```
python -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 9494
python -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494
```
### Client Prediction
......
......@@ -12,7 +12,7 @@ tar -xzvf unet.tar.gz
### 启动服务端
```
python -m paddle_serving_server_gpu.serve --model unet_model --gpu_ids 0 --port 9494
python -m paddle_serving_server.serve --model unet_model --gpu_ids 0 --port 9494
```
### 客户端预测
......
......@@ -15,7 +15,7 @@ sh get_data.sh
### Start server
```shell
python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_lite --use_xpu --ir_optim
```
### Client prediction
......
......@@ -13,7 +13,7 @@
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_server.web_service import WebService
import numpy as np
......
......@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### Start Service
```
python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
```
### Client Prediction
......
......@@ -12,7 +12,7 @@ tar -xzvf resnet_v2_50_imagenet.tar.gz
### 启动服务端
```
python -m paddle_serving_server_gpu.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
python -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --use_lite --use_xpu --ir_optim
```
### 客户端预测
......
......@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz
## Start RPC Service
```
python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0
python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0
```
## Prediction
......
......@@ -12,7 +12,7 @@ tar -xzvf yolov4.tar.gz
## 启动RPC服务
```
python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0
python -m paddle_serving_server.serve --model yolov4_model --port 9393 --gpu_ids 0
```
## 预测
......
......@@ -34,10 +34,16 @@ def update_info(file_name, feature, info):
f.write(new_str)
if len(sys.argv) > 2:
update_info("paddle_serving_server_gpu/version.py", "cuda_version",
if len(sys.argv) > 2 and len(sys.argv[2]) > 0:
update_info("paddle_serving_server/version.py", "version_suffix",
sys.argv[2])
package_name = '${SERVER_PACKAGE_NAME}'
if package_name.endswith('gpu'):
update_info("paddle_serving_server/version.py", "device_type", "1")
elif package_name.endswith('xpu'):
update_info("paddle_serving_server/version.py", "device_type", "2")
path = "paddle_serving_" + sys.argv[1]
commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
update_info(path + "/version.py", "commit_id", commit_id)
......@@ -82,7 +82,10 @@ class LocalPredictor(object):
f = open(client_config, 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
config = AnalysisConfig(model_path)
if os.path.exists(os.path.join(model_path, "__params__")):
config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, "__params__"))
else:
config = AnalysisConfig(model_path)
logger.info("load_model_config params: model_path:{}, use_gpu:{},\
gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{},\
use_trt:{}, use_lite:{}, use_xpu: {}, use_feed_fetch_ops:{}".format(
......
此差异已折叠。
此差异已折叠。
......@@ -28,7 +28,6 @@ import logging
_LOGGER = logging.getLogger(__name__)
class Monitor(object):
'''
Monitor base class. It is used to monitor the remote model, pull and update the local model.
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册