未验证 提交 12d28500 编写于 作者: T TeslaZhao 提交者: GitHub

Merge pull request #2 from PaddlePaddle/develop

Sync PaddlePaddle/Serving to TeslaZhao/Serving 
...@@ -54,6 +54,7 @@ option(SERVER "Compile Paddle Serving Server" OFF) ...@@ -54,6 +54,7 @@ option(SERVER "Compile Paddle Serving Server" OFF)
option(APP "Compile Paddle Serving App package" OFF) option(APP "Compile Paddle Serving App package" OFF)
option(WITH_ELASTIC_CTR "Compile ELASITC-CTR solution" OFF) option(WITH_ELASTIC_CTR "Compile ELASITC-CTR solution" OFF)
option(PACK "Compile for whl" OFF) option(PACK "Compile for whl" OFF)
option(WITH_TRT "Compile Paddle Serving with TRT" OFF)
set(WITH_MKLML ${WITH_MKL}) set(WITH_MKLML ${WITH_MKL})
if (NOT DEFINED WITH_MKLDNN) if (NOT DEFINED WITH_MKLDNN)
......
...@@ -45,9 +45,10 @@ nvidia-docker exec -it test bash ...@@ -45,9 +45,10 @@ nvidia-docker exec -it test bash
``` ```
```shell ```shell
pip install paddle-serving-client pip install paddle-serving-client==0.3.2
pip install paddle-serving-server # CPU pip install paddle-serving-server==0.3.2 # CPU
pip install paddle-serving-server-gpu # GPU pip install paddle-serving-server-gpu==0.3.2.post9 # GPU with CUDA9.0
pip install paddle-serving-server-gpu==0.3.2.post10 # GPU with CUDA10.0
``` ```
You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download. You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.
...@@ -127,6 +128,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ...@@ -127,6 +128,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `mem_optim_off` | - | - | Disable memory / graphic memory optimization | | `mem_optim_off` | - | - | Disable memory / graphic memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph | | `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL | | `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT |
Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/). Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
</center> </center>
......
...@@ -47,9 +47,10 @@ nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/se ...@@ -47,9 +47,10 @@ nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/se
nvidia-docker exec -it test bash nvidia-docker exec -it test bash
``` ```
```shell ```shell
pip install paddle-serving-client pip install paddle-serving-client==0.3.2
pip install paddle-serving-server # CPU pip install paddle-serving-server==0.3.2 # CPU
pip install paddle-serving-server-gpu # GPU pip install paddle-serving-server-gpu==0.3.2.post9 # GPU with CUDA9.0
pip install paddle-serving-server-gpu==0.3.2.post10 # GPU with CUDA10.0
``` ```
您可能需要使用国内镜像源(例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`)来加速下载。 您可能需要使用国内镜像源(例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`)来加速下载。
...@@ -123,6 +124,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po ...@@ -123,6 +124,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `mem_optim_off` | - | - | Disable memory optimization | | `mem_optim_off` | - | - | Disable memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph | | `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL | | `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT |
我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求,请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。 我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求,请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。
</center> </center>
......
...@@ -31,10 +31,14 @@ message( "WITH_GPU = ${WITH_GPU}") ...@@ -31,10 +31,14 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of: # Paddle Version should be one of:
# latest: latest develop build # latest: latest develop build
# version number like 1.5.2 # version number like 1.5.2
SET(PADDLE_VERSION "1.7.2") SET(PADDLE_VERSION "1.8.4")
if (WITH_GPU) if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl") if (WITH_TRT)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6")
else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl")
endif()
else() else()
if (WITH_AVX) if (WITH_AVX)
if (WITH_MKLML) if (WITH_MKLML)
...@@ -50,21 +54,38 @@ endif() ...@@ -50,21 +54,38 @@ endif()
SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/fluid_inference.tgz") SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/fluid_inference.tgz")
MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}") MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}")
if (WITH_GPU OR WITH_MKLML) if (WITH_GPU OR WITH_MKLML)
ExternalProject_Add( if (WITH_TRT)
"extern_paddle" ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS} "extern_paddle"
URL "${PADDLE_LIB_PATH}" ${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX "${PADDLE_SOURCES_DIR}" URL "${PADDLE_LIB_PATH}"
DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}" PREFIX "${PADDLE_SOURCES_DIR}"
CONFIGURE_COMMAND "" DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}"
BUILD_COMMAND "" CONFIGURE_COMMAND ""
UPDATE_COMMAND "" BUILD_COMMAND ""
INSTALL_COMMAND UPDATE_COMMAND ""
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include && INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib && ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party && ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party
) )
else()
ExternalProject_Add(
"extern_paddle"
${EXTERNAL_PROJECT_LOG_ARGS}
URL "${PADDLE_LIB_PATH}"
PREFIX "${PADDLE_SOURCES_DIR}"
DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party &&
${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so
)
endif()
else() else()
ExternalProject_Add( ExternalProject_Add(
"extern_paddle" "extern_paddle"
...@@ -92,8 +113,16 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib) ...@@ -92,8 +113,16 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL) ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a) SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL) ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a) SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so)
if (WITH_TRT)
ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer.so)
ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer_plugin.so)
endif()
ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL) ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a) SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a)
...@@ -101,4 +130,9 @@ SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/thir ...@@ -101,4 +130,9 @@ SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/thir
LIST(APPEND external_project_dependencies paddle) LIST(APPEND external_project_dependencies paddle)
LIST(APPEND paddle_depend_libs LIST(APPEND paddle_depend_libs
xxhash) xxhash)
if(WITH_TRT)
LIST(APPEND paddle_depend_libs
nvinfer nvinfer_plugin)
endif()
...@@ -44,6 +44,7 @@ message EngineDesc { ...@@ -44,6 +44,7 @@ message EngineDesc {
optional bool static_optimization = 14; optional bool static_optimization = 14;
optional bool force_update_static_cache = 15; optional bool force_update_static_cache = 15;
optional bool enable_ir_optimization = 16; optional bool enable_ir_optimization = 16;
optional bool use_trt = 17;
}; };
// model_toolkit conf // model_toolkit conf
......
...@@ -12,8 +12,9 @@ ...@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License # limitations under the License
#execute_process(COMMAND go env -w GO111MODULE=off)
add_subdirectory(cube-server) add_subdirectory(cube-server)
add_subdirectory(cube-api) add_subdirectory(cube-api)
add_subdirectory(cube-builder) add_subdirectory(cube-builder)
add_subdirectory(cube-transfer) #add_subdirectory(cube-transfer)
add_subdirectory(cube-agent) #add_subdirectory(cube-agent)
...@@ -9,7 +9,7 @@ endif() ...@@ -9,7 +9,7 @@ endif()
target_include_directories(serving PUBLIC target_include_directories(serving PUBLIC
${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor ${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
) )
include_directories(${CUDNN_ROOT}/include/)
if(WITH_GPU) if(WITH_GPU)
target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine
-Wl,--no-whole-archive) -Wl,--no-whole-archive)
...@@ -29,7 +29,11 @@ if(WITH_GPU) ...@@ -29,7 +29,11 @@ if(WITH_GPU)
endif() endif()
if(WITH_MKL OR WITH_GPU) if(WITH_MKL OR WITH_GPU)
if (WITH_TRT)
target_link_libraries(serving -liomp5 -lmklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
else()
target_link_libraries(serving -liomp5 -lmklml_intel -lmkldnn -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2) target_link_libraries(serving -liomp5 -lmklml_intel -lmkldnn -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
endif()
else() else()
target_link_libraries(serving openblas -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2) target_link_libraries(serving openblas -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
endif() endif()
......
...@@ -155,9 +155,11 @@ int GeneralResponseOp::inference() { ...@@ -155,9 +155,11 @@ int GeneralResponseOp::inference() {
} }
if (model_config->_is_lod_fetch[idx]) { if (model_config->_is_lod_fetch[idx]) {
for (int j = 0; j < in->at(idx).lod[0].size(); ++j) { if (in->at(idx).lod.size() > 0) {
fetch_p->mutable_tensor_array(var_idx)->add_lod( for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
in->at(idx).lod[0][j]); fetch_p->mutable_tensor_array(var_idx)->add_lod(
in->at(idx).lod[0][j]);
}
} }
} }
......
...@@ -13,7 +13,9 @@ set_source_files_properties( ...@@ -13,7 +13,9 @@ set_source_files_properties(
PROPERTIES PROPERTIES
COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure) add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure)
if (WITH_TRT)
add_definitions(-DWITH_TRT)
endif()
target_link_libraries(pdserving target_link_libraries(pdserving
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
......
...@@ -38,6 +38,7 @@ class InferEngineCreationParams { ...@@ -38,6 +38,7 @@ class InferEngineCreationParams {
_enable_ir_optimization = false; _enable_ir_optimization = false;
_static_optimization = false; _static_optimization = false;
_force_update_static_cache = false; _force_update_static_cache = false;
_use_trt = false;
} }
void set_path(const std::string& path) { _path = path; } void set_path(const std::string& path) { _path = path; }
...@@ -50,12 +51,16 @@ class InferEngineCreationParams { ...@@ -50,12 +51,16 @@ class InferEngineCreationParams {
_enable_ir_optimization = enable_ir_optimization; _enable_ir_optimization = enable_ir_optimization;
} }
void set_use_trt(bool use_trt) { _use_trt = use_trt; }
bool enable_memory_optimization() const { bool enable_memory_optimization() const {
return _enable_memory_optimization; return _enable_memory_optimization;
} }
bool enable_ir_optimization() const { return _enable_ir_optimization; } bool enable_ir_optimization() const { return _enable_ir_optimization; }
bool use_trt() const { return _use_trt; }
void set_static_optimization(bool static_optimization = false) { void set_static_optimization(bool static_optimization = false) {
_static_optimization = static_optimization; _static_optimization = static_optimization;
} }
...@@ -86,6 +91,7 @@ class InferEngineCreationParams { ...@@ -86,6 +91,7 @@ class InferEngineCreationParams {
bool _enable_ir_optimization; bool _enable_ir_optimization;
bool _static_optimization; bool _static_optimization;
bool _force_update_static_cache; bool _force_update_static_cache;
bool _use_trt;
}; };
class InferEngine { class InferEngine {
...@@ -172,6 +178,10 @@ class ReloadableInferEngine : public InferEngine { ...@@ -172,6 +178,10 @@ class ReloadableInferEngine : public InferEngine {
force_update_static_cache); force_update_static_cache);
} }
if (conf.has_use_trt()) {
_infer_engine_params.set_use_trt(conf.use_trt());
}
if (!check_need_reload() || load(_infer_engine_params) != 0) { if (!check_need_reload() || load(_infer_engine_params) != 0) {
LOG(ERROR) << "Failed load model_data_path" << _model_data_path; LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
return -1; return -1;
...@@ -553,8 +563,12 @@ class CloneDBReloadableInferEngine ...@@ -553,8 +563,12 @@ class CloneDBReloadableInferEngine
}; };
template <typename FluidFamilyCore> template <typename FluidFamilyCore>
#ifdef WITH_TRT
class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> {
#else
class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> { class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
public: #endif
public: // NOLINT
FluidInferEngine() {} FluidInferEngine() {}
~FluidInferEngine() {} ~FluidInferEngine() {}
......
...@@ -51,8 +51,8 @@ class WeightedRandomRender : public EndpointRouterBase { ...@@ -51,8 +51,8 @@ class WeightedRandomRender : public EndpointRouterBase {
new (std::nothrow) Factory<WeightedRandomRender, EndpointRouterBase>(); new (std::nothrow) Factory<WeightedRandomRender, EndpointRouterBase>();
if (factory == NULL) { if (factory == NULL) {
RAW_LOG(ERROR, RAW_LOG(ERROR,
"Failed regist factory: WeightedRandomRender->EndpointRouterBase \ "Failed regist factory: WeightedRandomRender->EndpointRouterBase "
in macro!"); "in macro!");
return -1; return -1;
} }
...@@ -63,8 +63,8 @@ class WeightedRandomRender : public EndpointRouterBase { ...@@ -63,8 +63,8 @@ class WeightedRandomRender : public EndpointRouterBase {
if (FactoryPool<EndpointRouterBase>::instance().register_factory( if (FactoryPool<EndpointRouterBase>::instance().register_factory(
"WeightedRandomRender", factory) != 0) { "WeightedRandomRender", factory) != 0) {
RAW_LOG(INFO, RAW_LOG(INFO,
"Factory has been registed: \ "Factory has been registed: "
WeightedRandomRender->EndpointRouterBase."); "WeightedRandomRender->EndpointRouterBase.");
} }
return 0; return 0;
......
...@@ -75,10 +75,12 @@ export PATH=$PATH:$GOPATH/bin ...@@ -75,10 +75,12 @@ export PATH=$PATH:$GOPATH/bin
## Get go packages ## Get go packages
```shell ```shell
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway go env -w GO111MODULE=on
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger go env -w GOPROXY=https://goproxy.cn,direct
go get -u github.com/golang/protobuf/protoc-gen-go go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u google.golang.org/grpc go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0
``` ```
...@@ -89,9 +91,9 @@ go get -u google.golang.org/grpc ...@@ -89,9 +91,9 @@ go get -u google.golang.org/grpc
``` shell ``` shell
mkdir server-build-cpu && cd server-build-cpu mkdir server-build-cpu && cd server-build-cpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON .. -DSERVER=ON ..
make -j10 make -j10
``` ```
...@@ -102,10 +104,28 @@ you can execute `make install` to put targets under directory `./output`, you ne ...@@ -102,10 +104,28 @@ you can execute `make install` to put targets under directory `./output`, you ne
``` shell ``` shell
mkdir server-build-gpu && cd server-build-gpu mkdir server-build-gpu && cd server-build-gpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON \ -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DWITH_GPU=ON .. -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DSERVER=ON \
-DWITH_GPU=ON ..
make -j10
```
### Integrated TRT version paddle inference library
```
mkdir server-build-trt && cd server-build-trt
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DSERVER=ON \
-DWITH_GPU=ON \
-DWITH_TRT=ON ..
make -j10 make -j10
``` ```
...@@ -134,7 +154,10 @@ execute `make install` to put targets under directory `./output` ...@@ -134,7 +154,10 @@ execute `make install` to put targets under directory `./output`
```bash ```bash
mkdir app-build && cd app-build mkdir app-build && cd app-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DAPP=ON .. cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DAPP=ON ..
make make
``` ```
...@@ -165,7 +188,9 @@ Please use the example under `python/examples` to verify. ...@@ -165,7 +188,9 @@ Please use the example under `python/examples` to verify.
| WITH_AVX | Compile Paddle Serving with AVX intrinsics | OFF | | WITH_AVX | Compile Paddle Serving with AVX intrinsics | OFF |
| WITH_MKL | Compile Paddle Serving with MKL support | OFF | | WITH_MKL | Compile Paddle Serving with MKL support | OFF |
| WITH_GPU | Compile Paddle Serving with NVIDIA GPU | OFF | | WITH_GPU | Compile Paddle Serving with NVIDIA GPU | OFF |
| CUDNN_ROOT | Define CuDNN library and header path | | | CUDNN_LIBRARY | Define CuDNN library and header path | |
| CUDA_TOOLKIT_ROOT_DIR | Define CUDA PATH | |
| TENSORRT_ROOT | Define TensorRT PATH | |
| CLIENT | Compile Paddle Serving Client | OFF | | CLIENT | Compile Paddle Serving Client | OFF |
| SERVER | Compile Paddle Serving Server | OFF | | SERVER | Compile Paddle Serving Server | OFF |
| APP | Compile Paddle Serving App package | OFF | | APP | Compile Paddle Serving App package | OFF |
...@@ -180,7 +205,8 @@ To compile the Paddle Serving GPU version on bare metal, you need to install the ...@@ -180,7 +205,8 @@ To compile the Paddle Serving GPU version on bare metal, you need to install the
- CUDA - CUDA
- CuDNN - CuDNN
- NCCL2
To compile the TensorRT version, you need to install the TensorRT library.
Note here: Note here:
...@@ -190,21 +216,12 @@ Note here: ...@@ -190,21 +216,12 @@ Note here:
The following is the base library version matching relationship used by the PaddlePaddle release version for reference: The following is the base library version matching relationship used by the PaddlePaddle release version for reference:
| | CUDA | CuDNN | NCCL2 | | | CUDA | CuDNN | TensorRT |
| :----: | :-----: | :----------------------: | :----: | | :----: | :-----: | :----------------------: | :----: |
| CUDA 8 | 8.0.61 | CuDNN 7.1.2 for CUDA 8.0 | 2.1.4 | | post9 | 9.0 | CuDNN 7.3.1 for CUDA 9.0 | |
| CUDA 9 | 9.0.176 | CuDNN 7.3.1 for CUDA 9.0 | 2.2.12 | | post10 | 10.0 | CuDNN 7.5.1 for CUDA 10.0| |
| trt | 10.1 | CuDNN 7.5.1 for CUDA 10.1| 6.0.1.5 |
### How to make the compiler detect the CuDNN library ### How to make the compiler detect the CuDNN library
Download the corresponding CUDNN version from NVIDIA developer official website and decompressing it, add `-DCUDNN_ROOT` to cmake command, to specify the path of CUDNN. Download the corresponding CUDNN version from NVIDIA developer official website and decompressing it, add `-DCUDNN_ROOT` to cmake command, to specify the path of CUDNN.
### How to make the compiler detect the nccl library
After downloading the corresponding version of the nccl2 library from the NVIDIA developer official website and decompressing it, add the following environment variables (take nccl2.1.4 as an example):
```shell
export C_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$CPLUS_INCLUDE_PATH
export LD_LIBRARY_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/lib/:$LD_LIBRARY_PATH
```
...@@ -72,10 +72,12 @@ export PATH=$PATH:$GOPATH/bin ...@@ -72,10 +72,12 @@ export PATH=$PATH:$GOPATH/bin
## 获取 Go packages ## 获取 Go packages
```shell ```shell
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway go env -w GO111MODULE=on
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger go env -w GOPROXY=https://goproxy.cn,direct
go get -u github.com/golang/protobuf/protoc-gen-go go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u google.golang.org/grpc go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0
``` ```
...@@ -85,7 +87,10 @@ go get -u google.golang.org/grpc ...@@ -85,7 +87,10 @@ go get -u google.golang.org/grpc
``` shell ``` shell
mkdir server-build-cpu && cd server-build-cpu mkdir server-build-cpu && cd server-build-cpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON .. cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON ..
make -j10 make -j10
``` ```
...@@ -95,21 +100,44 @@ make -j10 ...@@ -95,21 +100,44 @@ make -j10
``` shell ``` shell
mkdir server-build-gpu && cd server-build-gpu mkdir server-build-gpu && cd server-build-gpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON .. cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DSERVER=ON \
-DWITH_GPU=ON ..
make -j10 make -j10
``` ```
执行`make install`可以把目标产出放在`./output`目录下。 ### 集成TensorRT版本Paddle Inference Library
**注意:** 编译成功后,需要设置`SERVING_BIN`路径,详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项) ```
mkdir server-build-trt && cd server-build-trt
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DSERVER=ON \
-DWITH_GPU=ON \
-DWITH_TRT=ON ..
make -j10
```
执行`make install`可以把目标产出放在`./output`目录下。
**注意:** 编译成功后,需要设置`SERVING_BIN`路径,详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)
## 编译Client部分 ## 编译Client部分
``` shell ``` shell
mkdir client-build && cd client-build mkdir client-build && cd client-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON .. cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCLIENT=ON ..
make -j10 make -j10
``` ```
...@@ -121,7 +149,11 @@ make -j10 ...@@ -121,7 +149,11 @@ make -j10
```bash ```bash
mkdir app-build && cd app-build mkdir app-build && cd app-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCMAKE_INSTALL_PREFIX=./output -DAPP=ON .. cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCMAKE_INSTALL_PREFIX=./output \
-DAPP=ON ..
make make
``` ```
...@@ -152,7 +184,10 @@ make ...@@ -152,7 +184,10 @@ make
| WITH_AVX | Compile Paddle Serving with AVX intrinsics | OFF | | WITH_AVX | Compile Paddle Serving with AVX intrinsics | OFF |
| WITH_MKL | Compile Paddle Serving with MKL support | OFF | | WITH_MKL | Compile Paddle Serving with MKL support | OFF |
| WITH_GPU | Compile Paddle Serving with NVIDIA GPU | OFF | | WITH_GPU | Compile Paddle Serving with NVIDIA GPU | OFF |
| CUDNN_ROOT | Define CuDNN library and header path | | | WITH_TRT | Compile Paddle Serving with TensorRT | OFF |
| CUDNN_LIBRARY | Define CuDNN library and header path | |
| CUDA_TOOLKIT_ROOT_DIR | Define CUDA PATH | |
| TENSORRT_ROOT | Define TensorRT PATH | |
| CLIENT | Compile Paddle Serving Client | OFF | | CLIENT | Compile Paddle Serving Client | OFF |
| SERVER | Compile Paddle Serving Server | OFF | | SERVER | Compile Paddle Serving Server | OFF |
| APP | Compile Paddle Serving App package | OFF | | APP | Compile Paddle Serving App package | OFF |
...@@ -167,7 +202,8 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选 ...@@ -167,7 +202,8 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选
- CUDA - CUDA
- CuDNN - CuDNN
- NCCL2
编译TensorRT版本,需要安装TensorRT库。
这里要注意的是: 这里要注意的是:
...@@ -176,21 +212,12 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选 ...@@ -176,21 +212,12 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选
以下是PaddlePaddle发布版本所使用的基础库版本匹配关系,供参考: 以下是PaddlePaddle发布版本所使用的基础库版本匹配关系,供参考:
| | CUDA | CuDNN | NCCL2 | | | CUDA | CuDNN | TensorRT |
| :----: | :-----: | :----------------------: | :----: | | :----: | :-----: | :----------------------: | :----: |
| CUDA 8 | 8.0.61 | CuDNN 7.1.2 for CUDA 8.0 | 2.1.4 | | post9 | 9.0 | CuDNN 7.3.1 for CUDA 9.0 | |
| CUDA 9 | 9.0.176 | CuDNN 7.3.1 for CUDA 9.0 | 2.2.12 | | post10 | 10.0 | CuDNN 7.5.1 for CUDA 10.0| |
| trt | 10.1 | CuDNN 7.5.1 for CUDA 10.1| 6.0.1.5 |
### 如何让Paddle Serving编译系统探测到CuDNN库 ### 如何让Paddle Serving编译系统探测到CuDNN库
从NVIDIA developer官网下载对应版本CuDNN并在本地解压后,在cmake编译命令中增加`-DCUDNN_ROOT`参数,指定CuDNN库所在路径。 从NVIDIA developer官网下载对应版本CuDNN并在本地解压后,在cmake编译命令中增加`-DCUDNN_LIBRARY`参数,指定CuDNN库所在路径。
### 如何让Paddle Serving编译系统探测到nccl库
从NVIDIA developer官网下载对应版本nccl2库并解压后,增加如下环境变量 (以nccl2.1.4为例):
```shell
export C_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$CPLUS_INCLUDE_PATH
export LD_LIBRARY_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/lib/:$LD_LIBRARY_PATH
```
...@@ -15,13 +15,47 @@ ...@@ -15,13 +15,47 @@
``` ```
- Q: 如何使用自己编译的Paddle Serving进行预测? - Q: 如何使用自己编译的Paddle Serving进行预测?
A: 通过pip命令安装自己编译出的whl包,并设置SERVING_BIN环境变量为编译出的serving二进制文件路径。 A: 通过pip命令安装自己编译出的whl包,并设置SERVING_BIN环境变量为编译出的serving二进制文件路径。
- Q: 执行GPU预测时遇到InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0. - Q: 执行GPU预测时遇到InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
A: 将显卡驱动对应的libcuda.so的目录添加到LD_LIBRARY_PATH环境变量中 A: 将显卡驱动对应的libcuda.so的目录添加到LD_LIBRARY_PATH环境变量中
- Q: 执行GPU预测时遇到ExternalError: Cudnn error, CUDNN_STATUS_BAD_PARAM at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/operators/batch_norm_op.cu:198) - Q: 执行GPU预测时遇到ExternalError: Cudnn error, CUDNN_STATUS_BAD_PARAM at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/operators/batch_norm_op.cu:198)
A: 将cudnn的lib64路径添加到LD_LIBRARY_PATH,安装自pypi的Paddle Serving中post9版使用的是cudnn 7.3,post10使用的是cudnn 7.5。如果是使用自己编译的Paddle Serving,可以在log/serving.INFO日志文件中查看对应的cudnn版本。 A: 将cudnn的lib64路径添加到LD_LIBRARY_PATH,安装自pypi的Paddle Serving中post9版使用的是cudnn 7.3,post10使用的是cudnn 7.5。如果是使用自己编译的Paddle Serving,可以在log/serving.INFO日志文件中查看对应的cudnn版本。
- Q: 执行GPU预测时遇到Error: Failed to find dynamic library: libcublas.so - Q: 执行GPU预测时遇到Error: Failed to find dynamic library: libcublas.so
A: 将cuda的lib64路径添加到LD_LIBRARY_PATH, post9版本的Paddle Serving使用的是cuda 9.0,post10版本使用的cuda 10.0。 A: 将cuda的lib64路径添加到LD_LIBRARY_PATH, post9版本的Paddle Serving使用的是cuda 9.0,post10版本使用的cuda 10.0。
- Q: 部署和预测中的日志信息在哪里查看?
- A: server端的日志分为两部分,一部分打印到标准输出,一部分打印到启动服务时的目录下的log/serving.INFO文件中。
client端的日志直接打印到标准输出。
通过在部署服务之前 'export GLOG_v=3'可以输出更为详细的日志信息。
- Q: GPU环境运行Serving报错,GPU count is: 0。
```
terminate called after throwing an instance of 'paddle::platform::EnforceNotMet'
what():
--------------------------------------------
C++ Call Stacks (More useful to developers):
--------------------------------------------
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const*, int)
1 paddle::platform::SetDeviceId(int)
2 paddle::AnalysisConfig::fraction_of_gpu_memory_for_pool() const
3 std::unique_ptr<paddle::PaddlePredictor, std::default_delete<paddle::PaddlePredictor> > paddle::CreatePaddlePredictor<paddle::AnalysisConfig, (paddle::PaddleEngineKind)2>(paddle::AnalysisConfig const&)
4 std::unique_ptr<paddle::PaddlePredictor, std::default_delete<paddle::PaddlePredictor> > paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(paddle::AnalysisConfig const&)
----------------------
Error Message Summary:
----------------------
InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
[Hint: Expected id < GetCUDADeviceCount(), but received id:0 >= GetCUDADeviceCount():0.] at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/platform/gpu_info.cc:211)
```
A: libcuda.so没有链接成功。首先在机器上找到libcuda.so,ldd检查libnvidia版本与nvidia-smi中版本一致(libnvidia-fatbinaryloader.so.418.39,与NVIDIA-SMI 418.39 Driver Version: 418.39),然后用export导出libcuda.so的路径即可(例如libcuda.so在/usr/lib64/,export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib64/)
...@@ -3,51 +3,59 @@ ...@@ -3,51 +3,59 @@
## CPU server ## CPU server
### Python 3 ### Python 3
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.2-py3-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py3-none-any.whl
``` ```
### Python 2 ### Python 2
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.2-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py2-none-any.whl
``` ```
## GPU server ## GPU server
### Python 3 ### Python 3
``` ```
#cuda 9.0 #cuda 9.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post9-py3-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py3-none-any.whl
#cuda 10.0 #cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post10-py3-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py3-none-any.whl
#cuda10.1 with TensorRT 6
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py3-none-any.whl
``` ```
### Python 2 ### Python 2
``` ```
#cuda 9.0 #cuda 9.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post9-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py2-none-any.whl
#cuda 10.0 #cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post10-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py2-none-any.whl
##cuda10.1 with TensorRT 6
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py2-none-any.whl
``` ```
## Client ## Client
### Python 3.7 ### Python 3.7
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp37-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp37-none-any.whl
``` ```
### Python 3.6 ### Python 3.6
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp36-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp36-none-any.whl
```
### Python 3.5
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp35-none-any.whl
``` ```
### Python 2.7 ### Python 2.7
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp27-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp27-none-any.whl
``` ```
## App ## App
### Python 3 ### Python 3
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.2-py3-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.0.0-py3-none-any.whl
``` ```
### Python 2 ### Python 2
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.2-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.0.0-py2-none-any.whl
``` ```
## Java Demo
### Install package
```
mvn compile
mvn install
cd examples
mvn compile
mvn install
```
### Start Server
take the fit_a_line demo as example
```
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #CPU
python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #GPU
```
### Client Predict
```
java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample fit_a_line
```
The Java example also contains the prediction client of Bert, Model_enaemble, asyn_predict, batch_predict, Cube_local, Cube_quant, and Yolov4 models.
## Java 示例
### 安装客户端依赖
```
mvn compile
mvn install
cd examples
mvn compile
mvn install
```
### 启动服务端
以fit_a_line模型为例
```
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #CPU
python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #GPU
```
### 客户端预测
```
java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample fit_a_line
```
java示例中还包含了bert、model_enaemble、asyn_predict、batch_predict、cube_local、cube_quant、yolov4模型的预测客户端。
...@@ -23,7 +23,6 @@ ...@@ -23,7 +23,6 @@
#include "core/configure/inferencer_configure.pb.h" #include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/framework/infer.h" #include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT #include "paddle_inference_api.h" // NOLINT
//#include "predictor/framework/infer.h"
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
......
...@@ -2,6 +2,7 @@ FILE(GLOB fluid_gpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp) ...@@ -2,6 +2,7 @@ FILE(GLOB fluid_gpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_gpu_engine ${fluid_gpu_engine_srcs}) add_library(fluid_gpu_engine ${fluid_gpu_engine_srcs})
target_include_directories(fluid_gpu_engine PUBLIC target_include_directories(fluid_gpu_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/) ${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_gpu_engine pdserving extern_paddle configure) add_dependencies(fluid_gpu_engine pdserving extern_paddle configure)
target_link_libraries(fluid_gpu_engine pdserving paddle_fluid iomp5 mklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz) target_link_libraries(fluid_gpu_engine pdserving paddle_fluid iomp5 mklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
......
...@@ -190,7 +190,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { ...@@ -190,7 +190,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
paddle::AnalysisConfig analysis_config; paddle::AnalysisConfig analysis_config;
analysis_config.SetModel(data_path); analysis_config.SetModel(data_path);
analysis_config.EnableUseGpu(100, FLAGS_gpuid); analysis_config.EnableUseGpu(1500, FLAGS_gpuid);
analysis_config.SwitchSpecifyInputNames(true); analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1); analysis_config.SetCpuMathLibraryNumThreads(1);
...@@ -198,12 +198,68 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { ...@@ -198,12 +198,68 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.EnableMemoryOptim(); analysis_config.EnableMemoryOptim();
} }
if (params.enable_ir_optimization()) { #if 0 // todo: support flexible shape
analysis_config.SwitchIrOptim(true);
int min_seq_len = 1;
int max_seq_len = 512;
int opt_seq_len = 128;
int head_number = 12;
int batch = 50;
std::vector<int> min_in_shape = {batch, min_seq_len, 1};
std::vector<int> max_in_shape = {batch, max_seq_len, 1};
std::vector<int> opt_in_shape = {batch, opt_seq_len, 1};
std::string input1_name = "src_text_a_ids";
std::string input2_name = "pos_text_a_ids";
std::string input3_name = "sent_text_a_ids";
std::string input4_name = "stack_0.tmp_0";
std::map<std::string, std::vector<int>> min_input_shape = {
{input1_name, min_in_shape},
{input2_name, min_in_shape},
{input3_name, min_in_shape},
{input4_name, {batch, head_number, min_seq_len, min_seq_len}},
};
std::map<std::string, std::vector<int>> max_input_shape = {
{input1_name, max_in_shape},
{input2_name, max_in_shape},
{input3_name, max_in_shape},
{input4_name, {batch, head_number, max_seq_len, max_seq_len}},
};
std::map<std::string, std::vector<int>> opt_input_shape = {
{input1_name, opt_in_shape},
{input2_name, opt_in_shape},
{input3_name, opt_in_shape},
{input4_name, {batch, head_number, opt_seq_len, opt_seq_len}},
};
analysis_config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape);
#endif
int max_batch = 32;
int min_subgraph_size = 3;
if (params.use_trt()) {
analysis_config.EnableTensorRtEngine(
1 << 20,
max_batch,
min_subgraph_size,
paddle::AnalysisConfig::Precision::kFloat32,
false,
false);
LOG(INFO) << "create TensorRT predictor";
} else { } else {
analysis_config.SwitchIrOptim(false); if (params.enable_memory_optimization()) {
} analysis_config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
analysis_config.SwitchIrOptim(true);
} else {
analysis_config.SwitchIrOptim(false);
}
}
AutoLock lock(GlobalPaddleCreateMutex::instance()); AutoLock lock(GlobalPaddleCreateMutex::instance());
_core = _core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config); paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
......
...@@ -80,6 +80,16 @@ if (SERVER) ...@@ -80,6 +80,16 @@ if (SERVER)
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
elseif(WITH_TRT)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" trt
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
else() else()
add_custom_command( add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
......
...@@ -16,6 +16,7 @@ from paddle_serving_client import Client ...@@ -16,6 +16,7 @@ from paddle_serving_client import Client
from paddle_serving_app.reader import * from paddle_serving_app.reader import *
import sys import sys
import numpy as np import numpy as np
from paddle_serving_app.reader import BlazeFacePostprocess
preprocess = Sequential([ preprocess = Sequential([
File2Image(), File2Image(),
......
...@@ -90,6 +90,7 @@ def single_func(idx, resource): ...@@ -90,6 +90,7 @@ def single_func(idx, resource):
image = base64.b64encode( image = base64.b64encode(
open("./image_data/n01440764/" + file_list[i]).read()) open("./image_data/n01440764/" + file_list[i]).read())
else: else:
image_path = "./image_data/n01440764/" + file_list[i]
image = base64.b64encode(open(image_path, "rb").read()).decode( image = base64.b64encode(open(image_path, "rb").read()).decode(
"utf-8") "utf-8")
req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]}) req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import sys import sys
from paddle_serving_client import Client from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
if len(sys.argv) != 4: if len(sys.argv) != 4:
print("python resnet50_web_service.py model device port") print("python resnet50_web_service.py model device port")
......
...@@ -12,9 +12,9 @@ ...@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .chinese_bert_reader import ChineseBertReader from .chinese_bert_reader import ChineseBertReader
from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, Base64ToImage
from .image_reader import CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, ResizeByFactor from .image_reader import CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, ResizeByFactor
from .image_reader import RCNNPostprocess, SegPostprocess, PadStride from .image_reader import RCNNPostprocess, SegPostprocess, PadStride, BlazeFacePostprocess
from .image_reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from .image_reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
from .lac_reader import LACReader from .lac_reader import LACReader
from .senta_reader import SentaReader from .senta_reader import SentaReader
......
...@@ -317,7 +317,7 @@ class RCNNPostprocess(object): ...@@ -317,7 +317,7 @@ class RCNNPostprocess(object):
self.clip_bbox([xmin, ymin, xmax, ymax]) self.clip_bbox([xmin, ymin, xmax, ymax])
w = xmax - xmin w = xmax - xmin
h = ymax - ymin h = ymax - ymin
im_shape = t['im_shape'][0][i].tolist() im_shape = t['im_shape'].tolist()
im_height, im_width = int(im_shape[0]), int(im_shape[1]) im_height, im_width = int(im_shape[0]), int(im_shape[1])
xmin *= im_width xmin *= im_width
ymin *= im_height ymin *= im_height
...@@ -420,7 +420,7 @@ class RCNNPostprocess(object): ...@@ -420,7 +420,7 @@ class RCNNPostprocess(object):
for key in image_with_bbox: for key in image_with_bbox:
if key == "image": if key == "image":
continue continue
if ".lod" in key: if ".lod" in key or "im_shape" in key:
continue continue
fetch_name = key fetch_name = key
bbox_result = self._get_bbox_result(image_with_bbox, fetch_name, bbox_result = self._get_bbox_result(image_with_bbox, fetch_name,
......
...@@ -12,5 +12,5 @@ ...@@ -12,5 +12,5 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Paddle Serving App version string """ """ Paddle Serving App version string """
serving_app_version = "0.1.2" serving_app_version = "0.0.0"
commit_id = "" commit_id = ""
...@@ -354,8 +354,9 @@ class Client(object): ...@@ -354,8 +354,9 @@ class Client(object):
name)) name))
result_map[name].shape = shape result_map[name].shape = shape
if name in self.lod_tensor_set: if name in self.lod_tensor_set:
result_map["{}.lod".format( tmp_lod = result_batch_handle.get_lod(mi, name)
name)] = result_batch_handle.get_lod(mi, name) if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == float32_type: elif self.fetch_names_to_type_[name] == float32_type:
result_map[name] = result_batch_handle.get_float_by_name( result_map[name] = result_batch_handle.get_float_by_name(
mi, name) mi, name)
...@@ -367,9 +368,9 @@ class Client(object): ...@@ -367,9 +368,9 @@ class Client(object):
shape = result_batch_handle.get_shape(mi, name) shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape result_map[name].shape = shape
if name in self.lod_tensor_set: if name in self.lod_tensor_set:
result_map["{}.lod".format( tmp_lod = result_batch_handle.get_lod(mi, name)
name)] = result_batch_handle.get_lod(mi, name) if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == int32_type: elif self.fetch_names_to_type_[name] == int32_type:
# result_map[name] will be py::array(numpy array) # result_map[name] will be py::array(numpy array)
result_map[name] = result_batch_handle.get_int32_by_name( result_map[name] = result_batch_handle.get_int32_by_name(
...@@ -382,8 +383,9 @@ class Client(object): ...@@ -382,8 +383,9 @@ class Client(object):
shape = result_batch_handle.get_shape(mi, name) shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape result_map[name].shape = shape
if name in self.lod_tensor_set: if name in self.lod_tensor_set:
result_map["{}.lod".format( tmp_lod = result_batch_handle.get_lod(mi, name)
name)] = result_batch_handle.get_lod(mi, name) if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
multi_result_map.append(result_map) multi_result_map.append(result_map)
ret = None ret = None
if len(model_engine_names) == 1: if len(model_engine_names) == 1:
......
...@@ -74,7 +74,8 @@ def save_model(server_model_folder, ...@@ -74,7 +74,8 @@ def save_model(server_model_folder,
fetch_var = model_conf.FetchVar() fetch_var = model_conf.FetchVar()
fetch_var.alias_name = key fetch_var.alias_name = key
fetch_var.name = fetch_var_dict[key].name fetch_var.name = fetch_var_dict[key].name
fetch_var.is_lod_tensor = fetch_var_dict[key].lod_level >= 1 #fetch_var.is_lod_tensor = fetch_var_dict[key].lod_level >= 1
fetch_var.is_lod_tensor = 1
if fetch_var_dict[key].dtype == core.VarDesc.VarType.INT64: if fetch_var_dict[key].dtype == core.VarDesc.VarType.INT64:
fetch_var.fetch_type = 0 fetch_var.fetch_type = 0
if fetch_var_dict[key].dtype == core.VarDesc.VarType.FP32: if fetch_var_dict[key].dtype == core.VarDesc.VarType.FP32:
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Paddle Serving Client version string """ """ Paddle Serving Client version string """
serving_client_version = "0.3.2" serving_client_version = "0.0.0"
serving_server_version = "0.3.2" serving_server_version = "0.0.0"
module_proto_version = "0.3.2" module_proto_version = "0.0.0"
commit_id = "" commit_id = ""
...@@ -584,7 +584,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -584,7 +584,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
else: else:
raise Exception("error type.") raise Exception("error type.")
tensor.shape.extend(list(model_result[name].shape)) tensor.shape.extend(list(model_result[name].shape))
if name in self.lod_tensor_set_: if "{}.lod".format(name) in model_result:
tensor.lod.extend(model_result["{}.lod".format(name)] tensor.lod.extend(model_result["{}.lod".format(name)]
.tolist()) .tolist())
inst.tensor_array.append(tensor) inst.tensor_array.append(tensor)
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Paddle Serving Client version string """ """ Paddle Serving Client version string """
serving_client_version = "0.3.2" serving_client_version = "0.0.0"
serving_server_version = "0.3.2" serving_server_version = "0.0.0"
module_proto_version = "0.3.2" module_proto_version = "0.0.0"
commit_id = "" commit_id = ""
...@@ -123,7 +123,7 @@ class WebService(object): ...@@ -123,7 +123,7 @@ class WebService(object):
feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map) feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
result = {"result": result} result = {"result": result}
except ValueError as err: except ValueError as err:
result = {"result": err} result = {"result": str(err)}
return result return result
def run_rpc_service(self): def run_rpc_service(self):
......
...@@ -73,6 +73,8 @@ def serve_args(): ...@@ -73,6 +73,8 @@ def serve_args():
default=False, default=False,
action="store_true", action="store_true",
help="Use Multi-language-service") help="Use Multi-language-service")
parser.add_argument(
"--use_trt", default=False, action="store_true", help="Use TensorRT")
parser.add_argument( parser.add_argument(
"--product_name", "--product_name",
type=str, type=str,
...@@ -205,6 +207,7 @@ class Server(object): ...@@ -205,6 +207,7 @@ class Server(object):
self.cur_path = os.getcwd() self.cur_path = os.getcwd()
self.use_local_bin = False self.use_local_bin = False
self.gpuid = 0 self.gpuid = 0
self.use_trt = False
self.model_config_paths = None # for multi-model in a workflow self.model_config_paths = None # for multi-model in a workflow
self.product_name = None self.product_name = None
self.container_id = None self.container_id = None
...@@ -271,6 +274,9 @@ class Server(object): ...@@ -271,6 +274,9 @@ class Server(object):
def set_gpuid(self, gpuid=0): def set_gpuid(self, gpuid=0):
self.gpuid = gpuid self.gpuid = gpuid
def set_trt(self):
self.use_trt = True
def _prepare_engine(self, model_config_paths, device): def _prepare_engine(self, model_config_paths, device):
if self.model_toolkit_conf == None: if self.model_toolkit_conf == None:
self.model_toolkit_conf = server_sdk.ModelToolkitConf() self.model_toolkit_conf = server_sdk.ModelToolkitConf()
...@@ -290,6 +296,7 @@ class Server(object): ...@@ -290,6 +296,7 @@ class Server(object):
engine.enable_ir_optimization = self.ir_optimization engine.enable_ir_optimization = self.ir_optimization
engine.static_optimization = False engine.static_optimization = False
engine.force_update_static_cache = False engine.force_update_static_cache = False
engine.use_trt = self.use_trt
if device == "cpu": if device == "cpu":
engine.type = "FLUID_CPU_ANALYSIS_DIR" engine.type = "FLUID_CPU_ANALYSIS_DIR"
...@@ -396,7 +403,10 @@ class Server(object): ...@@ -396,7 +403,10 @@ class Server(object):
for line in version_file.readlines(): for line in version_file.readlines():
if re.match("cuda_version", line): if re.match("cuda_version", line):
cuda_version = line.split("\"")[1] cuda_version = line.split("\"")[1]
device_version = "serving-gpu-cuda" + cuda_version + "-" if cuda_version != "trt":
device_version = "serving-gpu-cuda" + cuda_version + "-"
else:
device_version = "serving-gpu-" + cuda_version + "-"
folder_name = device_version + serving_server_version folder_name = device_version + serving_server_version
tar_name = folder_name + ".tar.gz" tar_name = folder_name + ".tar.gz"
...@@ -645,7 +655,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -645,7 +655,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
else: else:
raise Exception("error type.") raise Exception("error type.")
tensor.shape.extend(list(model_result[name].shape)) tensor.shape.extend(list(model_result[name].shape))
if name in self.lod_tensor_set_: if "{}.lod".format(name) in model_result:
tensor.lod.extend(model_result["{}.lod".format(name)] tensor.lod.extend(model_result["{}.lod".format(name)]
.tolist()) .tolist())
inst.tensor_array.append(tensor) inst.tensor_array.append(tensor)
......
...@@ -64,6 +64,8 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss ...@@ -64,6 +64,8 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
server.set_memory_optimize(mem_optim) server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim) server.set_ir_optimize(ir_optim)
server.set_max_body_size(max_body_size) server.set_max_body_size(max_body_size)
if args.use_trt:
server.set_trt()
if args.product_name != None: if args.product_name != None:
server.set_product_name(args.product_name) server.set_product_name(args.product_name)
......
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Paddle Serving Client version string """ """ Paddle Serving Client version string """
serving_client_version = "0.3.2" serving_client_version = "0.0.0"
serving_server_version = "0.3.2" serving_server_version = "0.0.0"
module_proto_version = "0.3.2" module_proto_version = "0.0.0"
cuda_version = "9" cuda_version = "9"
commit_id = "" commit_id = ""
...@@ -178,7 +178,7 @@ class WebService(object): ...@@ -178,7 +178,7 @@ class WebService(object):
feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map) feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
result = {"result": result} result = {"result": result}
except ValueError as err: except ValueError as err:
result = {"result": err} result = {"result": str(err)}
return result return result
def run_rpc_service(self): def run_rpc_service(self):
......
...@@ -25,7 +25,7 @@ import ( ...@@ -25,7 +25,7 @@ import (
"github.com/grpc-ecosystem/grpc-gateway/runtime" "github.com/grpc-ecosystem/grpc-gateway/runtime"
"google.golang.org/grpc" "google.golang.org/grpc"
gw "./proto" gw "serving-gateway/proto"
) )
//export run_proxy_server //export run_proxy_server
......
...@@ -28,17 +28,11 @@ import util ...@@ -28,17 +28,11 @@ import util
py_version = sys.version_info py_version = sys.version_info
def copy_lib(): def copy_lib():
if py_version[0] == 2:
lib_list = ['libpython2.7.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
elif py_version[1] == 6:
lib_list = ['libpython3.6m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
elif py_version[1] == 7:
lib_list = ['libpython3.7m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
os.popen('mkdir -p paddle_serving_client/lib') os.popen('mkdir -p paddle_serving_client/lib')
lib_list = ['${OPENSSL_CRYPTO_LIBRARY}', '${OPENSSL_SSL_LIBRARY}',
'${PYTHON_LIBRARY}']
for lib in lib_list: for lib in lib_list:
r = os.popen('whereis {}'.format(lib)) os.popen('cp {} ./paddle_serving_client/lib'.format(lib))
text = r.read()
os.popen('cp {} ./paddle_serving_client/lib'.format(text.strip().split(' ')[1]))
max_version, mid_version, min_version = util.python_version() max_version, mid_version, min_version = util.python_version()
...@@ -53,9 +47,6 @@ REQUIRED_PACKAGES = [ ...@@ -53,9 +47,6 @@ REQUIRED_PACKAGES = [
'grpcio-tools >= 1.28.1' 'grpcio-tools >= 1.28.1'
] ]
if not util.find_package("paddlepaddle") and not util.find_package("paddlepaddle-gpu"):
REQUIRED_PACKAGES.append("paddlepaddle")
packages=['paddle_serving_client', packages=['paddle_serving_client',
'paddle_serving_client.proto', 'paddle_serving_client.proto',
......
...@@ -29,7 +29,7 @@ util.gen_pipeline_code("paddle_serving_server") ...@@ -29,7 +29,7 @@ util.gen_pipeline_code("paddle_serving_server")
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1', 'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1',
'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app' 'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app', 'func_timeout', 'pyyaml'
] ]
packages=['paddle_serving_server', packages=['paddle_serving_server',
......
...@@ -19,17 +19,19 @@ from __future__ import print_function ...@@ -19,17 +19,19 @@ from __future__ import print_function
from setuptools import setup, Distribution, Extension from setuptools import setup, Distribution, Extension
from setuptools import find_packages from setuptools import find_packages
from setuptools import setup from setuptools import setup
from paddle_serving_server_gpu.version import serving_server_version from paddle_serving_server_gpu.version import serving_server_version, cuda_version
import util import util
max_version, mid_version, min_version = util.python_version() if cuda_version != "trt":
cuda_version = "post" + cuda_version
max_version, mid_version, min_version = util.python_version()
# gen pipeline proto code # gen pipeline proto code
util.gen_pipeline_code("paddle_serving_server_gpu") util.gen_pipeline_code("paddle_serving_server_gpu")
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1', 'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1',
'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app' 'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app', 'func_timeout', 'pyyaml'
] ]
packages=['paddle_serving_server_gpu', packages=['paddle_serving_server_gpu',
...@@ -56,7 +58,7 @@ package_data={'paddle_serving_server_gpu': ['pipeline/gateway/libproxy_server.so ...@@ -56,7 +58,7 @@ package_data={'paddle_serving_server_gpu': ['pipeline/gateway/libproxy_server.so
setup( setup(
name='paddle-serving-server-gpu', name='paddle-serving-server-gpu',
version=serving_server_version.replace('-', '') + '.post@CUDA_VERSION_MAJOR@', version=serving_server_version.replace('-', '') + "." + cuda_version,
description= description=
('Paddle Serving Package for saved model with PaddlePaddle'), ('Paddle Serving Package for saved model with PaddlePaddle'),
url='https://github.com/PaddlePaddle/Serving', url='https://github.com/PaddlePaddle/Serving',
......
...@@ -44,8 +44,8 @@ def gen_pipeline_code(package_name): ...@@ -44,8 +44,8 @@ def gen_pipeline_code(package_name):
ret = os.system( ret = os.system(
"cd {}/pipeline/gateway/proto/ && " "cd {}/pipeline/gateway/proto/ && "
"../../../../../third_party/install/protobuf/bin/protoc -I. " "../../../../../third_party/install/protobuf/bin/protoc -I. "
"-I$GOPATH/src " "-I$GOPATH/pkg/mod "
"-I$GOPATH/src/github.com/grpc-ecosystem/grpc-gateway/third_party/googleapis " "-I$GOPATH/pkg/mod/github.com/grpc-ecosystem/grpc-gateway\@v1.15.2/third_party/googleapis "
"--go_out=plugins=grpc:. " "--go_out=plugins=grpc:. "
"gateway.proto".format(package_name)) "gateway.proto".format(package_name))
if ret != 0: if ret != 0:
...@@ -54,14 +54,18 @@ def gen_pipeline_code(package_name): ...@@ -54,14 +54,18 @@ def gen_pipeline_code(package_name):
ret = os.system( ret = os.system(
"cd {}/pipeline/gateway/proto/ && " "cd {}/pipeline/gateway/proto/ && "
"../../../../../third_party/install/protobuf/bin/protoc -I. " "../../../../../third_party/install/protobuf/bin/protoc -I. "
"-I$GOPATH/src " "-I$GOPATH/pkg/mod "
"-I$GOPATH/src/github.com/grpc-ecosystem/grpc-gateway/third_party/googleapis " "-I$GOPATH/pkg/mod/github.com/grpc-ecosystem/grpc-gateway\@v1.15.2/third_party/googleapis "
"--grpc-gateway_out=logtostderr=true:. " "--grpc-gateway_out=logtostderr=true:. "
"gateway.proto".format(package_name)) "gateway.proto".format(package_name))
if ret != 0: if ret != 0:
exit(1) exit(1)
# pipeline grpc-gateway shared-lib # pipeline grpc-gateway shared-lib
ret = os.system("cd {}/pipeline/gateway/ && go mod init serving-gateway".
format(package_name))
ret = os.system("cd {}/pipeline/gateway/ && go mod vendor && go mod tidy".
format(package_name))
ret = os.system( ret = os.system(
"cd {}/pipeline/gateway && " "cd {}/pipeline/gateway && "
"go build -buildmode=c-shared -o libproxy_server.so proxy_server.go". "go build -buildmode=c-shared -o libproxy_server.so proxy_server.go".
......
...@@ -41,6 +41,12 @@ RUN yum -y install wget && \ ...@@ -41,6 +41,12 @@ RUN yum -y install wget && \
echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \ echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
source /root/.bashrc && \ source /root/.bashrc && \
cd .. && rm -rf Python-3.6.8* && \ cd .. && rm -rf Python-3.6.8* && \
wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-* &&\
yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \ yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
yum clean all && \ yum clean all && \
echo "export LANG=en_US.utf8" >> /root/.bashrc && \ echo "export LANG=en_US.utf8" >> /root/.bashrc && \
......
...@@ -41,6 +41,12 @@ RUN yum -y install wget && \ ...@@ -41,6 +41,12 @@ RUN yum -y install wget && \
echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \ echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
source /root/.bashrc && \ source /root/.bashrc && \
cd .. && rm -rf Python-3.6.8* && \ cd .. && rm -rf Python-3.6.8* && \
wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-* && \
yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \ yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
yum clean all && \ yum clean all && \
localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \ localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \
......
...@@ -34,6 +34,13 @@ RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2 ...@@ -34,6 +34,13 @@ RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2
&& cd .. \ && cd .. \
&& rm -rf patchelf-0.10* && rm -rf patchelf-0.10*
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-*
RUN yum install -y python3 python3-devel RUN yum install -y python3 python3-devel
RUN yum -y update >/dev/null \ RUN yum -y update >/dev/null \
......
...@@ -5,7 +5,14 @@ RUN yum -y install wget >/dev/null \ ...@@ -5,7 +5,14 @@ RUN yum -y install wget >/dev/null \
&& yum -y install git openssl-devel curl-devel bzip2-devel python-devel \ && yum -y install git openssl-devel curl-devel bzip2-devel python-devel \
&& yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false \ && yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false \
&& yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \ && yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \
&& yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false && yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-*
RUN wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \ RUN wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
&& tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \ && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
...@@ -32,3 +39,5 @@ RUN yum install -y python3 python3-devel \ ...@@ -32,3 +39,5 @@ RUN yum install -y python3 python3-devel \
RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
&& echo "export LANG=en_US.utf8" >> /root/.bashrc \ && echo "export LANG=en_US.utf8" >> /root/.bashrc \
&& echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc && echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
...@@ -6,6 +6,13 @@ RUN yum -y install wget >/dev/null \ ...@@ -6,6 +6,13 @@ RUN yum -y install wget >/dev/null \
&& yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \ && yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \
&& yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false && yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-*
RUN wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \ RUN wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
&& tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \ && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
&& mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \ && mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \
......
...@@ -18,14 +18,20 @@ function init() { ...@@ -18,14 +18,20 @@ function init() {
export PYTHONROOT=/usr export PYTHONROOT=/usr
cd Serving cd Serving
export SERVING_WORKDIR=$PWD export SERVING_WORKDIR=$PWD
$PYTHONROOT/bin/python -m pip install -r python/requirements.txt $PYTHONROOT/bin/python -m pip install -r python/requirements.txt
$PYTHONROOT/bin/python -m pip install paddlepaddle
export GOPATH=$HOME/go export GOPATH=$HOME/go
export PATH=$PATH:$GOPATH/bin export PATH=$PATH:$GOPATH/bin
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway go env -w GO111MODULE=on
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger go env -w GOPROXY=https://goproxy.cn,direct
go get -u github.com/golang/protobuf/protoc-gen-go
go get -u google.golang.org/grpc go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0
} }
function check_cmd() { function check_cmd() {
...@@ -605,7 +611,7 @@ function python_test_grpc_impl() { ...@@ -605,7 +611,7 @@ function python_test_grpc_impl() {
# test load server config and client config in Server side # test load server config and client config in Server side
cd criteo_ctr_with_cube # pwd: /Serving/python/examples/grpc_impl_example/criteo_ctr_with_cube cd criteo_ctr_with_cube # pwd: /Serving/python/examples/grpc_impl_example/criteo_ctr_with_cube
<<COMMENT #comment for compile bug, todo fix conflict between grpc-gateway and cube-agent
check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz > /dev/null" check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz > /dev/null"
check_cmd "tar xf ctr_cube_unittest.tar.gz" check_cmd "tar xf ctr_cube_unittest.tar.gz"
check_cmd "mv models/ctr_client_conf ./" check_cmd "mv models/ctr_client_conf ./"
...@@ -626,9 +632,11 @@ function python_test_grpc_impl() { ...@@ -626,9 +632,11 @@ function python_test_grpc_impl() {
echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.67" echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.67"
exit 1 exit 1
fi fi
COMMENT
echo "grpc impl test success" echo "grpc impl test success"
kill_server_process kill_server_process
ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill #ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
cd .. # pwd: /Serving/python/examples/grpc_impl_example cd .. # pwd: /Serving/python/examples/grpc_impl_example
;; ;;
...@@ -665,6 +673,7 @@ function python_test_grpc_impl() { ...@@ -665,6 +673,7 @@ function python_test_grpc_impl() {
cd .. # pwd: /Serving/python/examples/grpc_impl_example cd .. # pwd: /Serving/python/examples/grpc_impl_example
# test load server config and client config in Server side # test load server config and client config in Server side
<<COMMENT #comment for compile bug, todo fix conflict between grpc-gateway and cube-agent
cd criteo_ctr_with_cube # pwd: /Serving/python/examples/grpc_impl_example/criteo_ctr_with_cube cd criteo_ctr_with_cube # pwd: /Serving/python/examples/grpc_impl_example/criteo_ctr_with_cube
check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz" check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz"
...@@ -689,10 +698,11 @@ function python_test_grpc_impl() { ...@@ -689,10 +698,11 @@ function python_test_grpc_impl() {
echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.67" echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.67"
exit 1 exit 1
fi fi
COMMENT
echo "grpc impl test success" echo "grpc impl test success"
kill_server_process kill_server_process
ps -ef | grep "test_server_gpu" | grep -v serving_build | grep -v grep | awk '{print $2}' | xargs kill ps -ef | grep "test_server_gpu" | grep -v serving_build | grep -v grep | awk '{print $2}' | xargs kill
ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill #ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
cd .. # pwd: /Serving/python/examples/grpc_impl_example cd .. # pwd: /Serving/python/examples/grpc_impl_example
;; ;;
*) *)
...@@ -829,8 +839,8 @@ EOF ...@@ -829,8 +839,8 @@ EOF
kill_process_by_port 18080 kill_process_by_port 18080
# test: process servicer & thread op # test: process servicer & thread op
pip uninstall grpcio -y #pip uninstall grpcio -y
pip install grpcio --no-binary=grpcio #pip install grpcio --no-binary=grpcio
cat << EOF > config.yml cat << EOF > config.yml
rpc_port: 18080 rpc_port: 18080
worker_num: 4 worker_num: 4
...@@ -944,7 +954,7 @@ function python_run_test() { ...@@ -944,7 +954,7 @@ function python_run_test() {
local TYPE=$1 # pwd: /Serving local TYPE=$1 # pwd: /Serving
cd python/examples # pwd: /Serving/python/examples cd python/examples # pwd: /Serving/python/examples
python_test_fit_a_line $TYPE # pwd: /Serving/python/examples python_test_fit_a_line $TYPE # pwd: /Serving/python/examples
python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples #python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
python_test_bert $TYPE # pwd: /Serving/python/examples python_test_bert $TYPE # pwd: /Serving/python/examples
python_test_imdb $TYPE # pwd: /Serving/python/examples python_test_imdb $TYPE # pwd: /Serving/python/examples
python_test_lac $TYPE # pwd: /Serving/python/examples python_test_lac $TYPE # pwd: /Serving/python/examples
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册