diff --git a/doc/COMPILE.md b/doc/COMPILE.md index 640d599dda555a8fef70ee0c42de29eae022c720..6554179bd90ed56ceb8b70218a74821e09259f7f 100644 --- a/doc/COMPILE.md +++ b/doc/COMPILE.md @@ -77,10 +77,10 @@ export PATH=$PATH:$GOPATH/bin ```shell go env -w GO111MODULE=on go env -w GOPROXY=https://goproxy.cn,direct -go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway -go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger -go get -u github.com/golang/protobuf/protoc-gen-go -go get -u google.golang.org/grpc +go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2 +go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2 +go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3 +go get -u google.golang.org/grpc@v1.33.0 ``` @@ -91,9 +91,9 @@ go get -u google.golang.org/grpc ``` shell mkdir server-build-cpu && cd server-build-cpu cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ - -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ - -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ - -DSERVER=ON .. + -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ + -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ + -DSERVER=ON .. make -j10 ``` @@ -104,10 +104,28 @@ you can execute `make install` to put targets under directory `./output`, you ne ``` shell mkdir server-build-gpu && cd server-build-gpu cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ - -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ - -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ - -DSERVER=ON \ - -DWITH_GPU=ON .. + -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ + -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ + -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \ + -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \ + -DSERVER=ON \ + -DWITH_GPU=ON .. +make -j10 +``` + +### Integrated TRT version paddle inference library + +``` +mkdir server-build-trt && cd server-build-trt +cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ + -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ + -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ + -DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \ + -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \ + -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \ + -DSERVER=ON \ + -DWITH_GPU=ON \ + -DWITH_TRT=ON .. make -j10 ``` @@ -136,7 +154,10 @@ execute `make install` to put targets under directory `./output` ```bash mkdir app-build && cd app-build -cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DAPP=ON .. +cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ + -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ + -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ + -DAPP=ON .. make ``` @@ -167,7 +188,9 @@ Please use the example under `python/examples` to verify. | WITH_AVX | Compile Paddle Serving with AVX intrinsics | OFF | | WITH_MKL | Compile Paddle Serving with MKL support | OFF | | WITH_GPU | Compile Paddle Serving with NVIDIA GPU | OFF | -| CUDNN_ROOT | Define CuDNN library and header path | | +| CUDNN_LIBRARY | Define CuDNN library and header path | | +| CUDA_TOOLKIT_ROOT_DIR | Define CUDA PATH | | +| TENSORRT_ROOT | Define TensorRT PATH | | | CLIENT | Compile Paddle Serving Client | OFF | | SERVER | Compile Paddle Serving Server | OFF | | APP | Compile Paddle Serving App package | OFF | diff --git a/doc/COMPILE_CN.md b/doc/COMPILE_CN.md index 392da7ed64bc88a8b92294f2a1f805522433cad1..c49c77b6327545cf44a6a45ac6d9ac55584fe17f 100644 --- a/doc/COMPILE_CN.md +++ b/doc/COMPILE_CN.md @@ -74,10 +74,10 @@ export PATH=$PATH:$GOPATH/bin ```shell go env -w GO111MODULE=on go env -w GOPROXY=https://goproxy.cn,direct -go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway -go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger -go get -u github.com/golang/protobuf/protoc-gen-go -go get -u google.golang.org/grpc +go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2 +go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2 +go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3 +go get -u google.golang.org/grpc@v1.33.0 ``` @@ -87,7 +87,10 @@ go get -u google.golang.org/grpc ``` shell mkdir server-build-cpu && cd server-build-cpu -cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON .. +cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ + -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ + -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ + -DSERVER=ON .. make -j10 ``` @@ -97,21 +100,44 @@ make -j10 ``` shell mkdir server-build-gpu && cd server-build-gpu -cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON .. +cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ + -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ + -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ + -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \ + -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \ + -DSERVER=ON \ + -DWITH_GPU=ON .. make -j10 ``` -执行`make install`可以把目标产出放在`./output`目录下。 +### 集成TensorRT版本Paddle Inference Library -**注意:** 编译成功后,需要设置`SERVING_BIN`路径,详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)。 +``` +mkdir server-build-trt && cd server-build-trt +cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ + -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ + -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ + -DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \ + -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \ + -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \ + -DSERVER=ON \ + -DWITH_GPU=ON \ + -DWITH_TRT=ON .. +make -j10 +``` +执行`make install`可以把目标产出放在`./output`目录下。 +**注意:** 编译成功后,需要设置`SERVING_BIN`路径,详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)。 ## 编译Client部分 ``` shell mkdir client-build && cd client-build -cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON .. +cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ + -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ + -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ + -DCLIENT=ON .. make -j10 ``` @@ -123,7 +149,11 @@ make -j10 ```bash mkdir app-build && cd app-build -cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCMAKE_INSTALL_PREFIX=./output -DAPP=ON .. +cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \ + -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \ + -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \ + -DCMAKE_INSTALL_PREFIX=./output \ + -DAPP=ON .. make ``` @@ -154,7 +184,10 @@ make | WITH_AVX | Compile Paddle Serving with AVX intrinsics | OFF | | WITH_MKL | Compile Paddle Serving with MKL support | OFF | | WITH_GPU | Compile Paddle Serving with NVIDIA GPU | OFF | -| CUDNN_ROOT | Define CuDNN library and header path | | +| WITH_TRT | Compile Paddle Serving with TensorRT | OFF | +| CUDNN_LIBRARY | Define CuDNN library and header path | | +| CUDA_TOOLKIT_ROOT_DIR | Define CUDA PATH | | +| TENSORRT_ROOT | Define TensorRT PATH | | | CLIENT | Compile Paddle Serving Client | OFF | | SERVER | Compile Paddle Serving Server | OFF | | APP | Compile Paddle Serving App package | OFF | diff --git a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h index 99fb05786b21d0a1337f6e3dee5ffd1ca2787218..4c2e4684ba6f2945dc7f2c7ef2a583cb9dd671b2 100644 --- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h +++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h @@ -238,12 +238,12 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore { analysis_config.SetTRTDynamicShapeInfo( min_input_shape, max_input_shape, opt_input_shape); #endif - int batch = 8; + int max_batch = 256; int min_subgraph_size = 3; if (params.use_trt()) { analysis_config.EnableTensorRtEngine( - 1 << 30, - batch, + 1 << 20, + max_batch, min_subgraph_size, paddle::AnalysisConfig::Precision::kFloat32, true,