diff --git a/doc/Latest_Packages_CN.md b/doc/Latest_Packages_CN.md index 924844013b1e445665b94249f84ee8b89335db35..8f0d1e5789f0b0c357c51e2c90fd173d7141b1fb 100644 --- a/doc/Latest_Packages_CN.md +++ b/doc/Latest_Packages_CN.md @@ -80,7 +80,7 @@ https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-102-0.0.0.tar.gz # Cuda 10.2 + Cudnn 8 https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-1028-0.0.0.tar.gz # Cuda 11.2 -https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-cuda112-0.0.0.tar.gz +https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-112-0.0.0.tar.gz ``` #### How to setup SERVING_BIN offline? diff --git a/doc/Run_On_Kubernetes_CN.md b/doc/Run_On_Kubernetes_CN.md index 03927689b082833940bacffb94cff32b733fce33..6cde56f0639a25911070695b2955b4b004588232 100644 --- a/doc/Run_On_Kubernetes_CN.md +++ b/doc/Run_On_Kubernetes_CN.md @@ -25,10 +25,10 @@ kubectl apply -f https://bit.ly/kong-ingress-dbless 在`tools/generate_runtime_docker.sh`文件下,它的使用方式如下 ```bash -bash tools/generate_runtime_docker.sh --env cuda10.1 --python 3.6 --serving 0.6.0 --paddle 2.0.1 --name serving_runtime:cuda10.1-py36 +bash tools/generate_runtime_docker.sh --env cuda10.1 --python 3.6 --serving 0.7.0 --paddle 2.2.0 --name serving_runtime:cuda10.1-py36 ``` -会生成 cuda10.1,python 3.6,serving版本0.6.0 还有 paddle版本2.0.1的运行镜像。如果有其他疑问,可以执行下列语句得到帮助信息。 +会生成 cuda10.1,python 3.6,serving版本0.7.0 还有 paddle版本2.2.0的运行镜像。如果有其他疑问,可以执行下列语句得到帮助信息。 ``` bash tools/generate_runtime_docker.sh --help @@ -83,7 +83,7 @@ python3.6 web_service.py web service模式本质上和pipeline模式类似,因此我们以`Serving/examples/C++/PaddleNLP/bert`为例 ```bash -#假设您已经拥有Serving运行镜像,假设镜像名为registry.baidubce.com/paddlepaddle/serving:0.6.0-cuda10.2-py36 +#假设您已经拥有Serving运行镜像,假设镜像名为registry.baidubce.com/paddlepaddle/serving:0.7.0-cpu-py36 docker run --rm -dit --name webservice_serving_demo registry.baidubce.com/paddlepaddle/serving:0.6.0-cpu-py36 bash cd Serving/examples/C++/PaddleNLP/bert ### download model diff --git a/tools/dockerfiles/build_scripts/install_trt.sh b/tools/dockerfiles/build_scripts/install_trt.sh index b882bd216cb0746bb907ad5314befb2a939ccb68..6d0bfa701d077c9e4fda1315283623109c0ac66d 100644 --- a/tools/dockerfiles/build_scripts/install_trt.sh +++ b/tools/dockerfiles/build_scripts/install_trt.sh @@ -24,7 +24,7 @@ if [[ "$VERSION" == "cuda10.1" ]];then rm TensorRT6-cuda10.1-cudnn7.tar.gz elif [[ "$VERSION" == "cuda11.2" ]];then wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz --no-check-certificate - tar -zxf TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz + tar -zxf TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz -C /usr/local cp -rf /usr/local/TensorRT-8.0.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-8.0.3.4/lib/* /usr/lib/ rm -rf TensorRT-8.0.3.4.Linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz elif [[ "$VERSION" == "cuda10.2" ]];then diff --git a/tools/dockerfiles/build_scripts/install_whl.sh b/tools/dockerfiles/build_scripts/install_whl.sh index f937cf2a62bbf8b8912817a9766550bb43193a93..ed4a92542c62f0de6e5ee907685ede4173b46e0f 100644 --- a/tools/dockerfiles/build_scripts/install_whl.sh +++ b/tools/dockerfiles/build_scripts/install_whl.sh @@ -20,7 +20,7 @@ RUN_ENV=$3 # cpu/10.1 10.2 PYTHON_VERSION=$4 serving_release= client_release="paddle-serving-client==$SERVING_VERSION" -app_release="paddle-serving-app==0.3.1" +app_release="paddle-serving-app==$SERVING_VERSION" if [[ $PYTHON_VERSION == "3.6" ]];then CPYTHON="36" @@ -33,48 +33,28 @@ elif [[ $PYTHON_VERSION == "3.8" ]];then CPYTHON_PADDLE="38" fi -if [[ $SERVING_VERSION == "0.5.0" ]]; then - if [[ "$RUN_ENV" == "cpu" ]];then - server_release="paddle-serving-server==$SERVING_VERSION" - serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-cpu-avx-mkl-${SERVING_VERSION}.tar.gz" - paddle_whl="https://paddle-wheel.bj.bcebos.com/$PADDLE_VERSION-cpu-avx-mkl/paddlepaddle-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl" - elif [[ "$RUN_ENV" == "cuda10.1" ]];then - server_release="paddle-serving-server-gpu==$SERVING_VERSION.post101" - serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-101-${SERVING_VERSION}.tar.gz" - paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda10.1-cudnn7-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION.post101-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl" - elif [[ "$RUN_ENV" == "cuda10.2" ]];then - server_release="paddle-serving-server-gpu==$SERVING_VERSION.post102" - serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-102-${SERVING_VERSION}.tar.gz" - paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda10.2-cudnn8-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl" - elif [[ "$RUN_ENV" == "cuda11" ]];then - server_release="paddle-serving-server-gpu==$SERVING_VERSION.post11" - serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-cuda11-${SERVING_VERSION}.tar.gz" - paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda11.0-cudnn8-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION.post110-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl" - fi - client_release="paddle-serving-client==$SERVING_VERSION" - app_release="paddle-serving-app==0.3.1" -else - if [[ "$RUN_ENV" == "cpu" ]];then - server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server-$SERVING_VERSION-py3-none-any.whl" - serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-cpu-avx-mkl-$SERVING_VERSION.tar.gz" - paddle_whl="https://paddle-wheel.bj.bcebos.com/$PADDLE_VERSION-cpu-avx-mkl/paddlepaddle-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl" - elif [[ "$RUN_ENV" == "cuda10.1" ]];then - server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post101-py3-none-any.whl" - serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-101-$SERVING_VERSION.tar.gz" - paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda10.1-cudnn7-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION.post101-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl" - elif [[ "$RUN_ENV" == "cuda10.2" ]];then - server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post102-py3-none-any.whl" - serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-102-$SERVING_VERSION.tar.gz" - paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda10.2-cudnn8-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl" - elif [[ "$RUN_ENV" == "cuda11" ]];then - server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post11-py3-none-any.whl" - serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-cuda11-$SERVING_VERSION.tar.gz" - paddle_whl="https://paddle-wheel.bj.bcebos.com/with-trt/$PADDLE_VERSION-gpu-cuda11.0-cudnn8-mkl-gcc8.2/paddlepaddle_gpu-$PADDLE_VERSION.post110-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl" - fi - client_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-$SERVING_VERSION-cp$CPYTHON-none-any.whl" - app_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-$SERVING_VERSION-py3-none-any.whl" +if [[ "$RUN_ENV" == "cpu" ]];then + server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server-$SERVING_VERSION-py3-none-any.whl" + serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-cpu-avx-mkl-$SERVING_VERSION.tar.gz" + paddle_whl="paddlepaddle==$PADDLE_VERSION" +elif [[ "$RUN_ENV" == "cuda10.1" ]];then + server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post101-py3-none-any.whl" + serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-101-$SERVING_VERSION.tar.gz" + paddle_whl="https://paddle-inference-lib.bj.bcebos.com/$PADDLE_VERSION/python/Linux/GPU/x86-64_gcc8.2_avx_mkl_cuda10.1_cudnn7.6.5_trt6.0.1.5/paddlepaddle_gpu-$PADDLE_VERSION.post101-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl" +elif [[ "$RUN_ENV" == "cuda10.2" ]] ;then + server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post1028-py3-none-any.whl" + serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-1028-$SERVING_VERSION.tar.gz" + paddle_whl="https://paddle-inference-lib.bj.bcebos.com/$PADDLE_VERSION/python/Linux/GPU/x86-64_gcc8.2_avx_mkl_cuda10.2_cudnn8.1.1_trt7.2.3.4/paddlepaddle_gpu-$PADDLE_VERSION-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl" +elif [[ "$RUN_ENV" == "cuda11.2" ]];then + server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post112-py3-none-any.whl" + serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-112-$SERVING_VERSION.tar.gz" + paddle_whl="https://paddle-inference-lib.bj.bcebos.com/$PADDLE_VERSION/python/Linux/GPU/x86-64_gcc8.2_avx_mkl_cuda11.2_cudnn8.2.1_trt8.0.3.4/paddlepaddle_gpu-$PADDLE_VERSION.post112-cp$CPYTHON-cp$CPYTHON_PADDLE-linux_x86_64.whl" fi +client_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-$SERVING_VERSION-cp$CPYTHON-none-any.whl" +app_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-$SERVING_VERSION-py3-none-any.whl" + + if [[ "$RUN_ENV" == "cpu" ]];then python$PYTHON_VERSION -m pip install $client_release $app_release $server_release python$PYTHON_VERSION -m pip install $paddle_whl @@ -105,15 +85,15 @@ elif [[ "$RUN_ENV" == "cuda10.2" ]];then echo "export SERVING_BIN=$PWD/serving_bin/serving">>/root/.bashrc rm -rf serving-gpu-102-${SERVING_VERSION}.tar.gz cd - -elif [[ "$RUN_ENV" == "cuda11" ]];then +elif [[ "$RUN_ENV" == "cuda11.2" ]];then python$PYTHON_VERSION -m pip install $client_release $app_release $server_release python$PYTHON_VERSION -m pip install $paddle_whl cd /usr/local/ wget $serving_bin - tar xf serving-gpu-cuda11-${SERVING_VERSION}.tar.gz - mv $PWD/serving-gpu-cuda11-${SERVING_VERSION} $PWD/serving_bin + tar xf serving-gpu-112-${SERVING_VERSION}.tar.gz + mv $PWD/serving-gpu-112-${SERVING_VERSION} $PWD/serving_bin echo "export SERVING_BIN=$PWD/serving_bin/serving">>/root/.bashrc - rm -rf serving-gpu-cuda11-${SERVING_VERSION}.tar.gz + rm -rf serving-gpu-112-${SERVING_VERSION}.tar.gz cd - fi diff --git a/tools/generate_runtime_docker.sh b/tools/generate_runtime_docker.sh index 9b2c2d605b7f67348666d6253d01075e23d5b030..8f4d7c220feaab55ab75664a5bfe8bd27269e278 100644 --- a/tools/generate_runtime_docker.sh +++ b/tools/generate_runtime_docker.sh @@ -7,10 +7,10 @@ function usage { echo "usage: sh tools/generate_runtime_docker.sh --SOME_ARG ARG_VALUE" echo " "; - echo " --env : running env, cpu/cuda10.1/cuda10.2/cuda11"; + echo " --env : running env, cpu/cuda10.1/cuda10.2/cuda11.2"; echo " --python : python version, 3.6/3.7/3.8 "; - echo " --serving : serving version(0.6.0)"; - echo " --paddle : paddle version(2.1.0)" + #echo " --serving : serving version(0.6.0/0.6.2)"; + #echo " --paddle : paddle version(2.1.0/2.2.0)" echo " --image_name : image name(default serving_runtime:env-python)" echo " -h | --help : helper"; } @@ -25,8 +25,8 @@ function parse_args case "$1" in --env ) env="$2"; shift;; --python ) python="$2"; shift;; - --serving ) serving="$2"; shift;; - --paddle ) paddle="$2"; shift;; + #--serving ) serving="$2"; shift;; + #--paddle ) paddle="$2"; shift;; --image_name ) image_name="$2"; shift;; -h | --help ) usage; exit;; # quit and show usage * ) args+=("$1") # if no match, add it to the positional args @@ -66,9 +66,11 @@ function run base_image="nvidia\/cuda:10.1-cudnn7-runtime-ubuntu16.04" elif [ $env == "cuda10.2" ]; then base_image="nvidia\/cuda:10.2-cudnn8-runtime-ubuntu16.04" - elif [ $env == "cuda11" ]; then - base_image="nvidia\/cuda:11.0.3-cudnn8-runtime-ubuntu16.04" + elif [ $env == "cuda11.2" ]; then + base_image="nvidia\/cuda:11.2.0-cudnn8-runtime-ubuntu16.04" fi + python="2.2.0" + serving="0.7.0" echo "base image: $base_image" echo "named arg: python: $python" echo "named arg: serving: $serving"