diff --git a/core/configure/proto/multi_lang_general_model_service.proto b/core/configure/proto/multi_lang_general_model_service.proto index 2a4764a041d7f817aba1d516427a241498d4c2e0..18fbcf760647e1694e738c0832fe45f4f7d9934f 100755 --- a/core/configure/proto/multi_lang_general_model_service.proto +++ b/core/configure/proto/multi_lang_general_model_service.proto @@ -59,7 +59,7 @@ message SimpleResponse { required int32 err_code = 1; } message GetClientConfigRequest {} -message GetClientConfigResponse { repeated string client_config_str_list = 1; } +message GetClientConfigResponse { required string client_config_str = 1; } service MultiLangGeneralModelService { rpc Inference(InferenceRequest) returns (InferenceResponse) {} diff --git a/doc/COMPILE.md b/doc/COMPILE.md index ef161d141f410c991f9a4bbc0c2605d08827dd31..79a26f857c5b06dd3da39988879b297ce35db167 100755 --- a/doc/COMPILE.md +++ b/doc/COMPILE.md @@ -153,7 +153,7 @@ cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR/ \ -DPYTHON_LIBRARIES=$PYTHON_LIBRARIES \ -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \ -DOPENCV_DIR=${OPENCV_DIR} \ - -DWITH_OPENCV=ON + -DWITH_OPENCV=ON \ -DSERVER=ON .. make -j10 ``` diff --git a/doc/COMPILE_CN.md b/doc/COMPILE_CN.md index 53c3548a46abdccedb53c629bb2c0bd0dd3bad17..3d04f4c873cb6b6c004c1377cbedaf30c89d9d5e 100755 --- a/doc/COMPILE_CN.md +++ b/doc/COMPILE_CN.md @@ -152,7 +152,7 @@ cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR/ \ -DPYTHON_LIBRARIES=$PYTHON_LIBRARIES \ -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \ -DOPENCV_DIR=${OPENCV_DIR} \ - -DWITH_OPENCV=ON + -DWITH_OPENCV=ON \ -DSERVER=ON .. make -j10 ``` diff --git a/doc/PADDLE_SERVING_ON_KUBERNETES.md b/doc/PADDLE_SERVING_ON_KUBERNETES.md index 21f4854c035b2d84f544a8c44721c64d945d1d59..399cf156dc4b890c02e8da5bec5aa132663d7d2d 100644 --- a/doc/PADDLE_SERVING_ON_KUBERNETES.md +++ b/doc/PADDLE_SERVING_ON_KUBERNETES.md @@ -25,10 +25,10 @@ kubectl apply -f https://bit.ly/kong-ingress-dbless 在`tools/generate_runtime_docker.sh`文件下,它的使用方式如下 ```bash -bash tool/generate_runtime_docker.sh --env cuda10.1 --python 2.7 --serving 0.5.0 --paddle 2.0.0 --name serving_runtime:cuda10.1-py27 +bash tool/generate_runtime_docker.sh --env cuda10.1 --python 3.6 --serving 0.6.0 --paddle 2.0.1 --name serving_runtime:cuda10.1-py36 ``` -会生成 cuda10.1,python 2.7,serving版本0.5.0 还有 paddle版本2.0.0的运行镜像。如果有其他疑问,可以执行下列语句得到帮助信息。 +会生成 cuda10.1,python 3.6,serving版本0.6.0 还有 paddle版本2.0.1的运行镜像。如果有其他疑问,可以执行下列语句得到帮助信息。 ``` bash tools/generate_runtime_docker.sh --help @@ -39,7 +39,7 @@ bash tools/generate_runtime_docker.sh --help - paddle-serving-server, paddle-serving-client,paddle-serving-app,paddlepaddle,具体版本可以在tools/runtime.dockerfile当中查看,同时,如果有定制化的需求,也可以在该文件中进行定制化。 - paddle-serving-server 二进制可执行程序 -也就是说,运行镜像在生成之后,我们只需要将我们运行的代码(如果有)和模型搬运到镜像中就可以。生成后的镜像名为`paddle_serving:cuda10.2-py37` +也就是说,运行镜像在生成之后,我们只需要将我们运行的代码(如果有)和模型搬运到镜像中就可以。生成后的镜像名为`paddle_serving:cuda10.2-py36` ### 添加您的代码和模型 @@ -50,8 +50,8 @@ bash tools/generate_runtime_docker.sh --help 对于pipeline模式,我们需要确保模型和程序文件、配置文件等各种依赖都能够在镜像中运行。因此可以在`/home/project`下存放我们的执行文件时,我们以`Serving/python/example/pipeline/ocr`为例,这是OCR文字识别任务。 ```bash -#假设您已经拥有Serving运行镜像,假设镜像名为paddle_serving:cuda10.2-py37 -docker run --rm -dit --name pipeline_serving_demo paddle_serving:cuda10.2-py37 bash +#假设您已经拥有Serving运行镜像,假设镜像名为paddle_serving:cuda10.2-py36 +docker run --rm -dit --name pipeline_serving_demo paddle_serving:cuda10.2-py36 bash cd Serving/python/example/pipeline/ocr # get models python -m paddle_serving_app.package --get_model ocr_rec @@ -71,7 +71,7 @@ docker commit pipeline_serving_demo ocr_serving:latest ``` docker exec -it pipeline_serving_demo bash cd /home/ocr -python3.7 web_service.py +python3.6 web_service.py ``` 进入容器到工程目录之后,剩下的操作和调试代码的工作是类似的。 @@ -83,8 +83,8 @@ python3.7 web_service.py web service模式本质上和pipeline模式类似,因此我们以`Serving/python/examples/bert`为例 ```bash -#假设您已经拥有Serving运行镜像,假设镜像名为registry.baidubce.com/paddlepaddle/serving:0.6.0-cuda10.2-py37 -docker run --rm -dit --name webservice_serving_demo registry.baidubce.com/paddlepaddle/serving:0.6.0-cpu-py27 bash +#假设您已经拥有Serving运行镜像,假设镜像名为registry.baidubce.com/paddlepaddle/serving:0.6.0-cuda10.2-py36 +docker run --rm -dit --name webservice_serving_demo registry.baidubce.com/paddlepaddle/serving:0.6.0-cpu-py36 bash cd Serving/python/examples/bert ### download model wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz @@ -102,7 +102,7 @@ docker commit webservice_serving_demo bert_serving:latest ```bash docker exec -it webservice_serving_demo bash cd /home/bert -python3.7 bert_web_service.py 9292 +python3.6 bert_web_service.py bert_seq128_model 9292 ``` 进入容器到工程目录之后,剩下的操作和调试代码的工作是类似的。 @@ -118,14 +118,15 @@ kubenetes集群操作需要`kubectl`去操纵yaml文件。我们这里给出了 - pipeline ocr示例 ```bash -sh tools/generate_k8s_yamls.sh --app_name ocr --image_name registry.baidubce.com/paddlepaddle/serving:k8s-pipeline-demo --workdir /home/ocr --command "python2.7 web_service.py" --port 9999 +sh tools/generate_k8s_yamls.sh --app_name ocr --image_name registry.baidubce.com/paddlepaddle/serving:k8s-pipeline-demo --workdir /home/ocr --command "python3.6 web_service.py" --port 9999 ``` - web service bert示例 ```bash -sh tools/generate_k8s_yamls.sh --app_name bert --image_name registry.baidubce.com/paddlepaddle/serving:k8s-web-demo --workdir /home/bert --command "python2.7 bert_web_service.py 9292" --port 9292 +sh tools/generate_k8s_yamls.sh --app_name bert --image_name registry.baidubce.com/paddlepaddle/serving:k8s-web-demo --workdir /home/bert --command "python3.6 bert_web_service.py bert_seq128_model 9292" --port 9292 ``` +**需要注意的是,app_name需要同URL的函数名相同。例如示例中bert的访问URL是`https://127.0.0.1:9292/bert/prediction`,那么app_name应为bert。** 接下来我们会看到有两个yaml文件,分别是`k8s_serving.yaml`和 k8s_ingress.yaml`. @@ -174,7 +175,7 @@ spec: workingDir: /home/ocr name: ocr command: ['/bin/bash', '-c'] - args: ["python3.7 web_service.py"] + args: ["python3.6 bert_web_service.py bert_seq128_model 9292"] env: - name: NODE_NAME valueFrom: @@ -216,7 +217,8 @@ spec: 最终我们执行就可以启动相关容器和API网关。 ``` -kubectl apply -f k8s_serving.yaml k8s_ingress.yaml +kubectl apply -f k8s_serving.yaml +kubectl apply -f k8s_ingress.yaml ``` 输入 diff --git a/python/examples/bert/README_CN.md b/python/examples/bert/README_CN.md index ef28089b559b7281613ae2fb78b8039978db2510..a03b577493fc763c43d1ce96766d4e9eb260565e 100644 --- a/python/examples/bert/README_CN.md +++ b/python/examples/bert/README_CN.md @@ -94,4 +94,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}] bash benchmark.sh bert_seq128_model bert_seq128_client ``` 性能测试的日志文件为profile_log_bert_seq128_model + 如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。 + +注意:bert_seq128_model和bert_seq128_client路径后不要加'/'符号,示例需要在GPU机器上运行。 diff --git a/python/examples/bert/benchmark.sh b/python/examples/bert/benchmark.sh index 1a8263556d345f367f503460274f1cb0165df2c5..7e374db3ee5a5bdccdc75dc2884b9dbbfcb60eca 100755 --- a/python/examples/bert/benchmark.sh +++ b/python/examples/bert/benchmark.sh @@ -17,27 +17,30 @@ sleep 5 #warm up $PYTHONROOT/bin/python3 benchmark.py --thread 4 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 -echo -e "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py +echo -e "import psutil\nimport time\nwhile True:\n\tcpu_res = psutil.cpu_percent()\n\twith open('cpu.txt', 'a+') as f:\n\t\tf.write(f'{cpu_res}\\\n')\n\ttime.sleep(0.1)" > cpu.py for thread_num in 1 4 8 16 do for batch_size in 1 4 16 64 do job_bt=`date '+%Y%m%d%H%M%S'` - nvidia-smi --id=0 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 & + nvidia-smi --id=0 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_memory_use.log 2>&1 & nvidia-smi --id=0 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 & + rm -rf cpu.txt + $PYTHONROOT/bin/python3 cpu.py & gpu_memory_pid=$! $PYTHONROOT/bin/python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 - kill ${gpu_memory_pid} - kill `ps -ef|grep used_memory|awk '{print $2}'` + kill `ps -ef|grep used_memory|awk '{print $2}'` > /dev/null + kill `ps -ef|grep utilization.gpu|awk '{print $2}'` > /dev/null + kill `ps -ef|grep cpu.py|awk '{print $2}'` > /dev/null echo "model_name:" $1 echo "thread_num:" $thread_num echo "batch_size:" $batch_size echo "=================Done====================" echo "model_name:$1" >> profile_log_$1 echo "batch_size:$batch_size" >> profile_log_$1 - $PYTHONROOT/bin/python3 cpu_utilization.py >> profile_log_$1 job_et=`date '+%Y%m%d%H%M%S'` - awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$1 + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "CPU_UTILIZATION:", max}' cpu.txt >> profile_log_$1 + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_memory_use.log >> profile_log_$1 awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$1 rm -rf gpu_use.log gpu_utilization.log $PYTHONROOT/bin/python3 ../util/show_profile.py profile $thread_num >> profile_log_$1 diff --git a/python/examples/fit_a_line/README_CN.md b/python/examples/fit_a_line/README_CN.md index e115b6debb330adbd4c81f94338a67305caa6d37..d58eb4fbf15045ef2e9d873b2c8517f86cbca0de 100644 --- a/python/examples/fit_a_line/README_CN.md +++ b/python/examples/fit_a_line/README_CN.md @@ -49,4 +49,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1 bash benchmark.sh uci_housing_model uci_housing_client ``` 性能测试的日志文件为profile_log_uci_housing_model + 如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。 + +注意:uci_housing_model和uci_housing_client路径后不要加'/'符号,示例需要在GPU机器上运行。 diff --git a/python/examples/fit_a_line/benchmark.py b/python/examples/fit_a_line/benchmark.py index 77f0965f778f760d68b0737273a5536e48350606..7c4e4b4c582361f2f0f5d48fb374b2e7899c65b2 100644 --- a/python/examples/fit_a_line/benchmark.py +++ b/python/examples/fit_a_line/benchmark.py @@ -30,6 +30,7 @@ def single_func(idx, resource): paddle.dataset.uci_housing.train(), buf_size=500), batch_size=1) total_number = sum(1 for _ in train_reader()) + latency_list = [] if args.request == "rpc": client = Client() @@ -37,9 +38,12 @@ def single_func(idx, resource): client.connect([args.endpoint]) start = time.time() for data in train_reader(): + l_start = time.time() fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"]) + l_end = time.time() + latency_list.append(l_end * 1000 - l_start * 1000) end = time.time() - return [[end - start], [total_number]] + return [[end - start], latency_list, [total_number]] elif args.request == "http": train_reader = paddle.batch( paddle.reader.shuffle( @@ -47,11 +51,14 @@ def single_func(idx, resource): batch_size=1) start = time.time() for data in train_reader(): + l_start = time.time() r = requests.post( 'http://{}/uci/prediction'.format(args.endpoint), data={"x": data[0]}) + l_end = time.time() + latency_list.append(l_end * 1000 - l_start * 1000) end = time.time() - return [[end - start], [total_number]] + return [[end - start], latency_list, [total_number]] start = time.time() diff --git a/python/examples/fit_a_line/benchmark.sh b/python/examples/fit_a_line/benchmark.sh new file mode 100755 index 0000000000000000000000000000000000000000..7e374db3ee5a5bdccdc75dc2884b9dbbfcb60eca --- /dev/null +++ b/python/examples/fit_a_line/benchmark.sh @@ -0,0 +1,55 @@ +rm profile_log* +export CUDA_VISIBLE_DEVICES=0,1 +export FLAGS_profile_server=1 +export FLAGS_profile_client=1 +export FLAGS_serving_latency=1 + +gpu_id=0 +#save cpu and gpu utilization log +if [ -d utilization ];then + rm -rf utilization +else + mkdir utilization +fi +#start server +$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1 --mem_optim --ir_optim > elog 2>&1 & +sleep 5 + +#warm up +$PYTHONROOT/bin/python3 benchmark.py --thread 4 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 +echo -e "import psutil\nimport time\nwhile True:\n\tcpu_res = psutil.cpu_percent()\n\twith open('cpu.txt', 'a+') as f:\n\t\tf.write(f'{cpu_res}\\\n')\n\ttime.sleep(0.1)" > cpu.py +for thread_num in 1 4 8 16 +do +for batch_size in 1 4 16 64 +do + job_bt=`date '+%Y%m%d%H%M%S'` + nvidia-smi --id=0 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_memory_use.log 2>&1 & + nvidia-smi --id=0 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 & + rm -rf cpu.txt + $PYTHONROOT/bin/python3 cpu.py & + gpu_memory_pid=$! + $PYTHONROOT/bin/python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 + kill `ps -ef|grep used_memory|awk '{print $2}'` > /dev/null + kill `ps -ef|grep utilization.gpu|awk '{print $2}'` > /dev/null + kill `ps -ef|grep cpu.py|awk '{print $2}'` > /dev/null + echo "model_name:" $1 + echo "thread_num:" $thread_num + echo "batch_size:" $batch_size + echo "=================Done====================" + echo "model_name:$1" >> profile_log_$1 + echo "batch_size:$batch_size" >> profile_log_$1 + job_et=`date '+%Y%m%d%H%M%S'` + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "CPU_UTILIZATION:", max}' cpu.txt >> profile_log_$1 + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_memory_use.log >> profile_log_$1 + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$1 + rm -rf gpu_use.log gpu_utilization.log + $PYTHONROOT/bin/python3 ../util/show_profile.py profile $thread_num >> profile_log_$1 + tail -n 8 profile >> profile_log_$1 + echo "" >> profile_log_$1 +done +done + +#Divided log +awk 'BEGIN{RS="\n\n"}{i++}{print > "bert_log_"i}' profile_log_$1 +mkdir bert_log && mv bert_log_* bert_log +ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9 diff --git a/python/paddle_serving_client/client.py b/python/paddle_serving_client/client.py index 48ad112ab015242b85753489f84422c4187f6ec1..8b1fc38032133230f450f83b9139d5f347b2ae1b 100755 --- a/python/paddle_serving_client/client.py +++ b/python/paddle_serving_client/client.py @@ -554,15 +554,8 @@ class MultiLangClient(object): get_client_config_req = multi_lang_general_model_service_pb2.GetClientConfigRequest( ) resp = self.stub_.GetClientConfig(get_client_config_req) - model_config_path_list = resp.client_config_str_list - file_path_list = [] - for single_model_config in model_config_path_list: - if os.path.isdir(single_model_config): - file_path_list.append("{}/serving_server_conf.prototxt".format( - single_model_config)) - elif os.path.isfile(single_model_config): - file_path_list.append(single_model_config) - self._parse_model_config(file_path_list) + model_config_str = resp.client_config_str + self._parse_model_config(model_config_str) def _flatten_list(self, nested_list): for item in nested_list: @@ -572,23 +565,10 @@ class MultiLangClient(object): else: yield item - def _parse_model_config(self, model_config_path_list): - if isinstance(model_config_path_list, str): - model_config_path_list = [model_config_path_list] - elif isinstance(model_config_path_list, list): - pass - - file_path_list = [] - for single_model_config in model_config_path_list: - if os.path.isdir(single_model_config): - file_path_list.append("{}/serving_client_conf.prototxt".format( - single_model_config)) - elif os.path.isfile(single_model_config): - file_path_list.append(single_model_config) + def _parse_model_config(self, model_config_str): model_conf = m_config.GeneralModelConfig() - f = open(file_path_list[0], 'r') - model_conf = google.protobuf.text_format.Merge( - str(f.read()), model_conf) + model_conf = google.protobuf.text_format.Merge(model_config_str, + model_conf) self.feed_names_ = [var.alias_name for var in model_conf.feed_var] self.feed_types_ = {} self.feed_shapes_ = {} @@ -598,11 +578,6 @@ class MultiLangClient(object): self.feed_shapes_[var.alias_name] = var.shape if var.is_lod_tensor: self.lod_tensor_set_.add(var.alias_name) - if len(file_path_list) > 1: - model_conf = m_config.GeneralModelConfig() - f = open(file_path_list[-1], 'r') - model_conf = google.protobuf.text_format.Merge( - str(f.read()), model_conf) self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] self.fetch_types_ = {} for i, var in enumerate(model_conf.fetch_var): diff --git a/python/paddle_serving_server/rpc_service.py b/python/paddle_serving_server/rpc_service.py index d9d302831fd2e3148547e24772005efb38cb8f32..f2503a5d86b032499543f5f4fc78b8b824218a44 100755 --- a/python/paddle_serving_server/rpc_service.py +++ b/python/paddle_serving_server/rpc_service.py @@ -198,5 +198,14 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. #model_config_path_list is list right now. #dict should be added when graphMaker is used. resp = multi_lang_general_model_service_pb2.GetClientConfigResponse() - resp.client_config_str_list[:] = self.model_config_path_list + model_config_str = [] + for single_model_config in self.model_config_path_list: + if os.path.isdir(single_model_config): + with open("{}/serving_server_conf.prototxt".format( + single_model_config)) as f: + model_config_str.append(str(f.read())) + elif os.path.isfile(single_model_config): + with open(single_model_config) as f: + model_config_str.append(str(f.read())) + resp.client_config_str = model_config_str[0] return resp diff --git a/tools/Dockerfile.cuda10.1-cudnn7.devel b/tools/Dockerfile.cuda10.1-cudnn7.devel index 991477abf58aa564428bffa794c7de300093942a..24087af9490b8b5f4b7f57d70cb927c580da6066 100644 --- a/tools/Dockerfile.cuda10.1-cudnn7.devel +++ b/tools/Dockerfile.cuda10.1-cudnn7.devel @@ -104,7 +104,7 @@ ENV PATH=usr/local/go/bin:/root/go/bin:${PATH} # Downgrade TensorRT COPY tools/dockerfiles/build_scripts /build_scripts -RUN bash /build_scripts/install_trt.sh +RUN bash /build_scripts/install_trt.sh cuda10.1 RUN rm -rf /build_scripts # git credential to skip password typing @@ -132,9 +132,9 @@ RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \ make -j8 && make install && \ ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache -RUN python3.8 -m pip install --upgrade pip requests && \ - python3.7 -m pip install --upgrade pip requests && \ - python3.6 -m pip install --upgrade pip requests +RUN python3.8 -m pip install --upgrade pip==21.1.1 requests && \ + python3.7 -m pip install --upgrade pip==21.1.1 requests && \ + python3.6 -m pip install --upgrade pip==21.1.1 requests RUN wget https://paddle-serving.bj.bcebos.com/others/centos_ssl.tar && \ tar xf centos_ssl.tar && rm -rf centos_ssl.tar && \ diff --git a/tools/Dockerfile.cuda10.2-cudnn8.devel b/tools/Dockerfile.cuda10.2-cudnn8.devel index e8cc0d6e21195133ea94bca414d94efcf837395a..0b1f4f1cb519792ba9f917a1e7313906e9eea3b8 100644 --- a/tools/Dockerfile.cuda10.2-cudnn8.devel +++ b/tools/Dockerfile.cuda10.2-cudnn8.devel @@ -104,7 +104,7 @@ ENV PATH=usr/local/go/bin:/root/go/bin:${PATH} # Downgrade TensorRT COPY tools/dockerfiles/build_scripts /build_scripts -RUN bash /build_scripts/install_trt.sh +RUN bash /build_scripts/install_trt.sh cuda10.2 RUN rm -rf /build_scripts # git credential to skip password typing @@ -132,9 +132,9 @@ RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \ make -j8 && make install && \ ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache -RUN python3.8 -m pip install --upgrade pip requests && \ - python3.7 -m pip install --upgrade pip requests && \ - python3.6 -m pip install --upgrade pip requests +RUN python3.8 -m pip install --upgrade pip==21.1.1 requests && \ + python3.7 -m pip install --upgrade pip==21.1.1 requests && \ + python3.6 -m pip install --upgrade pip==21.1.1 requests RUN wget https://paddle-serving.bj.bcebos.com/others/centos_ssl.tar && \ tar xf centos_ssl.tar && rm -rf centos_ssl.tar && \ diff --git a/tools/Dockerfile.cuda11.2-cudnn8.devel b/tools/Dockerfile.cuda11-cudnn8.devel similarity index 95% rename from tools/Dockerfile.cuda11.2-cudnn8.devel rename to tools/Dockerfile.cuda11-cudnn8.devel index 1903da2e9af27d9058cdd2f6c0e8f33f89662c96..2b4e75c7b112e7616ec91d03e12774386c974448 100644 --- a/tools/Dockerfile.cuda11.2-cudnn8.devel +++ b/tools/Dockerfile.cuda11-cudnn8.devel @@ -1,7 +1,7 @@ # A image for building paddle binaries # Use cuda devel base image for both cpu and gpu environment # When you modify it, please be aware of cudnn-runtime version -FROM nvidia/cuda:11.2.0-cudnn8-devel-ubuntu16.04 +FROM nvidia/cuda:11.0.3-cudnn8-devel-ubuntu16.04 MAINTAINER PaddlePaddle Authors # ENV variables @@ -104,7 +104,7 @@ ENV PATH=usr/local/go/bin:/root/go/bin:${PATH} # Downgrade TensorRT COPY tools/dockerfiles/build_scripts /build_scripts -RUN bash /build_scripts/install_trt.sh +RUN bash /build_scripts/install_trt.sh cuda11 RUN rm -rf /build_scripts # git credential to skip password typing @@ -132,9 +132,9 @@ RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \ make -j8 && make install && \ ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache -RUN python3.8 -m pip install --upgrade pip requests && \ - python3.7 -m pip install --upgrade pip requests && \ - python3.6 -m pip install --upgrade pip requests +RUN python3.8 -m pip install --upgrade pip==21.1.1 requests && \ + python3.7 -m pip install --upgrade pip==21.1.1 requests && \ + python3.6 -m pip install --upgrade pip==21.1.1 requests RUN wget https://paddle-serving.bj.bcebos.com/others/centos_ssl.tar && \ tar xf centos_ssl.tar && rm -rf centos_ssl.tar && \ diff --git a/tools/Dockerfile.devel b/tools/Dockerfile.devel index b83d611ab03c1aba80e7504399c646d140e1bc04..be31b2e9abd90f644eb0f94a6d672639e4b7f6c5 100644 --- a/tools/Dockerfile.devel +++ b/tools/Dockerfile.devel @@ -132,9 +132,9 @@ RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \ make -j8 && make install && \ ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache -RUN python3.8 -m pip install --upgrade pip requests && \ - python3.7 -m pip install --upgrade pip requests && \ - python3.6 -m pip install --upgrade pip requests +RUN python3.8 -m pip install --upgrade pip==21.1.1 requests && \ + python3.7 -m pip install --upgrade pip==21.1.1 requests && \ + python3.6 -m pip install --upgrade pip==21.1.1 requests RUN wget https://paddle-serving.bj.bcebos.com/others/centos_ssl.tar && \ tar xf centos_ssl.tar && rm -rf centos_ssl.tar && \ diff --git a/tools/Dockerfile.runtime_template b/tools/Dockerfile.runtime_template index 9b78b101eadbec6ce48212155d2a43efa31bb488..4df0496445791c3210ee8e3f32602e7d1a29dafa 100644 --- a/tools/Dockerfile.runtime_template +++ b/tools/Dockerfile.runtime_template @@ -28,12 +28,12 @@ WORKDIR /home # install whl and bin WORKDIR /home COPY tools/dockerfiles/build_scripts /build_scripts - RUN bash /build_scripts/install_whl.sh 0.5.0 2.0.0 <> <> && rm -rf /build_scripts + RUN bash /build_scripts/install_whl.sh <> <> <> <> && rm -rf /build_scripts # install tensorrt WORKDIR /home COPY tools/dockerfiles/build_scripts /build_scripts - RUN bash /build_scripts/install_trt.sh && rm -rf /build_scripts + RUN bash /build_scripts/install_trt.sh <> && rm -rf /build_scripts # install go RUN wget -qO- https://dl.google.com/go/go1.14.linux-amd64.tar.gz | \ diff --git a/tools/dockerfiles/build_scripts/install_trt.sh b/tools/dockerfiles/build_scripts/install_trt.sh index e5ec70d2f378d180a08a86d705f3e662a211dc91..559a5552464a4ffb8891446548aa16537b06e059 100644 --- a/tools/dockerfiles/build_scripts/install_trt.sh +++ b/tools/dockerfiles/build_scripts/install_trt.sh @@ -14,31 +14,21 @@ # See the License for the specific language governing permissions and # limitations under the License. -VERSION=$(nvcc --version | grep release | grep -oEi "release ([0-9]+)\.([0-9])"| sed "s/release //") - -if [[ "$VERSION" == "10.1" ]];then +VERSION=$1 +if [[ "$VERSION" == "cuda10.1" ]];then wget -q https://paddle-ci.gz.bcebos.com/TRT/TensorRT6-cuda10.1-cudnn7.tar.gz --no-check-certificate tar -zxf TensorRT6-cuda10.1-cudnn7.tar.gz -C /usr/local cp -rf /usr/local/TensorRT6-cuda10.1-cudnn7/include/* /usr/include/ && cp -rf /usr/local/TensorRT6-cuda10.1-cudnn7/lib/* /usr/lib/ + echo "cuda10.1 trt install ==============>>>>>>>>>>>>" rm TensorRT6-cuda10.1-cudnn7.tar.gz -elif [[ "$VERSION" == "11.0" ]];then +elif [[ "$VERSION" == "cuda11" ]];then wget -q https://paddle-ci.cdn.bcebos.com/TRT/TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz --no-check-certificate tar -zxf TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz -C /usr/local cp -rf /usr/local/TensorRT-7.1.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-7.1.3.4/lib/* /usr/lib/ rm TensorRT-7.1.3.4.Ubuntu-16.04.x86_64-gnu.cuda-11.0.cudnn8.0.tar.gz -elif [[ "$VERSION" == "10.2" ]];then +elif [[ "$VERSION" == "cuda10.2" ]];then wget https://paddle-ci.gz.bcebos.com/TRT/TensorRT7-cuda10.2-cudnn8.tar.gz --no-check-certificate tar -zxf TensorRT7-cuda10.2-cudnn8.tar.gz -C /usr/local cp -rf /usr/local/TensorRT-7.1.3.4/include/* /usr/include/ && cp -rf /usr/local/TensorRT-7.1.3.4/lib/* /usr/lib/ rm TensorRT7-cuda10.2-cudnn8.tar.gz -elif [[ "$VERSION" == "10.0" ]];then - wget -q https://paddle-ci.gz.bcebos.com/TRT/TensorRT6-cuda10.0-cudnn7.tar.gz --no-check-certificate - tar -zxf TensorRT6-cuda10.0-cudnn7.tar.gz -C /usr/local - cp -rf /usr/local/TensorRT6-cuda10.0-cudnn7/include/* /usr/include/ && cp -rf /usr/local/TensorRT6-cuda10.0-cudnn7/lib/* /usr/lib/ - rm TensorRT6-cuda10.0-cudnn7.tar.gz -elif [[ "$VERSION" == "9.0" ]];then - wget -q https://paddle-ci.gz.bcebos.com/TRT/TensorRT6-cuda9.0-cudnn7.tar.gz --no-check-certificate - tar -zxf TensorRT6-cuda9.0-cudnn7.tar.gz -C /usr/local - cp -rf /usr/local/TensorRT6-cuda9.0-cudnn7/include/* /usr/include/ && cp -rf /usr/local/TensorRT6-cuda9.0-cudnn7/lib/* /usr/lib/ - rm TensorRT6-cuda9.0-cudnn7.tar.gz fi diff --git a/tools/dockerfiles/build_scripts/install_whl.sh b/tools/dockerfiles/build_scripts/install_whl.sh index a1ce0143ce92224e30bcb7eb9cc37db537e9a241..46d8d226a63432d004a5ed3f53a6df41428ebd2a 100644 --- a/tools/dockerfiles/build_scripts/install_whl.sh +++ b/tools/dockerfiles/build_scripts/install_whl.sh @@ -40,6 +40,9 @@ if [[ $SERVING_VERSION == "0.5.0" ]]; then elif [[ "$RUN_ENV" == "cuda10.2" ]];then server_release="paddle-serving-server-gpu==$SERVING_VERSION.post102" serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-102-${SERVING_VERSION}.tar.gz" + elif [[ "$RUN_ENV" == "cuda11" ]];then + server_release="paddle-serving-server-gpu==$SERVING_VERSION.post11" + serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-11-${SERVING_VERSION}.tar.gz" fi client_release="paddle-serving-client==$SERVING_VERSION" app_release="paddle-serving-app==0.3.1" @@ -53,6 +56,9 @@ elif [[ $SERVING_VERSION == "0.6.0" ]]; then elif [[ "$RUN_ENV" == "cuda10.2" ]];then server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post102-py3-none-any.whl" serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-102-$SERVING_VERSION.tar.gz" + elif [[ "$RUN_ENV" == "cuda11" ]];then + server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post11-py3-none-any.whl" + serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-11-$SERVING_VERSION.tar.gz" fi client_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-$SERVING_VERSION-cp$CPYTHON-none-any.whl" app_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-$SERVING_VERSION-py3-none-any.whl" @@ -88,6 +94,16 @@ elif [[ "$RUN_ENV" == "cuda10.2" ]];then echo "export SERVING_BIN=$PWD/serving_bin/serving">>/root/.bashrc rm -rf serving-gpu-102-${SERVING_VERSION}.tar.gz cd - +elif [[ "$RUN_ENV" == "cuda11" ]];then + python$PYTHON_VERSION -m pip install $client_release $app_release $server_release + python$PYTHON_VERSION -m pip install paddlepaddle-gpu==${PADDLE_VERSION} + cd /usr/local/ + wget $serving_bin + tar xf serving-gpu-11-${SERVING_VERSION}.tar.gz + mv $PWD/serving-gpu-11-${SERVING_VERSION} $PWD/serving_bin + echo "export SERVING_BIN=$PWD/serving_bin/serving">>/root/.bashrc + rm -rf serving-gpu-11-${SERVING_VERSION}.tar.gz + cd - fi diff --git a/tools/generate_k8s_yamls.sh b/tools/generate_k8s_yamls.sh index 6a3f8783f3564125f16eb917b4198c580691978d..c1b542ef5146739fde75d2e80e7898a2b832512e 100644 --- a/tools/generate_k8s_yamls.sh +++ b/tools/generate_k8s_yamls.sh @@ -60,8 +60,8 @@ function run echo "named arg: command: $start_command" echo "named arg: port: $port" - sed -e "s/<< APP_NAME >>/$app_name/g" -e "s/<< IMAGE_NAME >>/$(echo $image_name | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< WORKDIR >>/$(echo $workdir | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< COMMAND >>/\"$start_command\"/g" -e "s/<< PORT >>/$port/g" tools/k8s_serving.yaml_template > k8s_serving.yaml - sed -e "s/<< APP_NAME >>/$app_name/g" -e "s/<< IMAGE_NAME >>/$(echo $image_name | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< WORKDIR >>/$(echo $workdir | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< COMMAND >>/\"$start_command\"/g" -e "s/<< PORT >>/$port/g" tools/k8s_ingress.yaml_template > k8s_ingress.yaml + sed -e "s/<< APP_NAME >>/$app_name/g" -e "s/<< IMAGE_NAME >>/$(echo $image_name | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< WORKDIR >>/$(echo $workdir | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< COMMAND >>/\"$(echo $start_command | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')\"/g" -e "s/<< PORT >>/$port/g" tools/k8s_serving.yaml_template > k8s_serving.yaml + sed -e "s/<< APP_NAME >>/$app_name/g" -e "s/<< IMAGE_NAME >>/$(echo $image_name | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< WORKDIR >>/$(echo $workdir | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< COMMAND >>/\"$(echo $start_command | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')\"/g" -e "s/<< PORT >>/$port/g" tools/k8s_ingress.yaml_template > k8s_ingress.yaml echo "check k8s_serving.yaml and k8s_ingress.yaml please." } diff --git a/tools/generate_runtime_docker.sh b/tools/generate_runtime_docker.sh index fd5f7ab132a13ba5dcbc667a9e1ae33a53e00815..dcf7251dbd07f9d91d9896210f6eb09e3eb4565a 100644 --- a/tools/generate_runtime_docker.sh +++ b/tools/generate_runtime_docker.sh @@ -66,6 +66,8 @@ function run base_image="nvidia\/cuda:10.1-cudnn7-runtime-ubuntu16.04" elif [ $env == "cuda10.2" ]; then base_image="nvidia\/cuda:10.2-cudnn8-runtime-ubuntu16.04" + elif [ $env == "cuda11" ]; then + base_image="nvidia\/cuda:11.0.3-cudnn8-runtime-ubuntu16.04" fi echo "base image: $base_image" echo "named arg: python: $python" diff --git a/tools/k8s_serving.yaml_template b/tools/k8s_serving.yaml_template index 17d103c8798ef778458663fd6c5aa4f38a1649fd..b66d929bf5e3856c50ba4871cb02a5192a26b6ff 100644 --- a/tools/k8s_serving.yaml_template +++ b/tools/k8s_serving.yaml_template @@ -34,6 +34,7 @@ spec: containers: - image: << IMAGE_NAME >> name: << APP_NAME >> + imagePullPolicy: Always ports: - containerPort: << PORT >> workingDir: << WORKDIR >> @@ -41,6 +42,8 @@ spec: command: ['/bin/bash', '-c'] args: [<< COMMAND >>] env: + - name: SERVING_BIN + value: "/usr/local/serving_bin/serving" - name: NODE_NAME valueFrom: fieldRef: