diff --git a/doc/LOW_PRECISION_DEPLOYMENT.md b/doc/LOW_PRECISION_DEPLOYMENT.md index cb08a88f2f3b2435f3b270575652217b1d956fbf..fb3bd208f2f52399afff1f96228543685f3cf389 100644 --- a/doc/LOW_PRECISION_DEPLOYMENT.md +++ b/doc/LOW_PRECISION_DEPLOYMENT.md @@ -17,7 +17,7 @@ python -m paddle_serving_client.convert --dirname ResNet50_quant ``` Start RPC service, specify the GPU id and precision mode ``` -python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_gpu --use_trt --precision int8 +python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8 ``` Request the serving service with Client ``` @@ -27,7 +27,7 @@ from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize client = Client() client.load_client_config( - "resnet_v2_50_imagenet_client/serving_client_conf.prototxt") + "serving_client/serving_client_conf.prototxt") client.connect(["127.0.0.1:9393"]) seq = Sequential([ @@ -37,11 +37,11 @@ seq = Sequential([ image_file = "daisy.jpg" img = seq(image_file) -fetch_map = client.predict(feed={"image": img}, fetch=["score"]) -print(fetch_map["score"].reshape(-1)) +fetch_map = client.predict(feed={"image": img}, fetch=["save_infer_model/scale_0.tmp_0"]) +print(fetch_map["save_infer_model/scale_0.tmp_0"].reshape(-1)) ``` ## Reference * [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim) * [Deploy the quantized model Using Paddle Inference on Intel CPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_x86_cpu_int8.html) -* [Deploy the quantized model Using Paddle Inference on Nvidia GPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html) \ No newline at end of file +* [Deploy the quantized model Using Paddle Inference on Nvidia GPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html) diff --git a/doc/LOW_PRECISION_DEPLOYMENT_CN.md b/doc/LOW_PRECISION_DEPLOYMENT_CN.md index e543db94396eecbe64a61d7a9362369d02ab42de..f77f4e241f3f4b95574d22b9ca55788b5abc968e 100644 --- a/doc/LOW_PRECISION_DEPLOYMENT_CN.md +++ b/doc/LOW_PRECISION_DEPLOYMENT_CN.md @@ -16,7 +16,7 @@ python -m paddle_serving_client.convert --dirname ResNet50_quant ``` 启动rpc服务, 设定所选GPU id、部署模型精度 ``` -python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_gpu --use_trt --precision int8 +python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8 ``` 使用client进行请求 ``` @@ -43,4 +43,4 @@ print(fetch_map["score"].reshape(-1)) ## 参考文档 * [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim) * PaddleInference Intel CPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_x86_cpu_int8.html) -* PaddleInference NV GPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html) \ No newline at end of file +* PaddleInference NV GPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html) diff --git a/python/examples/low_precision/resnet50/README.md b/python/examples/low_precision/resnet50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9e1ff16c676b067437183e6e19446e8a526feed5 --- /dev/null +++ b/python/examples/low_precision/resnet50/README.md @@ -0,0 +1,28 @@ +# resnet50 int8 example +(English|[简体中文](./README_CN.md)) + +## Obtain the quantized model through PaddleSlim tool +Train the low-precision models please refer to [PaddleSlim](https://paddleslim.readthedocs.io/zh_CN/latest/tutorials/quant/overview.html). + +## Deploy the quantized model from PaddleSlim using Paddle Serving with Nvidia TensorRT int8 mode + +Firstly, download the [Resnet50 int8 model](https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz) and convert to Paddle Serving's saved model。 +``` +wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz +tar zxvf ResNet50_quant.tar.gz + +python -m paddle_serving_client.convert --dirname ResNet50_quant +``` +Start RPC service, specify the GPU id and precision mode +``` +python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8 +``` +Request the serving service with Client +``` +python resnet50_client.py +``` + +## Reference +* [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim) +* [Deploy the quantized model Using Paddle Inference on Intel CPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_x86_cpu_int8.html) +* [Deploy the quantized model Using Paddle Inference on Nvidia GPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html) diff --git a/python/examples/low_precision/resnet50/README_CN.md b/python/examples/low_precision/resnet50/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..1c1a3be1de1690e9736d994016ac05cfba12bcab --- /dev/null +++ b/python/examples/low_precision/resnet50/README_CN.md @@ -0,0 +1,27 @@ +# resnet50 int8示例 +(简体中文|[English](./README.md)) + +## 通过PaddleSlim量化生成低精度模型 +详细见[PaddleSlim量化](https://paddleslim.readthedocs.io/zh_CN/latest/tutorials/quant/overview.html) + +## 使用TensorRT int8加载PaddleSlim Int8量化模型进行部署 +首先下载Resnet50 [PaddleSlim量化模型](https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz),并转换为Paddle Serving支持的部署模型格式。 +``` +wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz +tar zxvf ResNet50_quant.tar.gz + +python -m paddle_serving_client.convert --dirname ResNet50_quant +``` +启动rpc服务, 设定所选GPU id、部署模型精度 +``` +python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8 +``` +使用client进行请求 +``` +python resnet50_client.py +``` + +## 参考文档 +* [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim) +* PaddleInference Intel CPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_x86_cpu_int8.html) +* PaddleInference NV GPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html) diff --git a/python/examples/low_precision/resnet50/daisy.jpg b/python/examples/low_precision/resnet50/daisy.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7edeca63e5f32e68550ef720d81f59df58a8eabc Binary files /dev/null and b/python/examples/low_precision/resnet50/daisy.jpg differ diff --git a/python/examples/low_precision/resnet50/resnet50_client.py b/python/examples/low_precision/resnet50/resnet50_client.py new file mode 100644 index 0000000000000000000000000000000000000000..999b143c8a9aaf42784cbe225a8417b86a054c64 --- /dev/null +++ b/python/examples/low_precision/resnet50/resnet50_client.py @@ -0,0 +1,32 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle_serving_client import Client +from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop +from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize + +client = Client() +client.load_client_config( + "serving_client/serving_client_conf.prototxt") +client.connect(["127.0.0.1:9303"]) + +seq = Sequential([ + File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)), + Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True) +]) + +image_file = "daisy.jpg" +img = seq(image_file) +fetch_map = client.predict(feed={"image": img}, fetch=["save_infer_model/scale_0.tmp_0"]) +print(fetch_map["save_infer_model/scale_0.tmp_0"].reshape(-1)) diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index 079ccde87b9bfcdea7ac94781cc90284b0faf4ae..34bf66f9ba73709dd5dfe9c34158ac0fd9a2d4b9 100755 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -386,8 +386,6 @@ class Server(object): return if not os.path.exists(self.server_path): - os.system("touch {}/{}.is_download".format(self.module_path, - folder_name)) print('Frist time run, downloading PaddleServing components ...') r = os.system('wget ' + bin_url + ' --no-check-certificate') @@ -403,9 +401,10 @@ class Server(object): tar = tarfile.open(tar_name) tar.extractall() tar.close() + open(download_flag, "a").close() except: - if os.path.exists(exe_path): - os.remove(exe_path) + if os.path.exists(self.server_path): + os.remove(self.server_path) raise SystemExit( 'Decompressing failed, please check your permission of {} or disk space left.' .format(self.module_path))