Merge branch 'fixdoc' of https://github.com/zhangjun/Serving into fixdoc

96ce8dc7 · zhangjun · d7d23fed · da5605a1 · 96ce8dc7 · 96ce8dc7
29 changed file
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/000000570688.jpg
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/000000570688.jpg
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/README.md
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/README.md
+# Faster RCNN model on Pipeline Paddle Serving
+### Get The Faster RCNN Model
+```
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/faster_rcnn_r50_fpn_1x_coco.tar
+```
+### Start the service
+```
+tar xf faster_rcnn_r50_fpn_1x_coco.tar
+python web_service.py
+```
+### Perform prediction
+```
+python pipeline_http_client.py
+```
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/benchmark.py
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/benchmark.py
+import sys
+import os
+import yaml
+import requests
+import time
+import json
+import cv2
+import base64
+try:
+    from paddle_serving_server_gpu.pipeline import PipelineClient
+except ImportError:
+    from paddle_serving_server.pipeline import PipelineClient
+import numpy as np
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args, show_latency
+def cv2_to_base64(image):
+    return base64.b64encode(image).decode('utf8')
+def parse_benchmark(filein, fileout):
+    with open(filein, "r") as fin:
+        res = yaml.load(fin)
+        del_list = []
+        for key in res["DAG"].keys():
+            if "call" in key:
+                del_list.append(key)
+        for key in del_list:
+            del res["DAG"][key]
+    with open(fileout, "w") as fout:
+        yaml.dump(res, fout, default_flow_style=False)
+def gen_yml(device, gpu_id):
+    fin = open("config.yml", "r")
+    config = yaml.load(fin)
+    fin.close()
+    config["dag"]["tracer"] = {"interval_s": 30}
+    if device == "gpu":
+        config["op"]["faster_rcnn"]["local_service_conf"]["device_type"] = 1
+        config["op"]["faster_rcnn"]["local_service_conf"]["devices"] = gpu_id        
+    with open("config2.yml", "w") as fout: 
+        yaml.dump(config, fout, default_flow_style=False)
+def run_http(idx, batch_size):
+    print("start thread ({})".format(idx))
+    url = "http://127.0.0.1:18082/faster_rcnn/prediction"
+    with open(os.path.join(".", "000000570688.jpg"), 'rb') as file:
+        image_data1 = file.read()
+    image = cv2_to_base64(image_data1)
+    start = time.time()
+    while True:
+        data = {"key": [], "value": []}
+        for j in range(batch_size):
+            data["key"].append("image_" + str(j))
+            data["value"].append(image)
+        r = requests.post(url=url, data=json.dumps(data))
+        end = time.time()
+        if end - start > 70:
+            print("70s end")
+            break
+    return [[end - start]]
+def multithread_http(thread, batch_size):
+    multi_thread_runner = MultiThreadRunner()
+    result = multi_thread_runner.run(run_http , thread, batch_size)
+def run_rpc(thread, batch_size):
+    pass
+def multithread_rpc(thraed, batch_size):
+    multi_thread_runner = MultiThreadRunner()
+    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+if __name__ == "__main__":
+    if sys.argv[1] == "yaml":
+        mode = sys.argv[2] # brpc/  local predictor
+        thread = int(sys.argv[3])
+        device = sys.argv[4]
+        gpu_id = sys.argv[5]
+        gen_yml(device, gpu_id)
+    elif sys.argv[1] == "run":
+        mode = sys.argv[2] # http/ rpc
+        thread = int(sys.argv[3])
+        batch_size = int(sys.argv[4])
+        if mode == "http":
+            multithread_http(thread, batch_size)
+        elif mode == "rpc":
+            multithread_rpc(thread, batch_size)
+    elif sys.argv[1] == "dump":
+        filein = sys.argv[2]
+        fileout = sys.argv[3]
+        parse_benchmark(filein, fileout)
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/benchmark.sh
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/benchmark.sh
+export FLAGS_profile_pipeline=1
+alias python3="python3.7"
+modelname="faster_rcnn_r50_fpn_1x_coco"
+gpu_id="0"
+benchmark_config_filename="benchmark_config.yaml"
+# HTTP
+ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
+sleep 3
+python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
+rm -rf profile_log_$modelname
+for thread_num in 1 
+do
+  for batch_size in 1 
+  do
+    echo "#----FasterRCNN thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
+    rm -rf PipelineServingLogs
+    rm -rf cpu_utilization.py
+    python3 web_service.py >web.log 2>&1 &
+    sleep 3
+    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
+    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
+    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
+    python3 benchmark.py run http $thread_num $batch_size
+    python3 cpu_utilization.py >>profile_log_$modelname
+    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
+    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
+    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
+    python3 benchmark.py dump benchmark.log benchmark.tmp
+    mv benchmark.tmp benchmark.log
+    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
+    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
+    cat benchmark.log >> profile_log_$modelname
+    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname 
+    #rm -rf gpu_use.log gpu_utilization.log
+  done
+done
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/benchmark_config.yaml
+cuda_version: "10.1"
+cudnn_version: "7.6"
+trt_version: "6.0"
+python_version: "3.7"
+gcc_version: "8.2"
+paddle_version: "2.0.2"
+cpu: "Xeon 6148"
+gpu: "P4"
+xpu: "None"
+api: ""
+owner: "wangjiawei04"
+model_name: "faster_rcnn"
+model_type: "static"
+model_source: "paddledetection"
+model_url: ""
+batch_size: 1
+num_of_samples: 1000
+input_shape: "3, 480, 640"
+runtime_device: "gpu"
+ir_optim: true
+enable_memory_optim: true
+enable_tensorrt: false
+precision: "fp32"
+enable_mkldnn: true
+cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/config.yml
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/config.yml
+dag:
+  is_thread_op: false
+  tracer:
+    interval_s: 30
+http_port: 18082
+op:
+  faster_rcnn:
+    local_service_conf:
+      client_type: local_predictor
+      concurrency: 2
+      device_type: 1
+      devices: '2'
+      fetch_list:
+      - save_infer_model/scale_0.tmp_1
+      model_config: serving_server/
+rpc_port: 9998
+worker_num: 20
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/label_list.txt
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/label_list.txt
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/pipeline_http_client.py
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/pipeline_http_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# from paddle_serving_server.pipeline import PipelineClient
+import numpy as np
+import requests
+import json
+import cv2
+import base64
+import os
+def cv2_to_base64(image):
+    return base64.b64encode(image).decode('utf8')
+url = "http://127.0.0.1:18082/faster_rcnn/prediction"
+with open(os.path.join(".", "000000570688.jpg"), 'rb') as file:
+    image_data1 = file.read()
+image = cv2_to_base64(image_data1)
+for i in range(1):
+    data = {"key": ["image"], "value": [image]}
+    r = requests.post(url=url, data=json.dumps(data))
+    print(r.json())
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/web_service.py
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/web_service.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_server.web_service import WebService, Op
+import logging
+import numpy as np
+import sys
+import cv2
+from paddle_serving_app.reader import *
+import base64
+class FasterRCNNOp(Op):
+    def init_op(self):
+        self.img_preprocess = Sequential([
+            BGR2RGB(), Div(255.0),
+            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
+            Resize((640, 640)), Transpose((2, 0, 1))
+        ])
+        self.img_postprocess = RCNNPostprocess("label_list.txt", "output")
+    def preprocess(self, input_dicts, data_id, log_id):
+        (_, input_dict), = input_dicts.items()
+        imgs = []
+        #print("keys", input_dict.keys())
+        for key in input_dict.keys():
+            data = base64.b64decode(input_dict[key].encode('utf8'))
+            data = np.fromstring(data, np.uint8)
+            im = cv2.imdecode(data, cv2.IMREAD_COLOR)
+            im = self.img_preprocess(im)
+            imgs.append({
+              "image": im[np.newaxis,:],
+              "im_shape": np.array(list(im.shape[1:])).reshape(-1)[np.newaxis,:],
+              "scale_factor": np.array([1.0, 1.0]).reshape(-1)[np.newaxis,:],
+            })
+        feed_dict = {
+            "image": np.concatenate([x["image"] for x in imgs], axis=0),
+            "im_shape": np.concatenate([x["im_shape"] for x in imgs], axis=0),
+            "scale_factor": np.concatenate([x["scale_factor"] for x in imgs], axis=0)
+        }
+        #for key in feed_dict.keys():
+        #    print(key, feed_dict[key].shape)
+        return feed_dict, False, None, ""
+    def postprocess(self, input_dicts, fetch_dict, log_id):
+        #print(fetch_dict)
+        res_dict = {"bbox_result": str(self.img_postprocess(fetch_dict, visualize=False))}
+        return res_dict, None, ""
+class FasterRCNNService(WebService):
+    def get_pipeline_response(self, read_op):
+        faster_rcnn_op = FasterRCNNOp(name="faster_rcnn", input_ops=[read_op])
+        return faster_rcnn_op
+fasterrcnn_service = FasterRCNNService(name="faster_rcnn")
+fasterrcnn_service.prepare_pipeline_config("config2.yml")
+fasterrcnn_service.run_service()
--- a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/000000570688.jpg
+++ b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/000000570688.jpg
--- a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/README.md
+++ b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/README.md
+# PPYOLO model on Pipeline Paddle Serving
+([简体中文](./README_CN.md)|English)
+### Get Model
+```
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ppyolo_mbv3_large_coco.tar
+```
+### Start the service
+```
+tar xf ppyolo_mbv3_large_coco.tar
+python web_service.py
+```
+### Perform prediction
+```
+python pipeline_http_client.py
+```
--- a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/benchmark.py
+++ b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/benchmark.py
+import sys
+import os
+import yaml
+import requests
+import time
+import json
+import cv2
+import base64
+try:
+    from paddle_serving_server_gpu.pipeline import PipelineClient
+except ImportError:
+    from paddle_serving_server.pipeline import PipelineClient
+import numpy as np
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args, show_latency
+def cv2_to_base64(image):
+    return base64.b64encode(image).decode('utf8')
+def parse_benchmark(filein, fileout):
+    with open(filein, "r") as fin:
+        res = yaml.load(fin)
+        del_list = []
+        for key in res["DAG"].keys():
+            if "call" in key:
+                del_list.append(key)
+        for key in del_list:
+            del res["DAG"][key]
+    with open(fileout, "w") as fout:
+        yaml.dump(res, fout, default_flow_style=False)
+def gen_yml(device, gpu_id):
+    fin = open("config.yml", "r")
+    config = yaml.load(fin)
+    fin.close()
+    config["dag"]["tracer"] = {"interval_s": 30}
+    if device == "gpu":
+        config["op"]["ppyolo_mbv3"]["local_service_conf"]["device_type"] = 1
+        config["op"]["ppyolo_mbv3"]["local_service_conf"]["devices"] = gpu_id        
+    with open("config2.yml", "w") as fout: 
+        yaml.dump(config, fout, default_flow_style=False)
+def run_http(idx, batch_size):
+    print("start thread ({})".format(idx))
+    url = "http://127.0.0.1:18082/ppyolo_mbv3/prediction"
+    with open(os.path.join(".", "000000570688.jpg"), 'rb') as file:
+        image_data1 = file.read()
+    image = cv2_to_base64(image_data1)
+    start = time.time()
+    while True:
+        data = {"key": [], "value": []}
+        for j in range(batch_size):
+            data["key"].append("image_" + str(j))
+            data["value"].append(image)
+        r = requests.post(url=url, data=json.dumps(data))
+        end = time.time()
+        if end - start > 70:
+            print("70s end")
+            break
+    return [[end - start]]
+def multithread_http(thread, batch_size):
+    multi_thread_runner = MultiThreadRunner()
+    result = multi_thread_runner.run(run_http , thread, batch_size)
+def run_rpc(thread, batch_size):
+    pass
+def multithread_rpc(thraed, batch_size):
+    multi_thread_runner = MultiThreadRunner()
+    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+if __name__ == "__main__":
+    if sys.argv[1] == "yaml":
+        mode = sys.argv[2] # brpc/  local predictor
+        thread = int(sys.argv[3])
+        device = sys.argv[4]
+        gpu_id = sys.argv[5]
+        gen_yml(device, gpu_id)
+    elif sys.argv[1] == "run":
+        mode = sys.argv[2] # http/ rpc
+        thread = int(sys.argv[3])
+        batch_size = int(sys.argv[4])
+        if mode == "http":
+            multithread_http(thread, batch_size)
+        elif mode == "rpc":
+            multithread_rpc(thread, batch_size)
+    elif sys.argv[1] == "dump":
+        filein = sys.argv[2]
+        fileout = sys.argv[3]
+        parse_benchmark(filein, fileout)
--- a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/benchmark.sh
+++ b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/benchmark.sh
+export FLAGS_profile_pipeline=1
+alias python3="python3.7"
+modelname="ppyolo_mbv3_large"
+gpu_id="0"
+benchmark_config_filename="benchmark_config.yaml"
+# HTTP
+ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
+sleep 3
+python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
+rm -rf profile_log_$modelname
+for thread_num in 1 
+do
+  for batch_size in 1 
+  do
+    echo "#----PPyolo thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
+    rm -rf PipelineServingLogs
+    rm -rf cpu_utilization.py
+    python3 web_service.py >web.log 2>&1 &
+    sleep 3
+    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
+    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
+    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
+    python3 benchmark.py run http $thread_num $batch_size
+    python3 cpu_utilization.py >>profile_log_$modelname
+    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
+    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
+    python3 benchmark.py dump benchmark.log benchmark.tmp
+    mv benchmark.tmp benchmark.log
+    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
+    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
+    cat benchmark.log >> profile_log_$modelname
+    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
+    #rm -rf gpu_use.log gpu_utilization.log
+  done
+done
--- a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/benchmark_config.yaml
+cuda_version: "10.1"
+cudnn_version: "7.6"
+trt_version: "6.0"
+python_version: "3.7"
+gcc_version: "8.2"
+paddle_version: "2.0.2"
+cpu: "Xeon 6148"
+gpu: "P4"
+xpu: "None"
+api: ""
+owner: "wangjiawei04"
+model_name: "ppyolo"
+model_type: "static"
+model_source: "paddledetection"
+model_url: ""
+batch_size: 1
+num_of_samples: 1000
+input_shape: "3, 480, 640"
+runtime_device: "gpu"
+ir_optim: true
+enable_memory_optim: true
+enable_tensorrt: false
+precision: "fp32"
+enable_mkldnn: true
+cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/config.yml
+++ b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/config.yml
+dag:
+  is_thread_op: false
+  tracer:
+    interval_s: 30
+http_port: 18082
+op:
+  ppyolo_mbv3:
+    local_service_conf:
+      client_type: local_predictor
+      concurrency: 10
+      device_type: 1
+      devices: '2'
+      fetch_list:
+      - save_infer_model/scale_0.tmp_1
+      model_config: serving_server/
+rpc_port: 9998
+worker_num: 20
--- a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/label_list.txt
+++ b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/label_list.txt
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/pipeline_http_client.py
+++ b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/pipeline_http_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# from paddle_serving_server.pipeline import PipelineClient
+import numpy as np
+import requests
+import json
+import cv2
+import base64
+import os
+def cv2_to_base64(image):
+    return base64.b64encode(image).decode('utf8')
+url = "http://127.0.0.1:18082/ppyolo_mbv3/prediction"
+with open(os.path.join(".", "000000570688.jpg"), 'rb') as file:
+    image_data1 = file.read()
+image = cv2_to_base64(image_data1)
+for i in range(1):
+    data = {"key": ["image"], "value": [image]}
+    r = requests.post(url=url, data=json.dumps(data))
+    print(r.json())
--- a/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/web_service.py
+++ b/python/examples/pipeline/PaddleDetection/ppyolo_mbv3/web_service.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_server.web_service import WebService, Op
+import logging
+import numpy as np
+import sys
+import cv2
+from paddle_serving_app.reader import *
+import base64
+class PPYoloMbvOp(Op):
+    def init_op(self):
+        self.img_preprocess = Sequential([
+            BGR2RGB(), Div(255.0),
+            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
+            Resize((320, 320)), Transpose((2, 0, 1))
+        ])
+        self.img_postprocess = RCNNPostprocess("label_list.txt", "output")
+    def preprocess(self, input_dicts, data_id, log_id):
+        (_, input_dict), = input_dicts.items()
+        imgs = []
+        #print("keys", input_dict.keys())
+        for key in input_dict.keys():
+            data = base64.b64decode(input_dict[key].encode('utf8'))
+            data = np.fromstring(data, np.uint8)
+            im = cv2.imdecode(data, cv2.IMREAD_COLOR)
+            im = self.img_preprocess(im)
+            imgs.append({
+              "image": im[np.newaxis,:],
+              "im_shape": np.array(list(im.shape[1:])).reshape(-1)[np.newaxis,:],
+              "scale_factor": np.array([1.0, 1.0]).reshape(-1)[np.newaxis,:],
+            })
+        feed_dict = {
+            "image": np.concatenate([x["image"] for x in imgs], axis=0),
+            "im_shape": np.concatenate([x["im_shape"] for x in imgs], axis=0),
+            "scale_factor": np.concatenate([x["scale_factor"] for x in imgs], axis=0)
+        }
+        for key in feed_dict.keys():
+            print(key, feed_dict[key].shape)
+        return feed_dict, False, None, ""
+    def postprocess(self, input_dicts, fetch_dict, log_id):
+        #print(fetch_dict)
+        res_dict = {"bbox_result": str(self.img_postprocess(fetch_dict, visualize=False))}
+        return res_dict, None, ""
+class PPYoloMbv(WebService):
+    def get_pipeline_response(self, read_op):
+        ppyolo_mbv3_op = PPYoloMbvOp(name="ppyolo_mbv3", input_ops=[read_op])
+        return ppyolo_mbv3_op
+ppyolo_mbv3_service = PPYoloMbv(name="ppyolo_mbv3")
+ppyolo_mbv3_service.prepare_pipeline_config("config2.yml")
+ppyolo_mbv3_service.run_service()
--- a/python/examples/pipeline/PaddleDetection/yolov3/000000570688.jpg
+++ b/python/examples/pipeline/PaddleDetection/yolov3/000000570688.jpg
--- a/python/examples/pipeline/PaddleDetection/yolov3/README.md
+++ b/python/examples/pipeline/PaddleDetection/yolov3/README.md
+# YOLOv3 model on Pipeline Paddle Serving
+([简体中文](./README_CN.md)|English)
+### Get Model
+```
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/yolov3_darknet53_270e_coco.tar
+```
+### Start the service
+```
+tar xf yolov3_darknet53_270e_coco.tar
+python web_service.py
+```
+### Perform prediction
+```
+python pipeline_http_client.py
+```
--- a/python/examples/pipeline/PaddleDetection/yolov3/benchmark.py
+++ b/python/examples/pipeline/PaddleDetection/yolov3/benchmark.py
+import sys
+import os
+import yaml
+import requests
+import time
+import json
+import cv2
+import base64
+try:
+    from paddle_serving_server_gpu.pipeline import PipelineClient
+except ImportError:
+    from paddle_serving_server.pipeline import PipelineClient
+import numpy as np
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args, show_latency
+def cv2_to_base64(image):
+    return base64.b64encode(image).decode('utf8')
+def parse_benchmark(filein, fileout):
+    with open(filein, "r") as fin:
+        res = yaml.load(fin)
+        del_list = []
+        for key in res["DAG"].keys():
+            if "call" in key:
+                del_list.append(key)
+        for key in del_list:
+            del res["DAG"][key]
+    with open(fileout, "w") as fout:
+        yaml.dump(res, fout, default_flow_style=False)
+def gen_yml(device, gpu_id):
+    fin = open("config.yml", "r")
+    config = yaml.load(fin)
+    fin.close()
+    config["dag"]["tracer"] = {"interval_s": 30}
+    if device == "gpu":
+        config["op"]["faster_rcnn"]["local_service_conf"]["device_type"] = 1
+        config["op"]["faster_rcnn"]["local_service_conf"]["devices"] = gpu_id        
+    with open("config2.yml", "w") as fout: 
+        yaml.dump(config, fout, default_flow_style=False)
+def run_http(idx, batch_size):
+    print("start thread ({})".format(idx))
+    url = "http://127.0.0.1:18082/yolov3/prediction"
+    with open(os.path.join(".", "000000570688.jpg"), 'rb') as file:
+        image_data1 = file.read()
+    image = cv2_to_base64(image_data1)
+    start = time.time()
+    while True:
+        data = {"key": [], "value": []}
+        for j in range(batch_size):
+            data["key"].append("image_" + str(j))
+            data["value"].append(image)
+        r = requests.post(url=url, data=json.dumps(data))
+        end = time.time()
+        if end - start > 70:
+            print("70s end")
+            break
+    return [[end - start]]
+def multithread_http(thread, batch_size):
+    multi_thread_runner = MultiThreadRunner()
+    result = multi_thread_runner.run(run_http , thread, batch_size)
+def run_rpc(thread, batch_size):
+    pass
+def multithread_rpc(thraed, batch_size):
+    multi_thread_runner = MultiThreadRunner()
+    result = multi_thread_runner.run(run_rpc , thread, batch_size)
+if __name__ == "__main__":
+    if sys.argv[1] == "yaml":
+        mode = sys.argv[2] # brpc/  local predictor
+        thread = int(sys.argv[3])
+        device = sys.argv[4]
+        gpu_id = sys.argv[5]
+        gen_yml(device, gpu_id)
+    elif sys.argv[1] == "run":
+        mode = sys.argv[2] # http/ rpc
+        thread = int(sys.argv[3])
+        batch_size = int(sys.argv[4])
+        if mode == "http":
+            multithread_http(thread, batch_size)
+        elif mode == "rpc":
+            multithread_rpc(thread, batch_size)
+    elif sys.argv[1] == "dump":
+        filein = sys.argv[2]
+        fileout = sys.argv[3]
+        parse_benchmark(filein, fileout)
--- a/python/examples/pipeline/PaddleDetection/yolov3/benchmark.sh
+++ b/python/examples/pipeline/PaddleDetection/yolov3/benchmark.sh
+export FLAGS_profile_pipeline=1
+alias python3="python3.7"
+modelname="yolov3_darknet53_270e_coco"
+gpu_id="0"
+benchmark_config_filename="benchmark_config.yaml"
+# HTTP
+ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
+sleep 3
+python3 benchmark.py yaml local_predictor 1 cpu 
+rm -rf profile_log_$modelname
+for thread_num in 1 8 16
+do
+  for batch_size in 1 
+  do
+    echo "#----Yolov3 thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
+    rm -rf PipelineServingLogs
+    rm -rf cpu_utilization.py
+    python3 web_service.py >web.log 2>&1 &
+    sleep 3
+    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
+    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
+    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
+    python3 benchmark.py run http $thread_num $batch_size
+    python3 cpu_utilization.py >>profile_log_$modelname
+    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
+    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
+    python3 benchmark.py dump benchmark.log benchmark.tmp
+    mv benchmark.tmp benchmark.log
+    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
+    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
+    cat benchmark.log >> profile_log_$modelname
+    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
+    #rm -rf gpu_use.log gpu_utilization.log
+  done
+done
--- a/python/examples/pipeline/PaddleDetection/yolov3/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleDetection/yolov3/benchmark_config.yaml
+cuda_version: "10.1"
+cudnn_version: "7.6"
+trt_version: "6.0"
+python_version: "3.7"
+gcc_version: "8.2"
+paddle_version: "2.0.2"
+cpu: "Xeon 6148"
+gpu: "P4"
+xpu: "None"
+api: ""
+owner: "wangjiawei04"
+model_name: "yolov3"
+model_type: "static"
+model_source: "paddledetection"
+model_url: ""
+batch_size: 1
+num_of_samples: 1000
+input_shape: "3, 480, 640"
+runtime_device: "gpu"
+ir_optim: true
+enable_memory_optim: true
+enable_tensorrt: false
+precision: "fp32"
+enable_mkldnn: true
+cpu_math_library_num_threads: ""
--- a/python/examples/pipeline/PaddleDetection/yolov3/config.yml
+++ b/python/examples/pipeline/PaddleDetection/yolov3/config.yml
+dag:
+  is_thread_op: false
+  tracer:
+    interval_s: 30
+http_port: 18082
+op:
+  yolov3:
+    local_service_conf:
+      client_type: local_predictor
+      concurrency: 10
+      device_type: 1
+      devices: '2'
+      fetch_list:
+      - save_infer_model/scale_0.tmp_1
+      model_config: serving_server/
+rpc_port: 9998
+worker_num: 20
--- a/python/examples/pipeline/PaddleDetection/yolov3/label_list.txt
+++ b/python/examples/pipeline/PaddleDetection/yolov3/label_list.txt
+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+dining table
+toilet
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
--- a/python/examples/pipeline/PaddleDetection/yolov3/pipeline_http_client.py
+++ b/python/examples/pipeline/PaddleDetection/yolov3/pipeline_http_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# from paddle_serving_server.pipeline import PipelineClient
+import numpy as np
+import requests
+import json
+import cv2
+import base64
+import os
+def cv2_to_base64(image):
+    return base64.b64encode(image).decode('utf8')
+url = "http://127.0.0.1:18082/yolov3/prediction"
+with open(os.path.join(".", "000000570688.jpg"), 'rb') as file:
+    image_data1 = file.read()
+image = cv2_to_base64(image_data1)
+for i in range(1):
+    data = {"key": ["image"], "value": [image]}
+    r = requests.post(url=url, data=json.dumps(data))
+    print(r.json())
--- a/python/examples/pipeline/PaddleDetection/yolov3/web_service.py
+++ b/python/examples/pipeline/PaddleDetection/yolov3/web_service.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_server.web_service import WebService, Op
+import logging
+import numpy as np
+import sys
+import cv2
+from paddle_serving_app.reader import *
+import base64
+class Yolov3Op(Op):
+    def init_op(self):
+        self.img_preprocess = Sequential([
+            BGR2RGB(), Div(255.0),
+            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
+            Resize((640, 640)), Transpose((2, 0, 1))
+        ])
+        self.img_postprocess = RCNNPostprocess("label_list.txt", "output")
+    def preprocess(self, input_dicts, data_id, log_id):
+        (_, input_dict), = input_dicts.items()
+        imgs = []
+        #print("keys", input_dict.keys())
+        for key in input_dict.keys():
+            data = base64.b64decode(input_dict[key].encode('utf8'))
+            data = np.fromstring(data, np.uint8)
+            im = cv2.imdecode(data, cv2.IMREAD_COLOR)
+            im = self.img_preprocess(im)
+            imgs.append({
+              "image": im[np.newaxis,:],
+              "im_shape": np.array(list(im.shape[1:])).reshape(-1)[np.newaxis,:],
+              "scale_factor": np.array([1.0, 1.0]).reshape(-1)[np.newaxis,:],
+            })
+        feed_dict = {
+            "image": np.concatenate([x["image"] for x in imgs], axis=0),
+            "im_shape": np.concatenate([x["im_shape"] for x in imgs], axis=0),
+            "scale_factor": np.concatenate([x["scale_factor"] for x in imgs], axis=0)
+        }
+        #for key in feed_dict.keys():
+        #    print(key, feed_dict[key].shape)
+        return feed_dict, False, None, ""
+    def postprocess(self, input_dicts, fetch_dict, log_id):
+        #print(fetch_dict)
+        res_dict = {"bbox_result": str(self.img_postprocess(fetch_dict, visualize=False))}
+        return res_dict, None, ""
+class Yolov3Service(WebService):
+    def get_pipeline_response(self, read_op):
+        yolov3_op = Yolov3Op(name="yolov3", input_ops=[read_op])
+        return yolov3_op
+yolov3_service = Yolov3Service(name="yolov3")
+yolov3_service.prepare_pipeline_config("config2.yml")
+yolov3_service.run_service()
--- a/python/paddle_serving_app/reader/image_reader.py
+++ b/python/paddle_serving_app/reader/image_reader.py
@@ -415,7 +415,7 @@ class RCNNPostprocess(object):
        out_path = os.path.join(self.output_dir, image_path)
        image.save(out_path, quality=95)
-    def __call__(self, image_with_bbox):
+    def __call__(self, image_with_bbox, visualize=True):
        fetch_name = ""
        for key in image_with_bbox:
            if key == "image":
@@ -427,6 +427,8 @@ class RCNNPostprocess(object):
                                            self.clsid2catid)
        if os.path.isdir(self.output_dir) is False:
            os.mkdir(self.output_dir)
+        if visualize is False:
+            return bbox_result
        self.visualize(image_with_bbox["image"], bbox_result, self.catid2name,
                       len(self.label_list))
        if os.path.isdir(self.output_dir) is False:
@@ -434,6 +436,7 @@ class RCNNPostprocess(object):
        bbox_file = os.path.join(self.output_dir, 'bbox.json')
        with open(bbox_file, 'w') as f:
            json.dump(bbox_result, f, indent=4)
+        return bbox_result
    def __repr__(self):
        return self.__class__.__name__ + "label_file: {1}, output_dir: {2}".format(

--- a/tools/scripts/ipipe_py3.sh
+++ b/tools/scripts/ipipe_py3.sh
@@ -39,7 +39,8 @@ go get -u google.golang.org/grpc@v1.33.0
 build_whl_list=(build_cpu_server build_gpu_server build_client build_app)
 rpc_model_list=(grpc_fit_a_line grpc_yolov4 pipeline_imagenet bert_rpc_gpu bert_rpc_cpu ResNet50_rpc \
 lac_rpc cnn_rpc bow_rpc lstm_rpc fit_a_line_rpc deeplabv3_rpc mobilenet_rpc unet_rpc resnetv2_rpc \
-criteo_ctr_rpc_cpu criteo_ctr_rpc_gpu ocr_rpc yolov4_rpc_gpu faster_rcnn_hrnetv2p_w18_1x_encrypt)
+criteo_ctr_rpc_cpu criteo_ctr_rpc_gpu ocr_rpc yolov4_rpc_gpu faster_rcnn_hrnetv2p_w18_1x_encrypt \
+low_precision_resnet50_int8)
 http_model_list=(fit_a_line_http lac_http cnn_http bow_http lstm_http ResNet50_http bert_http\
 pipeline_ocr_cpu_http)
@@ -148,7 +149,7 @@ function before_hook() {
    setproxy
    unsetproxy
    cd ${build_path}/python
-    python3.6 -m pip install --upgrade pip==20.0.1
+    python3.6 -m pip install --upgrade pip
    python3.6 -m pip install requests
    python3.6 -m pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
    python3.6 -m pip install numpy==1.16.4
@@ -260,6 +261,22 @@ function build_app() {
    python3.6 -m pip install ${build_path}/build/python/dist/*
 }
+function low_precision_resnet50_int8 () {
+    dir=${log_dir}rpc_model/low_precision/resnet50/
+    cd ${build_path}/python/examples/low_precision/resnet50/
+    check_dir ${dir}
+    wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz
+    tar zxvf ResNet50_quant.tar.gz
+    python3.6 -m paddle_serving_client.convert --dirname ResNet50_quant
+    echo -e "${GREEN_COLOR}low_precision_resnet50_int8_GPU_RPC server started${RES}" | tee -a ${log_dir}server_total.txt
+    python3.6 -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8 > ${dir}server_log.txt 2>&1 &
+    check_result server 10
+    echo -e "${GREEN_COLOR}low_precision_resnet50_int8_GPU_RPC client started${RES}" | tee -a ${log_dir}client_total.txt
+    python3.6 resnet50_client.py > ${dir}client_log.txt 2>&1
+    check_result client "low_precision_resnet50_int8_GPU_RPC server test completed"
+    kill_server_process
+}
 function faster_rcnn_hrnetv2p_w18_1x_encrypt() {
    dir=${log_dir}rpc_model/faster_rcnn_hrnetv2p_w18_1x/
    cd ${build_path}/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x