Merge branch 'develop' into doc_0.6

dcb83b6b · Jiawei Wang · GitHub · 222c5e43 · e80b5e70 · dcb83b6b
39 changed file
--- a/doc/C++DESIGN.md
+++ b/doc/C++DESIGN.md
-# Paddle Serving Design
+# C++ Serving Design

 ([简体中文](./C++DESIGN_CN.md)|English)


--- a/doc/C++DESIGN_CN.md
+++ b/doc/C++DESIGN_CN.md
-# Paddle Serving设计方案
+# C++ Serving设计方案

 (简体中文|[English](./C++DESIGN.md))


--- a/doc/PADDLE_SERVING_ON_KUBERNETES.md
+++ b/doc/PADDLE_SERVING_ON_KUBERNETES.md
@@ -25,7 +25,7 @@ kubectl apply -f https://bit.ly/kong-ingress-dbless
 在`tools/generate_runtime_docker.sh`文件下，它的使用方式如下

 ```bash
-bash tool/generate_runtime_docker.sh --env cuda10.1 --python 3.6 --serving 0.6.0 --paddle 2.0.1 --name serving_runtime:cuda10.1-py36
+bash tools/generate_runtime_docker.sh --env cuda10.1 --python 3.6 --serving 0.6.0 --paddle 2.0.1 --name serving_runtime:cuda10.1-py36
 ```

 会生成 cuda10.1，python 3.6，serving版本0.6.0 还有 paddle版本2.0.1的运行镜像。如果有其他疑问，可以执行下列语句得到帮助信息。

--- a/python/examples/pipeline/PaddleClas/DarkNet53/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/DarkNet53/benchmark_config.yaml
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "DarkNet53"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/DarkNet53/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/DarkNet53/benchmark_config.yaml.template
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/DarkNet53/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/DarkNet53/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark_config.yaml
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "HRNet_W18_C"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark_config.yaml.template
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/HRNet_W18_C/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark_config.yaml
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "MobileNetV1"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark_config.yaml.template
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/MobileNetV1/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark_config.yaml
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "MobileNetV2"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark_config.yaml.template
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/MobileNetV2/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-     ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark_config.yaml
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "MobileNetV3_large_x1_0"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_pretrained.tar"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark_config.yaml.template
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/MobileNetV3_large_x1_0/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark_config.yaml
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "ResNeXt101_vd_64x4d"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark_config.yaml.template
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/ResNeXt101_vd_64x4d/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark_config.yaml
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "ResNet50_vd"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark_config.yaml.template
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark_config.yaml
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "ResNet50_vd_FPGM"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_FPGM_pretrained.tar"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark_config.yaml.template
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_FPGM/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark_config.yaml
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "ResNet50_vd_KL"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_KL_pretrained.tar"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark_config.yaml.template
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_KL/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark_config.yaml
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "ResNet50_vd_PACT"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_PACT_pretrained.tar"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark_config.yaml.template
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/ResNet50_vd_PACT/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/examples/pipeline/PaddleClas/ResNet_V2_50/pipeline_http_client.py
+++ b/python/examples/pipeline/PaddleClas/ResNet_V2_50/pipeline_http_client.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import numpy as np
 import requests
 import json
@@ -5,11 +19,13 @@ import cv2
 import base64
 import os

+
 def cv2_to_base64(image):
    return base64.b64encode(image).decode('utf8')

+
 if __name__ == "__main__":
-    url = "http://127.0.0.1:18000/imagenet/prediction"
+    url = "http://127.0.0.1:18080/imagenet/prediction"
    with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
        image_data1 = file.read()
    image = cv2_to_base64(image_data1)

--- a/python/examples/pipeline/PaddleClas/ResNet_V2_50/pipeline_rpc_client.py
+++ b/python/examples/pipeline/PaddleClas/ResNet_V2_50/pipeline_rpc_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+try:
+    from paddle_serving_server_gpu.pipeline import PipelineClient
+except ImportError:
+    from paddle_serving_server.pipeline import PipelineClient
+import numpy as np
+import requests
+import json
+import cv2
+import base64
+import os
+
+client = PipelineClient()
+client.connect(['127.0.0.1:9993'])
+
+
+def cv2_to_base64(image):
+    return base64.b64encode(image).decode('utf8')
+
+
+with open("daisy.jpg", 'rb') as file:
+    image_data = file.read()
+image = cv2_to_base64(image_data)
+
+for i in range(1):
+    ret = client.predict(feed_dict={"image": image}, fetch=["label", "prob"])
+    print(ret)
--- a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark_config.yaml
+++ b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark_config.yaml
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "ShuffleNetV2_x1_0"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x1_0_pretrained.tar"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "gpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark_config.yaml.template
+++ b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark_config.yaml.template
-
-cuda_version: "10.1"
-cudnn_version: "7.6"
-trt_version: "6.0"
-python_version: "3.7"
-gcc_version: "8.2"
-paddle_version: "2.0.1"
-
-cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
-gpu: "T4"
-xpu: "None"
-api: ""
-owner: "cuicheng01"
-
-model_name: "imagenet"
-model_type: "static"
-model_source: "PaddleClas"
-model_url: "model_url_path"
-
-batch_size: 1
-num_of_samples: 1000
-input_shape: "3,224,224"
-
-runtime_device: "cpu"
-ir_optim: true
-enable_memory_optim: true
-enable_tensorrt: false
-precision: "fp32"
-enable_mkldnn: false
-cpu_math_library_num_threads: ""
-
-
--- a/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark_gpu.sh
+++ b/python/examples/pipeline/PaddleClas/ShuffleNetV2_x1_0/benchmark_gpu.sh
-export FLAGS_profile_pipeline=1
-alias python3="python3.7"
-modelname="imagenet"
-use_gpu=1
-gpu_id="0"
-benchmark_config_filename="benchmark_config.yaml"
-
-# HTTP
-ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 
-sleep 3
-if [ $use_gpu -eq 1 ]; then
-  python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
-else
-  python3 benchmark.py yaml local_predictor 1 cpu
-fi
-rm -rf profile_log_$modelname
-for thread_num in 1
-do
-  for batch_size in 1
-  do
-    echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
-    rm -rf PipelineServingLogs
-    rm -rf cpu_utilization.py
-    python3 resnet50_web_service.py >web.log 2>&1 &
-    sleep 3
-    nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
-    nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
-    echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
-    python3 benchmark.py run http $thread_num $batch_size
-    python3 cpu_utilization.py >>profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
-    ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
-    ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
-    python3 benchmark.py dump benchmark.log benchmark.tmp
-    mv benchmark.tmp benchmark.log
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
-    awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
-    cat benchmark.log >> profile_log_$modelname
-    python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
-    #rm -rf gpu_use.log gpu_utilization.log
-  done
-done
--- a/python/paddle_serving_app/reader/image_reader.py
+++ b/python/paddle_serving_app/reader/image_reader.py
@@ -142,10 +142,10 @@ class DBPostProcess(object):
    def box_score_fast(self, bitmap, _box):
        h, w = bitmap.shape[:2]
        box = _box.copy()
-        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
-        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
-        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
-        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
+        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
+        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
+        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
+        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)

        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
        box[:, 0] = box[:, 0] - xmin