Merge pull request #1231 from HexToString/develop-p

add resnet50_benchmark

Merge pull request #1231 from HexToString/develop-p
add resnet50_benchmark
f92e6a52 · TeslaZhao · GitHub · 93feff0f · 95d382a9 · f92e6a52
3 changed file
--- a/python/examples/resnet_v2_50/benchmark.py
+++ b/python/examples/resnet_v2_50/benchmark.py
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
+from __future__ import unicode_literals, absolute_import
+import os
+import sys
+import time
+import json
+import requests
+import numpy as np
+from paddle_serving_client import Client
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args, show_latency
+from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
+from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
+
+args = benchmark_args()
+
+
+def single_func(idx, resource):
+    total_number = 0
+    profile_flags = False
+    latency_flags = False
+    if os.getenv("FLAGS_profile_client"):
+        profile_flags = True
+    if os.getenv("FLAGS_serving_latency"):
+        latency_flags = True
+        latency_list = []
+
+    if args.request == "rpc":
+        client = Client()
+        client.load_client_config(args.model)
+        client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
+        start = time.time()
+        for i in range(turns):
+            if args.batch_size >= 1:
+                l_start = time.time()
+                seq = Sequential([
+                    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(),
+                    Transpose((2, 0, 1)), Div(255), Normalize(
+                        [0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+                ])
+                image_file = "daisy.jpg"
+                img = seq(image_file)
+                feed_data = np.array(img)
+                feed_data = np.expand_dims(feed_data, 0).repeat(
+                    args.batch_size, axis=0)
+                result = client.predict(
+                    feed={"image": feed_data},
+                    fetch=["save_infer_model/scale_0.tmp_0"],
+                    batch=True)
+                l_end = time.time()
+                if latency_flags:
+                    latency_list.append(l_end * 1000 - l_start * 1000)
+                total_number = total_number + 1
+            else:
+                print("unsupport batch size {}".format(args.batch_size))
+
+    else:
+        raise ValueError("not implemented {} request".format(args.request))
+    end = time.time()
+    if latency_flags:
+        return [[end - start], latency_list, [total_number]]
+    else:
+        return [[end - start]]
+
+
+if __name__ == '__main__':
+    multi_thread_runner = MultiThreadRunner()
+    endpoint_list = ["127.0.0.1:9393"]
+    turns = 1
+    start = time.time()
+    result = multi_thread_runner.run(
+        single_func, args.thread, {"endpoint": endpoint_list,
+                                   "turns": turns})
+    end = time.time()
+    total_cost = end - start
+    total_number = 0
+    avg_cost = 0
+    for i in range(args.thread):
+        avg_cost += result[0][i]
+        total_number += result[2][i]
+    avg_cost = avg_cost / args.thread
+
+    print("total cost-include init: {}s".format(total_cost))
+    print("each thread cost: {}s. ".format(avg_cost))
+    print("qps: {}samples/s".format(args.batch_size * total_number / (
+        avg_cost * args.thread)))
+    print("qps(request): {}samples/s".format(total_number / (avg_cost *
+                                                             args.thread)))
+    print("total count: {} ".format(total_number))
+    if os.getenv("FLAGS_serving_latency"):
+        show_latency(result[1])
--- a/python/examples/resnet_v2_50/benchmark.sh
+++ b/python/examples/resnet_v2_50/benchmark.sh
+rm profile_log*
+rm -rf resnet_log*
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+export FLAGS_profile_server=1
+export FLAGS_profile_client=1
+export FLAGS_serving_latency=1 
+gpu_id=3
+#save cpu and gpu utilization log
+if [ -d utilization ];then
+    rm -rf utilization
+else
+    mkdir utilization
+fi
+#start server
+python3.6 -m paddle_serving_server.serve --model $1 --port 9393 --thread 10 --gpu_ids $gpu_id  --use_trt --ir_optim >  elog  2>&1 &
+sleep 15
+
+#warm up
+python3.6 benchmark.py --thread 1 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+echo -e "import psutil\nimport time\nwhile True:\n\tcpu_res = psutil.cpu_percent()\n\twith open('cpu.txt', 'a+') as f:\n\t\tf.write(f'{cpu_res}\\\n')\n\ttime.sleep(0.1)" > cpu.py
+for thread_num in 1 2 4 8 16
+do
+for batch_size in 1 4 8 16 32
+do
+    job_bt=`date '+%Y%m%d%H%M%S'`
+    nvidia-smi --id=$gpu_id --query-compute-apps=used_memory --format=csv -lms 100 > gpu_memory_use.log 2>&1 &
+    nvidia-smi --id=$gpu_id --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
+    rm -rf cpu.txt
+    python3.6 cpu.py &
+    gpu_memory_pid=$!
+    python3.6 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+    kill `ps -ef|grep used_memory|awk '{print $2}'` > /dev/null
+    kill `ps -ef|grep utilization.gpu|awk '{print $2}'` > /dev/null
+    kill `ps -ef|grep cpu.py|awk '{print $2}'` > /dev/null
+    echo "model_name:" $1
+    echo "thread_num:" $thread_num
+    echo "batch_size:" $batch_size
+    echo "=================Done===================="
+    echo "model_name:$1" >> profile_log_$1
+    echo "batch_size:$batch_size" >> profile_log_$1
+    job_et=`date '+%Y%m%d%H%M%S'`
+    awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "CPU_UTILIZATION:", max}' cpu.txt >> profile_log_$1
+    #awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_memory_use.log >> profile_log_$1
+    #awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$1
+    grep -av '^0 %' gpu_utilization.log > gpu_utilization.log.tmp
+    awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_memory_use.log >> profile_log_$1
+    awk -F" " '{sum+=$1} END {print "GPU_UTILIZATION:", sum/NR, sum, NR }' gpu_utilization.log.tmp >> profile_log_$1
+    rm -rf gpu_memory_use.log gpu_utilization.log gpu_utilization.log.tmp
+    python3.6 ../util/show_profile.py profile $thread_num >> profile_log_$1
+    tail -n 10 profile >> profile_log_$1
+    echo "" >> profile_log_$1
+done
+done
+
+#Divided log
+awk 'BEGIN{RS="\n\n"}{i++}{print > "resnet_log_"i}' profile_log_$1
+mkdir resnet_log && mv resnet_log_* resnet_log
+ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9
--- a/python/examples/resnet_v2_50/run_benchmark.sh
+++ b/python/examples/resnet_v2_50/run_benchmark.sh
+if [ ! -x "ResNet50.tar.gz"]; then
+  wget https://paddle-inference-dist.bj.bcebos.com/AI-Rank/models/Paddle/ResNet50.tar.gz
+fi
+tar -xzvf ResNet50.tar.gz
+python3.6 -m paddle_serving_client.convert --dirname ./ResNet50 --model_filename model --params_filename params
+bash benchmark.sh serving_server serving_client