benchmark.py 4.4 KB
Newer Older
L
LDOUBLEV 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
import sys
import os
import base64
import yaml
import requests
import time
import json
try:
    from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
    from paddle_serving_server.pipeline import PipelineClient
import numpy as np
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
def parse_benchmark(filein, fileout):
    with open(filein, "r") as fin:
        res = yaml.load(fin)
        del_list = []
        for key in res["DAG"].keys():
            if "call" in key:
                del_list.append(key)
        for key in del_list:
            del res["DAG"][key]
    with open(fileout, "w") as fout:
        yaml.dump(res, fout, default_flow_style=False)

def gen_yml(device, gpu_id):
    fin = open("config.yml", "r")
    config = yaml.load(fin)
    fin.close()
    config["dag"]["tracer"] = {"interval_s": 10}
    if device == "gpu":
        config["op"]["imagenet"]["local_service_conf"]["device_type"] = 1
        config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id
    else:
        config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0
    with open("config2.yml", "w") as fout: 
        yaml.dump(config, fout, default_flow_style=False)

def cv2_to_base64(image):
    return base64.b64encode(image).decode('utf8')

def run_http(idx, batch_size):
    print("start thread ({})".format(idx))
    url = "http://127.0.0.1:18080/imagenet/prediction"    
    start = time.time()

    with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
        image_data1 = file.read()
    image = cv2_to_base64(image_data1)
    keys, values = [], []
    for i in range(batch_size):
        keys.append("image_{}".format(i))
        values.append(image)
    data = {"key": keys, "value": values}
B
bjjwwang 已提交
56
    latency_list = []
L
LDOUBLEV 已提交
57
    start_time = time.time()
B
bjjwwang 已提交
58
    total_num = 0
L
LDOUBLEV 已提交
59
    while True:
B
bjjwwang 已提交
60
        l_start = time.time()
L
LDOUBLEV 已提交
61 62
        r = requests.post(url=url, data=json.dumps(data))
        print(r.json())
B
bjjwwang 已提交
63 64 65
        l_end = time.time()
        latency_list.append(l_end * 1000 - l_start * 1000)
        total_num += 1
L
LDOUBLEV 已提交
66 67 68
        if time.time() - start_time > 20:
            break
    end = time.time()
B
bjjwwang 已提交
69
    return [[end - start], latency_list, [total_num]]
L
LDOUBLEV 已提交
70 71 72

def multithread_http(thread, batch_size):
    multi_thread_runner = MultiThreadRunner()
B
bjjwwang 已提交
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
    start = time.time()
    result = multi_thread_runner.run(run_http, thread, batch_size)
    end = time.time()
    total_cost = end - start
    avg_cost = 0
    total_number = 0
    for i in range(thread):
        avg_cost += result[0][i]
        total_number += result[2][i]
    avg_cost = avg_cost / thread
    print("Total cost: {}s".format(total_cost))
    print("Each thread cost: {}s. ".format(avg_cost))
    print("Total count: {}. ".format(total_number))
    print("AVG QPS: {} samples/s".format(batch_size * total_number /
                                         total_cost))
    show_latency(result[1])
L
LDOUBLEV 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134

def run_rpc(thread, batch_size):
    client = PipelineClient()
    client.connect(['127.0.0.1:18080'])
    start = time.time()
    test_img_dir = "imgs/"
    for img_file in os.listdir(test_img_dir):
        with open(os.path.join(test_img_dir, img_file), 'rb') as file:
            image_data = file.read()
        image = cv2_to_base64(image_data)
        start_time = time.time()
        while True:
            ret = client.predict(feed_dict={"image": image}, fetch=["res"])
            if time.time() - start_time > 10:
                break
    end = time.time()
    return [[end - start]]


def multithread_rpc(thraed, batch_size):
    multi_thread_runner = MultiThreadRunner()
    result = multi_thread_runner.run(run_rpc , thread, batch_size)

if __name__ == "__main__":
    if sys.argv[1] == "yaml":
        mode = sys.argv[2] # brpc/  local predictor
        thread = int(sys.argv[3])
        device = sys.argv[4]
        if device == "gpu":
            gpu_id = sys.argv[5]
        else:
            gpu_id = None
        gen_yml(device, gpu_id)
    elif sys.argv[1] == "run":
        mode = sys.argv[2] # http/ rpc
        thread = int(sys.argv[3])
        batch_size = int(sys.argv[4])
        if mode == "http":
            multithread_http(thread, batch_size)
        elif mode == "rpc":
            multithread_rpc(thread, batch_size)
    elif sys.argv[1] == "dump":
        filein = sys.argv[2]
        fileout = sys.argv[3]
        parse_benchmark(filein, fileout)