refine benchmark_batch.py

09ab2fba · guru4elephant · 22dd55e4 · 09ab2fba · 09ab2fba · 09ab2fba
3 changed file
--- a/python/examples/bert/benchmark_batch.py
+++ b/python/examples/bert/benchmark_batch.py
@@ -41,13 +41,13 @@ def single_func(idx, resource):
        client = Client()
        client.load_client_config(args.model)
        client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
+        feed_batch = []
+        for bi in range(args.batch_size):
+            feed_batch.append(reader.process(dataset[bi]))
        start = time.time()
        for i in range(1000):
            if args.batch_size >= 1:
-                feed_batch = []
-                for bi in range(args.batch_size):
-                    feed_batch.append(reader.process(dataset[i]))
                result = client.batch_predict(
                    feed_batch=feed_batch, fetch=fetch)
            else:
@@ -61,7 +61,9 @@ def single_func(idx, resource):
 if __name__ == '__main__':
    multi_thread_runner = MultiThreadRunner()
-    endpoint_list = ["127.0.0.1:9292"]
+    endpoint_list = [
+        "127.0.0.1:9295", "127.0.0.1:9296", "127.0.0.1:9297", "127.0.0.1:9298"
+    ]
    result = multi_thread_runner.run(single_func, args.thread,
                                     {"endpoint": endpoint_list})
    avg_cost = 0

--- a/python/examples/bert/benchmark_batch.sh
+++ b/python/examples/bert/benchmark_batch.sh
 rm profile_log
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
+sleep 5
 for thread_num in 1 2 4 8 16
 do
 for batch_size in 1 2 4 8 16 32 64 128 256 512
 do
    $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
    echo "========================================"
+    echo "thread num: ", $thread_num
+    echo "batch size: ", $batch_size
    echo "batch size : $batch_size" >> profile_log
    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
    tail -n 1 profile >> profile_log

--- a/python/examples/bert/benchmark_with_profile.sh
+++ b/python/examples/bert/benchmark_with_profile.sh
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
+export FLAGS_profile_client=1
+export FLAGS_profile_server=1
+sleep 5
+thread_num=4
+python benchmark_batch.py --thread ${thread_num} --batch_size 64 --model serving_client_conf/serving_client_conf.prototxt 2> profile
+python show_profile.py profile ${thread_num}
+python timeline_trace.py profile trace