diff --git a/core/cube/cube-api/src/cube_cli.cpp b/core/cube/cube-api/src/cube_cli.cpp index eee4b0c31ad83ca69d242e81bae3ce4ecfb5bf1a..f43f3deacf21db2f777012539fcd83a8fb70002b 100644 --- a/core/cube/cube-api/src/cube_cli.cpp +++ b/core/cube/cube-api/src/cube_cli.cpp @@ -31,8 +31,9 @@ DEFINE_bool(print_output, false, "print output flag"); DEFINE_int32(thread_num, 1, "thread num"); std::atomic g_concurrency(0); -std::vector time_list; +std::vector> time_list; std::vector request_list; +int turns = 1000000 / FLAGS_batch; namespace { inline uint64_t time_diff(const struct timeval& start_time, @@ -97,7 +98,7 @@ int run(int argc, char** argv, int thread_id) { while (g_concurrency.load() >= FLAGS_thread_num) { } g_concurrency++; - + time_list[thread_id].resize(turns); while (index < file_size) { // uint64_t key = strtoul(buffer, NULL, 10); @@ -121,47 +122,12 @@ int run(int argc, char** argv, int thread_id) { } ++seek_counter; uint64_t seek_cost = time_diff(seek_start, seek_end); - seek_cost_total += seek_cost; - if (seek_cost > seek_cost_max) { - seek_cost_max = seek_cost; - } - if (seek_cost < seek_cost_min) { - seek_cost_min = seek_cost; - } + time_list[thread_id][request - 1] = seek_cost; keys.clear(); values.clear(); } } - /* - if (keys.size() > 0) { - int ret = 0; - values.resize(keys.size()); - TIME_FLAG(seek_start); - ret = cube->seek(FLAGS_dict, keys, &values); - TIME_FLAG(seek_end); - if (ret != 0) { - LOG(WARNING) << "cube seek failed"; - } else if (FLAGS_print_output) { - for (size_t i = 0; i < keys.size(); ++i) { - fprintf(stdout, - "key:%lu value:%s\n", - keys[i], - string_to_hex(values[i].buff).c_str()); - } - } - - ++seek_counter; - uint64_t seek_cost = time_diff(seek_start, seek_end); - seek_cost_total += seek_cost; - if (seek_cost > seek_cost_max) { - seek_cost_max = seek_cost; - } - if (seek_cost < seek_cost_min) { - seek_cost_min = seek_cost; - } - } - */ g_concurrency--; // fclose(key_file); @@ -171,12 +137,6 @@ int run(int argc, char** argv, int thread_id) { LOG(WARNING) << "destroy cube api failed err=" << ret; } - uint64_t seek_cost_avg = seek_cost_total / seek_counter; - LOG(INFO) << "seek cost avg = " << seek_cost_avg; - LOG(INFO) << "seek cost max = " << seek_cost_max; - LOG(INFO) << "seek cost min = " << seek_cost_min; - - time_list[thread_id] = seek_cost_avg; request_list[thread_id] = request; return 0; @@ -188,6 +148,7 @@ int run_m(int argc, char** argv) { request_list.resize(thread_num); time_list.resize(thread_num); std::vector thread_pool; + TIME_FLAG(main_start); for (int i = 0; i < thread_num; i++) { thread_pool.push_back(new std::thread(run, argc, argv, i)); } @@ -195,27 +156,33 @@ int run_m(int argc, char** argv) { thread_pool[i]->join(); delete thread_pool[i]; } + TIME_FLAG(main_end); uint64_t sum_time = 0; uint64_t max_time = 0; uint64_t min_time = 1000000; uint64_t request_num = 0; for (int i = 0; i < thread_num; i++) { - sum_time += time_list[i]; - if (time_list[i] > max_time) { - max_time = time_list[i]; - } - if (time_list[i] < min_time) { - min_time = time_list[i]; + for (int j = 0; j < request_list[i]; j++) { + sum_time += time_list[i][j]; + if (time_list[i][j] > max_time) { + max_time = time_list[i][j]; + } + if (time_list[i][j] < min_time) { + min_time = time_list[i][j]; + } } request_num += request_list[i]; } - uint64_t mean_time = sum_time / thread_num; - LOG(INFO) << thread_num << " thread seek cost" - << " avg = " << std::to_string(mean_time) - << " max = " << std::to_string(max_time) - << " min = " << std::to_string(min_time); - LOG(INFO) << " total_request = " << std::to_string(request_num) << " speed = " - << std::to_string(1000000 * thread_num / mean_time) // mean_time us + uint64_t mean_time = sum_time / (thread_num * turns); + uint64_t main_time = time_diff(main_start, main_end); + LOG(INFO) << "\n" + << thread_num << " thread seek cost" + << "\navg = " << std::to_string(mean_time) + << "\nmax = " << std::to_string(max_time) + << "\nmin = " << std::to_string(min_time); + LOG(INFO) << "\ntotal_request = " << std::to_string(request_num) + << "\nspeed = " << std::to_string(request_num * 1000000 / + main_time) // mean_time us << " query per second"; return 0; } diff --git a/core/general-server/op/general_dist_kv_infer_op.cpp b/core/general-server/op/general_dist_kv_infer_op.cpp index 9c6c70352b5387fab95acd16cdf79aa2b46f6122..adaa6cbc1818fc5300faf662d98ad47c9af4c468 100644 --- a/core/general-server/op/general_dist_kv_infer_op.cpp +++ b/core/general-server/op/general_dist_kv_infer_op.cpp @@ -90,6 +90,9 @@ int GeneralDistKVInferOp::inference() { keys.begin() + key_idx); key_idx += dataptr_size_pairs[i].second; } + Timer timeline; + int64_t cube_start = timeline.TimeStampUS(); + timeline.Start(); rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance(); std::vector table_names = cube->get_table_names(); if (table_names.size() == 0) { @@ -97,7 +100,7 @@ int GeneralDistKVInferOp::inference() { return -1; } int ret = cube->seek(table_names[0], keys, &values); - + int64_t cube_end = timeline.TimeStampUS(); if (values.size() != keys.size() || values[0].buff.size() == 0) { LOG(ERROR) << "cube value return null"; } @@ -153,9 +156,7 @@ int GeneralDistKVInferOp::inference() { VLOG(2) << "infer batch size: " << batch_size; - Timer timeline; int64_t start = timeline.TimeStampUS(); - timeline.Start(); if (InferManager::instance().infer( engine_name().c_str(), &infer_in, out, batch_size)) { @@ -165,6 +166,8 @@ int GeneralDistKVInferOp::inference() { int64_t end = timeline.TimeStampUS(); CopyBlobInfo(input_blob, output_blob); + AddBlobInfo(output_blob, cube_start); + AddBlobInfo(output_blob, cube_end); AddBlobInfo(output_blob, start); AddBlobInfo(output_blob, end); return 0; diff --git a/python/examples/bert/benchmark.sh b/python/examples/bert/benchmark.sh index 96c9cd97d15bf1133210feb0bcabf95fdcde9d37..09e9e1bc23b81f118a22a14ffc51fa2fd5a951d4 100644 --- a/python/examples/bert/benchmark.sh +++ b/python/examples/bert/benchmark.sh @@ -1,41 +1,52 @@ -rm profile_log +rm profile_log* export CUDA_VISIBLE_DEVICES=0,1,2,3 export FLAGS_profile_server=1 export FLAGS_profile_client=1 export FLAGS_serving_latency=1 -python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim False --ir_optim True 2> elog > stdlog & -hostname=`echo $(hostname)|awk -F '.baidu.com' '{print $1}'` -sleep 5 + gpu_id=0 +#save cpu and gpu utilization log +if [ -d utilization ];then + rm -rf utilization +else + mkdir utilization +fi +#start server +$PYTHONROOT/bin/python3 -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim > elog 2>&1 & +sleep 5 #warm up -python3 benchmark.py --thread 8 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 - -for thread_num in 4 8 16 +$PYTHONROOT/bin/python3 benchmark.py --thread 4 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 +echo -e "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py +for thread_num in 1 4 8 16 do -for batch_size in 1 4 16 64 256 +for batch_size in 1 4 16 64 do job_bt=`date '+%Y%m%d%H%M%S'` - nvidia-smi --id=$gpu_id --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 & + nvidia-smi --id=0 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 & + nvidia-smi --id=0 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 & gpu_memory_pid=$! - python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 + $PYTHONROOT/bin/python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 kill ${gpu_memory_pid} + kill `ps -ef|grep used_memory|awk '{print $2}'` echo "model_name:" $1 echo "thread_num:" $thread_num echo "batch_size:" $batch_size echo "=================Done====================" echo "model_name:$1" >> profile_log_$1 echo "batch_size:$batch_size" >> profile_log_$1 + $PYTHONROOT/bin/python3 cpu_utilization.py >> profile_log_$1 job_et=`date '+%Y%m%d%H%M%S'` - awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY_USE:", max}' gpu_use.log >> profile_log_$1 - monquery -n ${hostname} -i GPU_AVERAGE_UTILIZATION -s $job_bt -e $job_et -d 10 > gpu_log_file_${job_bt} - monquery -n ${hostname} -i CPU_USER -s $job_bt -e $job_et -d 10 > cpu_log_file_${job_bt} - cpu_num=$(cat /proc/cpuinfo | grep processor | wc -l) - gpu_num=$(nvidia-smi -L|wc -l) - python ../util/show_profile.py profile $thread_num >> profile_log_$1 + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$1 + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$1 + rm -rf gpu_use.log gpu_utilization.log + $PYTHONROOT/bin/python3 ../util/show_profile.py profile $thread_num >> profile_log_$1 tail -n 8 profile >> profile_log_$1 echo "" >> profile_log_$1 done done +#Divided log +awk 'BEGIN{RS="\n\n"}{i++}{print > "bert_log_"i}' profile_log_$1 +mkdir bert_log && mv bert_log_* bert_log ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9 diff --git a/python/examples/criteo_ctr_with_cube/benchmark.py b/python/examples/criteo_ctr_with_cube/benchmark.py index a850d244b0a5a1a01e98a6207fa9674b6ea0af1a..324eb18e214237cdb0d228fc6b57c8efd3665cc9 100755 --- a/python/examples/criteo_ctr_with_cube/benchmark.py +++ b/python/examples/criteo_ctr_with_cube/benchmark.py @@ -24,6 +24,7 @@ from paddle_serving_client.utils import MultiThreadRunner from paddle_serving_client.utils import benchmark_args from paddle_serving_client.metric import auc +py_version = sys.version_info[0] args = benchmark_args() @@ -49,7 +50,10 @@ def single_func(idx, resource): if args.batch_size > 0: feed_batch = [] for bi in range(args.batch_size): - data = reader().next() + if py_version == 2: + data = reader().next() + else: + data = reader().__next__() feed_dict = {} feed_dict['dense_input'] = data[0][0] for i in range(1, 27): @@ -71,14 +75,17 @@ if __name__ == '__main__': multi_thread_runner = MultiThreadRunner() endpoint_list = ["127.0.0.1:9292"] #result = single_func(0, {"endpoint": endpoint_list}) + start = time.time() result = multi_thread_runner.run(single_func, args.thread, {"endpoint": endpoint_list}) - print(result) + end = time.time() + total_cost = end - start avg_cost = 0 qps = 0 for i in range(args.thread): avg_cost += result[0][i * 2 + 0] qps += result[0][i * 2 + 1] avg_cost = avg_cost / args.thread + print("total cost: {}".format(total_cost)) print("average total cost {} s.".format(avg_cost)) print("qps {} ins/s".format(qps)) diff --git a/python/examples/criteo_ctr_with_cube/benchmark.sh b/python/examples/criteo_ctr_with_cube/benchmark.sh index 35b19b637d9e8dec10fd3b59224c5c17e3ba5f53..21daf9331ec4a7ba98ac73fc4570b024681aa06a 100755 --- a/python/examples/criteo_ctr_with_cube/benchmark.sh +++ b/python/examples/criteo_ctr_with_cube/benchmark.sh @@ -1,9 +1,23 @@ rm profile_log export FLAGS_profile_client=1 export FLAGS_profile_server=1 -for thread_num in 1 2 4 8 16 + +wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz --no-check-certificate +tar xf ctr_cube_unittest.tar.gz +mv models/ctr_client_conf ./ +mv models/ctr_serving_model_kv ./ +mv models/data ./cube/ + +wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz --no-check-certificate +tar xf cube_app.tar.gz +mv cube_app/cube* ./cube/ +sh cube_prepare.sh & + +python test_server.py ctr_serving_model_kv > serving_log 2>&1 & + +for thread_num in 1 4 16 do -for batch_size in 1 4 16 64 256 +for batch_size in 1 4 16 64 do $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1 echo "batch size : $batch_size" @@ -11,6 +25,8 @@ do echo "========================================" echo "batch size : $batch_size" >> profile_log $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log - tail -n 2 profile >> profile_log + tail -n 3 profile >> profile_log done done + +ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9 diff --git a/python/examples/criteo_ctr_with_cube/benchmark_cube.sh b/python/examples/criteo_ctr_with_cube/benchmark_cube.sh new file mode 100755 index 0000000000000000000000000000000000000000..e4a2cb3681642312f9c7fe8199cc4d7e68f5d9bf --- /dev/null +++ b/python/examples/criteo_ctr_with_cube/benchmark_cube.sh @@ -0,0 +1,33 @@ +rm profile_log + +wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz --no-check-certificate +tar xf ctr_cube_unittest.tar.gz +mv models/ctr_client_conf ./ +mv models/ctr_serving_model_kv ./ +mv models/data ./cube/ + +wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz --no-check-certificate +tar xf cube_app.tar.gz +mv cube_app/cube* ./cube/ +sh cube_prepare.sh & + +cp ../../../build_server/core/cube/cube-api/cube-cli . +python gen_key.py + +for thread_num in 1 4 16 32 +do +for batch_size in 1000 +do + ./cube-cli -config_file ./cube/conf/cube.conf -keys key -dict test_dict -thread_num $thread_num --batch $batch_size > profile 2>&1 + echo "batch size : $batch_size" + echo "thread num : $thread_num" + echo "========================================" + echo "batch size : $batch_size" >> profile_log + echo "thread num : $thread_num" >> profile_log + tail -n 7 profile | head -n 4 >> profile_log + tail -n 2 profile >> profile_log + +done +done + +ps -ef|grep 'cube'|grep -v grep|cut -c 9-15 | xargs kill -9 diff --git a/python/examples/criteo_ctr_with_cube/gen_key.py b/python/examples/criteo_ctr_with_cube/gen_key.py new file mode 100644 index 0000000000000000000000000000000000000000..115d81701fb2c8b78085c4c88a685dda992f2c27 --- /dev/null +++ b/python/examples/criteo_ctr_with_cube/gen_key.py @@ -0,0 +1,20 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import random + +with open("key", "w") as f: + for i in range(1000000): + f.write("{}\n".format(random.randint(0, 999999))) diff --git a/python/examples/criteo_ctr_with_cube/test_server.py b/python/examples/criteo_ctr_with_cube/test_server.py index 5399ace839a00071c0ed9ce384e5523b68db27fc..479c602910b5afa52b35a66d00316f54905c0741 100755 --- a/python/examples/criteo_ctr_with_cube/test_server.py +++ b/python/examples/criteo_ctr_with_cube/test_server.py @@ -33,5 +33,9 @@ server = Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(4) server.load_model_config(sys.argv[1]) -server.prepare_server(workdir="work_dir1", port=9292, device="cpu") +server.prepare_server( + workdir="work_dir1", + port=9292, + device="cpu", + cube_conf="./cube/conf/cube.conf") server.run_server() diff --git a/python/examples/criteo_ctr_with_cube/test_server_gpu.py b/python/examples/criteo_ctr_with_cube/test_server_gpu.py index 382be99bd37a52630d78bb84ef7e53047b018c95..33f74f91c13fca489db70a4d4171ae756355c787 100755 --- a/python/examples/criteo_ctr_with_cube/test_server_gpu.py +++ b/python/examples/criteo_ctr_with_cube/test_server_gpu.py @@ -33,5 +33,9 @@ server = Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(4) server.load_model_config(sys.argv[1]) -server.prepare_server(workdir="work_dir1", port=9292, device="cpu") +server.prepare_server( + workdir="work_dir1", + port=9292, + device="cpu", + cube_conf="./cube/conf/cube.conf") server.run_server() diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server.py b/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server.py index 361d5a59becb7c110907f66d8b651e05e7eb418e..8a3bee4e628ddd0896c1d2facbccbf2ef493df2b 100755 --- a/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server.py +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server.py @@ -33,5 +33,9 @@ server = Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(4) server.load_model_config(sys.argv[1], sys.argv[2]) -server.prepare_server(workdir="work_dir1", port=9292, device="cpu") +server.prepare_server( + workdir="work_dir1", + port=9292, + device="cpu", + cube_conf="./cube/conf/cube.conf") server.run_server() diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server_gpu.py b/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server_gpu.py index 38e1bf82118f6af7cfe7b467003332a5328b2979..343ded248e2ead554cd0235f890ebefc0b09c071 100755 --- a/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server_gpu.py +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server_gpu.py @@ -33,5 +33,9 @@ server = Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(4) server.load_model_config(sys.argv[1], sys.argv[2]) -server.prepare_server(workdir="work_dir1", port=9292, device="cpu") +server.prepare_server( + workdir="work_dir1", + port=9292, + device="cpu", + cube_conf="./cube/conf/cube.conf") server.run_server() diff --git a/python/examples/imagenet/benchmark.py b/python/examples/imagenet/benchmark.py index f4a7b083300be727ba81e880c41791bf36bfd6f7..0181b873a36c0e65beff1d03f750b5d78c89aa06 100644 --- a/python/examples/imagenet/benchmark.py +++ b/python/examples/imagenet/benchmark.py @@ -24,7 +24,7 @@ import json import base64 from paddle_serving_client import Client from paddle_serving_client.utils import MultiThreadRunner -from paddle_serving_client.utils import benchmark_args +from paddle_serving_client.utils import benchmark_args, show_latency from paddle_serving_app.reader import Sequential, File2Image, Resize from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize @@ -38,7 +38,11 @@ seq_preprocess = Sequential([ def single_func(idx, resource): file_list = [] - turns = 10 + turns = resource["turns"] + latency_flags = False + if os.getenv("FLAGS_serving_latency"): + latency_flags = True + latency_list = [] for file_name in os.listdir("./image_data/n01440764"): file_list.append(file_name) img_list = [] @@ -56,6 +60,7 @@ def single_func(idx, resource): start = time.time() for i in range(turns): if args.batch_size >= 1: + l_start = time.time() feed_batch = [] i_start = time.time() for bi in range(args.batch_size): @@ -69,6 +74,9 @@ def single_func(idx, resource): int(round(i_end * 1000000)))) result = client.predict(feed=feed_batch, fetch=fetch) + l_end = time.time() + if latency_flags: + latency_list.append(l_end * 1000 - l_start * 1000) else: print("unsupport batch size {}".format(args.batch_size)) @@ -88,6 +96,8 @@ def single_func(idx, resource): r = requests.post( server, data=req, headers={"Content-Type": "application/json"}) end = time.time() + if latency_flags: + return [[end - start], latency_list] return [[end - start]] @@ -96,11 +106,21 @@ if __name__ == '__main__': endpoint_list = [ "127.0.0.1:9292", "127.0.0.1:9293", "127.0.0.1:9294", "127.0.0.1:9295" ] - result = multi_thread_runner.run(single_func, args.thread, - {"endpoint": endpoint_list}) + turns = 100 + start = time.time() + result = multi_thread_runner.run( + single_func, args.thread, {"endpoint": endpoint_list, + "turns": turns}) #result = single_func(0, {"endpoint": endpoint_list}) + end = time.time() + total_cost = end - start avg_cost = 0 for i in range(args.thread): avg_cost += result[0][i] avg_cost = avg_cost / args.thread - print("average total cost {} s.".format(avg_cost)) + print("total cost: {}s".format(end - start)) + print("each thread cost: {}s.".format(avg_cost)) + print("qps: {}samples/s".format(args.batch_size * args.thread * turns / + total_cost)) + if os.getenv("FLAGS_serving_latency"): + show_latency(result[1]) diff --git a/python/examples/imagenet/benchmark.sh b/python/examples/imagenet/benchmark.sh index d7eb89fa9b0b68e5e442d15bdf16f431c91ba94d..620cf2a3d9fe6c292cedecd84dfda0bce42c15d4 100644 --- a/python/examples/imagenet/benchmark.sh +++ b/python/examples/imagenet/benchmark.sh @@ -1,28 +1,50 @@ -rm profile_log +rm profile_log* export CUDA_VISIBLE_DEVICES=0,1,2,3 export FLAGS_profile_server=1 export FLAGS_profile_client=1 -python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog & +python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog & sleep 5 +gpu_id=0 +#save cpu and gpu utilization log +if [ -d utilization ];then + rm -rf utilization +else + mkdir utilization +fi #warm up -$PYTHONROOT/bin/python benchmark.py --thread 8 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 +$PYTHONROOT/bin/python3 benchmark.py --thread 4 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 +echo -e "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py -for thread_num in 4 8 16 +for thread_num in 1 4 8 16 do for batch_size in 1 4 16 64 do + job_bt=`date '+%Y%m%d%H%M%S'` + nvidia-smi --id=0 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 & + nvidia-smi --id=0 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 & + gpu_memory_pid=$! $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 + kill ${gpu_memory_pid} + kill `ps -ef|grep used_memory|awk '{print $2}'` echo "model name :" $1 echo "thread num :" $thread_num echo "batch size :" $batch_size echo "=================Done====================" echo "model name :$1" >> profile_log echo "batch size :$batch_size" >> profile_log + job_et=`date '+%Y%m%d%H%M%S'` + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$1 + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$1 + rm -rf gpu_use.log gpu_utilization.log $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log tail -n 8 profile >> profile_log + echo "" >> profile_log_$1 done done +#Divided log +awk 'BEGIN{RS="\n\n"}{i++}{print > "ResNet_log_"i}' profile_log_$1 +mkdir $1_log && mv ResNet_log_* $1_log ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9 diff --git a/python/examples/imdb/benchmark.sh b/python/examples/imdb/benchmark.sh index 2b2d91c8192e66f6b3eee19b59fc7f5dc9339aa6..7db9a1086314047930bee32fe8c695c2b71753bf 100644 --- a/python/examples/imdb/benchmark.sh +++ b/python/examples/imdb/benchmark.sh @@ -1,19 +1,28 @@ -rm profile_log -export CUDA_VISIBLE_DEVICES=0,1,2,3 +rm profile_log* export FLAGS_profile_server=1 export FLAGS_profile_client=1 export FLAGS_serving_latency=1 -python -m paddle_serving_server_gpu.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1,2,3 --mem_optim --ir_optim 2> elog > stdlog & +$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --mem_optim --ir_optim 2> elog > stdlog & hostname=`echo $(hostname)|awk -F '.baidu.com' '{print $1}'` - +#save cpu and gpu utilization log +if [ -d utilization ];then + rm -rf utilization +else + mkdir utilization +fi sleep 5 -for thread_num in 4 8 16 + +#warm up +$PYTHONROOT/bin/python3 benchmark.py --thread 4 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 +echo -e "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py + +for thread_num in 1 4 8 16 do -for batch_size in 1 4 16 64 256 +for batch_size in 1 4 16 64 do job_bt=`date '+%Y%m%d%H%M%S'` - python benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 + $PYTHONROOT/bin/python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 echo "model_name:" $1 echo "thread_num:" $thread_num echo "batch_size:" $batch_size @@ -21,15 +30,14 @@ do echo "model_name:$1" >> profile_log_$1 echo "batch_size:$batch_size" >> profile_log_$1 job_et=`date '+%Y%m%d%H%M%S'` - awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY_USE:", max}' gpu_use.log >> profile_log_$1 - monquery -n ${hostname} -i GPU_AVERAGE_UTILIZATION -s $job_bt -e $job_et -d 10 > gpu_log_file_${job_bt} - monquery -n ${hostname} -i CPU_USER -s $job_bt -e $job_et -d 10 > cpu_log_file_${job_bt} - cpu_num=$(cat /proc/cpuinfo | grep processor | wc -l) - gpu_num=$(nvidia-smi -L|wc -l) - python ../util/show_profile.py profile $thread_num >> profile_log_$1 + $PYTHONROOT/bin/python3 ../util/show_profile.py profile $thread_num >> profile_log_$1 + $PYTHONROOT/bin/python3 cpu_utilization.py >> profile_log_$1 tail -n 8 profile >> profile_log_$1 echo "" >> profile_log_$1 done done +#Divided log +awk 'BEGIN{RS="\n\n"}{i++}{print > "imdb_log_"i}' profile_log_$1 +mkdir $1_log && mv imdb_log_* $1_log ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9 diff --git a/python/paddle_serving_app/reader/image_reader.py b/python/paddle_serving_app/reader/image_reader.py index 096f46549af137cb04a87e26a3b28c8d42e33daa..cfd50e2514ae796af06d3301fc728bf0f5a88ad0 100644 --- a/python/paddle_serving_app/reader/image_reader.py +++ b/python/paddle_serving_app/reader/image_reader.py @@ -677,7 +677,7 @@ class Resize(object): Args: size (sequence or int): Desired output size. If size is a sequence like - (h, w), output size will be matched to this. If size is an int, + (w, h), output size will be matched to this. If size is an int, smaller edge of the image will be matched to this number. i.e, if height > width, then image will be rescaled to (size * height / width, size) diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index 875e275c759d9fb1a9ccb6632816418a75a93aec..1e5fd16ed6c153a28cd72422ca3ef7b9177cb079 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -25,6 +25,7 @@ from contextlib import closing import collections import fcntl +import shutil import numpy as np import grpc from .proto import multi_lang_general_model_service_pb2 @@ -230,7 +231,7 @@ class Server(object): infer_service.workflows.extend(["workflow1"]) self.infer_service_conf.services.extend([infer_service]) - def _prepare_resource(self, workdir): + def _prepare_resource(self, workdir, cube_conf): self.workdir = workdir if self.resource_conf == None: with open("{}/{}".format(workdir, self.general_model_config_fn), @@ -242,6 +243,11 @@ class Server(object): if "dist_kv" in node.name: self.resource_conf.cube_config_path = workdir self.resource_conf.cube_config_file = self.cube_config_fn + if cube_conf == None: + raise ValueError( + "Please set the path of cube.conf while use dist_kv op." + ) + shutil.copy(cube_conf, workdir) if "quant" in node.name: self.resource_conf.cube_quant_bits = 8 self.resource_conf.model_toolkit_path = workdir @@ -366,7 +372,11 @@ class Server(object): os.chdir(self.cur_path) self.bin_path = self.server_path + "/serving" - def prepare_server(self, workdir=None, port=9292, device="cpu"): + def prepare_server(self, + workdir=None, + port=9292, + device="cpu", + cube_conf=None): if workdir == None: workdir = "./tmp" os.system("mkdir {}".format(workdir)) @@ -377,7 +387,7 @@ class Server(object): if not self.port_is_available(port): raise SystemExit("Port {} is already used".format(port)) self.set_port(port) - self._prepare_resource(workdir) + self._prepare_resource(workdir, cube_conf) self._prepare_engine(self.model_config_paths, device) self._prepare_infer_service(port) self.workdir = workdir @@ -645,7 +655,11 @@ class MultiLangServer(object): server_config_paths) self.bclient_config_path_ = client_config_path - def prepare_server(self, workdir=None, port=9292, device="cpu"): + def prepare_server(self, + workdir=None, + port=9292, + device="cpu", + cube_conf=None): if not self._port_is_available(port): raise SystemExit("Prot {} is already used".format(port)) default_port = 12000 @@ -656,7 +670,10 @@ class MultiLangServer(object): self.port_list_.append(default_port + i) break self.bserver_.prepare_server( - workdir=workdir, port=self.port_list_[0], device=device) + workdir=workdir, + port=self.port_list_[0], + device=device, + cube_conf=cube_conf) self.set_port(port) def _launch_brpc_service(self, bserver): diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index 26288f6ae65ce823a57ee201130d40ff6510c4a5..df04cb7840bbacd90ccb7e3c66147a6856b23e02 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -26,7 +26,7 @@ from contextlib import closing import argparse import collections import fcntl - +import shutil import numpy as np import grpc from .proto import multi_lang_general_model_service_pb2 @@ -285,7 +285,7 @@ class Server(object): infer_service.workflows.extend(["workflow1"]) self.infer_service_conf.services.extend([infer_service]) - def _prepare_resource(self, workdir): + def _prepare_resource(self, workdir, cube_conf): self.workdir = workdir if self.resource_conf == None: with open("{}/{}".format(workdir, self.general_model_config_fn), @@ -297,6 +297,11 @@ class Server(object): if "dist_kv" in node.name: self.resource_conf.cube_config_path = workdir self.resource_conf.cube_config_file = self.cube_config_fn + if cube_conf == None: + raise ValueError( + "Please set the path of cube.conf while use dist_kv op." + ) + shutil.copy(cube_conf, workdir) self.resource_conf.model_toolkit_path = workdir self.resource_conf.model_toolkit_file = self.model_toolkit_fn self.resource_conf.general_model_path = workdir @@ -406,7 +411,11 @@ class Server(object): os.chdir(self.cur_path) self.bin_path = self.server_path + "/serving" - def prepare_server(self, workdir=None, port=9292, device="cpu"): + def prepare_server(self, + workdir=None, + port=9292, + device="cpu", + cube_conf=None): if workdir == None: workdir = "./tmp" os.system("mkdir {}".format(workdir)) @@ -418,7 +427,7 @@ class Server(object): raise SystemExit("Port {} is already used".format(port)) self.set_port(port) - self._prepare_resource(workdir) + self._prepare_resource(workdir, cube_conf) self._prepare_engine(self.model_config_paths, device) self._prepare_infer_service(port) self.workdir = workdir @@ -690,7 +699,11 @@ class MultiLangServer(object): server_config_paths) self.bclient_config_path_ = client_config_path - def prepare_server(self, workdir=None, port=9292, device="cpu"): + def prepare_server(self, + workdir=None, + port=9292, + device="cpu", + cube_conf=None): if not self._port_is_available(port): raise SystemExit("Prot {} is already used".format(port)) default_port = 12000 @@ -701,7 +714,10 @@ class MultiLangServer(object): self.port_list_.append(default_port + i) break self.bserver_.prepare_server( - workdir=workdir, port=self.port_list_[0], device=device) + workdir=workdir, + port=self.port_list_[0], + device=device, + cube_conf=cube_conf) self.set_port(port) def _launch_brpc_service(self, bserver): diff --git a/tools/serving_build.sh b/tools/serving_build.sh index 097123165988fb266f7c4a3a0da603ade6d98be1..498969b9c886a56e298d397808c2da14a38bac85 100644 --- a/tools/serving_build.sh +++ b/tools/serving_build.sh @@ -229,10 +229,7 @@ function python_run_criteo_ctr_with_cube() { check_cmd "mv models/data ./cube/" check_cmd "mv models/ut_data ./" cp ../../../build-server-$TYPE/output/bin/cube* ./cube/ - mkdir -p $PYTHONROOT/lib/python2.7/site-packages/paddle_serving_server/serving-cpu-avx-openblas-0.1.3/ - yes | cp ../../../build-server-$TYPE/output/demo/serving/bin/serving $PYTHONROOT/lib/python2.7/site-packages/paddle_serving_server/serving-cpu-avx-openblas-0.1.3/ sh cube_prepare.sh & - check_cmd "mkdir work_dir1 && cp cube/conf/cube.conf ./work_dir1/" python test_server.py ctr_serving_model_kv & sleep 5 check_cmd "python test_client.py ctr_client_conf/serving_client_conf.prototxt ./ut_data >score" @@ -257,10 +254,7 @@ function python_run_criteo_ctr_with_cube() { check_cmd "mv models/data ./cube/" check_cmd "mv models/ut_data ./" cp ../../../build-server-$TYPE/output/bin/cube* ./cube/ - mkdir -p $PYTHONROOT/lib/python2.7/site-packages/paddle_serving_server_gpu/serving-gpu-0.1.3/ - yes | cp ../../../build-server-$TYPE/output/demo/serving/bin/serving $PYTHONROOT/lib/python2.7/site-packages/paddle_serving_server_gpu/serving-gpu-0.1.3/ sh cube_prepare.sh & - check_cmd "mkdir work_dir1 && cp cube/conf/cube.conf ./work_dir1/" python test_server_gpu.py ctr_serving_model_kv & sleep 5 # for warm up