diff --git a/core/configure/proto/multi_lang_general_model_service.proto b/core/configure/proto/multi_lang_general_model_service.proto index 2a4764a041d7f817aba1d516427a241498d4c2e0..18fbcf760647e1694e738c0832fe45f4f7d9934f 100755 --- a/core/configure/proto/multi_lang_general_model_service.proto +++ b/core/configure/proto/multi_lang_general_model_service.proto @@ -59,7 +59,7 @@ message SimpleResponse { required int32 err_code = 1; } message GetClientConfigRequest {} -message GetClientConfigResponse { repeated string client_config_str_list = 1; } +message GetClientConfigResponse { required string client_config_str = 1; } service MultiLangGeneralModelService { rpc Inference(InferenceRequest) returns (InferenceResponse) {} diff --git a/doc/COMPILE.md b/doc/COMPILE.md index ef161d141f410c991f9a4bbc0c2605d08827dd31..79a26f857c5b06dd3da39988879b297ce35db167 100755 --- a/doc/COMPILE.md +++ b/doc/COMPILE.md @@ -153,7 +153,7 @@ cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR/ \ -DPYTHON_LIBRARIES=$PYTHON_LIBRARIES \ -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \ -DOPENCV_DIR=${OPENCV_DIR} \ - -DWITH_OPENCV=ON + -DWITH_OPENCV=ON \ -DSERVER=ON .. make -j10 ``` diff --git a/doc/COMPILE_CN.md b/doc/COMPILE_CN.md index 53c3548a46abdccedb53c629bb2c0bd0dd3bad17..3d04f4c873cb6b6c004c1377cbedaf30c89d9d5e 100755 --- a/doc/COMPILE_CN.md +++ b/doc/COMPILE_CN.md @@ -152,7 +152,7 @@ cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR/ \ -DPYTHON_LIBRARIES=$PYTHON_LIBRARIES \ -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \ -DOPENCV_DIR=${OPENCV_DIR} \ - -DWITH_OPENCV=ON + -DWITH_OPENCV=ON \ -DSERVER=ON .. make -j10 ``` diff --git a/python/examples/bert/README_CN.md b/python/examples/bert/README_CN.md index ef28089b559b7281613ae2fb78b8039978db2510..a03b577493fc763c43d1ce96766d4e9eb260565e 100644 --- a/python/examples/bert/README_CN.md +++ b/python/examples/bert/README_CN.md @@ -94,4 +94,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"words": "hello"}] bash benchmark.sh bert_seq128_model bert_seq128_client ``` 性能测试的日志文件为profile_log_bert_seq128_model + 如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。 + +注意:bert_seq128_model和bert_seq128_client路径后不要加'/'符号,示例需要在GPU机器上运行。 diff --git a/python/examples/bert/benchmark.sh b/python/examples/bert/benchmark.sh index 1a8263556d345f367f503460274f1cb0165df2c5..7e374db3ee5a5bdccdc75dc2884b9dbbfcb60eca 100755 --- a/python/examples/bert/benchmark.sh +++ b/python/examples/bert/benchmark.sh @@ -17,27 +17,30 @@ sleep 5 #warm up $PYTHONROOT/bin/python3 benchmark.py --thread 4 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 -echo -e "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py +echo -e "import psutil\nimport time\nwhile True:\n\tcpu_res = psutil.cpu_percent()\n\twith open('cpu.txt', 'a+') as f:\n\t\tf.write(f'{cpu_res}\\\n')\n\ttime.sleep(0.1)" > cpu.py for thread_num in 1 4 8 16 do for batch_size in 1 4 16 64 do job_bt=`date '+%Y%m%d%H%M%S'` - nvidia-smi --id=0 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 & + nvidia-smi --id=0 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_memory_use.log 2>&1 & nvidia-smi --id=0 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 & + rm -rf cpu.txt + $PYTHONROOT/bin/python3 cpu.py & gpu_memory_pid=$! $PYTHONROOT/bin/python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 - kill ${gpu_memory_pid} - kill `ps -ef|grep used_memory|awk '{print $2}'` + kill `ps -ef|grep used_memory|awk '{print $2}'` > /dev/null + kill `ps -ef|grep utilization.gpu|awk '{print $2}'` > /dev/null + kill `ps -ef|grep cpu.py|awk '{print $2}'` > /dev/null echo "model_name:" $1 echo "thread_num:" $thread_num echo "batch_size:" $batch_size echo "=================Done====================" echo "model_name:$1" >> profile_log_$1 echo "batch_size:$batch_size" >> profile_log_$1 - $PYTHONROOT/bin/python3 cpu_utilization.py >> profile_log_$1 job_et=`date '+%Y%m%d%H%M%S'` - awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$1 + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "CPU_UTILIZATION:", max}' cpu.txt >> profile_log_$1 + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_memory_use.log >> profile_log_$1 awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$1 rm -rf gpu_use.log gpu_utilization.log $PYTHONROOT/bin/python3 ../util/show_profile.py profile $thread_num >> profile_log_$1 diff --git a/python/examples/fit_a_line/README_CN.md b/python/examples/fit_a_line/README_CN.md index e115b6debb330adbd4c81f94338a67305caa6d37..d58eb4fbf15045ef2e9d873b2c8517f86cbca0de 100644 --- a/python/examples/fit_a_line/README_CN.md +++ b/python/examples/fit_a_line/README_CN.md @@ -49,4 +49,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"x": [0.0137, -0.1 bash benchmark.sh uci_housing_model uci_housing_client ``` 性能测试的日志文件为profile_log_uci_housing_model + 如需修改性能测试用例的参数,请修改benchmark.sh中的配置信息。 + +注意:uci_housing_model和uci_housing_client路径后不要加'/'符号,示例需要在GPU机器上运行。 diff --git a/python/examples/fit_a_line/benchmark.sh b/python/examples/fit_a_line/benchmark.sh new file mode 100755 index 0000000000000000000000000000000000000000..7e374db3ee5a5bdccdc75dc2884b9dbbfcb60eca --- /dev/null +++ b/python/examples/fit_a_line/benchmark.sh @@ -0,0 +1,55 @@ +rm profile_log* +export CUDA_VISIBLE_DEVICES=0,1 +export FLAGS_profile_server=1 +export FLAGS_profile_client=1 +export FLAGS_serving_latency=1 + +gpu_id=0 +#save cpu and gpu utilization log +if [ -d utilization ];then + rm -rf utilization +else + mkdir utilization +fi +#start server +$PYTHONROOT/bin/python3 -m paddle_serving_server.serve --model $1 --port 9292 --thread 4 --gpu_ids 0,1 --mem_optim --ir_optim > elog 2>&1 & +sleep 5 + +#warm up +$PYTHONROOT/bin/python3 benchmark.py --thread 4 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 +echo -e "import psutil\nimport time\nwhile True:\n\tcpu_res = psutil.cpu_percent()\n\twith open('cpu.txt', 'a+') as f:\n\t\tf.write(f'{cpu_res}\\\n')\n\ttime.sleep(0.1)" > cpu.py +for thread_num in 1 4 8 16 +do +for batch_size in 1 4 16 64 +do + job_bt=`date '+%Y%m%d%H%M%S'` + nvidia-smi --id=0 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_memory_use.log 2>&1 & + nvidia-smi --id=0 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 & + rm -rf cpu.txt + $PYTHONROOT/bin/python3 cpu.py & + gpu_memory_pid=$! + $PYTHONROOT/bin/python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 + kill `ps -ef|grep used_memory|awk '{print $2}'` > /dev/null + kill `ps -ef|grep utilization.gpu|awk '{print $2}'` > /dev/null + kill `ps -ef|grep cpu.py|awk '{print $2}'` > /dev/null + echo "model_name:" $1 + echo "thread_num:" $thread_num + echo "batch_size:" $batch_size + echo "=================Done====================" + echo "model_name:$1" >> profile_log_$1 + echo "batch_size:$batch_size" >> profile_log_$1 + job_et=`date '+%Y%m%d%H%M%S'` + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "CPU_UTILIZATION:", max}' cpu.txt >> profile_log_$1 + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_memory_use.log >> profile_log_$1 + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$1 + rm -rf gpu_use.log gpu_utilization.log + $PYTHONROOT/bin/python3 ../util/show_profile.py profile $thread_num >> profile_log_$1 + tail -n 8 profile >> profile_log_$1 + echo "" >> profile_log_$1 +done +done + +#Divided log +awk 'BEGIN{RS="\n\n"}{i++}{print > "bert_log_"i}' profile_log_$1 +mkdir bert_log && mv bert_log_* bert_log +ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9 diff --git a/python/paddle_serving_client/client.py b/python/paddle_serving_client/client.py index 48ad112ab015242b85753489f84422c4187f6ec1..8b1fc38032133230f450f83b9139d5f347b2ae1b 100755 --- a/python/paddle_serving_client/client.py +++ b/python/paddle_serving_client/client.py @@ -554,15 +554,8 @@ class MultiLangClient(object): get_client_config_req = multi_lang_general_model_service_pb2.GetClientConfigRequest( ) resp = self.stub_.GetClientConfig(get_client_config_req) - model_config_path_list = resp.client_config_str_list - file_path_list = [] - for single_model_config in model_config_path_list: - if os.path.isdir(single_model_config): - file_path_list.append("{}/serving_server_conf.prototxt".format( - single_model_config)) - elif os.path.isfile(single_model_config): - file_path_list.append(single_model_config) - self._parse_model_config(file_path_list) + model_config_str = resp.client_config_str + self._parse_model_config(model_config_str) def _flatten_list(self, nested_list): for item in nested_list: @@ -572,23 +565,10 @@ class MultiLangClient(object): else: yield item - def _parse_model_config(self, model_config_path_list): - if isinstance(model_config_path_list, str): - model_config_path_list = [model_config_path_list] - elif isinstance(model_config_path_list, list): - pass - - file_path_list = [] - for single_model_config in model_config_path_list: - if os.path.isdir(single_model_config): - file_path_list.append("{}/serving_client_conf.prototxt".format( - single_model_config)) - elif os.path.isfile(single_model_config): - file_path_list.append(single_model_config) + def _parse_model_config(self, model_config_str): model_conf = m_config.GeneralModelConfig() - f = open(file_path_list[0], 'r') - model_conf = google.protobuf.text_format.Merge( - str(f.read()), model_conf) + model_conf = google.protobuf.text_format.Merge(model_config_str, + model_conf) self.feed_names_ = [var.alias_name for var in model_conf.feed_var] self.feed_types_ = {} self.feed_shapes_ = {} @@ -598,11 +578,6 @@ class MultiLangClient(object): self.feed_shapes_[var.alias_name] = var.shape if var.is_lod_tensor: self.lod_tensor_set_.add(var.alias_name) - if len(file_path_list) > 1: - model_conf = m_config.GeneralModelConfig() - f = open(file_path_list[-1], 'r') - model_conf = google.protobuf.text_format.Merge( - str(f.read()), model_conf) self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] self.fetch_types_ = {} for i, var in enumerate(model_conf.fetch_var): diff --git a/python/paddle_serving_server/rpc_service.py b/python/paddle_serving_server/rpc_service.py index d9d302831fd2e3148547e24772005efb38cb8f32..f2503a5d86b032499543f5f4fc78b8b824218a44 100755 --- a/python/paddle_serving_server/rpc_service.py +++ b/python/paddle_serving_server/rpc_service.py @@ -198,5 +198,14 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. #model_config_path_list is list right now. #dict should be added when graphMaker is used. resp = multi_lang_general_model_service_pb2.GetClientConfigResponse() - resp.client_config_str_list[:] = self.model_config_path_list + model_config_str = [] + for single_model_config in self.model_config_path_list: + if os.path.isdir(single_model_config): + with open("{}/serving_server_conf.prototxt".format( + single_model_config)) as f: + model_config_str.append(str(f.read())) + elif os.path.isfile(single_model_config): + with open(single_model_config) as f: + model_config_str.append(str(f.read())) + resp.client_config_str = model_config_str[0] return resp