未验证 提交 dcb83b6b 编写于 作者: J Jiawei Wang 提交者: GitHub

Merge branch 'develop' into doc_0.6

# Paddle Serving Design
# C++ Serving Design
([简体中文](./C++DESIGN_CN.md)|English)
......
# Paddle Serving设计方案
# C++ Serving设计方案
(简体中文|[English](./C++DESIGN.md))
......
......@@ -25,7 +25,7 @@ kubectl apply -f https://bit.ly/kong-ingress-dbless
`tools/generate_runtime_docker.sh`文件下,它的使用方式如下
```bash
bash tool/generate_runtime_docker.sh --env cuda10.1 --python 3.6 --serving 0.6.0 --paddle 2.0.1 --name serving_runtime:cuda10.1-py36
bash tools/generate_runtime_docker.sh --env cuda10.1 --python 3.6 --serving 0.6.0 --paddle 2.0.1 --name serving_runtime:cuda10.1-py36
```
会生成 cuda10.1,python 3.6,serving版本0.6.0 还有 paddle版本2.0.1的运行镜像。如果有其他疑问,可以执行下列语句得到帮助信息。
......
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "DarkNet53"
model_type: "static"
model_source: "PaddleClas"
model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "imagenet"
model_type: "static"
model_source: "PaddleClas"
model_url: "model_url_path"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "cpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="imagenet"
use_gpu=1
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
if [ $use_gpu -eq 1 ]; then
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
else
python3 benchmark.py yaml local_predictor 1 cpu
fi
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 resnet50_web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "HRNet_W18_C"
model_type: "static"
model_source: "PaddleClas"
model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/HRNet_W18_C_pretrained.tar"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "imagenet"
model_type: "static"
model_source: "PaddleClas"
model_url: "model_url_path"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "cpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="imagenet"
use_gpu=1
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
if [ $use_gpu -eq 1 ]; then
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
else
python3 benchmark.py yaml local_predictor 1 cpu
fi
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 resnet50_web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "MobileNetV1"
model_type: "static"
model_source: "PaddleClas"
model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "imagenet"
model_type: "static"
model_source: "PaddleClas"
model_url: "model_url_path"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "cpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="imagenet"
use_gpu=1
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
if [ $use_gpu -eq 1 ]; then
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
else
python3 benchmark.py yaml local_predictor 1 cpu
fi
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 resnet50_web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "MobileNetV2"
model_type: "static"
model_source: "PaddleClas"
model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "imagenet"
model_type: "static"
model_source: "PaddleClas"
model_url: "model_url_path"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "cpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="imagenet"
use_gpu=1
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
if [ $use_gpu -eq 1 ]; then
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
else
python3 benchmark.py yaml local_predictor 1 cpu
fi
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 resnet50_web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "MobileNetV3_large_x1_0"
model_type: "static"
model_source: "PaddleClas"
model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_pretrained.tar"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "imagenet"
model_type: "static"
model_source: "PaddleClas"
model_url: "model_url_path"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "cpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="imagenet"
use_gpu=1
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
if [ $use_gpu -eq 1 ]; then
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
else
python3 benchmark.py yaml local_predictor 1 cpu
fi
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 resnet50_web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "ResNeXt101_vd_64x4d"
model_type: "static"
model_source: "PaddleClas"
model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "imagenet"
model_type: "static"
model_source: "PaddleClas"
model_url: "model_url_path"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "cpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="imagenet"
use_gpu=1
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
if [ $use_gpu -eq 1 ]; then
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
else
python3 benchmark.py yaml local_predictor 1 cpu
fi
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 resnet50_web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "ResNet50_vd"
model_type: "static"
model_source: "PaddleClas"
model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "imagenet"
model_type: "static"
model_source: "PaddleClas"
model_url: "model_url_path"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "cpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="imagenet"
use_gpu=1
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
if [ $use_gpu -eq 1 ]; then
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
else
python3 benchmark.py yaml local_predictor 1 cpu
fi
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 resnet50_web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "ResNet50_vd_FPGM"
model_type: "static"
model_source: "PaddleClas"
model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_FPGM_pretrained.tar"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "imagenet"
model_type: "static"
model_source: "PaddleClas"
model_url: "model_url_path"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "cpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="imagenet"
use_gpu=1
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
if [ $use_gpu -eq 1 ]; then
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
else
python3 benchmark.py yaml local_predictor 1 cpu
fi
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 resnet50_web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "ResNet50_vd_KL"
model_type: "static"
model_source: "PaddleClas"
model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_KL_pretrained.tar"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "imagenet"
model_type: "static"
model_source: "PaddleClas"
model_url: "model_url_path"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "cpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="imagenet"
use_gpu=1
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
if [ $use_gpu -eq 1 ]; then
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
else
python3 benchmark.py yaml local_predictor 1 cpu
fi
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 resnet50_web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "ResNet50_vd_PACT"
model_type: "static"
model_source: "PaddleClas"
model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_PACT_pretrained.tar"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "imagenet"
model_type: "static"
model_source: "PaddleClas"
model_url: "model_url_path"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "cpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="imagenet"
use_gpu=1
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
if [ $use_gpu -eq 1 ]; then
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
else
python3 benchmark.py yaml local_predictor 1 cpu
fi
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 resnet50_web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import requests
import json
......@@ -5,11 +19,13 @@ import cv2
import base64
import os
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
if __name__ == "__main__":
url = "http://127.0.0.1:18000/imagenet/prediction"
url = "http://127.0.0.1:18080/imagenet/prediction"
with open(os.path.join(".", "daisy.jpg"), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
import requests
import json
import cv2
import base64
import os
client = PipelineClient()
client.connect(['127.0.0.1:9993'])
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
with open("daisy.jpg", 'rb') as file:
image_data = file.read()
image = cv2_to_base64(image_data)
for i in range(1):
ret = client.predict(feed_dict={"image": image}, fetch=["label", "prob"])
print(ret)
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "ShuffleNetV2_x1_0"
model_type: "static"
model_source: "PaddleClas"
model_url: "https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x1_0_pretrained.tar"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.1"
cpu: "Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz X12"
gpu: "T4"
xpu: "None"
api: ""
owner: "cuicheng01"
model_name: "imagenet"
model_type: "static"
model_source: "PaddleClas"
model_url: "model_url_path"
batch_size: 1
num_of_samples: 1000
input_shape: "3,224,224"
runtime_device: "cpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: false
cpu_math_library_num_threads: ""
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="imagenet"
use_gpu=1
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
if [ $use_gpu -eq 1 ]; then
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
else
python3 benchmark.py yaml local_predictor 1 cpu
fi
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----imagenet thread num: $thread_num batch size: $batch_size mode:http use_gpu:$use_gpu----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 resnet50_web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
......@@ -142,10 +142,10 @@ class DBPostProcess(object):
def box_score_fast(self, bitmap, _box):
h, w = bitmap.shape[:2]
box = _box.copy()
xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
box[:, 0] = box[:, 0] - xmin
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册