diff --git a/benchmark/run_benchmark_det.sh b/benchmark/run_benchmark_det.sh new file mode 100644 index 0000000000000000000000000000000000000000..36228adcf47566dbf24bcb4bd6dd0f4d1565451b --- /dev/null +++ b/benchmark/run_benchmark_det.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +set -xe +# 运行示例:CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} +# 参数说明 +function _set_params(){ + run_mode=${1:-"sp"} # 单卡sp|多卡mp + batch_size=${2:-"64"} + fp_item=${3:-"fp32"} # fp32|fp16 + max_iter=${4:-"500"} # 可选,如果需要修改代码提前中断 + model_name=${5:-"model_name"} + run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 后续QA设置该参数 + +# 以下不用修改 + device=${CUDA_VISIBLE_DEVICES//,/ } + arr=(${device}) + num_gpu_devices=${#arr[*]} + log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices} +} +function _train(){ + echo "Train on ${num_gpu_devices} GPUs" + echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" + + train_cmd="-c configs/det/${model_name}.yml + -o Train.loader.batch_size_per_card=${batch_size} + -o Global.epoch_num=${max_iter} " + case ${run_mode} in + sp) + train_cmd="python3.7 tools/train.py "${train_cmd}"" + ;; + mp) + train_cmd="python3.7 -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py ${train_cmd}" + ;; + *) echo "choose run_mode(sp or mp)"; exit 1; + esac +# 以下不用修改 + timeout 15m ${train_cmd} > ${log_file} 2>&1 + if [ $? -ne 0 ];then + echo -e "${model_name}, FAIL" + export job_fail_flag=1 + else + echo -e "${model_name}, SUCCESS" + export job_fail_flag=0 + fi + kill -9 `ps -ef|grep 'python3.7'|awk '{print $2}'` + + if [ $run_mode = "mp" -a -d mylog ]; then + rm ${log_file} + cp mylog/workerlog.0 ${log_file} + fi +} + +_set_params $@ +_train + diff --git a/benchmark/run_det.sh b/benchmark/run_det.sh new file mode 100644 index 0000000000000000000000000000000000000000..c94af85c365d66b2e0f0a143f14f0340d2f56a73 --- /dev/null +++ b/benchmark/run_det.sh @@ -0,0 +1,29 @@ +# 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 py=37 +# 执行目录:需说明 +cd PaddleOCR +# 1 安装该模型需要的依赖 (如需开启优化策略请注明) +python3.7 -m pip install -r requirements.txt +# 2 拷贝该模型需要数据、预训练模型 +wget -p ./tain_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data && tar xf icdar2015.tar && cd ../ +wget -p ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams +# 3 批量运行(如不方便批量,1,2需放到单个模型中) + +model_mode_list=(det_mv3_db det_r50_vd_east) +fp_item_list=(fp32) +bs_list=(256 128) +for model_mode in ${model_mode_list[@]}; do + for fp_item in ${fp_item_list[@]}; do + for bs_item in ${bs_list[@]}; do + echo "index is speed, 1gpus, begin, ${model_name}" + run_mode=sp + CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 10 ${model_mode} # (5min) + sleep 60 + echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" + run_mode=mp + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 10 ${model_mode} + sleep 60 + done + done +done + + diff --git a/configs/det/det_r50_vd_east.yml b/configs/det/det_r50_vd_east.yml index 0253c5bd9940fa6c0ec7da2c6639c1bc060842ca..e84a5fa7a7af34bde5e0abc6fed2e01f6ce42e6b 100644 --- a/configs/det/det_r50_vd_east.yml +++ b/configs/det/det_r50_vd_east.yml @@ -8,7 +8,7 @@ Global: # evaluation is run every 5000 iterations after the 4000th iteration eval_batch_step: [4000, 5000] cal_metric_during_train: False - pretrained_model: ./pretrain_models/ResNet50_vd_pretrained/ + pretrained_model: ./pretrain_models/ResNet50_vd_pretrained checkpoints: save_inference_dir: use_visualdl: False