diff --git a/test_tipc/static/README.MD b/test_tipc/static/README.MD deleted file mode 100644 index 94fb8f4e8d4e2dfcd9661c0d09cbfeccfd500884..0000000000000000000000000000000000000000 --- a/test_tipc/static/README.MD +++ /dev/null @@ -1,19 +0,0 @@ -# PaddleDetection 下静态图benchmark模型执行说明 -静态图benchmark测试脚本说明 -# 目录说明 -# Docker 运行环境 -docker image: registry.baidubce.com/paddlepaddle/paddle:latest-dev-cuda11.2-cudnn8-gcc82 -paddle = 2.2.2 -python = 3.7 -# 运行benchmark测试步骤 -git clone https://github.com/PaddlePaddle/PaddleDetection.git -cd PaddleDetection -# 准备数据 -bash test_tipc/static/${model_item}/benchmark_common/prepare.sh -# 运行模型 -## 单卡(自动运行打开Profiling) -export CUDA_VISIBLE_DEVICES=0 -bash test_tipc/static/${model_item}/N1C1/yolov3_darknet53_270e_coco_bs8_fp32_SingleP_DP.sh -## 多卡 -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -bash test_tipc/static/${model_item}/N1C8/yolov3_darknet53_270e_coco_bs8_fp32_MultiP_DP.sh diff --git a/test_tipc/static/mask_rcnn_r50_1x_coco/N1C1/mask_rcnn_r50_1x_coco_bs2_fp32_DP.sh b/test_tipc/static/mask_rcnn_r50_1x_coco/N1C1/mask_rcnn_r50_1x_coco_bs2_fp32_DP.sh deleted file mode 100644 index 626555aca6b08e79a053fe99048c92a743af32f2..0000000000000000000000000000000000000000 --- a/test_tipc/static/mask_rcnn_r50_1x_coco/N1C1/mask_rcnn_r50_1x_coco_bs2_fp32_DP.sh +++ /dev/null @@ -1,16 +0,0 @@ -model_item=mask_rcnn_r50_1x_coco -bs_item=2 -fp_item=fp32 -run_mode=DP -device_num=N1C1 -max_iter=100 -num_workers=8 - -# get data -bash test_tipc/static/${model_item}/benchmark_common/prepare.sh -# run -bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; -# run profiling -sleep 10; -export PROFILING=true -bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} 11 ${num_workers} 2>&1; diff --git a/test_tipc/static/mask_rcnn_r50_1x_coco/N1C8/mask_rcnn_r50_1x_coco_bs2_fp32_DP.sh b/test_tipc/static/mask_rcnn_r50_1x_coco/N1C8/mask_rcnn_r50_1x_coco_bs2_fp32_DP.sh deleted file mode 100644 index 54f8c485ad621917b16475502a29d2343f539ff2..0000000000000000000000000000000000000000 --- a/test_tipc/static/mask_rcnn_r50_1x_coco/N1C8/mask_rcnn_r50_1x_coco_bs2_fp32_DP.sh +++ /dev/null @@ -1,12 +0,0 @@ -model_item=mask_rcnn_r50_1x_coco -bs_item=2 -fp_item=fp32 -run_mode=DP -device_num=N1C8 -max_iter=100 -num_workers=8 - -# get data -bash test_tipc/static/${model_item}/benchmark_common/prepare.sh -# run -bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/test_tipc/static/mask_rcnn_r50_1x_coco/benchmark_common/prepare.sh b/test_tipc/static/mask_rcnn_r50_1x_coco/benchmark_common/prepare.sh deleted file mode 100644 index f2325a10799319ff3293aeb97d5c00c18fd1a8e0..0000000000000000000000000000000000000000 --- a/test_tipc/static/mask_rcnn_r50_1x_coco/benchmark_common/prepare.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash -# 执行路径在模型库的根目录下 -################################# 安装框架 如: -echo "*******prepare benchmark start ***********" -pip install -U pip -echo `pip --version` -pip install Cython -pip install -r requirements.txt - -################################# 准备训练数据 如: -wget -nc -P static/dataset/coco/ https://paddledet.bj.bcebos.com/data/coco_benchmark.tar -cd ./static/dataset/coco/ && tar -xf coco_benchmark.tar && mv -u coco_benchmark/* . -rm -rf coco_benchmark/ && cd ../../../ -echo "*******prepare benchmark end***********" diff --git a/test_tipc/static/mask_rcnn_r50_1x_coco/benchmark_common/run_benchmark.sh b/test_tipc/static/mask_rcnn_r50_1x_coco/benchmark_common/run_benchmark.sh deleted file mode 100644 index 355fd4734e422790523b3454a258b69370af7f21..0000000000000000000000000000000000000000 --- a/test_tipc/static/mask_rcnn_r50_1x_coco/benchmark_common/run_benchmark.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env bash -# Test training benchmark for a model. -# Usage:bash run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} -function _set_params(){ - model_item=${1:-"model_item"} # (必选) 模型 item - base_batch_size=${2:-"2"} # (必选) 如果是静态图单进程,则表示每张卡上的BS,需在训练时*卡数 - fp_item=${3:-"fp32"} # (必选) fp32|fp16 - run_mode=${4:-"DP"} # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP1-MP4-PP1 - device_num=${5:-"N1C1"} # (必选) 使用的卡数量,N1C1|N1C8|N4C32 (4机32卡) - profiling=${PROFILING:-"false"} # (必选) Profiling 开关,默认关闭,通过全局变量传递 - model_repo="PaddleDetection" # (必选) 模型套件的名字 - speed_unit="samples/sec" # (必选)速度指标单位 - skip_steps=10 # (必选)解析日志,跳过模型前几个性能不稳定的step - keyword="ips:" # (必选)解析日志,筛选出性能数据所在行的关键字 - convergence_key="loss:" # (可选)解析日志,筛选出收敛数据所在行的关键字 如:convergence_key="loss:" - max_iter=${6:-"100"} # (可选)需保证模型执行时间在5分钟内,需要修改代码提前中断的直接提PR 合入套件;或使用max_epoch参数 - num_workers=${7:-"8"} # (可选) -# 以下为通用执行命令,无特殊可不用修改 - model_name=${model_item}_bs${base_batch_size}_${fp_item}_${run_mode} # (必填) 且格式不要改动,与竞品名称对齐 - device=${CUDA_VISIBLE_DEVICES//,/ } - arr=(${device}) - num_gpu_devices=${#arr[*]} - run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # (必填) TRAIN_LOG_DIR benchmark框架设置该参数为全局变量 - profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)} # (必填) PROFILING_LOG_DIR benchmark框架设置该参数为全局变量 - speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)} - - train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log - profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling - speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed -} -function _train(){ - export FLAGS_eager_delete_tensor_gb=0.0 - export FLAGS_fraction_of_gpu_memory_to_use=0.98 - export FLAGS_memory_fraction_of_eager_deletion=1.0 - export FLAGS_conv_workspace_size_limit=500 - cd ./static - batch_size=${base_batch_size} # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs - echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}" - - if [ ${profiling} = "true" ];then - log_file=${profiling_log_file} - is_profiler=1 - else - log_file=${train_log_file} - is_profiler=0 - fi - if [ ${fp_item} = "fp16" ]; then - use_fp16_cmd="--fp16" - else - use_fp16_cmd="" - fi - - train_cmd="-c configs/mask_rcnn_r50_1x.yml \ - -o TrainReader.batch_size=${batch_size} \ - max_iters=${max_iter} log_iter=1 \ - TrainReader.worker_num=${num_workers} ${use_fp16_cmd} \ - --is_profiler=${is_profiler} " -# 以下为通用执行命令,无特殊可不用修改 - case ${run_mode} in - DP) if [[ ${device_num} = "N1C1" ]];then - echo "run ${run_mode} ${device_num}" - train_cmd="python -u tools/train.py ${train_cmd}" - else - rm -rf ./mylog - train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES \ - tools/train.py ${train_cmd}" - fi - ;; - DP1-MP1-PP1) echo "run run_mode: DP1-MP1-PP1" ;; - *) echo "choose run_mode "; exit 1; - esac - - echo "train_cmd: ${train_cmd} log_file: ${log_file}" - timeout 15m ${train_cmd} > ${log_file} 2>&1 - if [ $? -ne 0 ];then - echo -e "${model_name}, FAIL" - else - echo -e "${model_name}, SUCCESS" - fi - # kill -9 `ps -ef|grep 'python'|awk '{print $2}'` - if [ ${device_num} != "N1C1" -a -d mylog ]; then - rm ${log_file} - cp mylog/workerlog.0 ${log_file} - fi - cd ../ -} -source ${BENCHMARK_ROOT}/scripts/run_model.sh # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开 -_set_params $@ -#_train # 如果只产出训练log,不解析,可取消注释 -_run # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开 diff --git a/test_tipc/static/mask_rcnn_r50_fpn_1x_coco/N1C1/mask_rcnn_r50_fpn_1x_coco_bs2_fp32_DP.sh b/test_tipc/static/mask_rcnn_r50_fpn_1x_coco/N1C1/mask_rcnn_r50_fpn_1x_coco_bs2_fp32_DP.sh deleted file mode 100644 index a862a2279e4c08c7e6065276d040b88438b2fd2c..0000000000000000000000000000000000000000 --- a/test_tipc/static/mask_rcnn_r50_fpn_1x_coco/N1C1/mask_rcnn_r50_fpn_1x_coco_bs2_fp32_DP.sh +++ /dev/null @@ -1,16 +0,0 @@ -model_item=mask_rcnn_r50_fpn_1x_coco -bs_item=2 -fp_item=fp32 -run_mode=DP -device_num=N1C1 -max_iter=100 -num_workers=2 - -# get data -bash test_tipc/static/${model_item}/benchmark_common/prepare.sh -# run -bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; -# run profiling -sleep 10; -export PROFILING=true -bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} 11 ${num_workers} 2>&1; diff --git a/test_tipc/static/mask_rcnn_r50_fpn_1x_coco/N1C8/mask_rcnn_r50_fpn_1x_coco_bs2_fp32_DP.sh b/test_tipc/static/mask_rcnn_r50_fpn_1x_coco/N1C8/mask_rcnn_r50_fpn_1x_coco_bs2_fp32_DP.sh deleted file mode 100644 index a9a41f2aed2182a59b8f365bbadc63994c53f27d..0000000000000000000000000000000000000000 --- a/test_tipc/static/mask_rcnn_r50_fpn_1x_coco/N1C8/mask_rcnn_r50_fpn_1x_coco_bs2_fp32_DP.sh +++ /dev/null @@ -1,12 +0,0 @@ -model_item=mask_rcnn_r50_fpn_1x_coco -bs_item=2 -fp_item=fp32 -run_mode=DP -device_num=N1C8 -max_iter=100 -num_workers=2 - -# get data -bash test_tipc/static/${model_item}/benchmark_common/prepare.sh -# run -bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/test_tipc/static/mask_rcnn_r50_fpn_1x_coco/benchmark_common/prepare.sh b/test_tipc/static/mask_rcnn_r50_fpn_1x_coco/benchmark_common/prepare.sh deleted file mode 100644 index f2325a10799319ff3293aeb97d5c00c18fd1a8e0..0000000000000000000000000000000000000000 --- a/test_tipc/static/mask_rcnn_r50_fpn_1x_coco/benchmark_common/prepare.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash -# 执行路径在模型库的根目录下 -################################# 安装框架 如: -echo "*******prepare benchmark start ***********" -pip install -U pip -echo `pip --version` -pip install Cython -pip install -r requirements.txt - -################################# 准备训练数据 如: -wget -nc -P static/dataset/coco/ https://paddledet.bj.bcebos.com/data/coco_benchmark.tar -cd ./static/dataset/coco/ && tar -xf coco_benchmark.tar && mv -u coco_benchmark/* . -rm -rf coco_benchmark/ && cd ../../../ -echo "*******prepare benchmark end***********" diff --git a/test_tipc/static/mask_rcnn_r50_fpn_1x_coco/benchmark_common/run_benchmark.sh b/test_tipc/static/mask_rcnn_r50_fpn_1x_coco/benchmark_common/run_benchmark.sh deleted file mode 100644 index 740464a624db1511a52ecd24a5b7178d2287b1a6..0000000000000000000000000000000000000000 --- a/test_tipc/static/mask_rcnn_r50_fpn_1x_coco/benchmark_common/run_benchmark.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env bash -# Test training benchmark for a model. -# Usage:bash run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} -function _set_params(){ - model_item=${1:-"model_item"} # (必选) 模型 item - base_batch_size=${2:-"2"} # (必选) 如果是静态图单进程,则表示每张卡上的BS,需在训练时*卡数 - fp_item=${3:-"fp32"} # (必选) fp32|fp16 - run_mode=${4:-"DP"} # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP1-MP4-PP1 - device_num=${5:-"N1C1"} # (必选) 使用的卡数量,N1C1|N1C8|N4C32 (4机32卡) - profiling=${PROFILING:-"false"} # (必选) Profiling 开关,默认关闭,通过全局变量传递 - model_repo="PaddleDetection" # (必选) 模型套件的名字 - speed_unit="samples/sec" # (必选)速度指标单位 - skip_steps=10 # (必选)解析日志,跳过模型前几个性能不稳定的step - keyword="ips:" # (必选)解析日志,筛选出性能数据所在行的关键字 - convergence_key="loss:" # (可选)解析日志,筛选出收敛数据所在行的关键字 如:convergence_key="loss:" - max_iter=${6:-"100"} # (可选)需保证模型执行时间在5分钟内,需要修改代码提前中断的直接提PR 合入套件;或使用max_epoch参数 - num_workers=${7:-"8"} # (可选) -# 以下为通用执行命令,无特殊可不用修改 - model_name=${model_item}_bs${base_batch_size}_${fp_item}_${run_mode} # (必填) 且格式不要改动,与竞品名称对齐 - device=${CUDA_VISIBLE_DEVICES//,/ } - arr=(${device}) - num_gpu_devices=${#arr[*]} - run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # (必填) TRAIN_LOG_DIR benchmark框架设置该参数为全局变量 - profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)} # (必填) PROFILING_LOG_DIR benchmark框架设置该参数为全局变量 - speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)} - - train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log - profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling - speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed -} -function _train(){ - export FLAGS_eager_delete_tensor_gb=0.0 - export FLAGS_fraction_of_gpu_memory_to_use=0.98 - export FLAGS_memory_fraction_of_eager_deletion=1.0 - cd ./static - batch_size=${base_batch_size} # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs - echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}" - - if [ ${profiling} = "true" ];then - log_file=${profiling_log_file} - is_profiler=1 - else - log_file=${train_log_file} - is_profiler=0 - fi - if [ ${fp_item} = "fp16" ]; then - use_fp16_cmd="--fp16" - else - use_fp16_cmd="" - fi - - train_cmd="-c configs/mask_rcnn_r50_fpn_1x.yml -o LearningRate.base_lr=0.001 snapshot_iter=100000 \ - TrainReader.batch_size=${batch_size} \ - max_iters=${max_iter} log_iter=1 \ - TrainReader.worker_num=${num_workers} ${use_fp16_cmd} \ - --is_profiler=${is_profiler} " -# 以下为通用执行命令,无特殊可不用修改 - case ${run_mode} in - DP) if [[ ${device_num} = "N1C1" ]];then - echo "run ${run_mode} ${device_num}" - train_cmd="python -u tools/train.py ${train_cmd}" - else - rm -rf ./mylog - train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES \ - tools/train.py ${train_cmd}" - fi - ;; - DP1-MP1-PP1) echo "run run_mode: DP1-MP1-PP1" ;; - *) echo "choose run_mode "; exit 1; - esac - - echo "train_cmd: ${train_cmd} log_file: ${log_file}" - timeout 15m ${train_cmd} > ${log_file} 2>&1 - if [ $? -ne 0 ];then - echo -e "${model_name}, FAIL" - else - echo -e "${model_name}, SUCCESS" - fi - # kill -9 `ps -ef|grep 'python'|awk '{print $2}'` - if [ ${device_num} != "N1C1" -a -d mylog ]; then - rm ${log_file} - cp mylog/workerlog.0 ${log_file} - fi - cd ../ -} -source ${BENCHMARK_ROOT}/scripts/run_model.sh # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开 -_set_params $@ -#_train # 如果只产出训练log,不解析,可取消注释 -_run # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开 diff --git a/test_tipc/static/yolov3_darknet53_270e_coco/N1C1/yolov3_darknet53_270e_coco_bs8_fp32_DP.sh b/test_tipc/static/yolov3_darknet53_270e_coco/N1C1/yolov3_darknet53_270e_coco_bs8_fp32_DP.sh deleted file mode 100644 index 3d3db1be54e782d3a2a7b9d3920d51299430f1ef..0000000000000000000000000000000000000000 --- a/test_tipc/static/yolov3_darknet53_270e_coco/N1C1/yolov3_darknet53_270e_coco_bs8_fp32_DP.sh +++ /dev/null @@ -1,16 +0,0 @@ -model_item=yolov3_darknet53_270e_coco -bs_item=8 -fp_item=fp32 -run_mode=DP -device_num=N1C1 -max_iter=100 -num_workers=8 - -# get data -bash test_tipc/static/${model_item}/benchmark_common/prepare.sh -# run -bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; -# run profiling -sleep 10; -export PROFILING=true -bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} 11 ${num_workers} 2>&1; diff --git a/test_tipc/static/yolov3_darknet53_270e_coco/N1C8/yolov3_darknet53_270e_coco_bs8_fp32_DP.sh b/test_tipc/static/yolov3_darknet53_270e_coco/N1C8/yolov3_darknet53_270e_coco_bs8_fp32_DP.sh deleted file mode 100644 index 5cc30f26745c7058319275c2d2dd56a0274a91d6..0000000000000000000000000000000000000000 --- a/test_tipc/static/yolov3_darknet53_270e_coco/N1C8/yolov3_darknet53_270e_coco_bs8_fp32_DP.sh +++ /dev/null @@ -1,12 +0,0 @@ -model_item=yolov3_darknet53_270e_coco -bs_item=8 -fp_item=fp32 -run_mode=DP -device_num=N1C8 -max_iter=100 -num_workers=8 - -# get data -bash test_tipc/static/${model_item}/benchmark_common/prepare.sh -# run -bash test_tipc/static/${model_item}/benchmark_common/run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} ${max_iter} ${num_workers} 2>&1; diff --git a/test_tipc/static/yolov3_darknet53_270e_coco/benchmark_common/prepare.sh b/test_tipc/static/yolov3_darknet53_270e_coco/benchmark_common/prepare.sh deleted file mode 100644 index f2325a10799319ff3293aeb97d5c00c18fd1a8e0..0000000000000000000000000000000000000000 --- a/test_tipc/static/yolov3_darknet53_270e_coco/benchmark_common/prepare.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env bash -# 执行路径在模型库的根目录下 -################################# 安装框架 如: -echo "*******prepare benchmark start ***********" -pip install -U pip -echo `pip --version` -pip install Cython -pip install -r requirements.txt - -################################# 准备训练数据 如: -wget -nc -P static/dataset/coco/ https://paddledet.bj.bcebos.com/data/coco_benchmark.tar -cd ./static/dataset/coco/ && tar -xf coco_benchmark.tar && mv -u coco_benchmark/* . -rm -rf coco_benchmark/ && cd ../../../ -echo "*******prepare benchmark end***********" diff --git a/test_tipc/static/yolov3_darknet53_270e_coco/benchmark_common/run_benchmark.sh b/test_tipc/static/yolov3_darknet53_270e_coco/benchmark_common/run_benchmark.sh deleted file mode 100644 index 3af4d15712454dad44bdffa0524a641893e9f12d..0000000000000000000000000000000000000000 --- a/test_tipc/static/yolov3_darknet53_270e_coco/benchmark_common/run_benchmark.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env bash -# Test training benchmark for a model. -# Usage:bash run_benchmark.sh ${model_item} ${bs_item} ${fp_item} ${run_mode} ${device_num} -function _set_params(){ - model_item=${1:-"model_item"} # (必选) 模型 item - base_batch_size=${2:-"2"} # (必选) 如果是静态图单进程,则表示每张卡上的BS,需在训练时*卡数 - fp_item=${3:-"fp32"} # (必选) fp32|fp16 - run_mode=${4:-"DP"} # (必选) MP模型并行|DP数据并行|PP流水线并行|混合并行DP1-MP1-PP1|DP1-MP4-PP1 - device_num=${5:-"N1C1"} # (必选) 使用的卡数量,N1C1|N1C8|N4C32 (4机32卡) - profiling=${PROFILING:-"false"} # (必选) Profiling 开关,默认关闭,通过全局变量传递 - model_repo="PaddleDetection" # (必选) 模型套件的名字 - speed_unit="samples/sec" # (必选)速度指标单位 - skip_steps=10 # (必选)解析日志,跳过模型前几个性能不稳定的step - keyword="ips:" # (必选)解析日志,筛选出性能数据所在行的关键字 - convergence_key="loss:" # (可选)解析日志,筛选出收敛数据所在行的关键字 如:convergence_key="loss:" - max_iter=${6:-"100"} # (可选)需保证模型执行时间在5分钟内,需要修改代码提前中断的直接提PR 合入套件;或使用max_epoch参数 - num_workers=${7:-"8"} # (可选) -# 以下为通用执行命令,无特殊可不用修改 - model_name=${model_item}_bs${base_batch_size}_${fp_item}_${run_mode} # (必填) 且格式不要改动,与竞品名称对齐 - device=${CUDA_VISIBLE_DEVICES//,/ } - arr=(${device}) - num_gpu_devices=${#arr[*]} - run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # (必填) TRAIN_LOG_DIR benchmark框架设置该参数为全局变量 - profiling_log_path=${PROFILING_LOG_DIR:-$(pwd)} # (必填) PROFILING_LOG_DIR benchmark框架设置该参数为全局变量 - speed_log_path=${LOG_PATH_INDEX_DIR:-$(pwd)} - - train_log_file=${run_log_path}/${model_repo}_${model_name}_${device_num}_log - profiling_log_file=${profiling_log_path}/${model_repo}_${model_name}_${device_num}_profiling - speed_log_file=${speed_log_path}/${model_repo}_${model_name}_${device_num}_speed -} -function _train(){ - export FLAGS_eager_delete_tensor_gb=0.0 - export FLAGS_fraction_of_gpu_memory_to_use=0.98 - export FLAGS_memory_fraction_of_eager_deletion=1.0 - cd ./static - batch_size=${base_batch_size} # 如果模型跑多卡单进程时,请在_train函数中计算出多卡需要的bs - echo "current CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, model_name=${model_name}, device_num=${device_num}, is profiling=${profiling}" - - if [ ${profiling} = "true" ];then - log_file=${profiling_log_file} - is_profiler=1 - else - log_file=${train_log_file} - is_profiler=0 - fi - if [ ${fp_item} = "fp16" ]; then - use_fp16_cmd="--fp16" - else - use_fp16_cmd="" - fi - - train_cmd="-c configs/yolov3_darknet.yml -o LearningRate.base_lr=0.002 snapshot_iter=100000 \ - TrainReader.batch_size=${batch_size} \ - max_iters=${max_iter} log_iter=1 \ - TrainReader.worker_num=${num_workers} ${use_fp16_cmd} \ - --is_profiler=${is_profiler} " -# 以下为通用执行命令,无特殊可不用修改 - case ${run_mode} in - DP) if [[ ${device_num} = "N1C1" ]];then - echo "run ${run_mode} ${device_num}" - train_cmd="python -u tools/train.py ${train_cmd}" - else - rm -rf ./mylog - train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES \ - tools/train.py ${train_cmd}" - fi - ;; - DP1-MP1-PP1) echo "run run_mode: DP1-MP1-PP1" ;; - *) echo "choose run_mode "; exit 1; - esac - - echo "train_cmd: ${train_cmd} log_file: ${log_file}" - timeout 15m ${train_cmd} > ${log_file} 2>&1 - if [ $? -ne 0 ];then - echo -e "${model_name}, FAIL" - else - echo -e "${model_name}, SUCCESS" - fi - # kill -9 `ps -ef|grep 'python'|awk '{print $2}'` - if [ ${device_num} != "N1C1" -a -d mylog ]; then - rm ${log_file} - cp mylog/workerlog.0 ${log_file} - fi - cd ../ -} -source ${BENCHMARK_ROOT}/scripts/run_model.sh # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;如果不联调只想要产出训练log可以注掉本行,提交时需打开 -_set_params $@ -#_train # 如果只产出训练log,不解析,可取消注释 -_run # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只产出训练log可以注掉本行,提交时需打开