未验证 提交 0d8999fe 编写于 作者: L lzzyzlbb 提交者: GitHub

Benchmark (#435)

* Add benchmark for GAN

* Add benchmark for GAN
上级 181d9f80
StyleGANv2:
dataset_web: https://paddlegan.bj.bcebos.com/datasets/ffhq.tar
config: configs/stylegan_v2_256_ffhq.yaml
fp_item: fp32
bs_item: 3 8
total_iters: 300
log_interval: 10
FOMM:
dataset_web: https://paddlegan.bj.bcebos.com/datasets/fom_test_data.tar
config: configs/firstorder_vox_256.yaml
fp_item: fp32
bs_item: 8 16
epochs: 30
log_interval: 10
#!usr/bin/env bash
function parse_yaml {
local s='[[:space:]]*' w='[a-zA-Z0-9_]*' fs=$(echo @|tr @ '\034')
sed -ne "s|^\($s\):|\1|" \
-e "s|^\($s\)\($w\)$s:$s[\"']\(.*\)[\"']$s\$|\1$fs\2$fs\3|p" \
-e "s|^\($s\)\($w\)$s:$s\(.*\)$s\$|\1$fs\2$fs\3|p" $1 |
awk -F$fs '{
indent = length($1)/2;
vname[indent] = $2;
if (indent == 0) {
model_mode_list[model_num]=$2;
printf("model_mode_list[%d]=%s\n",(model_num), $2);
printf("model_num=%d\n", (model_num+1));
model_num=(model_num+1);
}
for (i in vname) {if (i > indent) {delete vname[i]}}
if (length($3) >= 0) {
vn=""; for (i=0; i<indent; i++) {vn=(vn)(vname[i])("_")}
printf("%s%s=\"%s\"\n",vn, $2, $3);
}
}'
}
eval $(parse_yaml "benchmark/benchmark.yaml")
for model_mode in ${model_mode_list[@]}; do
eval fp_item_list='$'"${model_mode}_fp_item"
eval bs_list='$'"${model_mode}_bs_item"
eval config='$'"${model_mode}_config"
eval total_iters='$'"${model_mode}_total_iters"
eval epochs='$'"${model_mode}_epochs"
eval dataset_web='$'"${model_mode}_dataset_web"
eval log_interval='$'"${model_mode}_log_interval"
wget ${dataset_web} -O data/${model_mode}.tar
tar -vxf data/${model_mode}.tar -C data/
if [ -n "$total_iters" ]; then
mode="total_iters"
max_iter=$total_iters
else
mode="epochs"
max_iter=$epochs
fi
echo ${epochs}
for fp_item in ${fp_item_list[@]}; do
for bs_item in ${bs_list[@]}
do
echo "index is speed, 1gpus, begin, ${model_name}"
run_mode=sp
CUDA_VISIBLE_DEVICES=0 benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} # (5min)
sleep 60
echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
run_mode=mp
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval}
sleep 60
done
done
done
#!/usr/bin/env bash
set -xe
# 运行示例:CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode}
# 参数说明
function _set_params(){
run_mode=${1:-"sp"} # 单卡sp|多卡mp
batch_size=${2:-"64"}
fp_item=${3:-"fp32"} # fp32|fp16
mode=${4:-"epochs"}
max_iter=${5:-"500"} # 可选,如果需要修改代码提前中断
model_name=${6:-"model_name"}
config=${7:-"config"}
log_interval=${8:-"1"}
run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 后续QA设置该参数
# 以下不用修改
device=${CUDA_VISIBLE_DEVICES//,/ }
arr=(${device})
num_gpu_devices=${#arr[*]}
log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
}
function _train(){
echo "Train on ${num_gpu_devices} GPUs"
echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"
train_cmd="--config-file=${config}
-o dataset.train.batch_size=${batch_size}
log_config.interval=${log_interval}
${mode}=${max_iter} "
case ${run_mode} in
sp) train_cmd="python -u tools/main.py "${train_cmd} ;;
mp)
train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/main.py "${train_cmd}
log_parse_file="mylog/workerlog.0" ;;
*) echo "choose run_mode(sp or mp)"; exit 1;
esac
# 以下不用修改
timeout 15m ${train_cmd} > ${log_file} 2>&1
if [ $? -ne 0 ];then
echo -e "${model_name}, FAIL"
export job_fail_flag=1
else
echo -e "${model_name}, SUCCESS"
export job_fail_flag=0
fi
trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM
if [ $run_mode = "mp" -a -d mylog ]; then
rm ${log_file}
cp mylog/workerlog.0 ${log_file}
fi
}
_set_params $@
_train
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册