From 233d05d9b61d2baaeb3dd646dee6ce5783175903 Mon Sep 17 00:00:00 2001 From: hysunflower <52739577+hysunflower@users.noreply.github.com> Date: Mon, 29 Nov 2021 19:19:22 +0800 Subject: [PATCH] update_gan scripts for benchmark (#500) * update_gan scripts for benchmark * update_gan scripts for benchmark --- benchmark/benchmark.yaml | 8 ++++---- benchmark/prepare.sh | 30 ++++++++++++++++++++++++++++ benchmark/run_all.sh | 40 ++++++-------------------------------- benchmark/run_benchmark.sh | 17 ++++++++++------ 4 files changed, 51 insertions(+), 44 deletions(-) create mode 100644 benchmark/prepare.sh diff --git a/benchmark/benchmark.yaml b/benchmark/benchmark.yaml index 6df2140..844b776 100644 --- a/benchmark/benchmark.yaml +++ b/benchmark/benchmark.yaml @@ -2,7 +2,7 @@ StyleGANv2: dataset_web: https://paddlegan.bj.bcebos.com/datasets/ffhq.tar config: configs/stylegan_v2_256_ffhq.yaml fp_item: fp32 - bs_item: 3 8 + bs_item: 8 total_iters: 100 log_interval: 5 @@ -10,7 +10,7 @@ FOMM: dataset_web: https://paddlegan.bj.bcebos.com/datasets/fom_test_data.tar config: configs/firstorder_vox_256.yaml fp_item: fp32 - bs_item: 8 16 + bs_item: 16 epochs: 1 log_interval: 1 @@ -18,7 +18,7 @@ esrgan: dataset_web: https://paddlegan.bj.bcebos.com/datasets/DIV2KandSet14.tar config: configs/esrgan_psnr_x4_div2k.yaml fp_item: fp32 - bs_item: 32 64 + bs_item: 32 total_iters: 300 log_interval: 10 @@ -26,7 +26,7 @@ edvr: dataset: data/REDS config: configs/edvr_m_wo_tsa.yaml fp_item: fp32 - bs_item: 4 64 + bs_item: 4 total_iters: 300 log_interval: 10 diff --git a/benchmark/prepare.sh b/benchmark/prepare.sh new file mode 100644 index 0000000..7831b31 --- /dev/null +++ b/benchmark/prepare.sh @@ -0,0 +1,30 @@ + +#!usr/bin/env bash + +export BENCHMARK_ROOT=/workspace +run_env=$BENCHMARK_ROOT/run_env +log_date=`date "+%Y.%m%d.%H%M%S"` +frame=paddle2.1.3 +cuda_version=10.2 +save_log_dir=${BENCHMARK_ROOT}/logs/${frame}_${log_date}_${cuda_version}/ + +if [[ -d ${save_log_dir} ]]; then + rm -rf ${save_log_dir} +fi + +# this for update the log_path coding mat +export TRAIN_LOG_DIR=${save_log_dir}/train_log +mkdir -p ${TRAIN_LOG_DIR} +log_path=${TRAIN_LOG_DIR} + +################################# 配置python, 如: +rm -rf $run_env +mkdir $run_env +echo `which python3.7` +ln -s $(which python3.7)m-config $run_env/python3-config +ln -s $(which python3.7) $run_env/python +ln -s $(which pip3.7) $run_env/pip + +export PATH=$run_env:${PATH} +cd $BENCHMARK_ROOT +pip install -v -e . diff --git a/benchmark/run_all.sh b/benchmark/run_all.sh index de2be07..9c344c2 100755 --- a/benchmark/run_all.sh +++ b/benchmark/run_all.sh @@ -1,34 +1,6 @@ - #!usr/bin/env bash -export BENCHMARK_ROOT=/workspace -run_env=$BENCHMARK_ROOT/run_env -log_date=`date "+%Y.%m%d.%H%M%S"` -frame=paddle2.1.3 -cuda_version=10.2 -save_log_dir=${BENCHMARK_ROOT}/logs/${frame}_${log_date}_${cuda_version}/ - -if [[ -d ${save_log_dir} ]]; then - rm -rf ${save_log_dir} -fi - -# this for update the log_path coding mat -export TRAIN_LOG_DIR=${save_log_dir}/train_log -mkdir -p ${TRAIN_LOG_DIR} -log_path=${TRAIN_LOG_DIR} - -################################# 配置python, 如: -rm -rf $run_env -mkdir $run_env -echo `which python3.7` -ln -s $(which python3.7)m-config $run_env/python3-config -ln -s $(which python3.7) $run_env/python -ln -s $(which pip3.7) $run_env/pip - -export PATH=$run_env:${PATH} -cd $BENCHMARK_ROOT -pip install -v -e . - +export log_path=${LOG_PATH_INDEX_DIR:-$(pwd)} function parse_yaml { local s='[[:space:]]*' w='[a-zA-Z0-9_]*' fs=$(echo @|tr @ '\034') @@ -79,17 +51,17 @@ for model_mode in ${model_mode_list[@]}; do for fp_item in ${fp_item_list[@]}; do for bs_item in ${bs_list[@]} do - echo "index is speed, 1gpus, begin, ${model_name}" + echo "index is speed, 1gpus, begin, ${model_mode}" run_mode=sp - CUDA_VISIBLE_DEVICES=0 benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile} # (5min) + CUDA_VISIBLE_DEVICES=0 benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile} | tee ${log_path}/gan_dygraph_${model_mode}_${run_mode}_bs${bs_item}_${fp_item}_speed_1gpus 2>&1 # (5min) sleep 60 - echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" + echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_mode}" run_mode=mp basicvsr_name=basicvsr if [ ${model_mode} = ${basicvsr_name} ]; then - CUDA_VISIBLE_DEVICES=0,1,2,3 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile} + CUDA_VISIBLE_DEVICES=0,1,2,3 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile} | tee ${log_path}/gan_dygraph_${model_mode}_${run_mode}_bs${bs_item}_${fp_item}_speed_4gpus4p 2>&1 else - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile} + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile} | tee ${log_path}/gan_dygraph_${model_mode}_${run_mode}_bs${bs_item}_${fp_item}_speed_8gpus8p 2>&1 fi sleep 60 done diff --git a/benchmark/run_benchmark.sh b/benchmark/run_benchmark.sh index 25d9980..78296b1 100755 --- a/benchmark/run_benchmark.sh +++ b/benchmark/run_benchmark.sh @@ -14,6 +14,15 @@ function _set_params(){ run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 后续QA设置该参数 need_profile=${9:-"off"} + index=1 + base_batch_size=${batch_size} + mission_name="图像生成" + direction_id=0 + keyword="ips:" + keyword_loss="G_idt_A_loss:" + skip_steps=5 + ips_unit="images/s" + # 以下不用修改 device=${CUDA_VISIBLE_DEVICES//,/ } arr=(${device}) @@ -23,9 +32,6 @@ function _set_params(){ log_profile=${run_log_path}/${model_name}_model.profile } -function _analysis_log(){ - python benchmark/analysis_log.py ${model_name} ${log_file} ${res_log_file} -} function _train(){ echo "Train on ${num_gpu_devices} GPUs" @@ -65,9 +71,8 @@ function _train(){ cp mylog/workerlog.0 ${log_file} fi - _analysis_log - } +source ${BENCHMARK_ROOT}/scripts/run_model.sh # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;该脚本在连调时可从benchmark repo中下载https://github.com/PaddlePaddle/benchmark/blob/master/scripts/run_model.sh;如果不联调只想要产出训练log可以注掉本行,提交时需打开 _set_params $@ -_train +_run -- GitLab