提交 cd4931d7 编写于 作者: L LDOUBLEV

revert > train.log

上级 4429973f
...@@ -317,25 +317,25 @@ else ...@@ -317,25 +317,25 @@ else
set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}") set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}")
set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu}") set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu}")
if [ ${#ips} -le 26 ];then if [ ${#ips} -le 26 ];then
save_log="${LOG_PATH}/${model_name}_gpus_${gpu}_autocast_${autocast}_bs_${train_batch_value}_sp" save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
nodes=1 nodes=1
else else
IFS="," IFS=","
ips_array=(${ips}) ips_array=(${ips})
IFS="|" IFS="|"
nodes=${#ips_array[@]} nodes=${#ips_array[@]}
save_log="${LOG_PATH}/${model_name}_gpus_${gpu}_autocast_${autocast}_bs_${train_batch_value}_mp" save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}"
fi fi
btrain_log="${LOG_PATH}/benchmark_train/${model_name}_bs${train_batch_value}_${autocast}" btrain_log="${LOG_PATH}/benchmark_train/${model_name}_bs${train_batch_value}_${autocast}"
set_save_model=$(func_set_params "${save_model_key}" "${save_log}") set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
if [ ${#gpu} -le 2 ];then # train with cpu or single gpu if [ ${#gpu} -le 2 ];then # train with cpu or single gpu
cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} > ${btrain_log} 2>&1 " cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} "
elif [ ${#ips} -le 26 ];then # train with multi-gpu elif [ ${#ips} -le 26 ];then # train with multi-gpu
cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} > ${btrain_log} 2>&1 " cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
else # train with multi-machine else # train with multi-machine
cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} > ${btrain_log} 2>&1 " cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
fi fi
# run train # run train
eval $cmd eval $cmd
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册