diff --git a/configs/det/det_mv3_db_amp.yml b/configs/det/det_mv3_db_amp.yml index 772342a2d34dfb2ed2975b72970f811c9300c473..640031752bab3a6d8b8d37f3a48c376d142f77cb 100644 --- a/configs/det/det_mv3_db_amp.yml +++ b/configs/det/det_mv3_db_amp.yml @@ -14,8 +14,8 @@ Global: use_visualdl: False infer_img: doc/imgs_en/img_10.jpg save_res_path: ./output/det_db/predicts_db.txt - -AMP: + #amp related + use_amp: True scale_loss: 1024.0 use_dynamic_loss_scaling: True diff --git a/tests/configs/ppocr_det_mobile_params.txt b/tests/configs/ppocr_det_mobile_params.txt index 5edb14cdbf8eef87b5b5558cbd8d1a2ff54ae919..be14865c5d8458429eaf598307269b6b75d1febc 100644 --- a/tests/configs/ppocr_det_mobile_params.txt +++ b/tests/configs/ppocr_det_mobile_params.txt @@ -1,9 +1,9 @@ ===========================train_params=========================== model_name:ocr_det python:python3.7 -gpu_list:0|0,1 +gpu_list:0|0,1|10.21.226.181,10.21.226.133;0,1 Global.use_gpu:True|True -Global.auto_cast:null +Global.auto_cast:fp32|amp Global.epoch_num:lite_train_infer=1|whole_train_infer=300 Global.save_model_dir:./output/ Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4 diff --git a/tests/test_python.sh b/tests/test_python.sh index 39b043b809016b245954a835d37789dcc28d7265..26045e174ed198854953e16453a8862a6984a3a9 100644 --- a/tests/test_python.sh +++ b/tests/test_python.sh @@ -253,6 +253,11 @@ else env=" " fi for autocast in ${autocast_list[*]}; do + if [ ${autocast} = "amp" ]; then + set_amp_config="Gloabl.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True" + else + set_amp_config=" " + fi for trainer in ${trainer_list[*]}; do flag_quant=False if [ ${trainer} = ${pact_key} ]; then @@ -279,7 +284,6 @@ else if [ ${run_train} = "null" ]; then continue fi - set_autocast=$(func_set_params "${autocast_key}" "${autocast}") set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}") set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}") @@ -295,11 +299,11 @@ else set_save_model=$(func_set_params "${save_model_key}" "${save_log}") if [ ${#gpu} -le 2 ];then # train with cpu or single gpu - cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} " + cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} " elif [ ${#gpu} -le 15 ];then # train with multi-gpu - cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1}" + cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}" else # train with multi-machine - cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1}" + cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}" fi # run train eval "unset CUDA_VISIBLE_DEVICES" diff --git a/tools/train.py b/tools/train.py index 49e44112c2939f4cf0fde9aca773d508d2f95736..d182af2988cb29511be40a079d2b3e06605ebe28 100755 --- a/tools/train.py +++ b/tools/train.py @@ -103,16 +103,16 @@ def main(config, device, logger, vdl_writer): logger.info('valid dataloader has {} iters'.format( len(valid_dataloader))) - use_amp = True if "AMP" in config else False + use_amp = config["Global"].get("use_amp", False) if use_amp: AMP_RELATED_FLAGS_SETTING = { 'FLAGS_cudnn_batchnorm_spatial_persistent': 1, 'FLAGS_max_inplace_grad_add': 8, } paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING) - scale_loss = config["AMP"].get("scale_loss", 1.0) - use_dynamic_loss_scaling = config["AMP"].get("use_dynamic_loss_scaling", - False) + scale_loss = config["Global"].get("scale_loss", 1.0) + use_dynamic_loss_scaling = config["Global"].get( + "use_dynamic_loss_scaling", False) scaler = paddle.amp.GradScaler( init_loss_scaling=scale_loss, use_dynamic_loss_scaling=use_dynamic_loss_scaling)