diff --git a/configs/ResNet/ResNet50_fp16.yml b/configs/ResNet/ResNet50_fp16.yml new file mode 100644 index 0000000000000000000000000000000000000000..a952833221a193cefe32c004f40181eaa409188d --- /dev/null +++ b/configs/ResNet/ResNet50_fp16.yml @@ -0,0 +1,81 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50' + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 120 +topk: 5 +image_shape: [3, 224, 224] + +# mixed precision training +use_fp16: True +amp_scale_loss: 128.0 +use_dynamic_loss_scaling: True + +use_mix: False +ls_epsilon: -1 + +LEARNING_RATE: + function: 'Piecewise' + params: + lr: 0.1 + decay_epochs: [30, 60, 90] + gamma: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000100 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/docs/zh_CN/update_history.md b/docs/zh_CN/update_history.md index e59f707209c409ebcf05bb3139407d8c84f169fb..b2ab286109354b9a75defc3ec5b29fbf85596349 100644 --- a/docs/zh_CN/update_history.md +++ b/docs/zh_CN/update_history.md @@ -1,5 +1,8 @@ # 更新日志 +* 2020.05.17 + * 添加混合精度训练。 + * 2020.05.09 * 添加Paddle Serving使用文档。 * 添加Paddle-Lite使用文档。 diff --git a/tools/program.py b/tools/program.py index c8e9556eea751b77fdd79cf3c2009a8de26ed4eb..b73f1064284a7f1929f99d892bee43798a5d52fb 100644 --- a/tools/program.py +++ b/tools/program.py @@ -297,6 +297,19 @@ def dist_optimizer(config, optimizer): return optimizer +def mixed_precision_optimizer(config, optimizer): + use_fp16 = config.get('use_fp16', False) + amp_scale_loss = config.get('amp_scale_loss', 1.0) + use_dynamic_loss_scaling = config.get('use_dynamic_loss_scaling', False) + if use_fp16: + optimizer = fluid.contrib.mixed_precision.decorate( + optimizer, + init_loss_scaling=amp_scale_loss, + use_dynamic_loss_scaling=use_dynamic_loss_scaling) + + return optimizer + + def build(config, main_prog, startup_prog, is_train=True): """ Build a program using a model and an optimizer @@ -337,6 +350,8 @@ def build(config, main_prog, startup_prog, is_train=True): optimizer = create_optimizer(config) lr = optimizer._global_learning_rate() fetchs['lr'] = (lr, AverageMeter('lr', 'f', need_avg=False)) + + optimizer = mixed_precision_optimizer(config, optimizer) optimizer = dist_optimizer(config, optimizer) optimizer.minimize(fetchs['loss'][0]) @@ -396,7 +411,7 @@ def run(dataloader, exe, program, fetchs, epoch=0, mode='train'): for i, m in enumerate(metrics): metric_list[i].update(m[0], len(batch[0])) fetchs_str = ''.join([str(m.value) + ' ' - for m in metric_list] + [batch_time.value])+'s' + for m in metric_list] + [batch_time.value]) + 's' if mode == 'eval': logger.info("{:s} step:{:<4d} {:s}s".format(mode, idx, fetchs_str)) else: @@ -404,16 +419,22 @@ def run(dataloader, exe, program, fetchs, epoch=0, mode='train'): step_str = "{:s} step:{:<4d}".format(mode, idx) logger.info("{:s} {:s} {:s}".format( - logger.coloring(epoch_str, "HEADER") if idx==0 else epoch_str, logger.coloring(step_str,"PURPLE"), logger.coloring(fetchs_str,'OKGREEN'))) + logger.coloring(epoch_str, "HEADER") + if idx == 0 else epoch_str, + logger.coloring(step_str, "PURPLE"), + logger.coloring(fetchs_str, 'OKGREEN'))) end_str = ''.join([str(m.mean) + ' ' - for m in metric_list] + [batch_time.total])+'s' + for m in metric_list] + [batch_time.total]) + 's' if mode == 'eval': logger.info("END {:s} {:s}s".format(mode, end_str)) else: end_epoch_str = "END epoch:{:<3d}".format(epoch) - logger.info("{:s} {:s} {:s}".format(logger.coloring(end_epoch_str,"RED"), logger.coloring(mode,"PURPLE"), logger.coloring(end_str,"OKGREEN"))) + logger.info("{:s} {:s} {:s}".format( + logger.coloring(end_epoch_str, "RED"), + logger.coloring(mode, "PURPLE"), + logger.coloring(end_str, "OKGREEN"))) # return top1_acc in order to save the best model if mode == 'valid':