From bb1376db452be0692a5c526317dc48f586e0e3ae Mon Sep 17 00:00:00 2001 From: zhiqiu Date: Fri, 16 Jul 2021 13:26:03 +0000 Subject: [PATCH] add flags setting --- .../ImageNet/ResNet/ResNet50_fp16_dygraph.yaml | 17 +++++++++++++++-- ppcls/engine/trainer.py | 6 ++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/ppcls/configs/ImageNet/ResNet/ResNet50_fp16_dygraph.yaml b/ppcls/configs/ImageNet/ResNet/ResNet50_fp16_dygraph.yaml index 7f51d334..59d4bcec 100644 --- a/ppcls/configs/ImageNet/ResNet/ResNet50_fp16_dygraph.yaml +++ b/ppcls/configs/ImageNet/ResNet/ResNet50_fp16_dygraph.yaml @@ -10,19 +10,26 @@ Global: epochs: 120 print_batch_step: 10 use_visualdl: False + image_channel: &image_channel 4 # used for static mode and model export - image_shape: [3, 224, 224] + image_shape: [*image_channel, 224, 224] save_inference_dir: ./inference # training model under @to_static to_static: False use_dali: True +# mixed precision training AMP: + scale_loss: 128.0 + use_dynamic_loss_scaling: True + use_pure_fp16: &use_pure_fp16 False # model architecture Arch: name: ResNet50 class_num: 1000 + input_image_channel: *image_channel + data_format: "NHWC" # loss function config for traing/eval process Loss: @@ -67,10 +74,12 @@ DataLoader: mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + output_fp16: *use_pure_fp16 + channel_num: *image_channel sampler: name: DistributedBatchSampler - batch_size: 64 + batch_size: 256 drop_last: False shuffle: True loader: @@ -95,6 +104,8 @@ DataLoader: mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + output_fp16: *use_pure_fp16 + channel_num: *image_channel sampler: name: DistributedBatchSampler batch_size: 64 @@ -120,6 +131,8 @@ Infer: mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] order: '' + output_fp16: *use_pure_fp16 + channel_num: *image_channel - ToCHWImage: PostProcess: name: Topk diff --git a/ppcls/engine/trainer.py b/ppcls/engine/trainer.py index af091659..451531c1 100644 --- a/ppcls/engine/trainer.py +++ b/ppcls/engine/trainer.py @@ -112,6 +112,12 @@ class Trainer(object): else: self.scale_loss = 1.0 self.use_dynamic_loss_scaling = False + if self.amp: + AMP_RELATED_FLAGS_SETTING = { + 'FLAGS_cudnn_batchnorm_spatial_persistent': 1, + 'FLAGS_max_inplace_grad_add': 8, + } + paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING) self.train_loss_func = None self.eval_loss_func = None self.train_metric_func = None -- GitLab