From d51058258beb40085dbbad04d372d4e2195f166e Mon Sep 17 00:00:00 2001 From: furnace <34057289+windstamp@users.noreply.github.com> Date: Fri, 25 Dec 2020 13:43:33 +0800 Subject: [PATCH] add amp support for SE_ResNet50_vd (#509) * add amp support for SE_ResNet50_vd * updates for data_format --- configs/ResNet/ResNet50_vd_fp16.yaml | 85 ++++++++++++++++++ ppcls/modeling/architectures/se_resnet_vd.py | 91 +++++++++++++------- 2 files changed, 145 insertions(+), 31 deletions(-) create mode 100644 configs/ResNet/ResNet50_vd_fp16.yaml diff --git a/configs/ResNet/ResNet50_vd_fp16.yaml b/configs/ResNet/ResNet50_vd_fp16.yaml new file mode 100644 index 00000000..81a75cef --- /dev/null +++ b/configs/ResNet/ResNet50_vd_fp16.yaml @@ -0,0 +1,85 @@ +mode: 'train' +ARCHITECTURE: + name: 'ResNet50_vd' + +pretrained_model: "" +model_save_dir: "./output/" +classes_num: 1000 +total_images: 1281167 +save_interval: 1 +validate: True +valid_interval: 1 +epochs: 200 +topk: 5 +image_shape: [3, 224, 224] + +# mixed precision training +use_amp: True +use_pure_fp16: False +multi_precision: False +scale_loss: 128.0 +use_dynamic_loss_scaleing: True +data_format: "NCHW" + +use_mix: True +ls_epsilon: 0.1 + +LEARNING_RATE: + function: 'Cosine' + params: + lr: 0.1 + +OPTIMIZER: + function: 'Momentum' + params: + momentum: 0.9 + regularizer: + function: 'L2' + factor: 0.000070 + +TRAIN: + batch_size: 256 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/train_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - RandCropImage: + size: 224 + - RandFlipImage: + flip_code: 1 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: + mix: + - MixupOperator: + alpha: 0.2 + +VALID: + batch_size: 64 + num_workers: 4 + file_list: "./dataset/ILSVRC2012/val_list.txt" + data_dir: "./dataset/ILSVRC2012/" + shuffle_seed: 0 + transforms: + - DecodeImage: + to_rgb: True + to_np: False + channel_first: False + - ResizeImage: + resize_short: 256 + - CropImage: + size: 224 + - NormalizeImage: + scale: 1.0/255.0 + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: '' + - ToCHWImage: diff --git a/ppcls/modeling/architectures/se_resnet_vd.py b/ppcls/modeling/architectures/se_resnet_vd.py index 5b7a587c..cba3d768 100644 --- a/ppcls/modeling/architectures/se_resnet_vd.py +++ b/ppcls/modeling/architectures/se_resnet_vd.py @@ -33,16 +33,16 @@ __all__ = [ class ConvBNLayer(nn.Layer): - def __init__( - self, - num_channels, - num_filters, - filter_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - name=None, ): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + is_vd_mode=False, + act=None, + name=None, + data_format="NCHW"): super(ConvBNLayer, self).__init__() self.is_vd_mode = is_vd_mode @@ -57,7 +57,8 @@ class ConvBNLayer(nn.Layer): padding=(filter_size - 1) // 2, groups=groups, weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) + bias_attr=False, + data_format=data_format) if name == "conv1": bn_name = "bn_" + name else: @@ -68,7 +69,8 @@ class ConvBNLayer(nn.Layer): param_attr=ParamAttr(name=bn_name + '_scale'), bias_attr=ParamAttr(bn_name + '_offset'), moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') + moving_variance_name=bn_name + '_variance', + data_format=data_format) def forward(self, inputs): if self.is_vd_mode: @@ -86,7 +88,8 @@ class BottleneckBlock(nn.Layer): shortcut=True, if_first=False, reduction_ratio=16, - name=None): + name=None, + data_format="NCHW"): super(BottleneckBlock, self).__init__() self.conv0 = ConvBNLayer( @@ -94,25 +97,29 @@ class BottleneckBlock(nn.Layer): num_filters=num_filters, filter_size=1, act='relu', - name=name + "_branch2a") + name=name + "_branch2a", + data_format=data_format) self.conv1 = ConvBNLayer( num_channels=num_filters, num_filters=num_filters, filter_size=3, stride=stride, act='relu', - name=name + "_branch2b") + name=name + "_branch2b", + data_format=data_format) self.conv2 = ConvBNLayer( num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None, - name=name + "_branch2c") + name=name + "_branch2c", + data_format=data_format) self.scale = SELayer( num_channels=num_filters * 4, num_filters=num_filters * 4, reduction_ratio=reduction_ratio, - name='fc_' + name) + name='fc_' + name, + data_format=data_format) if not shortcut: self.short = ConvBNLayer( @@ -121,7 +128,8 @@ class BottleneckBlock(nn.Layer): filter_size=1, stride=1, is_vd_mode=False if if_first else True, - name=name + "_branch1") + name=name + "_branch1", + data_format=data_format) self.shortcut = shortcut @@ -148,7 +156,8 @@ class BasicBlock(nn.Layer): shortcut=True, if_first=False, reduction_ratio=16, - name=None): + name=None, + data_format="NCHW"): super(BasicBlock, self).__init__() self.stride = stride self.conv0 = ConvBNLayer( @@ -157,19 +166,22 @@ class BasicBlock(nn.Layer): filter_size=3, stride=stride, act='relu', - name=name + "_branch2a") + name=name + "_branch2a", + data_format=data_format) self.conv1 = ConvBNLayer( num_channels=num_filters, num_filters=num_filters, filter_size=3, act=None, - name=name + "_branch2b") + name=name + "_branch2b", + data_format=data_format) self.scale = SELayer( num_channels=num_filters, num_filters=num_filters, reduction_ratio=reduction_ratio, - name='fc_' + name) + name='fc_' + name, + data_format=data_format) if not shortcut: self.short = ConvBNLayer( @@ -178,7 +190,8 @@ class BasicBlock(nn.Layer): filter_size=1, stride=1, is_vd_mode=False if if_first else True, - name=name + "_branch1") + name=name + "_branch1", + data_format=data_format) self.shortcut = shortcut @@ -197,7 +210,12 @@ class BasicBlock(nn.Layer): class SELayer(nn.Layer): - def __init__(self, num_channels, num_filters, reduction_ratio, name=None): + def __init__(self, + num_channels, + num_filters, + reduction_ratio, + name=None, + data_format="NCHW"): super(SELayer, self).__init__() self.pool2d_gap = AdaptiveAvgPool2D(1) @@ -234,10 +252,16 @@ class SELayer(nn.Layer): class SE_ResNet_vd(nn.Layer): - def __init__(self, layers=50, class_dim=1000): + def __init__(self, + layers=50, + class_dim=1000, + input_image_channel=3, + data_format="NCHW"): super(SE_ResNet_vd, self).__init__() self.layers = layers + self.data_format = data_format + self.input_image_channel = input_image_channel supported_layers = [18, 34, 50, 101, 152, 200] assert layers in supported_layers, \ "supported layers are {} but input layer is {}".format( @@ -258,27 +282,31 @@ class SE_ResNet_vd(nn.Layer): num_filters = [64, 128, 256, 512] self.conv1_1 = ConvBNLayer( - num_channels=3, + num_channels=self.input_image_channel, num_filters=32, filter_size=3, stride=2, act='relu', - name="conv1_1") + name="conv1_1", + data_format=self.data_format) self.conv1_2 = ConvBNLayer( num_channels=32, num_filters=32, filter_size=3, stride=1, act='relu', - name="conv1_2") + name="conv1_2", + data_format=self.data_format) self.conv1_3 = ConvBNLayer( num_channels=32, num_filters=64, filter_size=3, stride=1, act='relu', - name="conv1_3") - self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1) + name="conv1_3", + data_format=self.data_format) + self.pool2d_max = MaxPool2D( + kernel_size=3, stride=2, padding=1, data_format=self.data_format) self.block_list = [] if layers >= 50: @@ -301,7 +329,8 @@ class SE_ResNet_vd(nn.Layer): stride=2 if i == 0 and block != 0 else 1, shortcut=shortcut, if_first=block == i == 0, - name=conv_name)) + name=conv_name, + data_format=self.data_format)) self.block_list.append(bottleneck_block) shortcut = True else: -- GitLab