未验证 提交 b401e10b 编写于 作者: L littletomatodonkey 提交者: GitHub

Revert "add amp support for SE_ResNet50_vd (#509)" (#515)

This reverts commit d5105825.
上级 d5105825
mode: 'train'
ARCHITECTURE:
name: 'ResNet50_vd'
pretrained_model: ""
model_save_dir: "./output/"
classes_num: 1000
total_images: 1281167
save_interval: 1
validate: True
valid_interval: 1
epochs: 200
topk: 5
image_shape: [3, 224, 224]
# mixed precision training
use_amp: True
use_pure_fp16: False
multi_precision: False
scale_loss: 128.0
use_dynamic_loss_scaleing: True
data_format: "NCHW"
use_mix: True
ls_epsilon: 0.1
LEARNING_RATE:
function: 'Cosine'
params:
lr: 0.1
OPTIMIZER:
function: 'Momentum'
params:
momentum: 0.9
regularizer:
function: 'L2'
factor: 0.000070
TRAIN:
batch_size: 256
num_workers: 4
file_list: "./dataset/ILSVRC2012/train_list.txt"
data_dir: "./dataset/ILSVRC2012/"
shuffle_seed: 0
transforms:
- DecodeImage:
to_rgb: True
to_np: False
channel_first: False
- RandCropImage:
size: 224
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
mix:
- MixupOperator:
alpha: 0.2
VALID:
batch_size: 64
num_workers: 4
file_list: "./dataset/ILSVRC2012/val_list.txt"
data_dir: "./dataset/ILSVRC2012/"
shuffle_seed: 0
transforms:
- DecodeImage:
to_rgb: True
to_np: False
channel_first: False
- ResizeImage:
resize_short: 256
- CropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
...@@ -33,16 +33,16 @@ __all__ = [ ...@@ -33,16 +33,16 @@ __all__ = [
class ConvBNLayer(nn.Layer): class ConvBNLayer(nn.Layer):
def __init__(self, def __init__(
num_channels, self,
num_filters, num_channels,
filter_size, num_filters,
stride=1, filter_size,
groups=1, stride=1,
is_vd_mode=False, groups=1,
act=None, is_vd_mode=False,
name=None, act=None,
data_format="NCHW"): name=None, ):
super(ConvBNLayer, self).__init__() super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode self.is_vd_mode = is_vd_mode
...@@ -57,8 +57,7 @@ class ConvBNLayer(nn.Layer): ...@@ -57,8 +57,7 @@ class ConvBNLayer(nn.Layer):
padding=(filter_size - 1) // 2, padding=(filter_size - 1) // 2,
groups=groups, groups=groups,
weight_attr=ParamAttr(name=name + "_weights"), weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False, bias_attr=False)
data_format=data_format)
if name == "conv1": if name == "conv1":
bn_name = "bn_" + name bn_name = "bn_" + name
else: else:
...@@ -69,8 +68,7 @@ class ConvBNLayer(nn.Layer): ...@@ -69,8 +68,7 @@ class ConvBNLayer(nn.Layer):
param_attr=ParamAttr(name=bn_name + '_scale'), param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'), bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean', moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance', moving_variance_name=bn_name + '_variance')
data_format=data_format)
def forward(self, inputs): def forward(self, inputs):
if self.is_vd_mode: if self.is_vd_mode:
...@@ -88,8 +86,7 @@ class BottleneckBlock(nn.Layer): ...@@ -88,8 +86,7 @@ class BottleneckBlock(nn.Layer):
shortcut=True, shortcut=True,
if_first=False, if_first=False,
reduction_ratio=16, reduction_ratio=16,
name=None, name=None):
data_format="NCHW"):
super(BottleneckBlock, self).__init__() super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
...@@ -97,29 +94,25 @@ class BottleneckBlock(nn.Layer): ...@@ -97,29 +94,25 @@ class BottleneckBlock(nn.Layer):
num_filters=num_filters, num_filters=num_filters,
filter_size=1, filter_size=1,
act='relu', act='relu',
name=name + "_branch2a", name=name + "_branch2a")
data_format=data_format)
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
num_channels=num_filters, num_channels=num_filters,
num_filters=num_filters, num_filters=num_filters,
filter_size=3, filter_size=3,
stride=stride, stride=stride,
act='relu', act='relu',
name=name + "_branch2b", name=name + "_branch2b")
data_format=data_format)
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
num_channels=num_filters, num_channels=num_filters,
num_filters=num_filters * 4, num_filters=num_filters * 4,
filter_size=1, filter_size=1,
act=None, act=None,
name=name + "_branch2c", name=name + "_branch2c")
data_format=data_format)
self.scale = SELayer( self.scale = SELayer(
num_channels=num_filters * 4, num_channels=num_filters * 4,
num_filters=num_filters * 4, num_filters=num_filters * 4,
reduction_ratio=reduction_ratio, reduction_ratio=reduction_ratio,
name='fc_' + name, name='fc_' + name)
data_format=data_format)
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(
...@@ -128,8 +121,7 @@ class BottleneckBlock(nn.Layer): ...@@ -128,8 +121,7 @@ class BottleneckBlock(nn.Layer):
filter_size=1, filter_size=1,
stride=1, stride=1,
is_vd_mode=False if if_first else True, is_vd_mode=False if if_first else True,
name=name + "_branch1", name=name + "_branch1")
data_format=data_format)
self.shortcut = shortcut self.shortcut = shortcut
...@@ -156,8 +148,7 @@ class BasicBlock(nn.Layer): ...@@ -156,8 +148,7 @@ class BasicBlock(nn.Layer):
shortcut=True, shortcut=True,
if_first=False, if_first=False,
reduction_ratio=16, reduction_ratio=16,
name=None, name=None):
data_format="NCHW"):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.stride = stride self.stride = stride
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
...@@ -166,22 +157,19 @@ class BasicBlock(nn.Layer): ...@@ -166,22 +157,19 @@ class BasicBlock(nn.Layer):
filter_size=3, filter_size=3,
stride=stride, stride=stride,
act='relu', act='relu',
name=name + "_branch2a", name=name + "_branch2a")
data_format=data_format)
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
num_channels=num_filters, num_channels=num_filters,
num_filters=num_filters, num_filters=num_filters,
filter_size=3, filter_size=3,
act=None, act=None,
name=name + "_branch2b", name=name + "_branch2b")
data_format=data_format)
self.scale = SELayer( self.scale = SELayer(
num_channels=num_filters, num_channels=num_filters,
num_filters=num_filters, num_filters=num_filters,
reduction_ratio=reduction_ratio, reduction_ratio=reduction_ratio,
name='fc_' + name, name='fc_' + name)
data_format=data_format)
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(
...@@ -190,8 +178,7 @@ class BasicBlock(nn.Layer): ...@@ -190,8 +178,7 @@ class BasicBlock(nn.Layer):
filter_size=1, filter_size=1,
stride=1, stride=1,
is_vd_mode=False if if_first else True, is_vd_mode=False if if_first else True,
name=name + "_branch1", name=name + "_branch1")
data_format=data_format)
self.shortcut = shortcut self.shortcut = shortcut
...@@ -210,12 +197,7 @@ class BasicBlock(nn.Layer): ...@@ -210,12 +197,7 @@ class BasicBlock(nn.Layer):
class SELayer(nn.Layer): class SELayer(nn.Layer):
def __init__(self, def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
num_channels,
num_filters,
reduction_ratio,
name=None,
data_format="NCHW"):
super(SELayer, self).__init__() super(SELayer, self).__init__()
self.pool2d_gap = AdaptiveAvgPool2D(1) self.pool2d_gap = AdaptiveAvgPool2D(1)
...@@ -252,16 +234,10 @@ class SELayer(nn.Layer): ...@@ -252,16 +234,10 @@ class SELayer(nn.Layer):
class SE_ResNet_vd(nn.Layer): class SE_ResNet_vd(nn.Layer):
def __init__(self, def __init__(self, layers=50, class_dim=1000):
layers=50,
class_dim=1000,
input_image_channel=3,
data_format="NCHW"):
super(SE_ResNet_vd, self).__init__() super(SE_ResNet_vd, self).__init__()
self.layers = layers self.layers = layers
self.data_format = data_format
self.input_image_channel = input_image_channel
supported_layers = [18, 34, 50, 101, 152, 200] supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \ assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format( "supported layers are {} but input layer is {}".format(
...@@ -282,31 +258,27 @@ class SE_ResNet_vd(nn.Layer): ...@@ -282,31 +258,27 @@ class SE_ResNet_vd(nn.Layer):
num_filters = [64, 128, 256, 512] num_filters = [64, 128, 256, 512]
self.conv1_1 = ConvBNLayer( self.conv1_1 = ConvBNLayer(
num_channels=self.input_image_channel, num_channels=3,
num_filters=32, num_filters=32,
filter_size=3, filter_size=3,
stride=2, stride=2,
act='relu', act='relu',
name="conv1_1", name="conv1_1")
data_format=self.data_format)
self.conv1_2 = ConvBNLayer( self.conv1_2 = ConvBNLayer(
num_channels=32, num_channels=32,
num_filters=32, num_filters=32,
filter_size=3, filter_size=3,
stride=1, stride=1,
act='relu', act='relu',
name="conv1_2", name="conv1_2")
data_format=self.data_format)
self.conv1_3 = ConvBNLayer( self.conv1_3 = ConvBNLayer(
num_channels=32, num_channels=32,
num_filters=64, num_filters=64,
filter_size=3, filter_size=3,
stride=1, stride=1,
act='relu', act='relu',
name="conv1_3", name="conv1_3")
data_format=self.data_format) self.pool2d_max = MaxPool2D(kernel_size=3, stride=2, padding=1)
self.pool2d_max = MaxPool2D(
kernel_size=3, stride=2, padding=1, data_format=self.data_format)
self.block_list = [] self.block_list = []
if layers >= 50: if layers >= 50:
...@@ -329,8 +301,7 @@ class SE_ResNet_vd(nn.Layer): ...@@ -329,8 +301,7 @@ class SE_ResNet_vd(nn.Layer):
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut, shortcut=shortcut,
if_first=block == i == 0, if_first=block == i == 0,
name=conv_name, name=conv_name))
data_format=self.data_format))
self.block_list.append(bottleneck_block) self.block_list.append(bottleneck_block)
shortcut = True shortcut = True
else: else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册