提交 ac50d65d 编写于 作者: W wqz960

fix model format

上级 ee3ce16b
mode: 'train'
ARCHITECTURE:
name: 'GhostNet_x0_5'
pretrained_model: ""
model_save_dir: "./output/"
classes_num: 1000
total_images: 1281167
save_interval: 1
validate: True
valid_interval: 1
epochs: 360
topk: 5
image_shape: [3, 224, 224]
use_mix: False
ls_epsilon: 0.1
LEARNING_RATE:
function: 'CosineWarmup'
params:
lr: 0.8
OPTIMIZER:
function: 'Momentum'
params:
momentum: 0.9
regularizer:
function: 'L2'
factor: 0.0000400
TRAIN:
batch_size: 2048
num_workers: 4
file_list: "./dataset/ILSVRC2012/train_list.txt"
data_dir: "./dataset/ILSVRC2012/"
shuffle_seed: 0
transforms:
- DecodeImage:
to_rgb: True
to_np: False
channel_first: False
- RandCropImage:
size: 224
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
VALID:
batch_size: 64
num_workers: 4
file_list: "./dataset/ILSVRC2012/val_list.txt"
data_dir: "./dataset/ILSVRC2012/"
shuffle_seed: 0
transforms:
- DecodeImage:
to_rgb: True
to_np: False
channel_first: False
- ResizeImage:
resize_short: 256
- CropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
mode: 'train'
ARCHITECTURE:
name: 'GhostNet_x1_0'
pretrained_model: ""
model_save_dir: "./output/"
classes_num: 1000
total_images: 1281167
save_interval: 1
validate: True
valid_interval: 1
epochs: 360
topk: 5
image_shape: [3, 224, 224]
use_mix: False
ls_epsilon: 0.1
LEARNING_RATE:
function: 'CosineWarmup'
params:
lr: 0.4
OPTIMIZER:
function: 'Momentum'
params:
momentum: 0.9
regularizer:
function: 'L2'
factor: 0.0000400
TRAIN:
batch_size: 1024
num_workers: 4
file_list: "./dataset/ILSVRC2012/train_list.txt"
data_dir: "./dataset/ILSVRC2012/"
shuffle_seed: 0
transforms:
- DecodeImage:
to_rgb: True
to_np: False
channel_first: False
- RandCropImage:
size: 224
- RandFlipImage:
flip_code: 1
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
VALID:
batch_size: 64
num_workers: 4
file_list: "./dataset/ILSVRC2012/val_list.txt"
data_dir: "./dataset/ILSVRC2012/"
shuffle_seed: 0
transforms:
- DecodeImage:
to_rgb: True
to_np: False
channel_first: False
- ResizeImage:
resize_short: 256
- CropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
mode: 'train'
ARCHITECTURE:
name: 'GhostNet_x1_3'
pretrained_model: ""
model_save_dir: "./output/"
classes_num: 1000
total_images: 1281167
save_interval: 1
validate: True
valid_interval: 1
epochs: 360
topk: 5
image_shape: [3, 224, 224]
use_mix: False
ls_epsilon: 0.1
LEARNING_RATE:
function: 'CosineWarmup'
params:
lr: 0.4
OPTIMIZER:
function: 'Momentum'
params:
momentum: 0.9
regularizer:
function: 'L2'
factor: 0.0000400
TRAIN:
batch_size: 1024
num_workers: 4
file_list: "./dataset/ILSVRC2012/train_list.txt"
data_dir: "./dataset/ILSVRC2012/"
shuffle_seed: 0
transforms:
- DecodeImage:
to_rgb: True
to_np: False
channel_first: False
- RandCropImage:
size: 224
- RandFlipImage:
flip_code: 1
- AutoAugment:
- NormalizeImage:
scale: 1./255.
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
VALID:
batch_size: 64
num_workers: 4
file_list: "./dataset/ILSVRC2012/val_list.txt"
data_dir: "./dataset/ILSVRC2012/"
shuffle_seed: 0
transforms:
- DecodeImage:
to_rgb: True
to_np: False
channel_first: False
- ResizeImage:
resize_short: 256
- CropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ''
- ToCHWImage:
...@@ -42,7 +42,7 @@ from .res2net_vd import Res2Net50_vd_48w_2s, Res2Net50_vd_26w_4s, Res2Net50_vd_1 ...@@ -42,7 +42,7 @@ from .res2net_vd import Res2Net50_vd_48w_2s, Res2Net50_vd_26w_4s, Res2Net50_vd_1
from .hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W18_C, SE_HRNet_W30_C, SE_HRNet_W32_C, SE_HRNet_W40_C, SE_HRNet_W44_C, SE_HRNet_W48_C, SE_HRNet_W60_C, SE_HRNet_W64_C from .hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W18_C, SE_HRNet_W30_C, SE_HRNet_W32_C, SE_HRNet_W40_C, SE_HRNet_W44_C, SE_HRNet_W48_C, SE_HRNet_W60_C, SE_HRNet_W64_C
from .darts_gs import DARTS_GS_6M, DARTS_GS_4M from .darts_gs import DARTS_GS_6M, DARTS_GS_4M
from .resnet_acnet import ResNet18_ACNet, ResNet34_ACNet, ResNet50_ACNet, ResNet101_ACNet, ResNet152_ACNet from .resnet_acnet import ResNet18_ACNet, ResNet34_ACNet, ResNet50_ACNet, ResNet101_ACNet, ResNet152_ACNet
from .ghostnet import GhostNet_0_5, GhostNet_1_0, GhostNet_1_3 from .ghostnet import GhostNet_x0_5, GhostNet_x1_0, GhostNet_x1_3
# distillation model # distillation model
from .distillation_models import ResNet50_vd_distill_MobileNetV3_large_x1_0, ResNeXt101_32x16d_wsl_distill_ResNet50_vd from .distillation_models import ResNet50_vd_distill_MobileNetV3_large_x1_0, ResNeXt101_32x16d_wsl_distill_ResNet50_vd
......
...@@ -7,11 +7,11 @@ import math ...@@ -7,11 +7,11 @@ import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
__all__ = ["GhostNet", "GhostNet_0_5", "GhostNet_1_0", "GhostNet_1_3"] __all__ = ["GhostNet", "GhostNet_x0_5", "GhostNet_x1_0", "GhostNet_x1_3"]
class GhostNet(): class GhostNet():
def __init__(self, width_mult): def __init__(self, scale):
cfgs = [ cfgs = [
# k, t, c, SE, s # k, t, c, SE, s
[3, 16, 16, 0, 1], [3, 16, 16, 0, 1],
...@@ -32,7 +32,69 @@ class GhostNet(): ...@@ -32,7 +32,69 @@ class GhostNet():
[5, 960, 160, 1, 1] [5, 960, 160, 1, 1]
] ]
self.cfgs = cfgs self.cfgs = cfgs
self.width_mult = width_mult self.scale = scale
def net(self, input, class_dim=1000):
# build first layer:
output_channel = int(self._make_divisible(16 * self.scale, 4))
x = self.conv_bn_layer(
input=input,
num_filters=output_channel,
filter_size=3,
stride=2,
groups=1,
act="relu",
name="conv1")
# build inverted residual blocks
idx = 0
for k, exp_size, c, use_se, s in self.cfgs:
output_channel = int(self._make_divisible(c * self.scale, 4))
hidden_channel = int(
self._make_divisible(exp_size * self.scale, 4))
x = self.ghost_bottleneck(
inp=x,
hidden_dim=hidden_channel,
oup=output_channel,
kernel_size=k,
stride=s,
use_se=use_se,
name="ghost_bottle_" + str(idx))
idx += 1
# build last several layers
output_channel = int(
self._make_divisible(exp_size * self.scale, 4))
x = self.conv_bn_layer(
input=x,
num_filters=output_channel,
filter_size=1,
stride=1,
groups=1,
act="relu",
name="conv2")
x = fluid.layers.pool2d(
input=x, pool_type='avg', global_pooling=True)
output_channel = 1280
stdv = 1.0 / math.sqrt(x.shape[1] * 1.0)
out = self.conv_bn_layer(
input=x,
num_filters=output_channel,
filter_size=1,
stride=1,
groups=1,
act="relu",
name="fc_0")
out = fluid.layers.dropout(x=out, dropout_prob=0.2)
stdv = 1.0 / math.sqrt(out.shape[1] * 1.0)
out = fluid.layers.fc(
input=out,
size=class_dim,
param_attr=ParamAttr(
name="fc_1_weight",
initializer=fluid.initializer.Uniform(-stdv, stdv)),
bias_attr=ParamAttr(name="fc_1_offset"))
return out
def _make_divisible(self, v, divisor, min_value=None): def _make_divisible(self, v, divisor, min_value=None):
""" """
...@@ -56,8 +118,7 @@ class GhostNet(): ...@@ -56,8 +118,7 @@ class GhostNet():
stride=1, stride=1,
groups=1, groups=1,
act=None, act=None,
name=None, name=None):
data_format="NCHW"):
x = fluid.layers.conv2d( x = fluid.layers.conv2d(
input=input, input=input,
num_filters=num_filters, num_filters=num_filters,
...@@ -68,14 +129,11 @@ class GhostNet(): ...@@ -68,14 +129,11 @@ class GhostNet():
act=None, act=None,
param_attr=ParamAttr( param_attr=ParamAttr(
initializer=fluid.initializer.MSRA(), name=name + "_weights"), initializer=fluid.initializer.MSRA(), name=name + "_weights"),
bias_attr=False, bias_attr=False)
name=name + "_conv_op",
data_format=data_format)
x = fluid.layers.batch_norm( x = fluid.layers.batch_norm(
input=x, input=x,
act=act, act=act,
name=name + "_bn",
param_attr=ParamAttr( param_attr=ParamAttr(
name=name + "_bn_scale", name=name + "_bn_scale",
regularizer=fluid.regularizer.L2DecayRegularizer( regularizer=fluid.regularizer.L2DecayRegularizer(
...@@ -85,11 +143,10 @@ class GhostNet(): ...@@ -85,11 +143,10 @@ class GhostNet():
regularizer=fluid.regularizer.L2DecayRegularizer( regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)), regularization_coeff=0.0)),
moving_mean_name=name + "_bn_mean", moving_mean_name=name + "_bn_mean",
moving_variance_name=name + "_bn_variance", moving_variance_name=name + "_bn_variance")
data_layout=data_format)
return x return x
def SElayer(self, input, num_channels, reduction_ratio=4, name=None): def se_layer(self, input, num_channels, reduction_ratio=4, name=None):
pool = fluid.layers.pool2d( pool = fluid.layers.pool2d(
input=input, pool_size=0, pool_type='avg', global_pooling=True) input=input, pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
...@@ -111,9 +168,9 @@ class GhostNet(): ...@@ -111,9 +168,9 @@ class GhostNet():
name=name + '_exc_weights'), name=name + '_exc_weights'),
bias_attr=ParamAttr(name=name + '_exc_offset')) bias_attr=ParamAttr(name=name + '_exc_offset'))
excitation = fluid.layers.clip( excitation = fluid.layers.clip(
x=excitation, min=0, max=1, name=name + '_clip') x=excitation, min=0, max=1)
scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) se_scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
return scale return se_scale
def depthwise_conv(self, def depthwise_conv(self,
inp, inp,
...@@ -121,19 +178,17 @@ class GhostNet(): ...@@ -121,19 +178,17 @@ class GhostNet():
kernel_size, kernel_size,
stride=1, stride=1,
relu=False, relu=False,
name=None, name=None):
data_format="NCHW"):
return self.conv_bn_layer( return self.conv_bn_layer(
input=inp, input=inp,
num_filters=oup, num_filters=oup,
filter_size=kernel_size, filter_size=kernel_size,
stride=stride, stride=stride,
groups=inp.shape[1] if data_format == "NCHW" else inp.shape[-1], groups=inp.shape[1],
act="relu" if relu else None, act="relu" if relu else None,
name=name + "_dw", name=name + "_dw")
data_format=data_format)
def GhostModule(self, def ghost_module(self,
inp, inp,
oup, oup,
kernel_size=1, kernel_size=1,
...@@ -141,8 +196,7 @@ class GhostNet(): ...@@ -141,8 +196,7 @@ class GhostNet():
dw_size=3, dw_size=3,
stride=1, stride=1,
relu=True, relu=True,
name=None, name=None):
data_format="NCHW"):
self.oup = oup self.oup = oup
init_channels = int(math.ceil(oup / ratio)) init_channels = int(math.ceil(oup / ratio))
new_channels = int(init_channels * (ratio - 1)) new_channels = int(init_channels * (ratio - 1))
...@@ -153,8 +207,7 @@ class GhostNet(): ...@@ -153,8 +207,7 @@ class GhostNet():
stride=stride, stride=stride,
groups=1, groups=1,
act="relu" if relu else None, act="relu" if relu else None,
name=name + "_primary_conv", name=name + "_primary_conv")
data_format="NCHW")
cheap_operation = self.conv_bn_layer( cheap_operation = self.conv_bn_layer(
input=primary_conv, input=primary_conv,
num_filters=new_channels, num_filters=new_channels,
...@@ -162,30 +215,27 @@ class GhostNet(): ...@@ -162,30 +215,27 @@ class GhostNet():
stride=1, stride=1,
groups=init_channels, groups=init_channels,
act="relu" if relu else None, act="relu" if relu else None,
name=name + "_cheap_operation", name=name + "_cheap_operation")
data_format=data_format)
out = fluid.layers.concat( out = fluid.layers.concat(
[primary_conv, cheap_operation], axis=1, name=name + "_concat") [primary_conv, cheap_operation], axis=1)
return out return out
def GhostBottleneck(self, def ghost_bottleneck(self,
inp, inp,
hidden_dim, hidden_dim,
oup, oup,
kernel_size, kernel_size,
stride, stride,
use_se, use_se,
name=None, name=None):
data_format="NCHW"):
inp_channels = inp.shape[1] inp_channels = inp.shape[1]
x = self.GhostModule( x = self.ghost_module(
inp=inp, inp=inp,
oup=hidden_dim, oup=hidden_dim,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
relu=True, relu=True,
name=name + "GhostBottle_1", name=name + "ghost_module_1")
data_format="NCHW")
if stride == 2: if stride == 2:
x = self.depthwise_conv( x = self.depthwise_conv(
inp=x, inp=x,
...@@ -193,17 +243,16 @@ class GhostNet(): ...@@ -193,17 +243,16 @@ class GhostNet():
kernel_size=kernel_size, kernel_size=kernel_size,
stride=stride, stride=stride,
relu=False, relu=False,
name=name + "_dw2", name=name + "_dw2")
data_format="NCHW")
if use_se: if use_se:
x = self.SElayer( x = self.se_layer(
input=x, num_channels=hidden_dim, name=name + "SElayer") input=x, num_channels=hidden_dim, name=name + "se_layer")
x = self.GhostModule( x = self.ghost_module(
inp=x, inp=x,
oup=oup, oup=oup,
kernel_size=1, kernel_size=1,
relu=False, relu=False,
name=name + "GhostModule_2") name=name + "ghost_module_2")
if stride == 1 and inp_channels == oup: if stride == 1 and inp_channels == oup:
shortcut = inp shortcut = inp
else: else:
...@@ -213,8 +262,7 @@ class GhostNet(): ...@@ -213,8 +262,7 @@ class GhostNet():
kernel_size=kernel_size, kernel_size=kernel_size,
stride=stride, stride=stride,
relu=False, relu=False,
name=name + "shortcut_depthwise_conv", name=name + "shortcut_depthwise_conv")
data_format="NCHW")
shortcut = self.conv_bn_layer( shortcut = self.conv_bn_layer(
input=shortcut, input=shortcut,
num_filters=oup, num_filters=oup,
...@@ -222,104 +270,22 @@ class GhostNet(): ...@@ -222,104 +270,22 @@ class GhostNet():
stride=1, stride=1,
groups=1, groups=1,
act=None, act=None,
name=name + "shortcut_conv_bn", name=name + "shortcut_conv_bn")
data_format="NCHW")
return fluid.layers.elementwise_add( return fluid.layers.elementwise_add(
x=x, y=shortcut, axis=-1, act=None, name=name + "elementwise_add") x=x, y=shortcut, axis=-1, act=None)
def net(self, input, class_dim=1000):
# build first layer:
output_channel = int(self._make_divisible(16 * self.width_mult, 4))
x = self.conv_bn_layer(
input=input,
num_filters=output_channel,
filter_size=3,
stride=2,
groups=1,
act="relu",
name="firstlayer",
data_format="NCHW")
# build inverted residual blocks
idx = 0
for k, exp_size, c, use_se, s in self.cfgs:
output_channel = int(self._make_divisible(c * self.width_mult, 4))
hidden_channel = int(
self._make_divisible(exp_size * self.width_mult, 4))
x = self.GhostBottleneck(
inp=x,
hidden_dim=hidden_channel,
oup=output_channel,
kernel_size=k,
stride=s,
use_se=use_se,
name="GhostBottle_" + str(idx),
data_format="NCHW")
idx += 1
# build last several layers
output_channel = int(
self._make_divisible(exp_size * self.width_mult, 4))
x = self.conv_bn_layer(
input=x,
num_filters=output_channel,
filter_size=1,
stride=1,
groups=1,
act="relu",
name="lastlayer",
data_format="NCHW")
x = fluid.layers.pool2d(
input=x, pool_type='avg', global_pooling=True, data_format="NCHW")
output_channel = 1280
stdv = 1.0 / math.sqrt(x.shape[1] * 1.0)
out = fluid.layers.conv2d(
input=x,
num_filters=output_channel,
filter_size=1,
groups=1,
param_attr=ParamAttr(
name="fc_0_w",
initializer=fluid.initializer.Uniform(-stdv, stdv)),
bias_attr=False,
name="fc_0")
out = fluid.layers.batch_norm(
input=out,
act="relu",
name="fc_0_bn",
param_attr=ParamAttr(
name="fc_0_bn_scale",
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)),
bias_attr=ParamAttr(
name="fc_0_bn_offset",
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.0)),
moving_mean_name="fc_0_bn_mean",
moving_variance_name="fc_0_bn_variance",
data_layout="NCHW")
out = fluid.layers.dropout(x=out, dropout_prob=0.2)
stdv = 1.0 / math.sqrt(out.shape[1] * 1.0)
out = fluid.layers.fc(
input=out,
size=class_dim,
param_attr=ParamAttr(
name="fc_1_w",
initializer=fluid.initializer.Uniform(-stdv, stdv)),
bias_attr=ParamAttr(name="fc_1_bias"))
return out
def GhostNet_0_5(): def GhostNet_x0_5():
model = GhostNet(width_mult=0.5) model = GhostNet(scale=0.5)
return model return model
def GhostNet_1_0(): def GhostNet_x1_0():
model = GhostNet(width_mult=1.0) model = GhostNet(scale=1.0)
return model return model
def GhostNet_1_3(): def GhostNet_x1_3():
model = GhostNet(width_mult=1.3) model = GhostNet(scale=1.3)
return model return model
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册