提交 4205f751 编写于 作者: W wangmeng28

Add SE-ResNeXt model

上级 51a08322
...@@ -12,6 +12,7 @@ import alexnet ...@@ -12,6 +12,7 @@ import alexnet
import googlenet import googlenet
import inception_v4 import inception_v4
import inception_resnet_v2 import inception_resnet_v2
import se_resnext
DATA_DIM = 3 * 224 * 224 # Use 3 * 331 * 331 or 3 * 299 * 299 for Inception-ResNet-v2. DATA_DIM = 3 * 224 * 224 # Use 3 * 331 * 331 or 3 * 299 * 299 for Inception-ResNet-v2.
CLASS_DIM = 102 CLASS_DIM = 102
...@@ -29,7 +30,7 @@ def main(): ...@@ -29,7 +30,7 @@ def main():
help='The model for image classification', help='The model for image classification',
choices=[ choices=[
'alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet', 'alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet',
'inception-resnet-v2', 'inception_v4' 'inception-resnet-v2', 'inception_v4', 'se-resnext'
]) ])
parser.add_argument( parser.add_argument(
'params_path', help='The file which stores the parameters') 'params_path', help='The file which stores the parameters')
...@@ -59,6 +60,8 @@ def main(): ...@@ -59,6 +60,8 @@ def main():
image, class_dim=CLASS_DIM, dropout_rate=0.5, data_dim=DATA_DIM) image, class_dim=CLASS_DIM, dropout_rate=0.5, data_dim=DATA_DIM)
elif args.model == 'inception_v4': elif args.model == 'inception_v4':
out = inception_v4.inception_v4(image, class_dim=CLASS_DIM) out = inception_v4.inception_v4(image, class_dim=CLASS_DIM)
elif args.model == 'se-resnext':
out = se_resnext.se_resnext50(image, class_dim=CLASS_DIM)
# load parameters # load parameters
with gzip.open(args.params_path, 'r') as f: with gzip.open(args.params_path, 'r') as f:
......
import paddle.v2 as paddle
__all__ = ['se_resnext50']
def squeeze_excitation(input,
num_channels,
pool_size,
reduction_ratio=16,
name='__SE'):
squeeze = paddle.layer.img_pool(
name='{0}_globalpool'.format(name),
input=input,
pool_size=pool_size,
stride=1,
num_channels=num_channels,
pool_type=paddle.pooling.Avg())
squeeze = paddle.layer.fc(
name='{0}_fc0'.format(name),
input=squeeze,
size=num_channels / reduction_ratio,
act=paddle.activation.Relu())
excitation = paddle.layer.fc(
name='{0}_fc1'.format(name),
input=squeeze,
size=num_channels,
act=paddle.activation.Sigmoid())
scale = paddle.layer.broadcast_scale(input=input, weight=excitation)
return scale
def se_resnext50(input, class_dim):
conv0 = paddle.layer.img_conv(
name='conv0',
input=input,
num_channels=3,
num_filters=64,
filter_size=7,
padding=(7 - 1) / 2,
stride=2,
act=paddle.activation.Linear())
conv0 = paddle.layer.batch_norm(
name='conv0_norm', input=conv0, act=paddle.activation.Relu())
pool0 = paddle.layer.img_pool(
name='resnext_pool0',
input=conv0,
pool_size=3,
stride=2,
num_channels=64,
pool_type=paddle.pooling.Max())
def conv_block(input, group, depth, input_channels, num_filters, stride,
cardinality, out_size):
conv0 = paddle.layer.img_conv(
name='conv{0}_{1}_0'.format(group, depth),
input=input,
num_channels=input_channels,
num_filters=num_filters,
filter_size=1,
act=paddle.activation.Linear())
conv0 = paddle.layer.batch_norm(
name='conv{0}_{1}_0_norm'.format(group, depth),
input=conv0,
act=paddle.activation.Relu())
conv1 = paddle.layer.img_conv(
name='conv{0}_{1}_1'.format(group, depth),
input=conv0,
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
padding=1,
stride=stride,
groups=cardinality,
act=paddle.activation.Linear())
conv1 = paddle.layer.batch_norm(
name='conv{0}_{1}_1_norm'.format(group, depth),
input=conv1,
act=paddle.activation.Relu())
conv2 = paddle.layer.img_conv(
name='conv{0}_{1}_2'.format(group, depth),
input=conv1,
num_channels=num_filters,
num_filters=num_filters * 2,
filter_size=1,
act=paddle.activation.Linear())
conv2 = paddle.layer.batch_norm(
name='conv{0}_{1}_2_norm'.format(group, depth),
input=conv2,
act=paddle.activation.Linear())
scale = squeeze_excitation(
name='SE{0}_{1}'.format(group, depth),
input=conv2,
num_channels=num_filters * 2,
pool_size=out_size)
if input_channels == num_filters * 2:
shortcut = input
else:
shortcut = paddle.layer.img_conv(
name='shortcut_proj_{0}'.format(group),
input=input,
num_channels=input_channels,
num_filters=num_filters * 2,
filter_size=1,
stride=stride,
act=paddle.activation.Linear())
shortcut = paddle.layer.batch_norm(
name='shortcut_proj_{0}_norm'.format(group),
input=shortcut,
act=paddle.activation.Linear())
return paddle.layer.addto(
input=[scale, shortcut], act=paddle.activation.Relu())
depth = [3, 4, 6, 3]
num_filters = [128, 256, 512, 1024]
input_channels = [64, 256, 512, 1024]
strides = [1, 2, 2, 2]
out_size = [56, 28, 14, 7]
conv = pool0
for group in range(4):
for i in range(depth[group]):
conv = conv_block(
input=conv,
group=group + 1,
depth=i,
input_channels=input_channels[group]
if i == 0 else num_filters[group] * 2,
num_filters=num_filters[group],
stride=strides[group] if i == 0 else 1,
cardinality=32,
out_size=out_size[group])
pool1 = paddle.layer.img_pool(
name='resnext_globalpool',
input=conv,
pool_size=7,
stride=1,
num_channels=2048,
pool_type=paddle.pooling.Avg())
out = paddle.layer.fc(
name='resnext_fc',
input=pool1,
size=class_dim,
act=paddle.activation.Softmax())
return out
...@@ -10,6 +10,7 @@ import alexnet ...@@ -10,6 +10,7 @@ import alexnet
import googlenet import googlenet
import inception_v4 import inception_v4
import inception_resnet_v2 import inception_resnet_v2
import se_resnext
DATA_DIM = 3 * 224 * 224 # Use 3 * 331 * 331 or 3 * 299 * 299 for Inception-ResNet-v2. DATA_DIM = 3 * 224 * 224 # Use 3 * 331 * 331 or 3 * 299 * 299 for Inception-ResNet-v2.
CLASS_DIM = 102 CLASS_DIM = 102
...@@ -24,7 +25,7 @@ def main(): ...@@ -24,7 +25,7 @@ def main():
help='The model for image classification', help='The model for image classification',
choices=[ choices=[
'alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet', 'alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet',
'inception-resnet-v2', 'inception_v4' 'inception-resnet-v2', 'inception_v4', 'se-resnext'
]) ])
args = parser.parse_args() args = parser.parse_args()
...@@ -64,6 +65,8 @@ def main(): ...@@ -64,6 +65,8 @@ def main():
image, class_dim=CLASS_DIM, dropout_rate=0.5, data_dim=DATA_DIM) image, class_dim=CLASS_DIM, dropout_rate=0.5, data_dim=DATA_DIM)
elif args.model == 'inception_v4': elif args.model == 'inception_v4':
out = inception_v4.inception_v4(image, class_dim=CLASS_DIM) out = inception_v4.inception_v4(image, class_dim=CLASS_DIM)
elif args.model == 'se-resnext':
out = se_resnext.se_resnext50(image, class_dim=CLASS_DIM)
cost = paddle.layer.classification_cost(input=out, label=lbl) cost = paddle.layer.classification_cost(input=out, label=lbl)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册