diff --git a/image_classification/infer.py b/image_classification/infer.py index d28824bced5d4c7a64ee4d0dc4469af6ce7ea65a..81027440663c222b86a3d3df984af99205f7b9a7 100644 --- a/image_classification/infer.py +++ b/image_classification/infer.py @@ -12,6 +12,7 @@ import alexnet import googlenet import inception_v4 import inception_resnet_v2 +import se_resnext DATA_DIM = 3 * 224 * 224 # Use 3 * 331 * 331 or 3 * 299 * 299 for Inception-ResNet-v2. CLASS_DIM = 102 @@ -29,7 +30,7 @@ def main(): help='The model for image classification', choices=[ 'alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet', - 'inception-resnet-v2', 'inception_v4' + 'inception-resnet-v2', 'inception_v4', 'se-resnext' ]) parser.add_argument( 'params_path', help='The file which stores the parameters') @@ -59,6 +60,8 @@ def main(): image, class_dim=CLASS_DIM, dropout_rate=0.5, data_dim=DATA_DIM) elif args.model == 'inception_v4': out = inception_v4.inception_v4(image, class_dim=CLASS_DIM) + elif args.model == 'se-resnext': + out = se_resnext.se_resnext50(image, class_dim=CLASS_DIM) # load parameters with gzip.open(args.params_path, 'r') as f: diff --git a/image_classification/se_resnext.py b/image_classification/se_resnext.py new file mode 100644 index 0000000000000000000000000000000000000000..ee49313110f7ddb557f8c51eea1ae3060ceeb944 --- /dev/null +++ b/image_classification/se_resnext.py @@ -0,0 +1,148 @@ +import paddle.v2 as paddle + +__all__ = ['se_resnext50'] + + +def squeeze_excitation(input, + num_channels, + pool_size, + reduction_ratio=16, + name='__SE'): + squeeze = paddle.layer.img_pool( + name='{0}_globalpool'.format(name), + input=input, + pool_size=pool_size, + stride=1, + num_channels=num_channels, + pool_type=paddle.pooling.Avg()) + squeeze = paddle.layer.fc( + name='{0}_fc0'.format(name), + input=squeeze, + size=num_channels / reduction_ratio, + act=paddle.activation.Relu()) + excitation = paddle.layer.fc( + name='{0}_fc1'.format(name), + input=squeeze, + size=num_channels, + act=paddle.activation.Sigmoid()) + scale = paddle.layer.broadcast_scale(input=input, weight=excitation) + return scale + + +def se_resnext50(input, class_dim): + conv0 = paddle.layer.img_conv( + name='conv0', + input=input, + num_channels=3, + num_filters=64, + filter_size=7, + padding=(7 - 1) / 2, + stride=2, + act=paddle.activation.Linear()) + conv0 = paddle.layer.batch_norm( + name='conv0_norm', input=conv0, act=paddle.activation.Relu()) + pool0 = paddle.layer.img_pool( + name='resnext_pool0', + input=conv0, + pool_size=3, + stride=2, + num_channels=64, + pool_type=paddle.pooling.Max()) + + def conv_block(input, group, depth, input_channels, num_filters, stride, + cardinality, out_size): + conv0 = paddle.layer.img_conv( + name='conv{0}_{1}_0'.format(group, depth), + input=input, + num_channels=input_channels, + num_filters=num_filters, + filter_size=1, + act=paddle.activation.Linear()) + conv0 = paddle.layer.batch_norm( + name='conv{0}_{1}_0_norm'.format(group, depth), + input=conv0, + act=paddle.activation.Relu()) + conv1 = paddle.layer.img_conv( + name='conv{0}_{1}_1'.format(group, depth), + input=conv0, + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + padding=1, + stride=stride, + groups=cardinality, + act=paddle.activation.Linear()) + conv1 = paddle.layer.batch_norm( + name='conv{0}_{1}_1_norm'.format(group, depth), + input=conv1, + act=paddle.activation.Relu()) + conv2 = paddle.layer.img_conv( + name='conv{0}_{1}_2'.format(group, depth), + input=conv1, + num_channels=num_filters, + num_filters=num_filters * 2, + filter_size=1, + act=paddle.activation.Linear()) + conv2 = paddle.layer.batch_norm( + name='conv{0}_{1}_2_norm'.format(group, depth), + input=conv2, + act=paddle.activation.Linear()) + + scale = squeeze_excitation( + name='SE{0}_{1}'.format(group, depth), + input=conv2, + num_channels=num_filters * 2, + pool_size=out_size) + + if input_channels == num_filters * 2: + shortcut = input + else: + shortcut = paddle.layer.img_conv( + name='shortcut_proj_{0}'.format(group), + input=input, + num_channels=input_channels, + num_filters=num_filters * 2, + filter_size=1, + stride=stride, + act=paddle.activation.Linear()) + shortcut = paddle.layer.batch_norm( + name='shortcut_proj_{0}_norm'.format(group), + input=shortcut, + act=paddle.activation.Linear()) + + return paddle.layer.addto( + input=[scale, shortcut], act=paddle.activation.Relu()) + + depth = [3, 4, 6, 3] + num_filters = [128, 256, 512, 1024] + input_channels = [64, 256, 512, 1024] + strides = [1, 2, 2, 2] + out_size = [56, 28, 14, 7] + conv = pool0 + for group in range(4): + for i in range(depth[group]): + conv = conv_block( + input=conv, + group=group + 1, + depth=i, + input_channels=input_channels[group] + if i == 0 else num_filters[group] * 2, + num_filters=num_filters[group], + stride=strides[group] if i == 0 else 1, + cardinality=32, + out_size=out_size[group]) + + pool1 = paddle.layer.img_pool( + name='resnext_globalpool', + input=conv, + pool_size=7, + stride=1, + num_channels=2048, + pool_type=paddle.pooling.Avg()) + + out = paddle.layer.fc( + name='resnext_fc', + input=pool1, + size=class_dim, + act=paddle.activation.Softmax()) + return out diff --git a/image_classification/train.py b/image_classification/train.py index 5c7e70d9999d5b5014d796e615dd99729eccabc4..99688b0d0d776d629ff997bf07dfa8c02ce42276 100644 --- a/image_classification/train.py +++ b/image_classification/train.py @@ -10,6 +10,7 @@ import alexnet import googlenet import inception_v4 import inception_resnet_v2 +import se_resnext DATA_DIM = 3 * 224 * 224 # Use 3 * 331 * 331 or 3 * 299 * 299 for Inception-ResNet-v2. CLASS_DIM = 102 @@ -24,7 +25,7 @@ def main(): help='The model for image classification', choices=[ 'alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet', - 'inception-resnet-v2', 'inception_v4' + 'inception-resnet-v2', 'inception_v4', 'se-resnext' ]) args = parser.parse_args() @@ -64,6 +65,8 @@ def main(): image, class_dim=CLASS_DIM, dropout_rate=0.5, data_dim=DATA_DIM) elif args.model == 'inception_v4': out = inception_v4.inception_v4(image, class_dim=CLASS_DIM) + elif args.model == 'se-resnext': + out = se_resnext.se_resnext50(image, class_dim=CLASS_DIM) cost = paddle.layer.classification_cost(input=out, label=lbl)