import paddle.v2 as paddle __all__ = ['inception_v4'] def img_conv(name, input, num_filters, filter_size, stride, padding, num_channels=None): conv = paddle.layer.img_conv( name=name, input=input, num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, padding=padding, act=paddle.activation.Linear()) norm = paddle.layer.batch_norm( name=name + '_norm', input=conv, act=paddle.activation.Relu()) return norm def stem(input): conv0 = img_conv( name='stem_conv0', input=input, num_channels=3, num_filters=32, filter_size=3, stride=2, padding=1) conv1 = img_conv( name='stem_conv1', input=conv0, num_channels=32, num_filters=32, filter_size=3, stride=1, padding=1) conv2 = img_conv( name='stem_conv2', input=conv1, num_channels=32, num_filters=64, filter_size=3, stride=1, padding=1) def block0(input): pool0 = paddle.layer.img_pool( name='stem_branch0_pool0', input=input, num_channels=64, pool_size=3, stride=2, pool_type=paddle.pooling.Max()) conv0 = img_conv( name='stem_branch0_conv0', input=input, num_channels=64, num_filters=96, filter_size=3, stride=2, padding=1) return paddle.layer.concat(input=[pool0, conv0]) def block1(input): l_conv0 = img_conv( name='stem_branch1_l_conv0', input=input, num_channels=160, num_filters=64, filter_size=1, stride=1, padding=0) l_conv1 = img_conv( name='stem_branch1_l_conv1', input=l_conv0, num_channels=64, num_filters=96, filter_size=3, stride=1, padding=1) r_conv0 = img_conv( name='stem_branch1_r_conv0', input=input, num_channels=160, num_filters=64, filter_size=1, stride=1, padding=0) r_conv1 = img_conv( name='stem_branch1_r_conv1', input=r_conv0, num_channels=64, num_filters=64, filter_size=(7, 1), stride=1, padding=(3, 0)) r_conv2 = img_conv( name='stem_branch1_r_conv2', input=r_conv1, num_channels=64, num_filters=64, filter_size=(1, 7), stride=1, padding=(0, 3)) r_conv3 = img_conv( name='stem_branch1_r_conv3', input=r_conv2, num_channels=64, num_filters=96, filter_size=3, stride=1, padding=1) return paddle.layer.concat(input=[l_conv1, r_conv3]) def block2(input): conv0 = img_conv( name='stem_branch2_conv0', input=input, num_channels=192, num_filters=192, filter_size=3, stride=2, padding=1) pool0 = paddle.layer.img_pool( name='stem_branch2_pool0', input=input, num_channels=192, pool_size=3, stride=2, pool_type=paddle.pooling.Max()) return paddle.layer.concat(input=[conv0, pool0]) conv3 = block0(conv2) conv4 = block1(conv3) conv5 = block2(conv4) return conv5 def Inception_A(input, depth): b0_pool0 = paddle.layer.img_pool( name='inceptA{0}_branch0_pool0'.format(depth), input=input, num_channels=384, pool_size=3, stride=1, padding=1, pool_type=paddle.pooling.Avg()) b0_conv0 = img_conv( name='inceptA{0}_branch0_conv0'.format(depth), input=b0_pool0, num_channels=384, num_filters=96, filter_size=1, stride=1, padding=0) b1_conv0 = img_conv( name='inceptA{0}_branch1_conv0'.format(depth), input=input, num_channels=384, num_filters=96, filter_size=1, stride=1, padding=0) b2_conv0 = img_conv( name='inceptA{0}_branch2_conv0'.format(depth), input=input, num_channels=384, num_filters=64, filter_size=1, stride=1, padding=0) b2_conv1 = img_conv( name='inceptA{0}_branch2_conv1'.format(depth), input=b2_conv0, num_channels=64, num_filters=96, filter_size=3, stride=1, padding=1) b3_conv0 = img_conv( name='inceptA{0}_branch3_conv0'.format(depth), input=input, num_channels=384, num_filters=64, filter_size=1, stride=1, padding=0) b3_conv1 = img_conv( name='inceptA{0}_branch3_conv1'.format(depth), input=b3_conv0, num_channels=64, num_filters=96, filter_size=3, stride=1, padding=1) b3_conv2 = img_conv( name='inceptA{0}_branch3_conv2'.format(depth), input=b3_conv1, num_channels=96, num_filters=96, filter_size=3, stride=1, padding=1) return paddle.layer.concat(input=[b0_conv0, b1_conv0, b2_conv1, b3_conv2]) def Inception_B(input, depth): b0_pool0 = paddle.layer.img_pool( name='inceptB{0}_branch0_pool0'.format(depth), input=input, num_channels=1024, pool_size=3, stride=1, padding=1, pool_type=paddle.pooling.Avg()) b0_conv0 = img_conv( name='inceptB{0}_branch0_conv0'.format(depth), input=b0_pool0, num_channels=1024, num_filters=128, filter_size=1, stride=1, padding=0) b1_conv0 = img_conv( name='inceptB{0}_branch1_conv0'.format(depth), input=input, num_channels=1024, num_filters=384, filter_size=1, stride=1, padding=0) b2_conv0 = img_conv( name='inceptB{0}_branch2_conv0'.format(depth), input=input, num_channels=1024, num_filters=192, filter_size=1, stride=1, padding=0) b2_conv1 = img_conv( name='inceptB{0}_branch2_conv1'.format(depth), input=b2_conv0, num_channels=192, num_filters=224, filter_size=(1, 7), stride=1, padding=(0, 3)) b2_conv2 = img_conv( name='inceptB{0}_branch2_conv2'.format(depth), input=b2_conv1, num_channels=224, num_filters=256, filter_size=(7, 1), stride=1, padding=(3, 0)) b3_conv0 = img_conv( name='inceptB{0}_branch3_conv0'.format(depth), input=input, num_channels=1024, num_filters=192, filter_size=1, stride=1, padding=0) b3_conv1 = img_conv( name='inceptB{0}_branch3_conv1'.format(depth), input=b3_conv0, num_channels=192, num_filters=192, filter_size=(1, 7), stride=1, padding=(0, 3)) b3_conv2 = img_conv( name='inceptB{0}_branch3_conv2'.format(depth), input=b3_conv1, num_channels=192, num_filters=224, filter_size=(7, 1), stride=1, padding=(3, 0)) b3_conv3 = img_conv( name='inceptB{0}_branch3_conv3'.format(depth), input=b3_conv2, num_channels=224, num_filters=224, filter_size=(1, 7), stride=1, padding=(0, 3)) b3_conv4 = img_conv( name='inceptB{0}_branch3_conv4'.format(depth), input=b3_conv3, num_channels=224, num_filters=256, filter_size=(7, 1), stride=1, padding=(3, 0)) return paddle.layer.concat(input=[b0_conv0, b1_conv0, b2_conv2, b3_conv4]) def Inception_C(input, depth): b0_pool0 = paddle.layer.img_pool( name='inceptC{0}_branch0_pool0'.format(depth), input=input, num_channels=1536, pool_size=3, stride=1, padding=1, pool_type=paddle.pooling.Avg()) b0_conv0 = img_conv( name='inceptC{0}_branch0_conv0'.format(depth), input=b0_pool0, num_channels=1536, num_filters=256, filter_size=1, stride=1, padding=0) b1_conv0 = img_conv( name='inceptC{0}_branch1_conv0'.format(depth), input=input, num_channels=1536, num_filters=256, filter_size=1, stride=1, padding=0) b2_conv0 = img_conv( name='inceptC{0}_branch2_conv0'.format(depth), input=input, num_channels=1536, num_filters=384, filter_size=1, stride=1, padding=0) b2_conv1 = img_conv( name='inceptC{0}_branch2_conv1'.format(depth), input=b2_conv0, num_channels=384, num_filters=256, filter_size=(1, 3), stride=1, padding=(0, 1)) b2_conv2 = img_conv( name='inceptC{0}_branch2_conv2'.format(depth), input=b2_conv0, num_channels=384, num_filters=256, filter_size=(3, 1), stride=1, padding=(1, 0)) b3_conv0 = img_conv( name='inceptC{0}_branch3_conv0'.format(depth), input=input, num_channels=1536, num_filters=384, filter_size=1, stride=1, padding=0) b3_conv1 = img_conv( name='inceptC{0}_branch3_conv1'.format(depth), input=b3_conv0, num_channels=384, num_filters=448, filter_size=(1, 3), stride=1, padding=(0, 1)) b3_conv2 = img_conv( name='inceptC{0}_branch3_conv2'.format(depth), input=b3_conv1, num_channels=448, num_filters=512, filter_size=(3, 1), stride=1, padding=(1, 0)) b3_conv3 = img_conv( name='inceptC{0}_branch3_conv3'.format(depth), input=b3_conv2, num_channels=512, num_filters=256, filter_size=(3, 1), stride=1, padding=(1, 0)) b3_conv4 = img_conv( name='inceptC{0}_branch3_conv4'.format(depth), input=b3_conv2, num_channels=512, num_filters=256, filter_size=(1, 3), stride=1, padding=(0, 1)) return paddle.layer.concat( input=[b0_conv0, b1_conv0, b2_conv1, b2_conv2, b3_conv3, b3_conv4]) def Reduction_A(input): b0_pool0 = paddle.layer.img_pool( name='ReductA_branch0_pool0', input=input, num_channels=384, pool_size=3, stride=2, pool_type=paddle.pooling.Max()) b1_conv0 = img_conv( name='ReductA_branch1_conv0', input=input, num_channels=384, num_filters=384, filter_size=3, stride=2, padding=1) b2_conv0 = img_conv( name='ReductA_branch2_conv0', input=input, num_channels=384, num_filters=192, filter_size=1, stride=1, padding=0) b2_conv1 = img_conv( name='ReductA_branch2_conv1', input=b2_conv0, num_channels=192, num_filters=224, filter_size=3, stride=1, padding=1) b2_conv2 = img_conv( name='ReductA_branch2_conv2', input=b2_conv1, num_channels=224, num_filters=256, filter_size=3, stride=2, padding=1) return paddle.layer.concat(input=[b0_pool0, b1_conv0, b2_conv2]) def Reduction_B(input): b0_pool0 = paddle.layer.img_pool( name='ReductB_branch0_pool0', input=input, num_channels=1024, pool_size=3, stride=2, pool_type=paddle.pooling.Max()) b1_conv0 = img_conv( name='ReductB_branch1_conv0', input=input, num_channels=1024, num_filters=192, filter_size=1, stride=1, padding=0) b1_conv1 = img_conv( name='ReductB_branch1_conv1', input=b1_conv0, num_channels=192, num_filters=192, filter_size=3, stride=2, padding=1) b2_conv0 = img_conv( name='ReductB_branch2_conv0', input=input, num_channels=1024, num_filters=256, filter_size=1, stride=1, padding=0) b2_conv1 = img_conv( name='ReductB_branch2_conv1', input=b2_conv0, num_channels=256, num_filters=256, filter_size=(1, 7), stride=1, padding=(0, 3)) b2_conv2 = img_conv( name='ReductB_branch2_conv2', input=b2_conv1, num_channels=256, num_filters=320, filter_size=(7, 1), stride=1, padding=(3, 0)) b2_conv3 = img_conv( name='ReductB_branch2_conv3', input=b2_conv2, num_channels=320, num_filters=320, filter_size=3, stride=2, padding=1) return paddle.layer.concat(input=[b0_pool0, b1_conv1, b2_conv3]) def inception_v4(input, class_dim): conv = stem(input) for i in range(4): conv = Inception_A(conv, i) conv = Reduction_A(conv) for i in range(7): conv = Inception_B(conv, i) conv = Reduction_B(conv) for i in range(3): conv = Inception_C(conv, i) pool = paddle.layer.img_pool( name='incept_avg_pool', input=conv, num_channels=1536, pool_size=7, stride=1, pool_type=paddle.pooling.Avg()) drop = paddle.layer.dropout(input=pool, dropout_rate=0.2) out = paddle.layer.fc(name='incept_fc', input=drop, size=class_dim, act=paddle.activation.Softmax()) return out