diff --git a/image_classification/alexnet.py b/image_classification/alexnet.py new file mode 100644 index 0000000000000000000000000000000000000000..eaa7a3dc547a82fadb8ebf80984fd6dce9e4d9ce --- /dev/null +++ b/image_classification/alexnet.py @@ -0,0 +1,48 @@ +import paddle.v2 as paddle + +__all__ = ['alexnet'] + + +def alexnet(input): + conv1 = paddle.layer.img_conv( + input=input, + filter_size=11, + num_channels=3, + num_filters=96, + stride=4, + padding=1) + cmrnorm1 = paddle.layer.img_cmrnorm( + input=conv1, size=5, scale=0.0001, power=0.75) + pool1 = paddle.layer.img_pool(input=cmrnorm1, pool_size=3, stride=2) + + conv2 = paddle.layer.img_conv( + input=pool1, + filter_size=5, + num_filters=256, + stride=1, + padding=2, + groups=1) + cmrnorm2 = paddle.layer.img_cmrnorm( + input=conv2, size=5, scale=0.0001, power=0.75) + pool2 = paddle.layer.img_pool(input=cmrnorm2, pool_size=3, stride=2) + + pool3 = paddle.networks.img_conv_group( + input=pool2, + pool_size=3, + pool_stride=2, + conv_num_filter=[384, 384, 256], + conv_filter_size=3, + pool_type=paddle.pooling.Max()) + + fc1 = paddle.layer.fc( + input=pool3, + size=4096, + act=paddle.activation.Relu(), + layer_attr=paddle.attr.Extra(drop_rate=0.5)) + fc2 = paddle.layer.fc( + input=fc1, + size=4096, + act=paddle.activation.Relu(), + layer_attr=paddle.attr.Extra(drop_rate=0.5)) + + return fc2 diff --git a/image_classification/googlenet.py b/image_classification/googlenet.py new file mode 100644 index 0000000000000000000000000000000000000000..60cfa9d4f8f5c801666a4200d13cc9bf45ac760b --- /dev/null +++ b/image_classification/googlenet.py @@ -0,0 +1,161 @@ +import paddle.v2 as paddle + +__all__ = ['googlenet'] + + +def inception(name, input, channels, filter1, filter3R, filter3, filter5R, + filter5, proj): + cov1 = paddle.layer.conv_projection( + input=input, + filter_size=1, + num_channels=channels, + num_filters=filter1, + stride=1, + padding=0) + + cov3r = paddle.layer.img_conv( + name=name + '_3r', + input=input, + filter_size=1, + num_channels=channels, + num_filters=filter3R, + stride=1, + padding=0) + cov3 = paddle.layer.conv_projection( + input=cov3r, filter_size=3, num_filters=filter3, stride=1, padding=1) + + cov5r = paddle.layer.img_conv( + name=name + '_5r', + input=input, + filter_size=1, + num_channels=channels, + num_filters=filter5R, + stride=1, + padding=0) + cov5 = paddle.layer.conv_projection( + input=cov5r, filter_size=5, num_filters=filter5, stride=1, padding=2) + + pool1 = paddle.layer.img_pool( + name=name + '_max', + input=input, + pool_size=3, + num_channels=channels, + stride=1, + padding=1) + covprj = paddle.layer.conv_projection( + input=pool1, filter_size=1, num_filters=proj, stride=1, padding=0) + + cat = paddle.layer.concat( + name=name, + input=[cov1, cov3, cov5, covprj], + bias_attr=True, + act=paddle.activation.Relu()) + return cat + + +def googlenet(input): + # stage 1 + conv1 = paddle.layer.img_conv( + name="conv1", + input=input, + filter_size=7, + num_channels=3, + num_filters=64, + stride=2, + padding=3) + pool1 = paddle.layer.img_pool( + name="pool1", input=conv1, pool_size=3, num_channels=64, stride=2) + + # stage 2 + conv2_1 = paddle.layer.img_conv( + name="conv2_1", + input=pool1, + filter_size=1, + num_filters=64, + stride=1, + padding=0) + conv2_2 = paddle.layer.img_conv( + name="conv2_2", + input=conv2_1, + filter_size=3, + num_filters=192, + stride=1, + padding=1) + pool2 = paddle.layer.img_pool( + name="pool2", input=conv2_2, pool_size=3, num_channels=192, stride=2) + + # stage 3 + ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32) + ince3b = inception("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64) + pool3 = paddle.layer.img_pool( + name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2) + + # stage 4 + ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64) + ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64) + ince4c = inception("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64) + ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64) + ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128) + pool4 = paddle.layer.img_pool( + name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2) + + # stage 5 + ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128) + ince5b = inception("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128) + pool5 = paddle.layer.img_pool( + name="pool5", + input=ince5b, + num_channels=1024, + pool_size=7, + stride=7, + pool_type=paddle.pooling.Avg()) + dropout = paddle.layer.addto( + input=pool5, + layer_attr=paddle.attr.Extra(drop_rate=0.4), + act=paddle.activation.Linear()) + + # fc for output 1 + pool_o1 = paddle.layer.img_pool( + name="pool_o1", + input=ince4a, + num_channels=512, + pool_size=5, + stride=3, + pool_type=paddle.pooling.Avg()) + conv_o1 = paddle.layer.img_conv( + name="conv_o1", + input=pool_o1, + filter_size=1, + num_filters=128, + stride=1, + padding=0) + fc_o1 = paddle.layer.fc( + name="fc_o1", + input=conv_o1, + size=1024, + layer_attr=paddle.attr.Extra(drop_rate=0.7), + act=paddle.activation.Relu()) + + # fc for output 2 + pool_o2 = paddle.layer.img_pool( + name="pool_o2", + input=ince4d, + num_channels=528, + pool_size=5, + stride=3, + pool_type=paddle.pooling.Avg()) + conv_o2 = paddle.layer.img_conv( + name="conv_o2", + input=pool_o2, + filter_size=1, + num_filters=128, + stride=1, + padding=0) + fc_o2 = paddle.layer.fc( + name="fc_o2", + input=conv_o2, + size=1024, + layer_attr=paddle.attr.Extra(drop_rate=0.7), + act=paddle.activation.Relu()) + + return dropout, fc_o1, fc_o2 diff --git a/image_classification/resnet.py b/image_classification/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..1da44aadb38ec81fe704367f135b6276c751dea9 --- /dev/null +++ b/image_classification/resnet.py @@ -0,0 +1,93 @@ +import paddle.v2 as paddle + +__all__ = ['resnet_imagenet', 'resnet_cifar10'] + + +def conv_bn_layer(input, + ch_out, + filter_size, + stride, + padding, + active_type=paddle.activation.Relu(), + ch_in=None): + tmp = paddle.layer.img_conv( + input=input, + filter_size=filter_size, + num_channels=ch_in, + num_filters=ch_out, + stride=stride, + padding=padding, + act=paddle.activation.Linear(), + bias_attr=False) + return paddle.layer.batch_norm(input=tmp, act=active_type) + + +def shortcut(input, n_out, stride, b_projection): + if b_projection: + return conv_bn_layer(input, n_out, 1, stride, 0, + paddle.activation.Linear()) + else: + return input + + +def basicblock(input, ch_out, stride, b_projection): + # TODO: bug fix for ch_in = input.num_filters + conv1 = conv_bn_layer(input, ch_out, 3, stride, 1) + conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1, paddle.activation.Linear()) + short = shortcut(input, ch_out, stride, b_projection) + return paddle.layer.addto( + input=[conv2, short], act=paddle.activation.Relu()) + + +def bottleneck(input, ch_out, stride, b_projection): + # TODO: bug fix for ch_in = input.num_filters + conv1 = conv_bn_layer(input, ch_out, 1, stride, 0) + conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1) + conv3 = conv_bn_layer(conv2, ch_out * 4, 1, 1, 0, + paddle.activation.Linear()) + short = shortcut(input, ch_out * 4, stride, b_projection) + return paddle.layer.addto( + input=[conv3, short], act=paddle.activation.Relu()) + + +def layer_warp(block_func, input, features, count, stride): + conv = block_func(input, features, stride, True) + for i in range(1, count): + conv = block_func(conv, features, 1, False) + return conv + + +def resnet_imagenet(input, depth=50): + cfg = { + 18: ([2, 2, 2, 1], basicblock), + 34: ([3, 4, 6, 3], basicblock), + 50: ([3, 4, 6, 3], bottleneck), + 101: ([3, 4, 23, 3], bottleneck), + 152: ([3, 8, 36, 3], bottleneck) + } + stages, block_func = cfg[depth] + conv1 = conv_bn_layer( + input, ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3) + pool1 = paddle.layer.img_pool(input=conv1, pool_size=3, stride=2) + res1 = layer_warp(block_func, pool1, 64, stages[0], 1) + res2 = layer_warp(block_func, res1, 128, stages[1], 2) + res3 = layer_warp(block_func, res2, 256, stages[2], 2) + res4 = layer_warp(block_func, res3, 512, stages[3], 2) + pool2 = paddle.layer.img_pool( + input=res4, pool_size=7, stride=1, pool_type=paddle.pooling.Avg()) + return pool2 + + +def resnet_cifar10(input, depth=32): + # depth should be one of 20, 32, 44, 56, 110, 1202 + assert (depth - 2) % 6 == 0 + n = (depth - 2) / 6 + nStages = {16, 64, 128} + conv1 = conv_bn_layer( + input, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1) + res1 = layer_warp(basicblock, conv1, 16, n, 1) + res2 = layer_warp(basicblock, res1, 32, n, 2) + res3 = layer_warp(basicblock, res2, 64, n, 2) + pool = paddle.layer.img_pool( + input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg()) + return pool diff --git a/image_classification/train.py b/image_classification/train.py old mode 100644 new mode 100755 index d917bd8019bf31bf31133d7fb03247ad3b495af7..a8817c606f33945c92272e1a02b903f6b68e37a4 --- a/image_classification/train.py +++ b/image_classification/train.py @@ -1,38 +1,63 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License - import gzip - import paddle.v2 as paddle import reader import vgg +import resnet +import alexnet +import googlenet +import argparse +import os DATA_DIM = 3 * 224 * 224 -CLASS_DIM = 1000 +CLASS_DIM = 100 BATCH_SIZE = 128 def main(): + # parse the argument + parser = argparse.ArgumentParser() + parser.add_argument( + 'data_dir', + help='The data directory which contains train.list and val.list') + parser.add_argument( + 'model', + help='The model for image classification', + choices=['alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet', 'googlenet']) + args = parser.parse_args() # PaddlePaddle init - paddle.init(use_gpu=True, trainer_count=4) + paddle.init(use_gpu=True, trainer_count=1) image = paddle.layer.data( name="image", type=paddle.data_type.dense_vector(DATA_DIM)) lbl = paddle.layer.data( name="label", type=paddle.data_type.integer_value(CLASS_DIM)) - net = vgg.vgg13(image) + + extra_layers = None + if args.model == 'alexnet': + net = alexnet.alexnet(image) + elif args.model == 'vgg13': + net = vgg.vgg13(image) + elif args.model == 'vgg16': + net = vgg.vgg16(image) + elif args.model == 'vgg19': + net = vgg.vgg19(image) + elif args.model == 'resnet': + net = resnet.resnet_imagenet(image) + elif args.model == 'googlenet': + net, fc_o1, fc_o2 = googlenet.googlenet(image) + out1 = paddle.layer.fc( + input=fc_o1, size=CLASS_DIM, act=paddle.activation.Softmax()) + loss1 = paddle.layer.cross_entropy_cost( + input=out1, label=lbl, coeff=0.3) + paddle.evaluator.classification_error(input=out1, label=lbl) + out2 = paddle.layer.fc( + input=fc_o2, size=CLASS_DIM, act=paddle.activation.Softmax()) + loss2 = paddle.layer.cross_entropy_cost( + input=out2, label=lbl, coeff=0.3) + paddle.evaluator.classification_error(input=out2, label=lbl) + extra_layers = [loss1, loss2] + out = paddle.layer.fc( input=net, size=CLASS_DIM, act=paddle.activation.Softmax()) cost = paddle.layer.classification_cost(input=out, label=lbl) @@ -45,16 +70,19 @@ def main(): momentum=0.9, regularization=paddle.optimizer.L2Regularization(rate=0.0005 * BATCH_SIZE), - learning_rate=0.01 / BATCH_SIZE, + learning_rate=0.001 / BATCH_SIZE, learning_rate_decay_a=0.1, learning_rate_decay_b=128000 * 35, learning_rate_schedule="discexp", ) train_reader = paddle.batch( - paddle.reader.shuffle(reader.test_reader("train.list"), buf_size=1000), + paddle.reader.shuffle( + reader.test_reader(os.path.join(args.data_dir, 'train.list')), + buf_size=1000), batch_size=BATCH_SIZE) test_reader = paddle.batch( - reader.train_reader("test.list"), batch_size=BATCH_SIZE) + reader.train_reader(os.path.join(args.data_dir, 'val.list')), + batch_size=BATCH_SIZE) # End batch and end pass event handler def event_handler(event): @@ -71,7 +99,10 @@ def main(): # Create trainer trainer = paddle.trainer.SGD( - cost=cost, parameters=parameters, update_equation=optimizer) + cost=cost, + parameters=parameters, + update_equation=optimizer, + extra_layers=extra_layers) trainer.train( reader=train_reader, num_passes=200, event_handler=event_handler)