From cdc700bb3283cf3e8ce8ff83f2292d0a98e96a99 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Tue, 31 Oct 2017 03:23:29 +0800 Subject: [PATCH] add resnet (#5206) * add resnet * optimize code --- python/paddle/v2/framework/layers.py | 5 +- .../tests/test_image_classification_layer.py | 23 ++++ .../tests/test_image_classification_train.py | 130 +++++++++++++++++- 3 files changed, 152 insertions(+), 6 deletions(-) diff --git a/python/paddle/v2/framework/layers.py b/python/paddle/v2/framework/layers.py index 041a3b2c0b0..0212afec9dc 100644 --- a/python/paddle/v2/framework/layers.py +++ b/python/paddle/v2/framework/layers.py @@ -5,7 +5,7 @@ import re __all__ = [ 'fc', 'data', 'cross_entropy', 'conv2d', 'pool2d', 'embedding', 'concat', - 'StaticRNN', 'cast' + 'StaticRNN', 'cast', 'batch_norm' ] @@ -150,7 +150,7 @@ def _create_op_func_(op_type): outputs[name] = [helper.create_tmp_variable(dtype=dtype)] helper.append_op( type=op_type, inputs=inputs, outputs=outputs, attrs=kwargs) - return out + return helper.append_activation(out) func.__name__ = op_type globals()[op_type] = func @@ -160,6 +160,7 @@ def _create_op_func_(op_type): _create_op_func_('mean') _create_op_func_('mul') +_create_op_func_('elementwise_add') _create_op_func_('dropout') _create_op_func_('reshape') diff --git a/python/paddle/v2/framework/tests/test_image_classification_layer.py b/python/paddle/v2/framework/tests/test_image_classification_layer.py index 908cf44b88a..7411689b615 100644 --- a/python/paddle/v2/framework/tests/test_image_classification_layer.py +++ b/python/paddle/v2/framework/tests/test_image_classification_layer.py @@ -70,6 +70,29 @@ class TestLayer(unittest.TestCase): # print str(program) + def test_elementwise_add_with_act(self): + program = Program() + init_program = Program() + image1 = layers.data( + name='pixel1', + shape=[3, 48, 48], + data_type='float32', + program=program, + init_program=init_program) + image2 = layers.data( + name='pixel2', + shape=[3, 48, 48], + data_type='float32', + program=program, + init_program=init_program) + out = layers.elementwise_add( + x=image1, + y=image2, + act='relu', + program=program, + init_program=init_program) + # print(program) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/framework/tests/test_image_classification_train.py b/python/paddle/v2/framework/tests/test_image_classification_train.py index 4eb9051261e..6b6dec4976d 100644 --- a/python/paddle/v2/framework/tests/test_image_classification_train.py +++ b/python/paddle/v2/framework/tests/test_image_classification_train.py @@ -10,6 +10,120 @@ from paddle.v2.framework.executor import Executor import numpy as np +def resnet_cifar10(input, depth=32, program=None, init_program=None): + def conv_bn_layer(input, + ch_out, + filter_size, + stride, + padding, + act='relu', + program=None, + init_program=None): + tmp = layers.conv2d( + input=input, + filter_size=filter_size, + num_filters=ch_out, + stride=stride, + padding=padding, + act=None, + bias_attr=False, + program=program, + init_program=init_program) + return layers.batch_norm( + input=tmp, act=act, program=program, init_program=init_program) + + def shortcut(input, ch_in, ch_out, stride, program, init_program): + if ch_in != ch_out: + return conv_bn_layer(input, ch_out, 1, stride, 0, None, program, + init_program) + else: + return input + + def basicblock(input, + ch_in, + ch_out, + stride, + program=program, + init_program=init_program): + tmp = conv_bn_layer( + input, + ch_out, + 3, + stride, + 1, + program=program, + init_program=init_program) + tmp = conv_bn_layer( + tmp, + ch_out, + 3, + 1, + 1, + act=None, + program=program, + init_program=init_program) + short = shortcut(input, ch_in, ch_out, stride, program, init_program) + return layers.elementwise_add( + x=tmp, + y=short, + act='relu', + program=program, + init_program=init_program) + + def layer_warp(block_func, input, ch_in, ch_out, count, stride, program, + init_program): + tmp = block_func(input, ch_in, ch_out, stride, program, init_program) + for i in range(1, count): + tmp = block_func(tmp, ch_out, ch_out, 1, program, init_program) + return tmp + + assert (depth - 2) % 6 == 0 + n = (depth - 2) / 6 + conv1 = conv_bn_layer( + input=input, + ch_out=16, + filter_size=3, + stride=1, + padding=1, + program=program, + init_program=init_program) + res1 = layer_warp( + basicblock, + conv1, + 16, + 16, + n, + 1, + program=program, + init_program=init_program) + res2 = layer_warp( + basicblock, + res1, + 16, + 32, + n, + 2, + program=program, + init_program=init_program) + res3 = layer_warp( + basicblock, + res2, + 32, + 64, + n, + 2, + program=program, + init_program=init_program) + pool = layers.pool2d( + input=res3, + pool_size=8, + pool_type='avg', + pool_stride=1, + program=program, + init_program=init_program) + return pool + + def vgg16_bn_drop(input, program, init_program): def conv_block(input, num_filter, @@ -75,8 +189,16 @@ label = layers.data( data_type='int64', program=program, init_program=init_program) -vgg_net = vgg16_bn_drop(images, program, init_program) -predict = layers.fc(input=vgg_net, + +# Add neural network config +# option 1. resnet +net = resnet_cifar10(images, 32, program, init_program) +# option 2. vgg +# net = vgg16_bn_drop(images, program, init_program) + +# print(program) + +predict = layers.fc(input=net, size=classdim, act='softmax', program=program, @@ -123,8 +245,8 @@ for pass_id in range(PASS_NUM): fetch_list=[avg_cost]) loss = np.array(outs[0]) - # print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + - # " loss:" + str(loss)) + print("pass_id:" + str(pass_id) + " batch_id:" + str(batch_id) + + " loss:" + str(loss)) batch_id = batch_id + 1 if batch_id > 1: -- GitLab