test_imperative_resnet.py 10.8 KB
Newer Older
M
minqiyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import contextlib
import unittest
import numpy as np
import six

import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.imperative.base import to_variable
from test_imperative_base import new_program_scope

train_parameters = {
    "input_size": [3, 224, 224],
    "input_mean": [0.485, 0.456, 0.406],
    "input_std": [0.229, 0.224, 0.225],
    "learning_strategy": {
        "name": "piecewise_decay",
34
        "batch_size": 1,
M
minqiyang 已提交
35 36
        "epochs": [30, 60, 90],
        "steps": [0.1, 0.01, 0.001, 0.0001]
M
minqiyang 已提交
37
    },
38
    "batch_size": 1,
M
minqiyang 已提交
39 40
    "lr": 0.1,
    "total_images": 1281164,
M
minqiyang 已提交
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
}


def optimizer_setting(params):
    ls = params["learning_strategy"]
    if ls["name"] == "piecewise_decay":
        if "total_images" not in params:
            total_images = 1281167
        else:
            total_images = params["total_images"]
        batch_size = ls["batch_size"]
        step = int(total_images / batch_size + 1)

        bd = [step * e for e in ls["epochs"]]
        base_lr = params["lr"]
        lr = []
        lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
M
minqiyang 已提交
58 59 60 61 62 63 64
        optimizer = fluid.optimizer.SGD(learning_rate=params["lr"])
        #  optimizer = fluid.optimizer.Momentum(
    #  learning_rate=params["lr"],
    #  learning_rate=fluid.layers.piecewise_decay(
    #  boundaries=bd, values=lr),
    #  momentum=0.9,
    #  regularization=fluid.regularizer.L2Decay(1e-4))
M
minqiyang 已提交
65 66 67 68 69

    return optimizer


class ConvBNLayer(fluid.imperative.Layer):
M
minqiyang 已提交
70 71 72 73 74 75 76
    def __init__(self,
                 num_channels,
                 num_filters,
                 filter_size,
                 stride=1,
                 groups=1,
                 act=None):
M
minqiyang 已提交
77 78 79
        super(ConvBNLayer, self).__init__()

        self._conv = Conv2D(
M
minqiyang 已提交
80 81 82 83 84
            num_channels=num_channels,
            num_filters=num_filters,
            filter_size=filter_size,
            stride=stride,
            padding=(filter_size - 1) // 2,
M
minqiyang 已提交
85 86 87 88 89 90 91 92 93 94 95 96 97 98
            groups=groups,
            act=None,
            bias_attr=None)

        self._batch_norm = BatchNorm(num_filters, act=act)

    def forward(self, inputs):
        y = self._conv(inputs)
        y = self._batch_norm(y)

        return y


class BottleneckBlock(fluid.imperative.Layer):
M
minqiyang 已提交
99
    def __init__(self, num_channels, num_filters, stride, shortcut=True):
M
minqiyang 已提交
100 101 102
        super(BottleneckBlock, self).__init__()

        self.conv0 = ConvBNLayer(
M
minqiyang 已提交
103 104 105 106
            num_channels=num_channels,
            num_filters=num_filters,
            filter_size=1,
            act='relu')
M
minqiyang 已提交
107
        self.conv1 = ConvBNLayer(
M
minqiyang 已提交
108 109 110 111 112
            num_channels=num_filters,
            num_filters=num_filters,
            filter_size=3,
            stride=stride,
            act='relu')
M
minqiyang 已提交
113
        self.conv2 = ConvBNLayer(
M
minqiyang 已提交
114 115 116 117
            num_channels=num_filters,
            num_filters=num_filters * 4,
            filter_size=1,
            act=None)
M
minqiyang 已提交
118

M
minqiyang 已提交
119
        if not shortcut:
M
minqiyang 已提交
120
            self.short = ConvBNLayer(
M
minqiyang 已提交
121 122 123 124
                num_channels=num_channels,
                num_filters=num_filters * 4,
                filter_size=1,
                stride=stride)
M
minqiyang 已提交
125 126 127

        self.shortcut = shortcut

M
minqiyang 已提交
128 129
        self._num_channels_out = num_filters * 4

M
minqiyang 已提交
130
    def forward(self, inputs):
M
minqiyang 已提交
131 132 133
        y = self.conv0(inputs)
        conv1 = self.conv1(y)
        conv2 = self.conv2(conv1)
M
minqiyang 已提交
134 135

        if self.shortcut:
M
minqiyang 已提交
136 137 138
            short = inputs
        else:
            short = self.short(inputs)
M
minqiyang 已提交
139

M
minqiyang 已提交
140
        return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
M
minqiyang 已提交
141 142 143 144


class ResNet(fluid.imperative.Layer):
    def __init__(self, layers=50, class_dim=1000):
M
minqiyang 已提交
145 146
        super(ResNet, self).__init__()

M
minqiyang 已提交
147 148 149 150 151 152 153 154 155 156 157 158 159 160
        self.layers = layers
        supported_layers = [50, 101, 152]
        assert layers in supported_layers, \
            "supported layers are {} but input layer is {}".format(supported_layers, layers)

        if layers == 50:
            depth = [3, 4, 6, 3]
        elif layers == 101:
            depth = [3, 4, 23, 3]
        elif layers == 152:
            depth = [3, 8, 36, 3]
        num_filters = [64, 128, 256, 512]

        self.conv = ConvBNLayer(
M
minqiyang 已提交
161
            num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu')
M
minqiyang 已提交
162 163 164 165
        self.pool2d_max = Pool2D(
            pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')

        self.bottleneck_block_list = []
M
minqiyang 已提交
166
        num_channels = 64
M
minqiyang 已提交
167
        for block in range(len(depth)):
M
minqiyang 已提交
168
            shortcut = False
M
minqiyang 已提交
169 170
            for i in range(depth[block]):
                bottleneck_block = BottleneckBlock(
M
minqiyang 已提交
171
                    num_channels=num_channels,
M
minqiyang 已提交
172 173 174
                    num_filters=num_filters[block],
                    stride=2 if i == 0 and block != 0 else 1,
                    shortcut=shortcut)
M
minqiyang 已提交
175
                num_channels = bottleneck_block._num_channels_out
M
minqiyang 已提交
176
                self.bottleneck_block_list.append(bottleneck_block)
M
minqiyang 已提交
177
                shortcut = True
M
minqiyang 已提交
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195

        self.pool2d_avg = Pool2D(
            pool_size=7, pool_type='avg', global_pooling=True)

        import math
        stdv = 1.0 / math.sqrt(2048 * 1.0)

        self.out = FC(size=class_dim,
                      act='softmax',
                      param_attr=fluid.param_attr.ParamAttr(
                          initializer=fluid.initializer.Uniform(-stdv, stdv)))

    def forward(self, inputs):
        y = self.conv(inputs)
        y = self.pool2d_max(y)
        for bottleneck_block in self.bottleneck_block_list:
            y = bottleneck_block(y)
        y = self.pool2d_avg(y)
M
minqiyang 已提交
196
        y = self.out(y)
M
minqiyang 已提交
197 198 199 200
        return y


class TestImperativeResnet(unittest.TestCase):
M
minqiyang 已提交
201
    def test_resnet_gpu_float32(self):
M
minqiyang 已提交
202 203
        seed = 90

204
        batch_size = train_parameters["batch_size"]
M
minqiyang 已提交
205 206 207 208 209 210 211
        with fluid.imperative.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            resnet = ResNet()
            optimizer = optimizer_setting(train_parameters)
            train_reader = paddle.batch(
212
                paddle.dataset.flowers.train(), batch_size=batch_size)
M
minqiyang 已提交
213 214

            dy_param_init_value = {}
215 216 217 218
            for param in fluid.default_main_program().global_block(
            ).all_parameters():
                dy_param_init_value[param.name] = param._numpy()

M
minqiyang 已提交
219
            for batch_id, data in enumerate(train_reader()):
220
                if batch_id >= 1:
M
minqiyang 已提交
221 222 223
                    break

                x_data = np.array(
M
minqiyang 已提交
224
                    [x[0].reshape(3, 224, 224) for x in data]).astype('float32')
M
minqiyang 已提交
225
                y_data = np.array([x[1] for x in data]).astype('int64').reshape(
226
                    batch_size, 1)
M
minqiyang 已提交
227 228 229 230 231

                img = to_variable(x_data)
                label = to_variable(y_data)
                label._stop_gradient = True

M
minqiyang 已提交
232
                out = resnet(img)
M
minqiyang 已提交
233
                loss = fluid.layers.cross_entropy(input=out, label=label)
M
minqiyang 已提交
234
                avg_loss = fluid.layers.mean(x=loss)
M
minqiyang 已提交
235 236 237 238 239
                dy_out = avg_loss._numpy()

                if batch_id == 0:
                    for param in fluid.default_main_program().global_block(
                    ).all_parameters():
240 241
                        if param.name not in dy_param_init_value:
                            dy_param_init_value[param.name] = param._numpy()
M
minqiyang 已提交
242 243 244

                avg_loss._backward()
                optimizer.minimize(avg_loss)
245

M
minqiyang 已提交
246 247 248 249 250
                dy_param_value = {}
                for param in fluid.default_main_program().global_block(
                ).all_parameters():
                    dy_param_value[param.name] = param._numpy()

251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            exe = fluid.Executor(fluid.CUDAPlace(0))

            resnet = ResNet()
            optimizer = optimizer_setting(train_parameters)
            train_reader = paddle.batch(
                paddle.dataset.flowers.train(), batch_size=batch_size)

            img = fluid.layers.data(
                name='pixel', shape=[3, 224, 224], dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            out = resnet(img)
            loss = fluid.layers.cross_entropy(input=out, label=label)
            avg_loss = fluid.layers.mean(x=loss)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in fluid.default_startup_program().global_block(
            ).all_parameters():
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= 1:
                    break

                x_data = np.array(
                    [x[0].reshape(3, 224, 224) for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data]).astype('int64').reshape(
                    [batch_size, 1])

                fetch_list = [loss.name]
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
                              feed={"pixel": x_data,
                                    "label": y_data},
                              fetch_list=fetch_list)

                static_param_value = {}
                static_out = out[0]
                for i in range(1, len(out)):
                    static_param_value[static_param_name_list[i - 1]] = out[i]

        self.assertTrue(np.allclose(static_out.all(), dy_out.all()))

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(
                np.allclose(value.all(), dy_param_init_value[key].all()))

        for key, value in six.iteritems(static_param_value):
            if not np.allclose(value.all(), dy_param_value[key].all()):
                print(key)
                print(value, dy_param_value[key])
            self.assertTrue(np.allclose(value.all(), dy_param_value[key].all()))
M
minqiyang 已提交
315 316 317 318


if __name__ == '__main__':
    unittest.main()