test_imperative_resnet.py 10.1 KB
Newer Older
M
minqiyang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import contextlib
import unittest
import numpy as np
import six

import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.imperative.base import to_variable
from test_imperative_base import new_program_scope

train_parameters = {
    "input_size": [3, 224, 224],
    "input_mean": [0.485, 0.456, 0.406],
    "input_std": [0.229, 0.224, 0.225],
    "learning_strategy": {
        "name": "piecewise_decay",
        "batch_size": 256,
        "epochs": [30, 60, 90],
        "steps": [0.1, 0.01, 0.001, 0.0001]
M
minqiyang 已提交
37 38 39 40
    },
    "batch_size": 256,
    "lr": 0.1,
    "total_images": 1281164,
M
minqiyang 已提交
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
}


def optimizer_setting(params):
    ls = params["learning_strategy"]
    if ls["name"] == "piecewise_decay":
        if "total_images" not in params:
            total_images = 1281167
        else:
            total_images = params["total_images"]
        batch_size = ls["batch_size"]
        step = int(total_images / batch_size + 1)

        bd = [step * e for e in ls["epochs"]]
        base_lr = params["lr"]
        lr = []
        lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
M
minqiyang 已提交
58 59 60 61 62 63 64
        optimizer = fluid.optimizer.SGD(learning_rate=params["lr"])
        #  optimizer = fluid.optimizer.Momentum(
    #  learning_rate=params["lr"],
    #  learning_rate=fluid.layers.piecewise_decay(
    #  boundaries=bd, values=lr),
    #  momentum=0.9,
    #  regularization=fluid.regularizer.L2Decay(1e-4))
M
minqiyang 已提交
65 66 67 68 69

    return optimizer


class ConvBNLayer(fluid.imperative.Layer):
M
minqiyang 已提交
70 71 72 73 74 75 76
    def __init__(self,
                 num_channels,
                 num_filters,
                 filter_size,
                 stride=1,
                 groups=1,
                 act=None):
M
minqiyang 已提交
77 78 79
        super(ConvBNLayer, self).__init__()

        self._conv = Conv2D(
M
minqiyang 已提交
80 81 82 83 84
            num_channels=num_channels,
            num_filters=num_filters,
            filter_size=filter_size,
            stride=stride,
            padding=(filter_size - 1) // 2,
M
minqiyang 已提交
85 86 87 88 89 90 91 92 93 94 95 96 97 98
            groups=groups,
            act=None,
            bias_attr=None)

        self._batch_norm = BatchNorm(num_filters, act=act)

    def forward(self, inputs):
        y = self._conv(inputs)
        y = self._batch_norm(y)

        return y


class BottleneckBlock(fluid.imperative.Layer):
M
minqiyang 已提交
99
    def __init__(self, num_channels, num_filters, stride, shortcut=True):
M
minqiyang 已提交
100 101 102
        super(BottleneckBlock, self).__init__()

        self.conv0 = ConvBNLayer(
M
minqiyang 已提交
103 104 105 106
            num_channels=num_channels,
            num_filters=num_filters,
            filter_size=1,
            act='relu')
M
minqiyang 已提交
107
        self.conv1 = ConvBNLayer(
M
minqiyang 已提交
108 109 110 111 112
            num_channels=num_filters,
            num_filters=num_filters,
            filter_size=3,
            stride=stride,
            act='relu')
M
minqiyang 已提交
113
        self.conv2 = ConvBNLayer(
M
minqiyang 已提交
114 115 116 117
            num_channels=num_filters,
            num_filters=num_filters * 4,
            filter_size=1,
            act=None)
M
minqiyang 已提交
118

M
minqiyang 已提交
119
        if not shortcut:
M
minqiyang 已提交
120
            self.short = ConvBNLayer(
M
minqiyang 已提交
121 122 123 124
                num_channels=num_channels,
                num_filters=num_filters * 4,
                filter_size=1,
                stride=stride)
M
minqiyang 已提交
125 126 127

        self.shortcut = shortcut

M
minqiyang 已提交
128 129
        self._num_channels_out = num_filters * 4

M
minqiyang 已提交
130
    def forward(self, inputs):
M
minqiyang 已提交
131 132 133
        y = self.conv0(inputs)
        conv1 = self.conv1(y)
        conv2 = self.conv2(conv1)
M
minqiyang 已提交
134 135

        if self.shortcut:
M
minqiyang 已提交
136 137 138
            short = inputs
        else:
            short = self.short(inputs)
M
minqiyang 已提交
139

M
minqiyang 已提交
140
        return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
M
minqiyang 已提交
141 142 143 144


class ResNet(fluid.imperative.Layer):
    def __init__(self, layers=50, class_dim=1000):
M
minqiyang 已提交
145 146
        super(ResNet, self).__init__()

M
minqiyang 已提交
147 148 149 150 151 152 153 154 155 156 157 158 159 160
        self.layers = layers
        supported_layers = [50, 101, 152]
        assert layers in supported_layers, \
            "supported layers are {} but input layer is {}".format(supported_layers, layers)

        if layers == 50:
            depth = [3, 4, 6, 3]
        elif layers == 101:
            depth = [3, 4, 23, 3]
        elif layers == 152:
            depth = [3, 8, 36, 3]
        num_filters = [64, 128, 256, 512]

        self.conv = ConvBNLayer(
M
minqiyang 已提交
161
            num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu')
M
minqiyang 已提交
162 163 164 165
        self.pool2d_max = Pool2D(
            pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')

        self.bottleneck_block_list = []
M
minqiyang 已提交
166
        num_channels = 64
M
minqiyang 已提交
167
        for block in range(len(depth)):
M
minqiyang 已提交
168
            shortcut = False
M
minqiyang 已提交
169 170
            for i in range(depth[block]):
                bottleneck_block = BottleneckBlock(
M
minqiyang 已提交
171
                    num_channels=num_channels,
M
minqiyang 已提交
172 173 174
                    num_filters=num_filters[block],
                    stride=2 if i == 0 and block != 0 else 1,
                    shortcut=shortcut)
M
minqiyang 已提交
175
                num_channels = bottleneck_block._num_channels_out
M
minqiyang 已提交
176
                self.bottleneck_block_list.append(bottleneck_block)
M
minqiyang 已提交
177
                shortcut = True
M
minqiyang 已提交
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195

        self.pool2d_avg = Pool2D(
            pool_size=7, pool_type='avg', global_pooling=True)

        import math
        stdv = 1.0 / math.sqrt(2048 * 1.0)

        self.out = FC(size=class_dim,
                      act='softmax',
                      param_attr=fluid.param_attr.ParamAttr(
                          initializer=fluid.initializer.Uniform(-stdv, stdv)))

    def forward(self, inputs):
        y = self.conv(inputs)
        y = self.pool2d_max(y)
        for bottleneck_block in self.bottleneck_block_list:
            y = bottleneck_block(y)
        y = self.pool2d_avg(y)
M
minqiyang 已提交
196
        y = self.out(y)
M
minqiyang 已提交
197 198 199 200
        return y


class TestImperativeResnet(unittest.TestCase):
M
minqiyang 已提交
201
    def test_resnet_gpu_float32(self):
M
minqiyang 已提交
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
        seed = 90

        with fluid.imperative.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            resnet = ResNet()
            optimizer = optimizer_setting(train_parameters)
            train_reader = paddle.batch(
                paddle.dataset.flowers.train(), batch_size=256)

            dy_param_init_value = {}
            for batch_id, data in enumerate(train_reader()):
                if batch_id >= 2:
                    break

                x_data = np.array(
M
minqiyang 已提交
219
                    [x[0].reshape(3, 224, 224) for x in data]).astype('float32')
M
minqiyang 已提交
220
                y_data = np.array([x[1] for x in data]).astype('int64').reshape(
M
minqiyang 已提交
221
                    256, 1)
M
minqiyang 已提交
222 223 224 225 226

                img = to_variable(x_data)
                label = to_variable(y_data)
                label._stop_gradient = True

M
minqiyang 已提交
227
                out = resnet(img)
M
minqiyang 已提交
228
                loss = fluid.layers.cross_entropy(input=out, label=label)
M
minqiyang 已提交
229
                avg_loss = fluid.layers.mean(x=loss)
M
minqiyang 已提交
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
                dy_out = avg_loss._numpy()

                if batch_id == 0:
                    for param in fluid.default_main_program().global_block(
                    ).all_parameters():
                        dy_param_init_value[param.name] = param._numpy()

                avg_loss._backward()
                optimizer.minimize(avg_loss)
                dy_param_value = {}
                for param in fluid.default_main_program().global_block(
                ).all_parameters():
                    dy_param_value[param.name] = param._numpy()

        #  with new_program_scope():
        #  fluid.default_startup_program().random_seed = seed
        #  fluid.default_main_program().random_seed = seed

        #  exe = fluid.Executor(fluid.CPUPlace())

        #  #  mnist = Conv2D(1, 20, 5)
        #  mnist = MNIST()
        #  sgd = SGDOptimizer(learning_rate=1e-3)
        #  train_reader = paddle.batch(
        #  paddle.dataset.mnist.train(), batch_size=128)

        #  img = fluid.layers.data(
        #  name='pixel', shape=[1, 28, 28], dtype='float32')
        #  label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        #  cost = mnist(img)
        #  loss = fluid.layers.reduce_mean(cost)
        #  sgd.minimize(loss)

        #  # initialize params and fetch them
        #  static_param_init_value = {}
        #  static_param_name_list = []
        #  for param in fluid.default_startup_program().global_block(
        #  ).all_parameters():
        #  static_param_name_list.append(param.name)

        #  out = exe.run(fluid.default_startup_program(),
        #  fetch_list=static_param_name_list)

        #  for i in range(len(static_param_name_list)):
        #  static_param_init_value[static_param_name_list[i]] = out[i]

        #  for batch_id, data in enumerate(train_reader()):
        #  if batch_id >= 2:
        #  break

        #  x_data = np.array(
        #  [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
        #  y_data = np.array([x[1] for x in data]).astype('int64').reshape(
        #  [128, 1])

        #  fetch_list = [loss.name]
        #  fetch_list.extend(static_param_name_list)
        #  out = exe.run(fluid.default_main_program(),
        #  feed={"pixel": x_data,
        #  "label": y_data},
        #  fetch_list=fetch_list)

        #  static_param_value = {}
        #  static_out = out[0]
        #  for i in range(1, len(out)):
        #  static_param_value[static_param_name_list[i - 1]] = out[i]

        #  for key, value in six.iteritems(static_param_init_value):
        #  self.assertTrue(
        #  np.allclose(value.all(), dy_param_init_value[key].all()))
        #  self.assertTrue(np.allclose(static_out.all(), dy_out.all()))
        #  for key, value in six.iteritems(static_param_value):
        #  self.assertTrue(np.allclose(value.all(), dy_param_value[key].all()))


if __name__ == '__main__':
    unittest.main()