test_image_classification_fp16.py 17.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import contextlib
16
import copy
17 18
import math
import os
19
import sys
20
import tempfile
21 22 23 24 25 26
import unittest

import numpy

import paddle
from paddle import fluid
27
from paddle.static.amp import decorate
28

P
pangyoki 已提交
29 30
paddle.enable_static()

31 32

def resnet_cifar10(input, depth=32):
33 34 35
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
36
        tmp = paddle.static.nn.conv2d(
37 38 39 40 41 42 43 44
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
            stride=stride,
            padding=padding,
            act=None,
            bias_attr=bias_attr,
        )
45
        return paddle.static.nn.batch_norm(input=tmp, act=act)
46 47 48 49 50 51 52 53 54 55 56

    def shortcut(input, ch_in, ch_out, stride):
        if ch_in != ch_out:
            return conv_bn_layer(input, ch_out, 1, stride, 0, None)
        else:
            return input

    def basicblock(input, ch_in, ch_out, stride):
        tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
        tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
        short = shortcut(input, ch_in, ch_out, stride)
57
        return paddle.nn.functional.relu(paddle.add(x=tmp, y=short))
58 59 60 61 62 63 64 65 66

    def layer_warp(block_func, input, ch_in, ch_out, count, stride):
        tmp = block_func(input, ch_in, ch_out, stride)
        for i in range(1, count):
            tmp = block_func(tmp, ch_out, ch_out, 1)
        return tmp

    assert (depth - 2) % 6 == 0
    n = (depth - 2) // 6
67 68 69
    conv1 = conv_bn_layer(
        input=input, ch_out=16, filter_size=3, stride=1, padding=1
    )
70 71 72
    res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
    res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
    res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
C
ccrrong 已提交
73
    pool = paddle.nn.functional.avg_pool2d(x=res3, kernel_size=8, stride=1)
74 75 76 77 78
    return pool


def vgg16_bn_drop(input):
    def conv_block(input, num_filter, groups, dropouts):
79 80 81 82 83 84 85 86 87 88 89
        return fluid.nets.img_conv_group(
            input=input,
            pool_size=2,
            pool_stride=2,
            conv_num_filter=[num_filter] * groups,
            conv_filter_size=3,
            conv_act='relu',
            conv_with_batchnorm=True,
            conv_batchnorm_drop_rate=dropouts,
            pool_type='max',
        )
90 91 92 93 94 95 96

    conv1 = conv_block(input, 64, 2, [0.3, 0])
    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])

C
ccrrong 已提交
97
    drop = paddle.nn.functional.dropout(x=conv5, p=0.5)
C
Charles-hit 已提交
98
    fc1 = paddle.static.nn.fc(x=drop, size=4096, activation=None)
99
    bn = paddle.static.nn.batch_norm(input=fc1, act='relu')
C
ccrrong 已提交
100
    drop2 = paddle.nn.functional.dropout(x=bn, p=0.5)
C
Charles-hit 已提交
101
    fc2 = paddle.static.nn.fc(x=drop2, size=4096, activation=None)
102 103 104 105 106 107 108 109 110 111 112 113
    return fc2


def train(net_type, use_cuda, save_dirname, is_local):
    classdim = 10
    data_shape = [3, 32, 32]

    train_program = fluid.Program()
    startup_prog = fluid.Program()
    train_program.random_seed = 123
    startup_prog.random_seed = 456
    with fluid.program_guard(train_program, startup_prog):
G
GGBond8488 已提交
114 115
        images = paddle.static.data(
            name='pixel', shape=[-1] + data_shape, dtype='float32'
116
        )
G
GGBond8488 已提交
117
        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
118 119 120

        if net_type == "vgg":
            print("train vgg net")
121
            net = vgg16_bn_drop(images)
122 123
        elif net_type == "resnet":
            print("train resnet")
124
            net = resnet_cifar10(images, 32)
125 126 127
        else:
            raise ValueError("%s network is not supported" % net_type)

C
Charles-hit 已提交
128
        logits = paddle.static.nn.fc(x=net, size=classdim, activation="softmax")
129
        cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
130 131
            logits, label, return_softmax=True
        )
132
        avg_cost = paddle.mean(cost)
133
        acc = paddle.static.accuracy(input=predict, label=label)
134 135 136 137

        # Test program
        test_program = train_program.clone(for_test=True)

Y
Yibing Liu 已提交
138
        optimizer = fluid.optimizer.Lamb(learning_rate=0.001)
139

140
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
141 142 143 144 145 146 147 148
            custom_black_varnames={"loss", "conv2d_0.w_0"}
        )
        mp_optimizer = decorate(
            optimizer=optimizer,
            amp_lists=amp_lists,
            init_loss_scaling=8.0,
            use_dynamic_loss_scaling=True,
        )
149

G
gongweibao 已提交
150
        mp_optimizer.minimize(avg_cost)
151 152
        loss_scaling = mp_optimizer.get_loss_scaling()
        scaled_loss = mp_optimizer.get_scaled_loss()
153 154 155 156 157

    BATCH_SIZE = 128
    PASS_NUM = 1

    # no shuffle for unit test
158 159 160
    train_reader = paddle.batch(
        paddle.dataset.cifar.train10(), batch_size=BATCH_SIZE
    )
161

162 163 164
    test_reader = paddle.batch(
        paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE
    )
165 166 167 168 169 170 171 172 173 174 175 176 177

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(place=place, feed_list=[images, label])

    def train_loop(main_program):
        exe.run(startup_prog)
        loss = 0.0
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
                np_scaled_loss, loss = exe.run(
                    main_program,
                    feed=feeder.feed(data),
178 179
                    fetch_list=[scaled_loss, avg_cost],
                )
180
                print(
181
                    'PassID {:1}, BatchID {:04}, train loss {:2.4}, scaled train closs {:2.4}'.format(
182 183 184 185 186 187
                        pass_id,
                        batch_id + 1,
                        float(loss),
                        float(np_scaled_loss),
                    )
                )
188 189 190 191
                if (batch_id % 10) == 0:
                    acc_list = []
                    avg_loss_list = []
                    for tid, test_data in enumerate(test_reader()):
192 193 194 195 196
                        loss_t, acc_t = exe.run(
                            program=test_program,
                            feed=feeder.feed(test_data),
                            fetch_list=[avg_cost, acc],
                        )
197 198 199 200 201 202 203 204 205 206
                        if math.isnan(float(loss_t)):
                            sys.exit("got NaN loss, training failed.")
                        acc_list.append(float(acc_t))
                        avg_loss_list.append(float(loss_t))
                        break  # Use 1 segment for speeding up CI

                    acc_value = numpy.array(acc_list).mean()
                    avg_loss_value = numpy.array(avg_loss_list).mean()

                    print(
207
                        'PassID {:1}, BatchID {:04}, test loss {:2.2}, acc {:2.2}'.format(
208 209 210 211 212 213
                            pass_id,
                            batch_id + 1,
                            float(avg_loss_value),
                            float(acc_value),
                        )
                    )
214 215 216

                    if acc_value > 0.08:  # Low threshold for speeding up CI
                        fluid.io.save_inference_model(
217 218 219
                            save_dirname,
                            ["pixel"],
                            [predict],
220
                            exe,
221
                            main_program=train_program,
222 223
                            clip_extra=True,
                        )
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
                        return

    if is_local:
        train_loop(train_program)
    else:
        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("PADDLE_TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
239
        t = paddle.distributed.transpiler.DistributeTranspiler()
240 241 242
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
243 244 245
            pserver_startup = t.get_startup_program(
                current_endpoint, pserver_prog
            )
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())


def infer(use_cuda, save_dirname=None):
    if save_dirname is None:
        return

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
T
tianshuo78520a 已提交
262
        # the feed_target_names (the names of variables that will be fed
263 264
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
265 266 267 268 269
        [
            inference_program,
            feed_target_names,
            fetch_targets,
        ] = fluid.io.load_inference_model(save_dirname, exe)
270 271 272 273 274 275 276 277

        # The input's dimension of conv should be 4-D or 5-D.
        # Use normilized image pixels as input data, which should be in the range [0, 1.0].
        batch_size = 1
        tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")

        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
278 279 280 281 282
        results = exe.run(
            inference_program,
            feed={feed_target_names[0]: tensor_img},
            fetch_list=fetch_targets,
        )
283 284 285

        print("infer results: ", results[0])

286 287 288 289 290 291 292 293
        fluid.io.save_inference_model(
            save_dirname,
            feed_target_names,
            fetch_targets,
            exe,
            inference_program,
            clip_extra=True,
        )
294 295


296 297 298
class TestImageClassification(unittest.TestCase):
    def setUp(self):
        self.temp_dir = tempfile.TemporaryDirectory()
299

300 301
    def tearDown(self):
        self.temp_dir.cleanup()
302

303 304 305
    def main(self, net_type, use_cuda, is_local=True):
        if use_cuda and not fluid.core.is_compiled_with_cuda():
            return
306

307 308 309
        # Directory for saving the trained model
        save_dirname = os.path.join(
            self.temp_dir.name,
310 311
            "image_classification_" + net_type + ".inference.model",
        )
312 313

        train(net_type, use_cuda, save_dirname, is_local)
314
        # infer(use_cuda, save_dirname)
315

316
    def test_amp_lists(self):
317 318 319
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
320

321
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists()
322 323 324 325 326
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_1(self):
327 328 329
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
330 331 332 333 334

        # 1. w={'exp}, b=None
        white_list.add('exp')
        black_list.remove('exp')

335
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists({'exp'})
336 337 338 339 340
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_2(self):
341 342 343
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
344 345 346 347 348

        # 2. w={'tanh'}, b=None
        white_list.add('tanh')
        gray_list.remove('tanh')

349
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists({'tanh'})
350 351 352 353 354
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_3(self):
355 356 357
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
358 359 360 361

        # 3. w={'lstm'}, b=None
        white_list.add('lstm')

362
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists({'lstm'})
363 364 365 366 367
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_4(self):
368 369 370
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
371 372 373 374 375

        # 4. w=None, b={'conv2d'}
        white_list.remove('conv2d')
        black_list.add('conv2d')

376
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
377 378
            custom_black_list={'conv2d'}
        )
379 380 381 382 383
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_5(self):
384 385 386
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
387 388 389 390 391

        # 5. w=None, b={'tanh'}
        black_list.add('tanh')
        gray_list.remove('tanh')

392
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
393 394
            custom_black_list={'tanh'}
        )
395 396 397 398 399
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_6(self):
400 401 402
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
403 404 405 406

        # 6. w=None, b={'lstm'}
        black_list.add('lstm')

407
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
408 409
            custom_black_list={'lstm'}
        )
410 411 412 413 414 415 416
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_7(self):
        # 7. w={'lstm'} b={'lstm'}
        # raise ValueError
417 418
        self.assertRaises(
            ValueError,
419
            paddle.static.amp.AutoMixedPrecisionLists,
420 421 422
            {'lstm'},
            {'lstm'},
        )
423

424 425
    def test_vgg_cuda(self):
        with self.scope_prog_guard():
426
            self.main('vgg', use_cuda=True)
427 428 429

    def test_resnet_cuda(self):
        with self.scope_prog_guard():
430
            self.main('resnet', use_cuda=True)
431 432 433 434 435 436 437 438 439 440 441

    @contextlib.contextmanager
    def scope_prog_guard(self):
        prog = fluid.Program()
        startup_prog = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
                yield


442 443 444 445 446 447
class TestAmpWithNonIterableDataLoader(unittest.TestCase):
    def decorate_with_data_loader(self):
        main_prog = paddle.static.Program()
        start_prog = paddle.static.Program()
        with paddle.static.program_guard(main_prog, start_prog):
            with paddle.fluid.unique_name.guard():
G
GGBond8488 已提交
448 449
                image = paddle.static.data(
                    name='image', shape=[-1, 3, 224, 224], dtype='float32'
450
                )
G
GGBond8488 已提交
451 452
                label = paddle.static.data(
                    name='label', shape=[-1, 1], dtype='int64'
453
                )
454 455 456 457
                py_reader = fluid.io.DataLoader.from_generator(
                    feed_list=[image, label],
                    capacity=4,
                    iterable=False,
458 459
                    use_double_buffer=False,
                )
460 461

                net = vgg16_bn_drop(image)
C
Charles-hit 已提交
462 463 464
                logits = paddle.static.nn.fc(
                    x=net, size=10, activation="softmax"
                )
465
                cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
466 467
                    logits, label, return_softmax=True
                )
468
                avg_cost = paddle.mean(cost)
469 470

                optimizer = fluid.optimizer.Lamb(learning_rate=0.001)
471 472
                amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
                    custom_black_varnames={"loss", "conv2d_0.w_0"}
473 474 475 476 477 478 479
                )
                mp_optimizer = decorate(
                    optimizer=optimizer,
                    amp_lists=amp_lists,
                    init_loss_scaling=8.0,
                    use_dynamic_loss_scaling=True,
                )
480 481 482 483 484 485 486

                mp_optimizer.minimize(avg_cost)

    def test_non_iterable_dataloader(self):
        self.decorate_with_data_loader()


487 488
if __name__ == '__main__':
    unittest.main()