test_image_classification_fp16.py 18.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import contextlib
16
import copy
17 18
import math
import os
19
import sys
20
import tempfile
21 22 23 24 25 26
import unittest

import numpy

import paddle
from paddle import fluid
27
from paddle.static.amp import decorate
28

P
pangyoki 已提交
29 30
paddle.enable_static()

31 32

def resnet_cifar10(input, depth=32):
33 34 35
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
36
        tmp = paddle.static.nn.conv2d(
37 38 39 40 41 42 43 44
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
            stride=stride,
            padding=padding,
            act=None,
            bias_attr=bias_attr,
        )
45
        return paddle.static.nn.batch_norm(input=tmp, act=act)
46 47 48 49 50 51 52 53 54 55 56

    def shortcut(input, ch_in, ch_out, stride):
        if ch_in != ch_out:
            return conv_bn_layer(input, ch_out, 1, stride, 0, None)
        else:
            return input

    def basicblock(input, ch_in, ch_out, stride):
        tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
        tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
        short = shortcut(input, ch_in, ch_out, stride)
57
        return paddle.nn.functional.relu(paddle.add(x=tmp, y=short))
58 59 60 61 62 63 64 65 66

    def layer_warp(block_func, input, ch_in, ch_out, count, stride):
        tmp = block_func(input, ch_in, ch_out, stride)
        for i in range(1, count):
            tmp = block_func(tmp, ch_out, ch_out, 1)
        return tmp

    assert (depth - 2) % 6 == 0
    n = (depth - 2) // 6
67 68 69
    conv1 = conv_bn_layer(
        input=input, ch_out=16, filter_size=3, stride=1, padding=1
    )
70 71 72
    res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
    res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
    res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
C
ccrrong 已提交
73
    pool = paddle.nn.functional.avg_pool2d(x=res3, kernel_size=8, stride=1)
74 75 76 77 78
    return pool


def vgg16_bn_drop(input):
    def conv_block(input, num_filter, groups, dropouts):
79 80 81 82 83 84 85 86 87 88 89
        return fluid.nets.img_conv_group(
            input=input,
            pool_size=2,
            pool_stride=2,
            conv_num_filter=[num_filter] * groups,
            conv_filter_size=3,
            conv_act='relu',
            conv_with_batchnorm=True,
            conv_batchnorm_drop_rate=dropouts,
            pool_type='max',
        )
90 91 92 93 94 95 96

    conv1 = conv_block(input, 64, 2, [0.3, 0])
    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])

C
ccrrong 已提交
97
    drop = paddle.nn.functional.dropout(x=conv5, p=0.5)
C
Charles-hit 已提交
98
    fc1 = paddle.static.nn.fc(x=drop, size=4096, activation=None)
99
    bn = paddle.static.nn.batch_norm(input=fc1, act='relu')
C
ccrrong 已提交
100
    drop2 = paddle.nn.functional.dropout(x=bn, p=0.5)
C
Charles-hit 已提交
101
    fc2 = paddle.static.nn.fc(x=drop2, size=4096, activation=None)
102 103 104 105 106 107 108 109 110 111 112 113
    return fc2


def train(net_type, use_cuda, save_dirname, is_local):
    classdim = 10
    data_shape = [3, 32, 32]

    train_program = fluid.Program()
    startup_prog = fluid.Program()
    train_program.random_seed = 123
    startup_prog.random_seed = 456
    with fluid.program_guard(train_program, startup_prog):
G
GGBond8488 已提交
114 115
        images = paddle.static.data(
            name='pixel', shape=[-1] + data_shape, dtype='float32'
116
        )
G
GGBond8488 已提交
117
        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
118 119 120

        if net_type == "vgg":
            print("train vgg net")
121
            net = vgg16_bn_drop(images)
122 123
        elif net_type == "resnet":
            print("train resnet")
124
            net = resnet_cifar10(images, 32)
125 126 127
        else:
            raise ValueError("%s network is not supported" % net_type)

C
Charles-hit 已提交
128
        logits = paddle.static.nn.fc(x=net, size=classdim, activation="softmax")
129
        cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
130 131
            logits, label, return_softmax=True
        )
132
        avg_cost = paddle.mean(cost)
133
        acc = paddle.static.accuracy(input=predict, label=label)
134 135 136 137

        # Test program
        test_program = train_program.clone(for_test=True)

Y
Yibing Liu 已提交
138
        optimizer = fluid.optimizer.Lamb(learning_rate=0.001)
139

140
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
141 142 143 144 145 146 147 148
            custom_black_varnames={"loss", "conv2d_0.w_0"}
        )
        mp_optimizer = decorate(
            optimizer=optimizer,
            amp_lists=amp_lists,
            init_loss_scaling=8.0,
            use_dynamic_loss_scaling=True,
        )
149

G
gongweibao 已提交
150
        mp_optimizer.minimize(avg_cost)
151 152
        loss_scaling = mp_optimizer.get_loss_scaling()
        scaled_loss = mp_optimizer.get_scaled_loss()
153 154 155 156 157

    BATCH_SIZE = 128
    PASS_NUM = 1

    # no shuffle for unit test
158 159 160
    train_reader = paddle.batch(
        paddle.dataset.cifar.train10(), batch_size=BATCH_SIZE
    )
161

162 163 164
    test_reader = paddle.batch(
        paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE
    )
165 166 167 168 169 170 171 172 173 174 175 176 177

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(place=place, feed_list=[images, label])

    def train_loop(main_program):
        exe.run(startup_prog)
        loss = 0.0
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
                np_scaled_loss, loss = exe.run(
                    main_program,
                    feed=feeder.feed(data),
178 179
                    fetch_list=[scaled_loss, avg_cost],
                )
180
                print(
181
                    'PassID {:1}, BatchID {:04}, train loss {:2.4}, scaled train closs {:2.4}'.format(
182 183 184 185 186 187
                        pass_id,
                        batch_id + 1,
                        float(loss),
                        float(np_scaled_loss),
                    )
                )
188 189 190 191
                if (batch_id % 10) == 0:
                    acc_list = []
                    avg_loss_list = []
                    for tid, test_data in enumerate(test_reader()):
192 193 194 195 196
                        loss_t, acc_t = exe.run(
                            program=test_program,
                            feed=feeder.feed(test_data),
                            fetch_list=[avg_cost, acc],
                        )
197 198 199 200 201 202 203 204 205 206
                        if math.isnan(float(loss_t)):
                            sys.exit("got NaN loss, training failed.")
                        acc_list.append(float(acc_t))
                        avg_loss_list.append(float(loss_t))
                        break  # Use 1 segment for speeding up CI

                    acc_value = numpy.array(acc_list).mean()
                    avg_loss_value = numpy.array(avg_loss_list).mean()

                    print(
207
                        'PassID {:1}, BatchID {:04}, test loss {:2.2}, acc {:2.2}'.format(
208 209 210 211 212 213
                            pass_id,
                            batch_id + 1,
                            float(avg_loss_value),
                            float(acc_value),
                        )
                    )
214 215 216

                    if acc_value > 0.08:  # Low threshold for speeding up CI
                        fluid.io.save_inference_model(
217 218 219
                            save_dirname,
                            ["pixel"],
                            [predict],
220
                            exe,
221
                            main_program=train_program,
222 223
                            clip_extra=True,
                        )
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
                        return

    if is_local:
        train_loop(train_program)
    else:
        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("PADDLE_TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
239
        t = paddle.distributed.transpiler.DistributeTranspiler()
240 241 242
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
243 244 245
            pserver_startup = t.get_startup_program(
                current_endpoint, pserver_prog
            )
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())


def infer(use_cuda, save_dirname=None):
    if save_dirname is None:
        return

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
T
tianshuo78520a 已提交
262
        # the feed_target_names (the names of variables that will be fed
263 264
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
265 266 267 268 269
        [
            inference_program,
            feed_target_names,
            fetch_targets,
        ] = fluid.io.load_inference_model(save_dirname, exe)
270 271 272 273 274 275 276 277

        # The input's dimension of conv should be 4-D or 5-D.
        # Use normilized image pixels as input data, which should be in the range [0, 1.0].
        batch_size = 1
        tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")

        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
278 279 280 281 282
        results = exe.run(
            inference_program,
            feed={feed_target_names[0]: tensor_img},
            fetch_list=fetch_targets,
        )
283 284 285

        print("infer results: ", results[0])

286 287 288 289 290 291 292 293
        fluid.io.save_inference_model(
            save_dirname,
            feed_target_names,
            fetch_targets,
            exe,
            inference_program,
            clip_extra=True,
        )
294 295


296 297 298
class TestImageClassification(unittest.TestCase):
    def setUp(self):
        self.temp_dir = tempfile.TemporaryDirectory()
299

300 301
    def tearDown(self):
        self.temp_dir.cleanup()
302

303 304 305
    def main(self, net_type, use_cuda, is_local=True):
        if use_cuda and not fluid.core.is_compiled_with_cuda():
            return
306

307 308 309
        # Directory for saving the trained model
        save_dirname = os.path.join(
            self.temp_dir.name,
310 311
            "image_classification_" + net_type + ".inference.model",
        )
312 313

        train(net_type, use_cuda, save_dirname, is_local)
314
        # infer(use_cuda, save_dirname)
315

316
    def test_amp_lists(self):
317 318 319 320
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
321 322 323 324
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
325
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
326

327
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists()
328 329 330 331 332
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_1(self):
333 334 335 336
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
337 338 339 340
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
341
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
342 343 344 345 346

        # 1. w={'exp}, b=None
        white_list.add('exp')
        black_list.remove('exp')

347
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists({'exp'})
348 349 350 351 352
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_2(self):
353 354 355 356
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
357 358 359 360
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
361
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
362 363 364 365 366

        # 2. w={'tanh'}, b=None
        white_list.add('tanh')
        gray_list.remove('tanh')

367
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists({'tanh'})
368 369 370 371 372
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_3(self):
373 374 375 376
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
377 378 379 380
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
381
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
382 383 384 385

        # 3. w={'lstm'}, b=None
        white_list.add('lstm')

386
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists({'lstm'})
387 388 389 390 391
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_4(self):
392 393 394 395
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
396 397 398 399
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
400
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
401 402 403 404 405

        # 4. w=None, b={'conv2d'}
        white_list.remove('conv2d')
        black_list.add('conv2d')

406
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
407 408
            custom_black_list={'conv2d'}
        )
409 410 411 412 413
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_5(self):
414 415 416 417
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
418 419 420 421
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
422
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
423 424 425 426 427

        # 5. w=None, b={'tanh'}
        black_list.add('tanh')
        gray_list.remove('tanh')

428
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
429 430
            custom_black_list={'tanh'}
        )
431 432 433 434 435
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_6(self):
436 437 438 439
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
440 441 442 443
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
444
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
445 446 447 448

        # 6. w=None, b={'lstm'}
        black_list.add('lstm')

449
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
450 451
            custom_black_list={'lstm'}
        )
452 453 454 455 456 457 458
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_7(self):
        # 7. w={'lstm'} b={'lstm'}
        # raise ValueError
459 460
        self.assertRaises(
            ValueError,
461
            paddle.static.amp.AutoMixedPrecisionLists,
462 463 464
            {'lstm'},
            {'lstm'},
        )
465

466 467
    def test_vgg_cuda(self):
        with self.scope_prog_guard():
468
            self.main('vgg', use_cuda=True)
469 470 471

    def test_resnet_cuda(self):
        with self.scope_prog_guard():
472
            self.main('resnet', use_cuda=True)
473 474 475 476 477 478 479 480 481 482 483

    @contextlib.contextmanager
    def scope_prog_guard(self):
        prog = fluid.Program()
        startup_prog = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
                yield


484 485 486 487 488 489
class TestAmpWithNonIterableDataLoader(unittest.TestCase):
    def decorate_with_data_loader(self):
        main_prog = paddle.static.Program()
        start_prog = paddle.static.Program()
        with paddle.static.program_guard(main_prog, start_prog):
            with paddle.fluid.unique_name.guard():
G
GGBond8488 已提交
490 491
                image = paddle.static.data(
                    name='image', shape=[-1, 3, 224, 224], dtype='float32'
492
                )
G
GGBond8488 已提交
493 494
                label = paddle.static.data(
                    name='label', shape=[-1, 1], dtype='int64'
495
                )
496 497 498 499
                py_reader = fluid.io.DataLoader.from_generator(
                    feed_list=[image, label],
                    capacity=4,
                    iterable=False,
500 501
                    use_double_buffer=False,
                )
502 503

                net = vgg16_bn_drop(image)
C
Charles-hit 已提交
504 505 506
                logits = paddle.static.nn.fc(
                    x=net, size=10, activation="softmax"
                )
507
                cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
508 509
                    logits, label, return_softmax=True
                )
510
                avg_cost = paddle.mean(cost)
511 512

                optimizer = fluid.optimizer.Lamb(learning_rate=0.001)
513 514
                amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
                    custom_black_varnames={"loss", "conv2d_0.w_0"}
515 516 517 518 519 520 521
                )
                mp_optimizer = decorate(
                    optimizer=optimizer,
                    amp_lists=amp_lists,
                    init_loss_scaling=8.0,
                    use_dynamic_loss_scaling=True,
                )
522 523 524 525 526 527 528

                mp_optimizer.minimize(avg_cost)

    def test_non_iterable_dataloader(self):
        self.decorate_with_data_loader()


529 530
if __name__ == '__main__':
    unittest.main()