test_image_classification_fp16.py 18.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddle
import paddle.fluid as fluid
import contextlib
import math
import sys
import numpy
import unittest
import os
23
import copy
24
import numpy as np
25
import tempfile
26
from paddle.static.amp import decorate
27

P
pangyoki 已提交
28 29
paddle.enable_static()

30 31

def resnet_cifar10(input, depth=32):
32 33 34 35 36 37 38 39 40 41 42 43
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
        tmp = fluid.layers.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
            stride=stride,
            padding=padding,
            act=None,
            bias_attr=bias_attr,
        )
44 45 46 47 48 49 50 51 52 53 54 55
        return fluid.layers.batch_norm(input=tmp, act=act)

    def shortcut(input, ch_in, ch_out, stride):
        if ch_in != ch_out:
            return conv_bn_layer(input, ch_out, 1, stride, 0, None)
        else:
            return input

    def basicblock(input, ch_in, ch_out, stride):
        tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
        tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
        short = shortcut(input, ch_in, ch_out, stride)
56
        return paddle.nn.functional.relu(paddle.add(x=tmp, y=short))
57 58 59 60 61 62 63 64 65

    def layer_warp(block_func, input, ch_in, ch_out, count, stride):
        tmp = block_func(input, ch_in, ch_out, stride)
        for i in range(1, count):
            tmp = block_func(tmp, ch_out, ch_out, 1)
        return tmp

    assert (depth - 2) % 6 == 0
    n = (depth - 2) // 6
66 67 68
    conv1 = conv_bn_layer(
        input=input, ch_out=16, filter_size=3, stride=1, padding=1
    )
69 70 71
    res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
    res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
    res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
72 73 74
    pool = fluid.layers.pool2d(
        input=res3, pool_size=8, pool_type='avg', pool_stride=1
    )
75 76 77 78 79
    return pool


def vgg16_bn_drop(input):
    def conv_block(input, num_filter, groups, dropouts):
80 81 82 83 84 85 86 87 88 89 90
        return fluid.nets.img_conv_group(
            input=input,
            pool_size=2,
            pool_stride=2,
            conv_num_filter=[num_filter] * groups,
            conv_filter_size=3,
            conv_act='relu',
            conv_with_batchnorm=True,
            conv_batchnorm_drop_rate=dropouts,
            pool_type='max',
        )
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114

    conv1 = conv_block(input, 64, 2, [0.3, 0])
    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])

    drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
    fc1 = fluid.layers.fc(input=drop, size=4096, act=None)
    bn = fluid.layers.batch_norm(input=fc1, act='relu')
    drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
    fc2 = fluid.layers.fc(input=drop2, size=4096, act=None)
    return fc2


def train(net_type, use_cuda, save_dirname, is_local):
    classdim = 10
    data_shape = [3, 32, 32]

    train_program = fluid.Program()
    startup_prog = fluid.Program()
    train_program.random_seed = 123
    startup_prog.random_seed = 456
    with fluid.program_guard(train_program, startup_prog):
115 116 117
        images = fluid.layers.data(
            name='pixel', shape=data_shape, dtype='float32'
        )
118 119 120 121
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')

        if net_type == "vgg":
            print("train vgg net")
122
            net = vgg16_bn_drop(images)
123 124
        elif net_type == "resnet":
            print("train resnet")
125
            net = resnet_cifar10(images, 32)
126 127 128 129
        else:
            raise ValueError("%s network is not supported" % net_type)

        logits = fluid.layers.fc(input=net, size=classdim, act="softmax")
130
        cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
131 132
            logits, label, return_softmax=True
        )
133
        avg_cost = paddle.mean(cost)
134
        acc = paddle.static.accuracy(input=predict, label=label)
135 136 137 138

        # Test program
        test_program = train_program.clone(for_test=True)

Y
Yibing Liu 已提交
139
        optimizer = fluid.optimizer.Lamb(learning_rate=0.001)
140

141
        amp_lists = fluid.contrib.mixed_precision.AutoMixedPrecisionLists(
142 143 144 145 146 147 148 149
            custom_black_varnames={"loss", "conv2d_0.w_0"}
        )
        mp_optimizer = decorate(
            optimizer=optimizer,
            amp_lists=amp_lists,
            init_loss_scaling=8.0,
            use_dynamic_loss_scaling=True,
        )
150

G
gongweibao 已提交
151
        mp_optimizer.minimize(avg_cost)
152 153
        loss_scaling = mp_optimizer.get_loss_scaling()
        scaled_loss = mp_optimizer.get_scaled_loss()
154 155 156 157 158

    BATCH_SIZE = 128
    PASS_NUM = 1

    # no shuffle for unit test
159 160 161
    train_reader = paddle.batch(
        paddle.dataset.cifar.train10(), batch_size=BATCH_SIZE
    )
162

163 164 165
    test_reader = paddle.batch(
        paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE
    )
166 167 168 169 170 171 172 173 174 175 176 177 178

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(place=place, feed_list=[images, label])

    def train_loop(main_program):
        exe.run(startup_prog)
        loss = 0.0
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
                np_scaled_loss, loss = exe.run(
                    main_program,
                    feed=feeder.feed(data),
179 180
                    fetch_list=[scaled_loss, avg_cost],
                )
181
                print(
182 183 184 185 186 187 188
                    'PassID {0:1}, BatchID {1:04}, train loss {2:2.4}, scaled train closs {3:2.4}'.format(
                        pass_id,
                        batch_id + 1,
                        float(loss),
                        float(np_scaled_loss),
                    )
                )
189 190 191 192
                if (batch_id % 10) == 0:
                    acc_list = []
                    avg_loss_list = []
                    for tid, test_data in enumerate(test_reader()):
193 194 195 196 197
                        loss_t, acc_t = exe.run(
                            program=test_program,
                            feed=feeder.feed(test_data),
                            fetch_list=[avg_cost, acc],
                        )
198 199 200 201 202 203 204 205 206 207
                        if math.isnan(float(loss_t)):
                            sys.exit("got NaN loss, training failed.")
                        acc_list.append(float(acc_t))
                        avg_loss_list.append(float(loss_t))
                        break  # Use 1 segment for speeding up CI

                    acc_value = numpy.array(acc_list).mean()
                    avg_loss_value = numpy.array(avg_loss_list).mean()

                    print(
208 209 210 211 212 213 214
                        'PassID {0:1}, BatchID {1:04}, test loss {2:2.2}, acc {3:2.2}'.format(
                            pass_id,
                            batch_id + 1,
                            float(avg_loss_value),
                            float(acc_value),
                        )
                    )
215 216 217

                    if acc_value > 0.08:  # Low threshold for speeding up CI
                        fluid.io.save_inference_model(
218 219 220
                            save_dirname,
                            ["pixel"],
                            [predict],
221
                            exe,
222
                            main_program=train_program,
223 224
                            clip_extra=True,
                        )
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
                        return

    if is_local:
        train_loop(train_program)
    else:
        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("PADDLE_TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
        t = fluid.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
244 245 246
            pserver_startup = t.get_startup_program(
                current_endpoint, pserver_prog
            )
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())


def infer(use_cuda, save_dirname=None):
    if save_dirname is None:
        return

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
T
tianshuo78520a 已提交
263
        # the feed_target_names (the names of variables that will be fed
264 265
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
266 267 268 269 270
        [
            inference_program,
            feed_target_names,
            fetch_targets,
        ] = fluid.io.load_inference_model(save_dirname, exe)
271 272 273 274 275 276 277 278

        # The input's dimension of conv should be 4-D or 5-D.
        # Use normilized image pixels as input data, which should be in the range [0, 1.0].
        batch_size = 1
        tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")

        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
279 280 281 282 283
        results = exe.run(
            inference_program,
            feed={feed_target_names[0]: tensor_img},
            fetch_list=fetch_targets,
        )
284 285 286

        print("infer results: ", results[0])

287 288 289 290 291 292 293 294
        fluid.io.save_inference_model(
            save_dirname,
            feed_target_names,
            fetch_targets,
            exe,
            inference_program,
            clip_extra=True,
        )
295 296


297 298 299
class TestImageClassification(unittest.TestCase):
    def setUp(self):
        self.temp_dir = tempfile.TemporaryDirectory()
300

301 302
    def tearDown(self):
        self.temp_dir.cleanup()
303

304 305 306
    def main(self, net_type, use_cuda, is_local=True):
        if use_cuda and not fluid.core.is_compiled_with_cuda():
            return
307

308 309 310
        # Directory for saving the trained model
        save_dirname = os.path.join(
            self.temp_dir.name,
311 312
            "image_classification_" + net_type + ".inference.model",
        )
313 314

        train(net_type, use_cuda, save_dirname, is_local)
315
        # infer(use_cuda, save_dirname)
316

317 318
    def test_amp_lists(self):
        white_list = copy.copy(
319 320
            fluid.contrib.mixed_precision.fp16_lists.white_list
        )
321
        black_list = copy.copy(
322 323
            fluid.contrib.mixed_precision.fp16_lists.black_list
        )
324
        gray_list = copy.copy(
325 326
            fluid.contrib.mixed_precision.fp16_lists.gray_list
        )
327 328 329 330 331 332 333 334

        amp_lists = fluid.contrib.mixed_precision.AutoMixedPrecisionLists()
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_1(self):
        white_list = copy.copy(
335 336
            fluid.contrib.mixed_precision.fp16_lists.white_list
        )
337
        black_list = copy.copy(
338 339
            fluid.contrib.mixed_precision.fp16_lists.black_list
        )
340
        gray_list = copy.copy(
341 342
            fluid.contrib.mixed_precision.fp16_lists.gray_list
        )
343 344 345 346 347 348

        # 1. w={'exp}, b=None
        white_list.add('exp')
        black_list.remove('exp')

        amp_lists = fluid.contrib.mixed_precision.AutoMixedPrecisionLists(
349 350
            {'exp'}
        )
351 352 353 354 355 356
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_2(self):
        white_list = copy.copy(
357 358
            fluid.contrib.mixed_precision.fp16_lists.white_list
        )
359
        black_list = copy.copy(
360 361
            fluid.contrib.mixed_precision.fp16_lists.black_list
        )
362
        gray_list = copy.copy(
363 364
            fluid.contrib.mixed_precision.fp16_lists.gray_list
        )
365 366 367 368 369 370

        # 2. w={'tanh'}, b=None
        white_list.add('tanh')
        gray_list.remove('tanh')

        amp_lists = fluid.contrib.mixed_precision.AutoMixedPrecisionLists(
371 372
            {'tanh'}
        )
373 374 375 376 377 378
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_3(self):
        white_list = copy.copy(
379 380
            fluid.contrib.mixed_precision.fp16_lists.white_list
        )
381
        black_list = copy.copy(
382 383
            fluid.contrib.mixed_precision.fp16_lists.black_list
        )
384
        gray_list = copy.copy(
385 386
            fluid.contrib.mixed_precision.fp16_lists.gray_list
        )
387 388 389 390 391

        # 3. w={'lstm'}, b=None
        white_list.add('lstm')

        amp_lists = fluid.contrib.mixed_precision.AutoMixedPrecisionLists(
392 393
            {'lstm'}
        )
394 395 396 397 398 399
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_4(self):
        white_list = copy.copy(
400 401
            fluid.contrib.mixed_precision.fp16_lists.white_list
        )
402
        black_list = copy.copy(
403 404
            fluid.contrib.mixed_precision.fp16_lists.black_list
        )
405
        gray_list = copy.copy(
406 407
            fluid.contrib.mixed_precision.fp16_lists.gray_list
        )
408 409 410 411 412 413

        # 4. w=None, b={'conv2d'}
        white_list.remove('conv2d')
        black_list.add('conv2d')

        amp_lists = fluid.contrib.mixed_precision.AutoMixedPrecisionLists(
414 415
            custom_black_list={'conv2d'}
        )
416 417 418 419 420 421
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_5(self):
        white_list = copy.copy(
422 423
            fluid.contrib.mixed_precision.fp16_lists.white_list
        )
424
        black_list = copy.copy(
425 426
            fluid.contrib.mixed_precision.fp16_lists.black_list
        )
427
        gray_list = copy.copy(
428 429
            fluid.contrib.mixed_precision.fp16_lists.gray_list
        )
430 431 432 433 434 435

        # 5. w=None, b={'tanh'}
        black_list.add('tanh')
        gray_list.remove('tanh')

        amp_lists = fluid.contrib.mixed_precision.AutoMixedPrecisionLists(
436 437
            custom_black_list={'tanh'}
        )
438 439 440 441 442 443
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_6(self):
        white_list = copy.copy(
444 445
            fluid.contrib.mixed_precision.fp16_lists.white_list
        )
446
        black_list = copy.copy(
447 448
            fluid.contrib.mixed_precision.fp16_lists.black_list
        )
449
        gray_list = copy.copy(
450 451
            fluid.contrib.mixed_precision.fp16_lists.gray_list
        )
452 453 454 455 456

        # 6. w=None, b={'lstm'}
        black_list.add('lstm')

        amp_lists = fluid.contrib.mixed_precision.AutoMixedPrecisionLists(
457 458
            custom_black_list={'lstm'}
        )
459 460 461 462 463 464 465
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_7(self):
        # 7. w={'lstm'} b={'lstm'}
        # raise ValueError
466 467 468 469 470 471
        self.assertRaises(
            ValueError,
            fluid.contrib.mixed_precision.AutoMixedPrecisionLists,
            {'lstm'},
            {'lstm'},
        )
472

473 474
    def test_vgg_cuda(self):
        with self.scope_prog_guard():
475
            self.main('vgg', use_cuda=True)
476 477 478

    def test_resnet_cuda(self):
        with self.scope_prog_guard():
479
            self.main('resnet', use_cuda=True)
480 481 482 483 484 485 486 487 488 489 490

    @contextlib.contextmanager
    def scope_prog_guard(self):
        prog = fluid.Program()
        startup_prog = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
                yield


491 492 493 494 495 496
class TestAmpWithNonIterableDataLoader(unittest.TestCase):
    def decorate_with_data_loader(self):
        main_prog = paddle.static.Program()
        start_prog = paddle.static.Program()
        with paddle.static.program_guard(main_prog, start_prog):
            with paddle.fluid.unique_name.guard():
497 498 499 500 501 502
                image = fluid.layers.data(
                    name='image', shape=[3, 224, 224], dtype='float32'
                )
                label = fluid.layers.data(
                    name='label', shape=[1], dtype='int64'
                )
503 504 505 506
                py_reader = fluid.io.DataLoader.from_generator(
                    feed_list=[image, label],
                    capacity=4,
                    iterable=False,
507 508
                    use_double_buffer=False,
                )
509 510 511

                net = vgg16_bn_drop(image)
                logits = fluid.layers.fc(input=net, size=10, act="softmax")
512
                cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
513 514
                    logits, label, return_softmax=True
                )
515
                avg_cost = paddle.mean(cost)
516 517

                optimizer = fluid.optimizer.Lamb(learning_rate=0.001)
518 519 520 521 522 523 524 525 526 527 528
                amp_lists = (
                    fluid.contrib.mixed_precision.AutoMixedPrecisionLists(
                        custom_black_varnames={"loss", "conv2d_0.w_0"}
                    )
                )
                mp_optimizer = decorate(
                    optimizer=optimizer,
                    amp_lists=amp_lists,
                    init_loss_scaling=8.0,
                    use_dynamic_loss_scaling=True,
                )
529 530 531 532 533 534 535

                mp_optimizer.minimize(avg_cost)

    def test_non_iterable_dataloader(self):
        self.decorate_with_data_loader()


536 537
if __name__ == '__main__':
    unittest.main()