test_image_classification_fp16.py 18.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import contextlib
16
import copy
17 18
import math
import os
19
import sys
20
import tempfile
21 22 23 24
import unittest

import numpy

L
LoneRanger 已提交
25 26 27 28
# TODO: remove sys.path.append
sys.path.append("../legacy_test")
import nets

29 30
import paddle
from paddle import fluid
31
from paddle.static.amp import decorate
32

P
pangyoki 已提交
33 34
paddle.enable_static()

35 36

def resnet_cifar10(input, depth=32):
37 38 39
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
40
        tmp = paddle.static.nn.conv2d(
41 42 43 44 45 46 47 48
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
            stride=stride,
            padding=padding,
            act=None,
            bias_attr=bias_attr,
        )
49
        return paddle.static.nn.batch_norm(input=tmp, act=act)
50 51 52 53 54 55 56 57 58 59 60

    def shortcut(input, ch_in, ch_out, stride):
        if ch_in != ch_out:
            return conv_bn_layer(input, ch_out, 1, stride, 0, None)
        else:
            return input

    def basicblock(input, ch_in, ch_out, stride):
        tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
        tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
        short = shortcut(input, ch_in, ch_out, stride)
61
        return paddle.nn.functional.relu(paddle.add(x=tmp, y=short))
62 63 64 65 66 67 68 69 70

    def layer_warp(block_func, input, ch_in, ch_out, count, stride):
        tmp = block_func(input, ch_in, ch_out, stride)
        for i in range(1, count):
            tmp = block_func(tmp, ch_out, ch_out, 1)
        return tmp

    assert (depth - 2) % 6 == 0
    n = (depth - 2) // 6
71 72 73
    conv1 = conv_bn_layer(
        input=input, ch_out=16, filter_size=3, stride=1, padding=1
    )
74 75 76
    res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
    res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
    res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
C
ccrrong 已提交
77
    pool = paddle.nn.functional.avg_pool2d(x=res3, kernel_size=8, stride=1)
78 79 80 81 82
    return pool


def vgg16_bn_drop(input):
    def conv_block(input, num_filter, groups, dropouts):
L
LoneRanger 已提交
83
        return nets.img_conv_group(
84 85 86 87 88 89 90 91 92 93
            input=input,
            pool_size=2,
            pool_stride=2,
            conv_num_filter=[num_filter] * groups,
            conv_filter_size=3,
            conv_act='relu',
            conv_with_batchnorm=True,
            conv_batchnorm_drop_rate=dropouts,
            pool_type='max',
        )
94 95 96 97 98 99 100

    conv1 = conv_block(input, 64, 2, [0.3, 0])
    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])

C
ccrrong 已提交
101
    drop = paddle.nn.functional.dropout(x=conv5, p=0.5)
C
Charles-hit 已提交
102
    fc1 = paddle.static.nn.fc(x=drop, size=4096, activation=None)
103
    bn = paddle.static.nn.batch_norm(input=fc1, act='relu')
C
ccrrong 已提交
104
    drop2 = paddle.nn.functional.dropout(x=bn, p=0.5)
C
Charles-hit 已提交
105
    fc2 = paddle.static.nn.fc(x=drop2, size=4096, activation=None)
106 107 108 109 110 111 112 113 114 115 116 117
    return fc2


def train(net_type, use_cuda, save_dirname, is_local):
    classdim = 10
    data_shape = [3, 32, 32]

    train_program = fluid.Program()
    startup_prog = fluid.Program()
    train_program.random_seed = 123
    startup_prog.random_seed = 456
    with fluid.program_guard(train_program, startup_prog):
G
GGBond8488 已提交
118 119
        images = paddle.static.data(
            name='pixel', shape=[-1] + data_shape, dtype='float32'
120
        )
G
GGBond8488 已提交
121
        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
122 123 124

        if net_type == "vgg":
            print("train vgg net")
125
            net = vgg16_bn_drop(images)
126 127
        elif net_type == "resnet":
            print("train resnet")
128
            net = resnet_cifar10(images, 32)
129 130 131
        else:
            raise ValueError("%s network is not supported" % net_type)

C
Charles-hit 已提交
132
        logits = paddle.static.nn.fc(x=net, size=classdim, activation="softmax")
133
        cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
134 135
            logits, label, return_softmax=True
        )
136
        avg_cost = paddle.mean(cost)
137
        acc = paddle.static.accuracy(input=predict, label=label)
138 139 140 141

        # Test program
        test_program = train_program.clone(for_test=True)

L
LoneRanger 已提交
142
        optimizer = paddle.optimizer.Lamb(learning_rate=0.001)
143

144
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
145 146 147 148 149 150 151 152
            custom_black_varnames={"loss", "conv2d_0.w_0"}
        )
        mp_optimizer = decorate(
            optimizer=optimizer,
            amp_lists=amp_lists,
            init_loss_scaling=8.0,
            use_dynamic_loss_scaling=True,
        )
153

G
gongweibao 已提交
154
        mp_optimizer.minimize(avg_cost)
155 156
        loss_scaling = mp_optimizer.get_loss_scaling()
        scaled_loss = mp_optimizer.get_scaled_loss()
157 158 159 160 161

    BATCH_SIZE = 128
    PASS_NUM = 1

    # no shuffle for unit test
162 163 164
    train_reader = paddle.batch(
        paddle.dataset.cifar.train10(), batch_size=BATCH_SIZE
    )
165

166 167 168
    test_reader = paddle.batch(
        paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE
    )
169 170 171 172 173 174 175 176 177 178 179 180 181

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(place=place, feed_list=[images, label])

    def train_loop(main_program):
        exe.run(startup_prog)
        loss = 0.0
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
                np_scaled_loss, loss = exe.run(
                    main_program,
                    feed=feeder.feed(data),
182 183
                    fetch_list=[scaled_loss, avg_cost],
                )
184
                print(
185
                    'PassID {:1}, BatchID {:04}, train loss {:2.4}, scaled train closs {:2.4}'.format(
186 187 188 189 190 191
                        pass_id,
                        batch_id + 1,
                        float(loss),
                        float(np_scaled_loss),
                    )
                )
192 193 194 195
                if (batch_id % 10) == 0:
                    acc_list = []
                    avg_loss_list = []
                    for tid, test_data in enumerate(test_reader()):
196 197 198 199 200
                        loss_t, acc_t = exe.run(
                            program=test_program,
                            feed=feeder.feed(test_data),
                            fetch_list=[avg_cost, acc],
                        )
201 202 203 204 205 206 207 208 209 210
                        if math.isnan(float(loss_t)):
                            sys.exit("got NaN loss, training failed.")
                        acc_list.append(float(acc_t))
                        avg_loss_list.append(float(loss_t))
                        break  # Use 1 segment for speeding up CI

                    acc_value = numpy.array(acc_list).mean()
                    avg_loss_value = numpy.array(avg_loss_list).mean()

                    print(
211
                        'PassID {:1}, BatchID {:04}, test loss {:2.2}, acc {:2.2}'.format(
212 213 214 215 216 217
                            pass_id,
                            batch_id + 1,
                            float(avg_loss_value),
                            float(acc_value),
                        )
                    )
218 219 220

                    if acc_value > 0.08:  # Low threshold for speeding up CI
                        fluid.io.save_inference_model(
221 222 223
                            save_dirname,
                            ["pixel"],
                            [predict],
224
                            exe,
225
                            main_program=train_program,
226 227
                            clip_extra=True,
                        )
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
                        return

    if is_local:
        train_loop(train_program)
    else:
        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("PADDLE_TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
243
        t = paddle.distributed.transpiler.DistributeTranspiler()
244 245 246
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
247 248 249
            pserver_startup = t.get_startup_program(
                current_endpoint, pserver_prog
            )
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())


def infer(use_cuda, save_dirname=None):
    if save_dirname is None:
        return

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
T
tianshuo78520a 已提交
266
        # the feed_target_names (the names of variables that will be fed
267 268
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
269 270 271 272 273
        [
            inference_program,
            feed_target_names,
            fetch_targets,
        ] = fluid.io.load_inference_model(save_dirname, exe)
274 275 276 277 278 279 280 281

        # The input's dimension of conv should be 4-D or 5-D.
        # Use normilized image pixels as input data, which should be in the range [0, 1.0].
        batch_size = 1
        tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")

        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
282 283 284 285 286
        results = exe.run(
            inference_program,
            feed={feed_target_names[0]: tensor_img},
            fetch_list=fetch_targets,
        )
287 288 289

        print("infer results: ", results[0])

290 291 292 293 294 295 296 297
        fluid.io.save_inference_model(
            save_dirname,
            feed_target_names,
            fetch_targets,
            exe,
            inference_program,
            clip_extra=True,
        )
298 299


300 301 302
class TestImageClassification(unittest.TestCase):
    def setUp(self):
        self.temp_dir = tempfile.TemporaryDirectory()
303

304 305
    def tearDown(self):
        self.temp_dir.cleanup()
306

307 308 309
    def main(self, net_type, use_cuda, is_local=True):
        if use_cuda and not fluid.core.is_compiled_with_cuda():
            return
310

311 312 313
        # Directory for saving the trained model
        save_dirname = os.path.join(
            self.temp_dir.name,
314 315
            "image_classification_" + net_type + ".inference.model",
        )
316 317

        train(net_type, use_cuda, save_dirname, is_local)
318
        # infer(use_cuda, save_dirname)
319

320
    def test_amp_lists(self):
321 322 323 324
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
325 326 327 328
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
329
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
330

331
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists()
332 333 334 335 336
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_1(self):
337 338 339 340
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
341 342 343 344
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
345
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
346 347 348 349 350

        # 1. w={'exp}, b=None
        white_list.add('exp')
        black_list.remove('exp')

351
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists({'exp'})
352 353 354 355 356
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_2(self):
357 358 359 360
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
361 362 363 364
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
365
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
366 367 368 369 370

        # 2. w={'tanh'}, b=None
        white_list.add('tanh')
        gray_list.remove('tanh')

371
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists({'tanh'})
372 373 374 375 376
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_3(self):
377 378 379 380
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
381 382 383 384
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
385
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
386 387 388 389

        # 3. w={'lstm'}, b=None
        white_list.add('lstm')

390
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists({'lstm'})
391 392 393 394 395
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_4(self):
396 397 398 399
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
400 401 402 403
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
404
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
405 406 407 408 409

        # 4. w=None, b={'conv2d'}
        white_list.remove('conv2d')
        black_list.add('conv2d')

410
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
411 412
            custom_black_list={'conv2d'}
        )
413 414 415 416 417
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_5(self):
418 419 420 421
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
422 423 424 425
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
426
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
427 428 429 430 431

        # 5. w=None, b={'tanh'}
        black_list.add('tanh')
        gray_list.remove('tanh')

432
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
433 434
            custom_black_list={'tanh'}
        )
435 436 437 438 439
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_6(self):
440 441 442 443
        white_list = (
            copy.copy(paddle.static.amp.fp16_lists.white_list)
            | paddle.static.amp.fp16_lists._only_supported_fp16_list
        )
Z
Zhang Ting 已提交
444 445 446 447
        black_list = copy.copy(
            paddle.static.amp.fp16_lists.black_list
            | paddle.static.amp.fp16_lists._extra_black_list
        )
448
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
449 450 451 452

        # 6. w=None, b={'lstm'}
        black_list.add('lstm')

453
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
454 455
            custom_black_list={'lstm'}
        )
456 457 458 459 460 461 462
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_7(self):
        # 7. w={'lstm'} b={'lstm'}
        # raise ValueError
463 464
        self.assertRaises(
            ValueError,
465
            paddle.static.amp.AutoMixedPrecisionLists,
466 467 468
            {'lstm'},
            {'lstm'},
        )
469

470 471
    def test_vgg_cuda(self):
        with self.scope_prog_guard():
472
            self.main('vgg', use_cuda=True)
473 474 475

    def test_resnet_cuda(self):
        with self.scope_prog_guard():
476
            self.main('resnet', use_cuda=True)
477 478 479 480 481 482 483 484 485 486 487

    @contextlib.contextmanager
    def scope_prog_guard(self):
        prog = fluid.Program()
        startup_prog = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
                yield


488 489 490 491 492 493
class TestAmpWithNonIterableDataLoader(unittest.TestCase):
    def decorate_with_data_loader(self):
        main_prog = paddle.static.Program()
        start_prog = paddle.static.Program()
        with paddle.static.program_guard(main_prog, start_prog):
            with paddle.fluid.unique_name.guard():
G
GGBond8488 已提交
494 495
                image = paddle.static.data(
                    name='image', shape=[-1, 3, 224, 224], dtype='float32'
496
                )
G
GGBond8488 已提交
497 498
                label = paddle.static.data(
                    name='label', shape=[-1, 1], dtype='int64'
499
                )
500 501 502 503
                py_reader = fluid.io.DataLoader.from_generator(
                    feed_list=[image, label],
                    capacity=4,
                    iterable=False,
504 505
                    use_double_buffer=False,
                )
506 507

                net = vgg16_bn_drop(image)
C
Charles-hit 已提交
508 509 510
                logits = paddle.static.nn.fc(
                    x=net, size=10, activation="softmax"
                )
511
                cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
512 513
                    logits, label, return_softmax=True
                )
514
                avg_cost = paddle.mean(cost)
515

L
LoneRanger 已提交
516
                optimizer = paddle.optimizer.Lamb(learning_rate=0.001)
517 518
                amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
                    custom_black_varnames={"loss", "conv2d_0.w_0"}
519 520 521 522 523 524 525
                )
                mp_optimizer = decorate(
                    optimizer=optimizer,
                    amp_lists=amp_lists,
                    init_loss_scaling=8.0,
                    use_dynamic_loss_scaling=True,
                )
526 527 528 529 530 531 532

                mp_optimizer.minimize(avg_cost)

    def test_non_iterable_dataloader(self):
        self.decorate_with_data_loader()


533 534
if __name__ == '__main__':
    unittest.main()