test_image_classification_fp16.py 17.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddle
import paddle.fluid as fluid
import contextlib
import math
import sys
import numpy
import unittest
import os
23
import copy
24
import numpy as np
25
import tempfile
26
from paddle.static.amp import decorate
27

P
pangyoki 已提交
28 29
paddle.enable_static()

30 31

def resnet_cifar10(input, depth=32):
32 33 34
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
35
        tmp = paddle.static.nn.conv2d(
36 37 38 39 40 41 42 43
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
            stride=stride,
            padding=padding,
            act=None,
            bias_attr=bias_attr,
        )
44
        return paddle.static.nn.batch_norm(input=tmp, act=act)
45 46 47 48 49 50 51 52 53 54 55

    def shortcut(input, ch_in, ch_out, stride):
        if ch_in != ch_out:
            return conv_bn_layer(input, ch_out, 1, stride, 0, None)
        else:
            return input

    def basicblock(input, ch_in, ch_out, stride):
        tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
        tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
        short = shortcut(input, ch_in, ch_out, stride)
56
        return paddle.nn.functional.relu(paddle.add(x=tmp, y=short))
57 58 59 60 61 62 63 64 65

    def layer_warp(block_func, input, ch_in, ch_out, count, stride):
        tmp = block_func(input, ch_in, ch_out, stride)
        for i in range(1, count):
            tmp = block_func(tmp, ch_out, ch_out, 1)
        return tmp

    assert (depth - 2) % 6 == 0
    n = (depth - 2) // 6
66 67 68
    conv1 = conv_bn_layer(
        input=input, ch_out=16, filter_size=3, stride=1, padding=1
    )
69 70 71
    res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
    res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
    res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
C
ccrrong 已提交
72
    pool = paddle.nn.functional.avg_pool2d(x=res3, kernel_size=8, stride=1)
73 74 75 76 77
    return pool


def vgg16_bn_drop(input):
    def conv_block(input, num_filter, groups, dropouts):
78 79 80 81 82 83 84 85 86 87 88
        return fluid.nets.img_conv_group(
            input=input,
            pool_size=2,
            pool_stride=2,
            conv_num_filter=[num_filter] * groups,
            conv_filter_size=3,
            conv_act='relu',
            conv_with_batchnorm=True,
            conv_batchnorm_drop_rate=dropouts,
            pool_type='max',
        )
89 90 91 92 93 94 95

    conv1 = conv_block(input, 64, 2, [0.3, 0])
    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])

C
ccrrong 已提交
96
    drop = paddle.nn.functional.dropout(x=conv5, p=0.5)
C
Charles-hit 已提交
97
    fc1 = paddle.static.nn.fc(x=drop, size=4096, activation=None)
98
    bn = paddle.static.nn.batch_norm(input=fc1, act='relu')
C
ccrrong 已提交
99
    drop2 = paddle.nn.functional.dropout(x=bn, p=0.5)
C
Charles-hit 已提交
100
    fc2 = paddle.static.nn.fc(x=drop2, size=4096, activation=None)
101 102 103 104 105 106 107 108 109 110 111 112
    return fc2


def train(net_type, use_cuda, save_dirname, is_local):
    classdim = 10
    data_shape = [3, 32, 32]

    train_program = fluid.Program()
    startup_prog = fluid.Program()
    train_program.random_seed = 123
    startup_prog.random_seed = 456
    with fluid.program_guard(train_program, startup_prog):
G
GGBond8488 已提交
113 114
        images = paddle.static.data(
            name='pixel', shape=[-1] + data_shape, dtype='float32'
115
        )
G
GGBond8488 已提交
116
        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
117 118 119

        if net_type == "vgg":
            print("train vgg net")
120
            net = vgg16_bn_drop(images)
121 122
        elif net_type == "resnet":
            print("train resnet")
123
            net = resnet_cifar10(images, 32)
124 125 126
        else:
            raise ValueError("%s network is not supported" % net_type)

C
Charles-hit 已提交
127
        logits = paddle.static.nn.fc(x=net, size=classdim, activation="softmax")
128
        cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
129 130
            logits, label, return_softmax=True
        )
131
        avg_cost = paddle.mean(cost)
132
        acc = paddle.static.accuracy(input=predict, label=label)
133 134 135 136

        # Test program
        test_program = train_program.clone(for_test=True)

Y
Yibing Liu 已提交
137
        optimizer = fluid.optimizer.Lamb(learning_rate=0.001)
138

139
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
140 141 142 143 144 145 146 147
            custom_black_varnames={"loss", "conv2d_0.w_0"}
        )
        mp_optimizer = decorate(
            optimizer=optimizer,
            amp_lists=amp_lists,
            init_loss_scaling=8.0,
            use_dynamic_loss_scaling=True,
        )
148

G
gongweibao 已提交
149
        mp_optimizer.minimize(avg_cost)
150 151
        loss_scaling = mp_optimizer.get_loss_scaling()
        scaled_loss = mp_optimizer.get_scaled_loss()
152 153 154 155 156

    BATCH_SIZE = 128
    PASS_NUM = 1

    # no shuffle for unit test
157 158 159
    train_reader = paddle.batch(
        paddle.dataset.cifar.train10(), batch_size=BATCH_SIZE
    )
160

161 162 163
    test_reader = paddle.batch(
        paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE
    )
164 165 166 167 168 169 170 171 172 173 174 175 176

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(place=place, feed_list=[images, label])

    def train_loop(main_program):
        exe.run(startup_prog)
        loss = 0.0
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
                np_scaled_loss, loss = exe.run(
                    main_program,
                    feed=feeder.feed(data),
177 178
                    fetch_list=[scaled_loss, avg_cost],
                )
179
                print(
180 181 182 183 184 185 186
                    'PassID {0:1}, BatchID {1:04}, train loss {2:2.4}, scaled train closs {3:2.4}'.format(
                        pass_id,
                        batch_id + 1,
                        float(loss),
                        float(np_scaled_loss),
                    )
                )
187 188 189 190
                if (batch_id % 10) == 0:
                    acc_list = []
                    avg_loss_list = []
                    for tid, test_data in enumerate(test_reader()):
191 192 193 194 195
                        loss_t, acc_t = exe.run(
                            program=test_program,
                            feed=feeder.feed(test_data),
                            fetch_list=[avg_cost, acc],
                        )
196 197 198 199 200 201 202 203 204 205
                        if math.isnan(float(loss_t)):
                            sys.exit("got NaN loss, training failed.")
                        acc_list.append(float(acc_t))
                        avg_loss_list.append(float(loss_t))
                        break  # Use 1 segment for speeding up CI

                    acc_value = numpy.array(acc_list).mean()
                    avg_loss_value = numpy.array(avg_loss_list).mean()

                    print(
206 207 208 209 210 211 212
                        'PassID {0:1}, BatchID {1:04}, test loss {2:2.2}, acc {3:2.2}'.format(
                            pass_id,
                            batch_id + 1,
                            float(avg_loss_value),
                            float(acc_value),
                        )
                    )
213 214 215

                    if acc_value > 0.08:  # Low threshold for speeding up CI
                        fluid.io.save_inference_model(
216 217 218
                            save_dirname,
                            ["pixel"],
                            [predict],
219
                            exe,
220
                            main_program=train_program,
221 222
                            clip_extra=True,
                        )
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
                        return

    if is_local:
        train_loop(train_program)
    else:
        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("PADDLE_TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
        t = fluid.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
242 243 244
            pserver_startup = t.get_startup_program(
                current_endpoint, pserver_prog
            )
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())


def infer(use_cuda, save_dirname=None):
    if save_dirname is None:
        return

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
T
tianshuo78520a 已提交
261
        # the feed_target_names (the names of variables that will be fed
262 263
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
264 265 266 267 268
        [
            inference_program,
            feed_target_names,
            fetch_targets,
        ] = fluid.io.load_inference_model(save_dirname, exe)
269 270 271 272 273 274 275 276

        # The input's dimension of conv should be 4-D or 5-D.
        # Use normilized image pixels as input data, which should be in the range [0, 1.0].
        batch_size = 1
        tensor_img = numpy.random.rand(batch_size, 3, 32, 32).astype("float32")

        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
277 278 279 280 281
        results = exe.run(
            inference_program,
            feed={feed_target_names[0]: tensor_img},
            fetch_list=fetch_targets,
        )
282 283 284

        print("infer results: ", results[0])

285 286 287 288 289 290 291 292
        fluid.io.save_inference_model(
            save_dirname,
            feed_target_names,
            fetch_targets,
            exe,
            inference_program,
            clip_extra=True,
        )
293 294


295 296 297
class TestImageClassification(unittest.TestCase):
    def setUp(self):
        self.temp_dir = tempfile.TemporaryDirectory()
298

299 300
    def tearDown(self):
        self.temp_dir.cleanup()
301

302 303 304
    def main(self, net_type, use_cuda, is_local=True):
        if use_cuda and not fluid.core.is_compiled_with_cuda():
            return
305

306 307 308
        # Directory for saving the trained model
        save_dirname = os.path.join(
            self.temp_dir.name,
309 310
            "image_classification_" + net_type + ".inference.model",
        )
311 312

        train(net_type, use_cuda, save_dirname, is_local)
313
        # infer(use_cuda, save_dirname)
314

315
    def test_amp_lists(self):
316 317 318
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
319

320
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists()
321 322 323 324 325
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_1(self):
326 327 328
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
329 330 331 332 333

        # 1. w={'exp}, b=None
        white_list.add('exp')
        black_list.remove('exp')

334
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists({'exp'})
335 336 337 338 339
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_2(self):
340 341 342
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
343 344 345 346 347

        # 2. w={'tanh'}, b=None
        white_list.add('tanh')
        gray_list.remove('tanh')

348
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists({'tanh'})
349 350 351 352 353
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_3(self):
354 355 356
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
357 358 359 360

        # 3. w={'lstm'}, b=None
        white_list.add('lstm')

361
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists({'lstm'})
362 363 364 365 366
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_4(self):
367 368 369
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
370 371 372 373 374

        # 4. w=None, b={'conv2d'}
        white_list.remove('conv2d')
        black_list.add('conv2d')

375
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
376 377
            custom_black_list={'conv2d'}
        )
378 379 380 381 382
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_5(self):
383 384 385
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
386 387 388 389 390

        # 5. w=None, b={'tanh'}
        black_list.add('tanh')
        gray_list.remove('tanh')

391
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
392 393
            custom_black_list={'tanh'}
        )
394 395 396 397 398
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_6(self):
399 400 401
        white_list = copy.copy(paddle.static.amp.fp16_lists.white_list)
        black_list = copy.copy(paddle.static.amp.fp16_lists.black_list)
        gray_list = copy.copy(paddle.static.amp.fp16_lists.gray_list)
402 403 404 405

        # 6. w=None, b={'lstm'}
        black_list.add('lstm')

406
        amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
407 408
            custom_black_list={'lstm'}
        )
409 410 411 412 413 414 415
        self.assertEqual(amp_lists.white_list, white_list)
        self.assertEqual(amp_lists.black_list, black_list)
        self.assertEqual(amp_lists.gray_list, gray_list)

    def test_amp_lists_7(self):
        # 7. w={'lstm'} b={'lstm'}
        # raise ValueError
416 417
        self.assertRaises(
            ValueError,
418
            paddle.static.amp.AutoMixedPrecisionLists,
419 420 421
            {'lstm'},
            {'lstm'},
        )
422

423 424
    def test_vgg_cuda(self):
        with self.scope_prog_guard():
425
            self.main('vgg', use_cuda=True)
426 427 428

    def test_resnet_cuda(self):
        with self.scope_prog_guard():
429
            self.main('resnet', use_cuda=True)
430 431 432 433 434 435 436 437 438 439 440

    @contextlib.contextmanager
    def scope_prog_guard(self):
        prog = fluid.Program()
        startup_prog = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
                yield


441 442 443 444 445 446
class TestAmpWithNonIterableDataLoader(unittest.TestCase):
    def decorate_with_data_loader(self):
        main_prog = paddle.static.Program()
        start_prog = paddle.static.Program()
        with paddle.static.program_guard(main_prog, start_prog):
            with paddle.fluid.unique_name.guard():
G
GGBond8488 已提交
447 448
                image = paddle.static.data(
                    name='image', shape=[-1, 3, 224, 224], dtype='float32'
449
                )
G
GGBond8488 已提交
450 451
                label = paddle.static.data(
                    name='label', shape=[-1, 1], dtype='int64'
452
                )
453 454 455 456
                py_reader = fluid.io.DataLoader.from_generator(
                    feed_list=[image, label],
                    capacity=4,
                    iterable=False,
457 458
                    use_double_buffer=False,
                )
459 460

                net = vgg16_bn_drop(image)
C
Charles-hit 已提交
461 462 463
                logits = paddle.static.nn.fc(
                    x=net, size=10, activation="softmax"
                )
464
                cost, predict = paddle.nn.functional.softmax_with_cross_entropy(
465 466
                    logits, label, return_softmax=True
                )
467
                avg_cost = paddle.mean(cost)
468 469

                optimizer = fluid.optimizer.Lamb(learning_rate=0.001)
470 471
                amp_lists = paddle.static.amp.AutoMixedPrecisionLists(
                    custom_black_varnames={"loss", "conv2d_0.w_0"}
472 473 474 475 476 477 478
                )
                mp_optimizer = decorate(
                    optimizer=optimizer,
                    amp_lists=amp_lists,
                    init_loss_scaling=8.0,
                    use_dynamic_loss_scaling=True,
                )
479 480 481 482 483 484 485

                mp_optimizer.minimize(avg_cost)

    def test_non_iterable_dataloader(self):
        self.decorate_with_data_loader()


486 487
if __name__ == '__main__':
    unittest.main()