test_multi_precision_fp16_train.py 10.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import contextlib
import unittest
17

18
import numpy as np
19 20 21

import paddle
from paddle import fluid
22
from paddle.io import Dataset
23
from paddle.static.amp.fp16_utils import cast_model_to_fp16
24 25 26 27

paddle.enable_static()


28 29
class RandomDataset(Dataset):
    def __init__(self, num_samples, seed=123):
30
        super().__init__()
31 32 33 34 35
        np.random.seed(seed)
        self.num_samples = num_samples

    def __getitem__(self, idx):
        image = np.random.random([3, 32, 32]).astype('float32')
36
        label = np.random.randint(0, 9, (1,)).astype('int64')
37 38 39 40 41 42 43 44 45 46 47 48 49 50
        return image, label

    def __len__(self):
        return self.num_samples


def reader_decorator(reader):
    def __reader__():
        for i in range(len(reader)):
            yield reader[i]

    return __reader__


51
def resnet_cifar10(input, depth=32):
52 53 54
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
55
        tmp = paddle.static.nn.conv2d(
56 57 58 59 60 61 62 63
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
            stride=stride,
            padding=padding,
            act=None,
            bias_attr=bias_attr,
        )
64
        return paddle.static.nn.batch_norm(input=tmp, act=act)
65 66 67 68 69 70 71 72 73 74 75

    def shortcut(input, ch_in, ch_out, stride):
        if ch_in != ch_out:
            return conv_bn_layer(input, ch_out, 1, stride, 0, None)
        else:
            return input

    def basicblock(input, ch_in, ch_out, stride):
        tmp = conv_bn_layer(input, ch_out, 3, stride, 1)
        tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, act=None, bias_attr=True)
        short = shortcut(input, ch_in, ch_out, stride)
76
        return paddle.nn.functional.relu(paddle.add(x=tmp, y=short))
77 78 79 80 81 82 83 84 85

    def layer_warp(block_func, input, ch_in, ch_out, count, stride):
        tmp = block_func(input, ch_in, ch_out, stride)
        for i in range(1, count):
            tmp = block_func(tmp, ch_out, ch_out, 1)
        return tmp

    assert (depth - 2) % 6 == 0
    n = (depth - 2) // 6
86 87 88
    conv1 = conv_bn_layer(
        input=input, ch_out=16, filter_size=3, stride=1, padding=1
    )
89 90 91 92
    with paddle.static.amp.fp16_guard():
        res1 = layer_warp(basicblock, conv1, 16, 16, n, 1)
        res2 = layer_warp(basicblock, res1, 16, 32, n, 2)
        res3 = layer_warp(basicblock, res2, 32, 64, n, 2)
C
ccrrong 已提交
93
    pool = paddle.nn.functional.avg_pool2d(x=res3, kernel_size=8, stride=1)
94 95 96
    return pool


97
def train(use_pure_fp16=True, use_nesterov=False, optimizer=""):
98 99 100 101 102 103 104 105 106
    classdim = 10
    data_shape = [3, 32, 32]
    PASS_NUM = 1

    train_program = fluid.Program()
    startup_prog = fluid.Program()
    train_program.random_seed = 123
    startup_prog.random_seed = 456
    with fluid.program_guard(train_program, startup_prog):
G
GGBond8488 已提交
107 108
        images = paddle.static.data(
            name='pixel', shape=[-1] + data_shape, dtype='float32'
109
        )
G
GGBond8488 已提交
110
        label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
111
        net = resnet_cifar10(images)
C
Charles-hit 已提交
112
        logits = paddle.static.nn.fc(x=net, size=classdim, activation="softmax")
113
        cost = paddle.nn.functional.softmax_with_cross_entropy(
114 115
            logits, label, return_softmax=False
        )
116
        sum_cost = paddle.sum(cost)
117 118 119 120

        # Test program
        test_program = train_program.clone(for_test=True)

121
        if optimizer == "Adam":
122 123 124 125 126 127
            optimizer = paddle.optimizer.AdamW(
                learning_rate=0.001,
                epsilon=1e-8,
                weight_decay=0.0,
                multi_precision=True,
            )
128 129
        elif optimizer == "Lars":
            optimizer = paddle.fluid.optimizer.LarsMomentumOptimizer(
130 131
                learning_rate=0.001, momentum=0.9, multi_precision=use_pure_fp16
            )
132 133 134 135 136
        else:
            optimizer = paddle.optimizer.Momentum(
                learning_rate=0.001,
                momentum=0.9,
                use_nesterov=use_nesterov,
137
                weight_decay=paddle.regularizer.L2Decay(1e-4),
138 139
                multi_precision=use_pure_fp16,
            )
140 141 142 143 144 145

        if use_pure_fp16:
            optimizer = paddle.static.amp.decorate(
                optimizer,
                init_loss_scaling=128.0,
                use_dynamic_loss_scaling=True,
146 147
                use_pure_fp16=True,
            )
148 149 150

        optimizer.minimize(sum_cost)

151 152 153 154 155
    train_reader = paddle.batch(
        reader_decorator(RandomDataset(16 * 5, seed=123)),
        batch_size=16,
        drop_last=True,
    )
156

157 158 159 160 161
    test_reader = paddle.batch(
        reader_decorator(RandomDataset(4 * 5, seed=456)),
        batch_size=4,
        drop_last=True,
    )
162 163 164 165 166

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(place=place, feed_list=[images, label])

167
    def train_loop():
168 169
        exe.run(startup_prog)
        if use_pure_fp16:
170 171 172
            optimizer.amp_init(
                place, test_program=test_program, use_fp16_test=True
            )
173 174 175

        train_loss_list = []
        test_loss_list = []
176 177
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
178 179 180
                (loss,) = exe.run(
                    train_program, feed=feeder.feed(data), fetch_list=[sum_cost]
                )
181
                loss_v = float(loss) if isinstance(loss, np.ndarray) else loss
182
                print(
183
                    'PassID {:1}, Train Batch ID {:04}, train loss {:2.4}'.format(
184 185 186
                        pass_id, batch_id + 1, float(loss_v)
                    )
                )
187
                train_loss_list.append(float(loss_v))
188

189
            for tid, test_data in enumerate(test_reader()):
190 191 192 193 194
                (loss_t,) = exe.run(
                    program=test_program,
                    feed=feeder.feed(test_data),
                    fetch_list=[sum_cost],
                )
195
                test_loss_list.append(float(loss_t))
196
                print(
197
                    'PassID {:1}, Test Batch ID {:04}, test loss {:2.4}'.format(
198 199 200
                        pass_id, tid + 1, float(loss_t)
                    )
                )
201 202 203 204

        return train_loss_list, test_loss_list

    return train_loop()
205 206 207 208 209 210 211


class TestImageMultiPrecision(unittest.TestCase):
    def test_resnet_pure_fp16(self):
        if not fluid.core.is_compiled_with_cuda():
            return

212 213
        def do_test(use_nesterov=False, optimizer=""):
            if optimizer == "Adam":
214
                suffix = "use Adam"
215 216
            elif optimizer == "Lars":
                suffix = "use Lars"
217 218
            else:
                suffix = "with Nesterov" if use_nesterov else "without Nesterov"
219
            with self.scope_prog_guard():
220 221 222 223 224
                print(
                    "-----------------FP16 Train {}-----------------".format(
                        suffix
                    )
                )
225
                train_loss_fp16, test_loss_fp16 = train(
226 227
                    use_pure_fp16=True,
                    use_nesterov=use_nesterov,
228 229
                    optimizer=optimizer,
                )
230
            with self.scope_prog_guard():
231 232 233 234 235
                print(
                    "-----------------FP32 Train {}-----------------".format(
                        suffix
                    )
                )
236
                train_loss_fp32, test_loss_fp32 = train(
237 238
                    use_pure_fp16=False,
                    use_nesterov=use_nesterov,
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
                    optimizer=optimizer,
                )

            np.testing.assert_allclose(
                np.array(train_loss_fp16),
                np.array(train_loss_fp32),
                rtol=0.01,
                atol=1e-05,
                equal_nan=True,
                err_msg='Failed to train in pure FP16.',
            )
            np.testing.assert_allclose(
                np.array(test_loss_fp16),
                np.array(test_loss_fp32),
                rtol=0.01,
                atol=1e-05,
                equal_nan=True,
                err_msg='Failed to test in pure FP16.',
            )
258 259 260

        do_test(use_nesterov=False)
        do_test(use_nesterov=True)
261 262
        do_test(optimizer="Adam")
        do_test(optimizer="Lars")
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279

    @contextlib.contextmanager
    def scope_prog_guard(self):
        prog = fluid.Program()
        startup_prog = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
                yield


class TestAmpWithNonIterableDataLoader(unittest.TestCase):
    def decorate_with_data_loader(self):
        main_prog = paddle.static.Program()
        start_prog = paddle.static.Program()
        with paddle.static.program_guard(main_prog, start_prog):
            with paddle.fluid.unique_name.guard():
G
GGBond8488 已提交
280 281
                image = paddle.static.data(
                    name='image', shape=[-1, 3, 224, 224], dtype='float32'
282
                )
G
GGBond8488 已提交
283 284
                label = paddle.static.data(
                    name='label', shape=[-1, 1], dtype='int64'
285
                )
286 287 288 289
                py_reader = fluid.io.DataLoader.from_generator(
                    feed_list=[image, label],
                    capacity=4,
                    iterable=False,
290 291
                    use_double_buffer=False,
                )
292
                zero_var = paddle.tensor.fill_constant(
293 294
                    shape=[1], dtype='int64', value=0
                )
295
                one_var = paddle.tensor.fill_constant(
296 297
                    shape=[1], dtype='int64', value=1
                )
Q
qizhaoaoe 已提交
298 299 300 301
                label_val = paddle.static.nn.cond(
                    label != zero_var, lambda: zero_var, lambda: one_var
                )
                paddle.assign(label_val, output=label)
302
                net = resnet_cifar10(image)
C
Charles-hit 已提交
303 304 305
                logits = paddle.static.nn.fc(
                    x=net, size=10, activation="softmax"
                )
306 307 308 309 310 311 312 313

        block = main_prog.global_block()
        for op in block.ops:
            if op.type == "mul":
                op._set_attr('in_dtype', fluid.core.VarDesc.VarType.FP32)
                op._set_attr('out_dtype', fluid.core.VarDesc.VarType.FP32)
                op._set_attr('dtype', fluid.core.VarDesc.VarType.FP32)

314
        cast_model_to_fp16(main_prog, use_fp16_guard=False)
315 316

    def test_non_iterable_dataloader(self):
317 318
        if fluid.core.is_compiled_with_cuda():
            self.decorate_with_data_loader()
319 320 321 322


if __name__ == '__main__':
    unittest.main()