test_sync_batch_norm_op.py 15.7 KB
Newer Older
Q
qingqing01 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14 15 16 17
"""
test for sync bachnorm op.
for both FP64 and FP16 input.
"""
Q
qingqing01 已提交
18

19
import os
20 21 22
import random
import subprocess
import tempfile
Q
qingqing01 已提交
23
import unittest
24

Q
qingqing01 已提交
25
import numpy as np
26
from decorator_helper import prog_scope
27 28 29 30 31
from eager_op_test import (
    OpTest,
    _set_use_system_allocator,
    convert_float_to_uint16,
)
32

C
ceci3 已提交
33
import paddle
34 35
from paddle import fluid, nn
from paddle.fluid import Program, core, program_guard
36 37

_set_use_system_allocator(True)
38 39 40 41 42 43 44 45 46 47 48


def create_or_get_tensor(scope, var_name, var, place):
    """Get tensor, if not found, create a new one."""
    tensor = scope.var(var_name).get_tensor()
    if var is not None:
        assert isinstance(var, np.ndarray)
        tensor.set_recursive_sequence_lengths([])
        tensor.set(var, place)
    return tensor

Q
qingqing01 已提交
49 50

class TestSyncBatchNormOpTraining(unittest.TestCase):
51 52
    """sync_batch_norm op test."""

Q
qingqing01 已提交
53
    def setUp(self):
54
        """Setup."""
55
        # self.dtype = np.float32
56
        self.dtype = np.float32 if core.is_compiled_with_rocm() else np.float64
57
        self.N = 8
Q
qingqing01 已提交
58
        self.C = 16
59
        self.H = 32
Q
qingqing01 已提交
60 61
        self.W = 32
        self.dshape = [self.N, self.C, self.H, self.W]
62
        self.atol = 1e-3
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
        self.data_dir = tempfile.TemporaryDirectory()
        self.fleet_log_dir = tempfile.TemporaryDirectory()

    def tearDown(self) -> None:
        self.data_dir.cleanup()
        self.fleet_log_dir.cleanup()

    def multi_device_run(self, layout, fetch_list, only_forward=False):
        cmds = [
            "python",
            "-m",
            "paddle.distributed.launch",
        ]
        cmds += ["--log_dir", self.fleet_log_dir.name]
        cmds += ["dist_fleet_sync_batch_norm.py"]
        cmds += ["--data_dir", self.data_dir.name]

        dshape = [
            self.N // core.get_cuda_device_count(),
            self.C,
            self.H,
            self.W,
        ]
        cmds += ["--dshape", str(dshape)]
        cmds += ["--dtype", str(self.dtype.__name__)]
        cmds += ["--layout", layout]
        cmds += ["--fetch_list", str(fetch_list)]
        if only_forward:
            cmds += ["--only_forward"]
92
        if self.dtype == np.float16 or self.dtype == np.uint16:
93 94 95
            cmds += ["--use_cudnn"]
        p = subprocess.run(cmds)
        assert p.returncode == 0, f"Fleet train: Failed: {p}"
Q
qingqing01 已提交
96

97 98 99
    def _build_program(
        self, place, layout, seed, sync_bn=False, only_forward=False
    ):
100
        """Build program."""
Q
qingqing01 已提交
101 102 103 104
        main = fluid.Program()
        startup = fluid.Program()
        main.random_seed = seed
        startup.random_seed = seed
105
        use_cudnn = (self.dtype == np.float16) or (self.dtype == np.uint16)
Q
qingqing01 已提交
106 107
        with fluid.unique_name.guard():
            with fluid.program_guard(main, startup):
G
GGBond8488 已提交
108
                data = paddle.static.data(
109 110 111 112
                    name='input',
                    shape=self.dshape,
                    dtype=self.dtype,
                )
G
GGBond8488 已提交
113
                data.desc.set_need_check_feed(False)
114
                conv = paddle.static.nn.conv2d(
Q
qingqing01 已提交
115 116 117 118 119
                    input=data,
                    num_filters=32,
                    filter_size=1,
                    param_attr=fluid.ParamAttr(name='conv2d_weight'),
                    bias_attr=False,
120 121
                    use_cudnn=use_cudnn,
                )
122
                bn = paddle.static.nn.batch_norm(
Q
qingqing01 已提交
123 124 125 126 127 128
                    conv,
                    param_attr=fluid.ParamAttr(name='bn_scale'),
                    bias_attr=fluid.ParamAttr(name='bn_bias'),
                    moving_mean_name='bn_moving_mean',
                    moving_variance_name='bn_moving_variance',
                    data_layout=layout,
129 130
                    is_test=only_forward,
                )
131
                if core.is_compiled_with_rocm():
132
                    bn = paddle.cast(bn, 'float32')
133
                else:
134
                    bn = paddle.cast(bn, 'float64')
135
                sigmoid = paddle.nn.functional.sigmoid(bn)
136
                out = paddle.sum(sigmoid)
Q
qingqing01 已提交
137 138 139 140 141 142 143
                if not sync_bn:
                    out = out / core.get_cuda_device_count()
                if not only_forward:
                    sgd_opt = fluid.optimizer.SGD(learning_rate=0.0)
                    sgd_opt.backward(out)
        return main, startup, [out, conv, bn]

144
    @prog_scope()
145 146
    def _compare(self, place, layout, only_forward):
        """Compare results."""
Q
qingqing01 已提交
147 148
        seed = 10
        os.environ['FLAGS_cudnn_deterministic'] = "1"
149
        paddle.enable_static()
150
        scope = core.Scope()
151 152 153 154 155 156 157 158
        if self.dtype == np.uint16:
            data = convert_float_to_uint16(
                np.random.random(size=self.dshape).astype(np.float32) * 4.0 - 2
            )
        else:
            data = (
                np.random.random(size=self.dshape).astype(self.dtype) * 4.0 - 2
            )
159 160 161 162 163 164 165 166 167
        stride = self.N // core.get_cuda_device_count()
        for id in range(core.get_cuda_device_count()):
            filepath = os.path.join(
                self.data_dir.name,
                'input_{}_{}_{}_{}.npy'.format(
                    id, only_forward, str(self.dtype.__name__), layout
                ),
            )
            np.save(filepath, data[id * stride : (id + 1) * stride])
168 169 170
        data = create_or_get_tensor(
            scope, "input", OpTest.np_dtype_to_fluid_dtype(data), place
        )
171

Q
qingqing01 已提交
172
        # Single-GPU, N = 32 per GPU
173 174 175
        main, startup, outs = self._build_program(
            place, layout, seed, False, only_forward
        )
Q
qingqing01 已提交
176 177 178
        exe = fluid.Executor(place)
        exe.run(startup)
        fetch_names = [v.name for v in outs] + [
179 180 181 182
            'bn_moving_mean',
            'bn_moving_variance',
            'bn_scale',
            'bn_bias',
Q
qingqing01 已提交
183 184 185
        ]
        if not only_forward:
            others = [
186 187 188 189 190 191
                'batch_norm_0.tmp_0',
                'batch_norm_0.tmp_1',
                'bn_scale@GRAD',
                'bn_bias@GRAD',
                'batch_norm_0.tmp_3@GRAD',
                'conv2d_0.tmp_0@GRAD',
Q
qingqing01 已提交
192 193
            ]
            fetch_names += others
194 195 196
        bn_fetches = exe.run(
            program=main, feed={'input': data}, fetch_list=fetch_names
        )
Q
qingqing01 已提交
197 198 199

        #####################################################################
        # Multi-GPUs, self.N / core.get_cuda_device_count() per GPU
200
        assert core.get_cuda_device_count() > 1
201 202

        fetch_names = [
203 204 205 206
            'bn_moving_mean',
            'bn_moving_variance',
            'bn_scale',
            'bn_bias',
Q
qingqing01 已提交
207 208 209
        ]
        if not only_forward:
            others = [
210 211 212 213 214 215
                'batch_norm_0.tmp_0',
                'batch_norm_0.tmp_1',
                'bn_scale@GRAD',
                'bn_bias@GRAD',
                'batch_norm_0.tmp_3@GRAD',
                'conv2d_0.tmp_0@GRAD',
Q
qingqing01 已提交
216 217
            ]
            fetch_names += others
218 219 220

        self.multi_device_run(
            layout, fetch_list=fetch_names, only_forward=only_forward
221
        )
Q
qingqing01 已提交
222

223 224 225
        fetch_names = [v.name for v in outs] + fetch_names

        for i in range(1, len(bn_fetches)):
Q
qingqing01 已提交
226
            bn_val = bn_fetches[i]
227 228 229 230 231 232 233
            file_path = os.path.join(
                self.data_dir.name,
                'output_{}_{}_{}_{}.npy'.format(
                    0, only_forward, self.dtype.__name__, i
                ),
            )
            sync_bn_val = np.load(file_path)
Q
qingqing01 已提交
234
            if sync_bn_val.shape != bn_val.shape:
235
                bn_val = bn_val[:stride]
236 237 238 239 240 241 242 243 244 245 246 247 248 249
            np.testing.assert_allclose(
                bn_val,
                sync_bn_val,
                rtol=1e-05,
                atol=self.atol,
                err_msg='Output ('
                + fetch_names[i]
                + ') has diff. \n'
                + '\nBN     '
                + str(bn_val)
                + '\n'
                + 'Sync BN '
                + str(sync_bn_val),
            )
Q
qingqing01 已提交
250 251

    def test_train(self):
252
        """Test training."""
Q
qingqing01 已提交
253 254 255 256 257
        if not core.is_compiled_with_cuda():
            return

        places = [core.CUDAPlace(0)]
        for place in places:
258
            for layout in ["NHWC", "NCHW"]:
259
                self._compare(place, layout, False)
Q
qingqing01 已提交
260 261

    def test_infer(self):
262
        """Test inference."""
Q
qingqing01 已提交
263 264 265 266 267
        if not core.is_compiled_with_cuda():
            return

        places = [core.CUDAPlace(0)]
        for place in places:
268
            for layout in ["NHWC", "NCHW"]:
269 270 271 272 273 274 275 276 277
                self._compare(place, layout, True)


class TestFP16SyncBatchNormOpTraining(TestSyncBatchNormOpTraining):
    """sync_batch_norm op test for FP16 input."""

    def setUp(self):
        """Setup."""
        self.dtype = np.float16
278
        self.N = 8
279
        self.C = 16
280
        self.H = 32
281
        self.W = 32
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
        self.dshape = [self.N, self.C, self.H, self.W]
        self.atol = 1e-3
        self.data_dir = tempfile.TemporaryDirectory()
        self.fleet_log_dir = tempfile.TemporaryDirectory()


@unittest.skipIf(
    not core.is_compiled_with_cuda()
    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
    "core is not compiled with CUDA or not support the bfloat16",
)
class TestBF16SyncBatchNormOpTraining(TestSyncBatchNormOpTraining):
    """sync_batch_norm op test for BF16 input."""

    def setUp(self):
        """Setup."""
        self.dtype = np.uint16
        self.N = 8
        self.C = 16
        self.H = 32
        self.W = 32
303 304
        self.dshape = [self.N, self.C, self.H, self.W]
        self.atol = 1e-2
305 306
        self.data_dir = tempfile.TemporaryDirectory()
        self.fleet_log_dir = tempfile.TemporaryDirectory()
Q
qingqing01 已提交
307 308


C
ceci3 已提交
309 310 311 312 313 314
class TestDygraphSyncBatchNormAPIError(unittest.TestCase):
    def test_errors(self):
        if not core.is_compiled_with_cuda():
            return

        with program_guard(Program(), Program()):
C
ceci3 已提交
315
            my_sync_batch_norm = paddle.nn.SyncBatchNorm(10)
316 317 318
            x1 = fluid.create_lod_tensor(
                np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CUDAPlace(0)
            )
C
ceci3 已提交
319 320 321 322
            self.assertRaises(TypeError, my_sync_batch_norm, x1)

            # the input dtype of SyncBatchNorm must be float16 or float32 or float64
            # float16 only can be set on GPU place
G
GGBond8488 已提交
323 324 325 326
            x2 = paddle.static.data(
                name='x2', shape=[-1, 3, 4, 5, 6], dtype="int32"
            )
            x2.desc.set_need_check_feed(False)
C
ceci3 已提交
327 328 329
            self.assertRaises(TypeError, my_sync_batch_norm, x2)


330 331 332 333 334 335
class TestConvertSyncBatchNorm(unittest.TestCase):
    def test_convert(self):
        if not core.is_compiled_with_cuda():
            return

        with program_guard(Program(), Program()):
336 337 338 339 340
            compare_model = paddle.nn.Sequential(
                paddle.nn.Conv2D(3, 5, 3),
                paddle.nn.BatchNorm2D(5),
                paddle.nn.BatchNorm2D(5),
            )
341
            model = paddle.nn.Sequential(
342 343
                paddle.nn.Conv2D(3, 5, 3),
                paddle.nn.BatchNorm2D(5),
C
ceci3 已提交
344 345 346
                paddle.nn.BatchNorm2D(
                    5,
                    weight_attr=fluid.ParamAttr(name='bn.scale'),
347 348 349
                    bias_attr=fluid.ParamAttr(name='bn.bias'),
                ),
            )
350 351
            model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
            for idx, sublayer in enumerate(compare_model.sublayers()):
C
cnn 已提交
352
                if isinstance(sublayer, paddle.nn.BatchNorm2D):
353
                    self.assertEqual(
354 355
                        isinstance(model[idx], paddle.nn.SyncBatchNorm), True
                    )
356 357


C
ceci3 已提交
358
class TestConvertSyncBatchNormCast1(unittest.TestCase):
C
ceci3 已提交
359 360 361 362 363 364
    def test_convert(self):
        if not core.is_compiled_with_cuda():
            return

        class Net(nn.Layer):
            def __init__(self):
365
                super().__init__()
C
ceci3 已提交
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
                self.conv1 = nn.Conv2D(3, 5, 3)
                self.bn = []
                bn = self.add_sublayer('bn', nn.BatchNorm2D(5))
                self.bn.append(bn)

            def forward(self, x):
                x = self.conv1(x)
                for bn in self.bn:
                    x = bn(x)
                return x

        model = nn.Sequential()
        model.add_sublayer('net1', Net())
        model.add_sublayer('net2', Net())
        compare_model = nn.Sequential()
        compare_model.add_sublayer('net1', Net())
        compare_model.add_sublayer('net2', Net())
        model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.assertEqual(len(compare_model.sublayers()), len(model.sublayers()))


C
ceci3 已提交
387 388 389 390 391 392 393 394 395
class TestConvertSyncBatchNormCase2(unittest.TestCase):
    def test_convert(self):
        if not core.is_compiled_with_cuda():
            return

        with fluid.dygraph.guard(fluid.CUDAPlace(0)):

            class SyBNNet(paddle.nn.Layer):
                def __init__(self, in_ch=3, out_ch=3, dirate=1):
396
                    super().__init__()
C
ceci3 已提交
397 398 399 400
                    self.bn_s1 = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(
                        paddle.nn.BatchNorm3D(
                            out_ch,
                            weight_attr=paddle.ParamAttr(
401 402 403 404
                                regularizer=paddle.regularizer.L2Decay(0.0)
                            ),
                        )
                    )
C
ceci3 已提交
405
                    self.bn_s2 = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(
406 407
                        paddle.nn.BatchNorm3D(out_ch, data_format='NDHWC')
                    )
C
ceci3 已提交
408 409 410 411 412 413 414 415

                def forward(self, x):
                    x = self.bn_s1(x)
                    out = paddle.sum(paddle.abs(self.bn_s2(x)))
                    return out

            class BNNet(paddle.nn.Layer):
                def __init__(self, in_ch=3, out_ch=3, dirate=1):
416
                    super().__init__()
C
ceci3 已提交
417 418 419
                    self.bn_s1 = paddle.nn.BatchNorm3D(
                        out_ch,
                        weight_attr=paddle.ParamAttr(
420 421 422
                            regularizer=paddle.regularizer.L2Decay(0.0)
                        ),
                    )
C
ceci3 已提交
423
                    self.bn_s2 = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(
424 425
                        paddle.nn.BatchNorm3D(out_ch, data_format='NDHWC')
                    )
C
ceci3 已提交
426 427 428 429 430 431 432 433 434 435 436 437 438

                def forward(self, x):
                    x = self.bn_s1(x)
                    out = paddle.sum(paddle.abs(self.bn_s2(x)))
                    return out

            bn_model = BNNet()
            sybn_model = SyBNNet()
            np.random.seed(10)
            data = np.random.random([3, 3, 3, 3, 3]).astype('float32')
            x = paddle.to_tensor(data)
            bn_out = bn_model(x)
            sybn_out = sybn_model(x)
439 440 441 442
            np.testing.assert_allclose(
                bn_out.numpy(),
                sybn_out.numpy(),
                rtol=1e-05,
443 444 445 446 447 448 449
                err_msg='Output has diff. \n'
                + '\nBN     '
                + str(bn_out.numpy())
                + '\n'
                + 'Sync BN '
                + str(sybn_out.numpy()),
            )
C
ceci3 已提交
450 451 452 453 454 455 456 457 458 459 460 461 462 463


class TestDygraphSyncBatchNormDataFormatError(unittest.TestCase):
    def test_errors(self):
        if not core.is_compiled_with_cuda():
            return

        with fluid.dygraph.guard(fluid.CUDAPlace(0)):
            my_sync_batch_norm = paddle.nn.SyncBatchNorm(10, data_format='CN')
            data = np.random.random([3, 3, 3]).astype('float32')
            x = paddle.to_tensor(data)
            self.assertRaises(ValueError, my_sync_batch_norm, x)


Q
qingqing01 已提交
464
if __name__ == '__main__':
465 466 467
    paddle.seed(0)
    np.random.seed(0)
    random.seed(0)
Q
qingqing01 已提交
468
    unittest.main()