test_backward.py 15.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
16

17 18
import numpy as np

19
import paddle
20
import paddle.nn.functional as F
21
from paddle import fluid, static
22
from paddle.fluid import backward
23

24

25
class BackwardNet:
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
    """
    Abstract Base Class.
    All Net inherited this Class should implement two functions:
        build_model: build net to test the logic of backward
        init_data: fake input data to test all programs.
    """

    def __init__(self):
        self.stop_gradient_grad_vars = set()
        self.no_grad_vars = set()
        self.params_names = set()
        self.op_path = []

    def build_model(self):
        """
        Build net to test the logic of backward.
        :return: loss
        """
        raise NotImplementedError

    def init_data(self):
        """
        Fake input data to test all programs.
        :return: dict, {'var_name': var_data}
        """
        raise NotImplementedError
52 53


54
class TestBackward(unittest.TestCase):
55 56 57 58 59 60
    """
    All related TestClass should inherit this class,
    and only implement test_backward function.
    """

    def _check_all(self, net):
61 62 63 64 65
        place = (
            fluid.CUDAPlace(0)
            if fluid.core.is_compiled_with_cuda()
            else fluid.CPUPlace()
        )
66 67 68 69 70 71
        exe = fluid.Executor(place)

        main = fluid.Program()
        startup = fluid.Program()

        with fluid.program_guard(main, startup):
72 73
            loss = net.build_model()
            self._check_backward(loss, main)
74 75 76

            optimizer = fluid.optimizer.SGD(learning_rate=0.1)
            optimizer.minimize(loss)
77 78 79 80 81 82 83 84 85 86
            exe.run(startup)
            exe.run(feed=net.init_data())

    def _check_backward(self, loss, main_program):
        global_block_idx = self.global_block_idx
        params_grads = self._check_params_grad(loss)
        # 1.1 get_stop_gradients
        no_grad_dict = self._check_stop_gradient(main_program)
        # 1.2 find_op_path
        op_path, block_no_grad_set = self._check_op_path(
87 88
            main_program.block(global_block_idx), [loss], [], no_grad_dict
        )
89 90
        # 1.3 _find_no_grad_vars
        no_grad_vars = self._check_find_no_grad_vars(
91 92 93 94 95
            main_program.block(global_block_idx),
            op_path,
            [loss],
            block_no_grad_set,
        )
96 97 98
        # update no_grad_dict
        block_no_grad_set.update(no_grad_vars)
        no_grad_dict[global_block_idx].update(
99 100
            list(map(fluid.backward._append_grad_suffix_, block_no_grad_set))
        )
101 102

    def _check_params_grad(self, loss, parameter_list=None, no_grad_set=None):
103 104 105
        params_grads = fluid.backward.append_backward(
            loss, parameter_list, no_grad_set
        )
106 107 108
        params_names = {
            param_var.name for (param_var, grad_var) in params_grads
        }
109 110 111 112 113 114 115
        self.assertSetEqual(params_names, self.net.params_names)

        return params_grads

    def _check_stop_gradient(self, program):
        no_grad_dict = fluid.backward._get_stop_gradients_(program)
        if no_grad_dict is not None and isinstance(no_grad_dict, dict):
116 117 118 119
            self.assertSetEqual(
                no_grad_dict[self.global_block_idx],
                self.net.stop_gradient_grad_vars,
            )
120 121 122 123 124 125 126 127

        return no_grad_dict

    def _check_op_path(self, root_block, outputs, inputs=[], no_grad_dict=None):
        if no_grad_dict is None or not isinstance(no_grad_dict, dict):
            block_no_grad_set = None
        else:
            block_no_grad_set = set(
128 129 130 131 132 133 134 135
                map(
                    fluid.backward._strip_grad_suffix_,
                    no_grad_dict[self.global_block_idx],
                )
            )
        op_path = fluid.backward._find_op_path_(
            root_block, outputs, inputs, block_no_grad_set
        )
136 137 138 139 140
        op_types = [op.type for op in op_path]
        self.assertListEqual(op_types, self.net.op_path)

        return op_path, block_no_grad_set

141 142 143
    def _check_find_no_grad_vars(
        self, root_block, op_path, targets, block_no_grad_set
    ):
144
        no_grad_vars = fluid.backward._find_no_grad_vars(
145 146
            root_block, op_path, targets, block_no_grad_set
        )
147 148 149 150
        self.assertSetEqual(no_grad_vars, self.net.no_grad_vars)

        return no_grad_vars

151
    def _check_error_param_list(self, net, parameter_list):
152 153 154 155 156
        place = (
            fluid.CUDAPlace(0)
            if fluid.core.is_compiled_with_cuda()
            else fluid.CPUPlace()
        )
157 158 159 160 161 162 163 164 165 166 167 168
        exe = fluid.Executor(place)

        main = fluid.Program()
        startup = fluid.Program()

        with fluid.program_guard(main, startup):
            loss = net.build_model()
            optimizer = fluid.optimizer.SGD(learning_rate=0.1)
            optimizer.minimize(loss, parameter_list=parameter_list)
            exe.run(startup)
            exe.run(feed=net.init_data())

169
    def _check_error_no_grad_set(self, net, no_grad_set):
170 171 172 173 174
        place = (
            fluid.CUDAPlace(0)
            if fluid.core.is_compiled_with_cuda()
            else fluid.CPUPlace()
        )
175 176 177 178 179 180 181 182 183 184 185 186
        exe = fluid.Executor(place)

        main = fluid.Program()
        startup = fluid.Program()

        with fluid.program_guard(main, startup):
            loss = net.build_model()
            optimizer = fluid.optimizer.SGD(learning_rate=0.1)
            optimizer.minimize(loss, no_grad_set=no_grad_set)
            exe.run(startup)
            exe.run(feed=net.init_data())

187 188 189

class SimpleNet(BackwardNet):
    def __init__(self):
190
        super().__init__()
191 192 193 194 195 196
        self.stop_gradient_grad_vars = {
            'x_no_grad@GRAD',
            'x2_no_grad@GRAD',
            'x3_no_grad@GRAD',
            'label_no_grad@GRAD',
        }
197
        self.no_grad_vars = set()
198
        self.params_names = {'w2v', 'fc_predict.b_0', 'fc_w'}
199
        self.op_path = [
200 201 202 203 204 205 206 207 208
            'lookup_table_v2',
            'lookup_table_v2',  # embedding
            'elementwise_add',  # merge
            'mul',
            'elementwise_add',
            'softmax',  # fc
            'elementwise_sub',
            'square',
            'reduce_mean',
209 210 211 212 213 214 215 216 217 218 219 220 221
        ]  # loss
        self.shape = [16, 50]

    def init_data(self):
        assert len(self.shape) == 2
        x = np.random.randint(0, 90, self.shape).astype('int64')
        x2 = np.random.randint(0, 90, self.shape).astype('int64')
        x3 = np.random.randint(0, 90, self.shape).astype('int64')
        label = np.random.random([self.shape[0], 1]).astype('float32')
        return {
            'x_no_grad': x,
            'x2_no_grad': x2,
            'x3_no_grad': x3,
222
            'label_no_grad': label,
223 224 225 226
        }

    def build_model(self):
        # stop_gradient = True in input
227 228 229 230 231 232 233 234 235 236
        x = paddle.static.data(
            name='x_no_grad', shape=self.shape, dtype='int64'
        )
        x2 = paddle.static.data(
            name='x2_no_grad', shape=self.shape, dtype='int64'
        )
        x3 = paddle.static.data(
            name='x3_no_grad', shape=self.shape, dtype='int64'
        )
        label = paddle.static.data(
237 238
            name='label_no_grad', shape=[self.shape[0], 1], dtype='float32'
        )
239 240
        # shared layer, the grad of 'w2v' will be summed and renamed.
        # To test  _addup_repetitive_outputs_
241
        x_emb = paddle.static.nn.embedding(
242 243
            x, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')
        )
244
        x2_emb = paddle.static.nn.embedding(
245 246
            x2, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')
        )
247
        x3_emb = paddle.static.nn.embedding(
248 249
            x3, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')
        )
250
        # merge layers
251 252
        x_merge = paddle.add(x_emb, x2_emb, name='x_add_x2')
        x2_merge = paddle.add(x2_emb, x3_emb, name='x2_add_x3')
253
        # shared fc_w
C
Charles-hit 已提交
254 255
        predict = paddle.static.nn.fc(
            x=x_merge,
256
            size=1,
C
Charles-hit 已提交
257 258
            activation='softmax',
            weight_attr=fluid.ParamAttr(name='fc_w'),
259 260
            name='fc_predict',
        )
261
        # useless layer for calculating loss
C
Charles-hit 已提交
262 263
        fc_no_use = paddle.static.nn.fc(
            x=x2_merge,
264
            size=1,
C
Charles-hit 已提交
265 266
            activation='sigmoid',
            weight_attr=fluid.ParamAttr(name='fc_w'),
267 268
            name='fc_no_use',
        )
269
        # loss
270 271 272
        cost = paddle.nn.functional.square_error_cost(
            input=predict, label=label
        )
273
        loss = paddle.mean(cost, name='mean_loss')
274 275 276 277 278

        return loss


class TestSimpleNet(TestBackward):
279
    def test_backward(self):
280 281 282 283 284 285 286 287
        """
        Instantiate each NetClass to test backward.
        """
        self.global_block_idx = 0
        self.net = SimpleNet()
        self._check_all(self.net)


288 289
class TestGradientsError(unittest.TestCase):
    def test_error(self):
290
        x = paddle.static.data(name='x', shape=[None, 2, 8, 8], dtype='float32')
291
        x.stop_gradient = False
292
        conv = paddle.static.nn.conv2d(x, 4, 1, bias_attr=False)
293
        y = F.relu(conv)
294 295 296 297 298 299 300 301 302 303 304 305 306 307

        with self.assertRaises(TypeError):
            x_grad = fluid.gradients(y.name, x)

        with self.assertRaises(TypeError):
            x_grad = fluid.gradients(y, x.name)

        with self.assertRaises(TypeError):
            x_grad = fluid.gradients([y], [x], target_gradients=x.name)

        with self.assertRaises(TypeError):
            x_grad = fluid.gradients([y], x, no_grad_set=conv)


308 309 310 311 312 313 314
class TestSimpleNetWithErrorParamList(TestBackward):
    def test_parameter_list_type_error(self):
        self.global_block_idx = 0
        self.net = SimpleNet()
        # The type of parameter_list argument must be list or tuple
        with self.assertRaises(TypeError):
            self._check_error_param_list(self.net, "test")
315
        # The type of parameter_list's member must be Variable or str
316 317 318
        test = paddle.static.data(
            name='test', shape=[None, 90], dtype='float32'
        )
319 320 321 322
        with self.assertRaises(TypeError):
            self._check_error_param_list(self.net, [test, "test", 3])


323 324 325 326 327 328 329 330
class TestSimpleNetWithErrorNoGradSet(TestBackward):
    def test_no_grad_set_type_error(self):
        self.global_block_idx = 0
        self.net = SimpleNet()
        # The type of no_grad_set argument must be set or list or tuple
        with self.assertRaises(TypeError):
            self._check_error_no_grad_set(self.net, "test")
        # The type of no_grad_set's member must be Variable or str
331 332 333
        test = paddle.static.data(
            name='test', shape=[None, 90], dtype='float32'
        )
334 335 336 337
        with self.assertRaises(TypeError):
            self._check_error_no_grad_set(self.net, [test, "test", 3])


338 339
class TestAppendBackwardWithError(unittest.TestCase):
    def build_net(self):
340 341
        x = paddle.static.data(name='x', shape=[None, 13], dtype='int64')
        y = paddle.static.data(name='y', shape=[None, 1], dtype='float32')
342
        x_emb = paddle.static.nn.embedding(x, size=[100, 256])
C
Charles-hit 已提交
343
        y_predict = paddle.static.nn.fc(x=x_emb, size=1, name='my_fc')
344
        loss = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
345
        avg_loss = paddle.mean(loss)
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
        param_names = [
            param.name
            for param in fluid.default_main_program().block(0).all_parameters()
        ]

        return avg_loss, param_names

    def setUp(self):
        main_program = fluid.Program()
        with fluid.program_guard(main_program):
            self.avg_loss, self.param_names = self.build_net()

    def test_loss_type_error(self):
        with self.assertRaises(TypeError):
            fluid.backward.append_backward(loss=self.avg_loss.name)

    def test_parameter_list_type_error(self):
        with self.assertRaises(TypeError):
            self.param_names[0] = np.random.random([10])
365 366 367
            fluid.backward.append_backward(
                loss=self.avg_loss, parameter_list=self.param_names
            )
368 369 370 371 372 373 374

    def test_callback_type_error(self):
        with self.assertRaises(TypeError):

            def callback(block, context):
                return

375 376 377
            fluid.backward.append_backward(
                loss=self.avg_loss, callbacks=callback
            )
378 379


380 381 382 383 384
class TestGradientsWithOptimizer(unittest.TestCase):
    def _check_grad_op_name(self, forward_list, optimiezed_list):
        backward_list = [op + "_grad" for op in reversed(forward_list)]
        idx = optimiezed_list.index(backward_list[0], len(backward_list))

385 386 387
        self.assertListEqual(
            backward_list, optimiezed_list[idx : idx + len(backward_list)]
        )
388 389 390 391 392 393 394 395 396 397 398 399

    def test_gradient_with_optimizer(self):
        main = fluid.Program()
        startup = fluid.Program()

        with fluid.program_guard(main, startup):
            img = static.data(name='image', shape=[None, 784])
            pred = static.nn.fc(x=img, size=10, activation='relu')
            loss = paddle.mean(pred)
            opt = paddle.optimizer.Momentum(learning_rate=0.01, momentum=0.9)

            forward_list = [o.type for o in main.current_block().ops]
400 401 402 403 404 405
            (
                optimize_ops,
                pram_grads,
            ) = paddle.autograd.backward_mode.gradients_with_optimizer(
                main, opt
            )
406 407 408 409 410 411 412 413

            optimized_list = [o.type for o in main.current_block().ops]

            self.assertGreater(len(optimized_list), len(forward_list))
            self.assertIn(opt.type, optimized_list)
            self._check_grad_op_name(forward_list, optimized_list)


414 415 416
# TODO(Aurelius84): add conditional network test
class ConditionalNet(BackwardNet):
    def __init__(self):
417
        super().__init__()
418 419


420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452
class TestBackwardUninitializedVariable(unittest.TestCase):
    """this case is found in yolov5 while to_static.
    gradient aggregation may cause sum a invalid variable.
    """

    def test(self):
        paddle.enable_static()
        main_prg, startup_prg = paddle.static.Program(), paddle.static.Program()
        with paddle.static.program_guard(main_prg, startup_prg):
            gt = paddle.static.data(name='gt', shape=[4], dtype='float32')
            x = paddle.static.data(name='x', shape=[2], dtype='float32')
            gt.stop_gradient = True
            x.stop_gradient = False
            gt = gt.reshape([4, 1]).reshape([4])
            loss = (
                paddle.nn.functional.binary_cross_entropy(x, gt[:2])
                + (gt[2:4] * x).sum()
            )
            exe = paddle.static.Executor()
            paddle.fluid.backward.gradients(loss, [])
            exe.run(startup_prg)
            # Optimizer
            out = exe.run(
                main_prg,
                feed={
                    'gt': np.array([1.0, 1.0, 0.0, 0.0], dtype='float32'),
                    'x': np.array([0.5, 0.5], dtype='float32'),
                },
                fetch_list=[loss],
            )
            print(out)


453 454 455 456 457 458 459 460 461 462 463 464 465
class TestStripGradSuffix(unittest.TestCase):
    def test_strip_grad_suffix(self):
        cases = (
            ('x@GRAD', 'x'),
            ('x@GRAD@GRAD', 'x'),
            ('x@GRAD@RENAME@1', 'x'),
            ('x@GRAD_slice_0@GRAD', 'x@GRAD_slice_0'),
            ('grad/grad/x@GRAD@RENAME@block0@1@GRAD', 'x'),
        )
        for input_, desired in cases:
            self.assertEqual(backward._strip_grad_suffix_(input_), desired)


466
if __name__ == '__main__':
467
    paddle.enable_static()
468
    unittest.main()