test_backward.py 15.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
16

17 18
import numpy as np

19 20
import paddle
import paddle.fluid as fluid
21
import paddle.nn.functional as F
22 23
import paddle.static as static

24

25
class BackwardNet:
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
    """
    Abstract Base Class.
    All Net inherited this Class should implement two functions:
        build_model: build net to test the logic of backward
        init_data: fake input data to test all programs.
    """

    def __init__(self):
        self.stop_gradient_grad_vars = set()
        self.no_grad_vars = set()
        self.params_names = set()
        self.op_path = []

    def build_model(self):
        """
        Build net to test the logic of backward.
        :return: loss
        """
        raise NotImplementedError

    def init_data(self):
        """
        Fake input data to test all programs.
        :return: dict, {'var_name': var_data}
        """
        raise NotImplementedError
52 53


54
class TestBackward(unittest.TestCase):
55 56 57 58 59 60
    """
    All related TestClass should inherit this class,
    and only implement test_backward function.
    """

    def _check_all(self, net):
61 62 63 64 65
        place = (
            fluid.CUDAPlace(0)
            if fluid.core.is_compiled_with_cuda()
            else fluid.CPUPlace()
        )
66 67 68 69 70 71
        exe = fluid.Executor(place)

        main = fluid.Program()
        startup = fluid.Program()

        with fluid.program_guard(main, startup):
72 73
            loss = net.build_model()
            self._check_backward(loss, main)
74 75 76

            optimizer = fluid.optimizer.SGD(learning_rate=0.1)
            optimizer.minimize(loss)
77 78 79 80 81 82 83 84 85 86
            exe.run(startup)
            exe.run(feed=net.init_data())

    def _check_backward(self, loss, main_program):
        global_block_idx = self.global_block_idx
        params_grads = self._check_params_grad(loss)
        # 1.1 get_stop_gradients
        no_grad_dict = self._check_stop_gradient(main_program)
        # 1.2 find_op_path
        op_path, block_no_grad_set = self._check_op_path(
87 88
            main_program.block(global_block_idx), [loss], [], no_grad_dict
        )
89 90
        # 1.3 _find_no_grad_vars
        no_grad_vars = self._check_find_no_grad_vars(
91 92 93 94 95
            main_program.block(global_block_idx),
            op_path,
            [loss],
            block_no_grad_set,
        )
96 97 98
        # update no_grad_dict
        block_no_grad_set.update(no_grad_vars)
        no_grad_dict[global_block_idx].update(
99 100
            list(map(fluid.backward._append_grad_suffix_, block_no_grad_set))
        )
101 102

    def _check_params_grad(self, loss, parameter_list=None, no_grad_set=None):
103 104 105
        params_grads = fluid.backward.append_backward(
            loss, parameter_list, no_grad_set
        )
106
        params_names = set(
107 108
            [param_var.name for (param_var, grad_var) in params_grads]
        )
109 110 111 112 113 114 115
        self.assertSetEqual(params_names, self.net.params_names)

        return params_grads

    def _check_stop_gradient(self, program):
        no_grad_dict = fluid.backward._get_stop_gradients_(program)
        if no_grad_dict is not None and isinstance(no_grad_dict, dict):
116 117 118 119
            self.assertSetEqual(
                no_grad_dict[self.global_block_idx],
                self.net.stop_gradient_grad_vars,
            )
120 121 122 123 124 125 126 127

        return no_grad_dict

    def _check_op_path(self, root_block, outputs, inputs=[], no_grad_dict=None):
        if no_grad_dict is None or not isinstance(no_grad_dict, dict):
            block_no_grad_set = None
        else:
            block_no_grad_set = set(
128 129 130 131 132 133 134 135
                map(
                    fluid.backward._strip_grad_suffix_,
                    no_grad_dict[self.global_block_idx],
                )
            )
        op_path = fluid.backward._find_op_path_(
            root_block, outputs, inputs, block_no_grad_set
        )
136 137 138 139 140
        op_types = [op.type for op in op_path]
        self.assertListEqual(op_types, self.net.op_path)

        return op_path, block_no_grad_set

141 142 143
    def _check_find_no_grad_vars(
        self, root_block, op_path, targets, block_no_grad_set
    ):
144
        no_grad_vars = fluid.backward._find_no_grad_vars(
145 146
            root_block, op_path, targets, block_no_grad_set
        )
147 148 149 150
        self.assertSetEqual(no_grad_vars, self.net.no_grad_vars)

        return no_grad_vars

151
    def _check_error_param_list(self, net, parameter_list):
152 153 154 155 156
        place = (
            fluid.CUDAPlace(0)
            if fluid.core.is_compiled_with_cuda()
            else fluid.CPUPlace()
        )
157 158 159 160 161 162 163 164 165 166 167 168
        exe = fluid.Executor(place)

        main = fluid.Program()
        startup = fluid.Program()

        with fluid.program_guard(main, startup):
            loss = net.build_model()
            optimizer = fluid.optimizer.SGD(learning_rate=0.1)
            optimizer.minimize(loss, parameter_list=parameter_list)
            exe.run(startup)
            exe.run(feed=net.init_data())

169
    def _check_error_no_grad_set(self, net, no_grad_set):
170 171 172 173 174
        place = (
            fluid.CUDAPlace(0)
            if fluid.core.is_compiled_with_cuda()
            else fluid.CPUPlace()
        )
175 176 177 178 179 180 181 182 183 184 185 186
        exe = fluid.Executor(place)

        main = fluid.Program()
        startup = fluid.Program()

        with fluid.program_guard(main, startup):
            loss = net.build_model()
            optimizer = fluid.optimizer.SGD(learning_rate=0.1)
            optimizer.minimize(loss, no_grad_set=no_grad_set)
            exe.run(startup)
            exe.run(feed=net.init_data())

187 188 189

class SimpleNet(BackwardNet):
    def __init__(self):
190
        super().__init__()
191 192
        self.stop_gradient_grad_vars = set(
            [
193 194 195 196
                'x_no_grad@GRAD',
                'x2_no_grad@GRAD',
                'x3_no_grad@GRAD',
                'label_no_grad@GRAD',
197 198
            ]
        )
199
        self.no_grad_vars = set()
200
        self.params_names = set(['w2v', 'fc_predict.b_0', 'fc_w'])
201
        self.op_path = [
202 203 204 205 206 207 208 209 210
            'lookup_table_v2',
            'lookup_table_v2',  # embedding
            'elementwise_add',  # merge
            'mul',
            'elementwise_add',
            'softmax',  # fc
            'elementwise_sub',
            'square',
            'reduce_mean',
211 212 213 214 215 216 217 218 219 220 221 222 223
        ]  # loss
        self.shape = [16, 50]

    def init_data(self):
        assert len(self.shape) == 2
        x = np.random.randint(0, 90, self.shape).astype('int64')
        x2 = np.random.randint(0, 90, self.shape).astype('int64')
        x3 = np.random.randint(0, 90, self.shape).astype('int64')
        label = np.random.random([self.shape[0], 1]).astype('float32')
        return {
            'x_no_grad': x,
            'x2_no_grad': x2,
            'x3_no_grad': x3,
224
            'label_no_grad': label,
225 226 227 228 229 230 231
        }

    def build_model(self):
        # stop_gradient = True in input
        x = fluid.data(name='x_no_grad', shape=self.shape, dtype='int64')
        x2 = fluid.data(name='x2_no_grad', shape=self.shape, dtype='int64')
        x3 = fluid.data(name='x3_no_grad', shape=self.shape, dtype='int64')
232 233 234
        label = fluid.data(
            name='label_no_grad', shape=[self.shape[0], 1], dtype='float32'
        )
235 236
        # shared layer, the grad of 'w2v' will be summed and renamed.
        # To test  _addup_repetitive_outputs_
237 238 239 240 241 242 243 244 245
        x_emb = fluid.embedding(
            x, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')
        )
        x2_emb = fluid.embedding(
            x2, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')
        )
        x3_emb = fluid.embedding(
            x3, size=[100, 64], param_attr=fluid.ParamAttr(name='w2v')
        )
246
        # merge layers
247 248
        x_merge = paddle.add(x_emb, x2_emb, name='x_add_x2')
        x2_merge = paddle.add(x2_emb, x3_emb, name='x2_add_x3')
249
        # shared fc_w
C
Charles-hit 已提交
250 251
        predict = paddle.static.nn.fc(
            x=x_merge,
252
            size=1,
C
Charles-hit 已提交
253 254
            activation='softmax',
            weight_attr=fluid.ParamAttr(name='fc_w'),
255 256
            name='fc_predict',
        )
257
        # useless layer for calculating loss
C
Charles-hit 已提交
258 259
        fc_no_use = paddle.static.nn.fc(
            x=x2_merge,
260
            size=1,
C
Charles-hit 已提交
261 262
            activation='sigmoid',
            weight_attr=fluid.ParamAttr(name='fc_w'),
263 264
            name='fc_no_use',
        )
265
        # loss
266 267 268
        cost = paddle.nn.functional.square_error_cost(
            input=predict, label=label
        )
269
        loss = paddle.mean(cost, name='mean_loss')
270 271 272 273 274

        return loss


class TestSimpleNet(TestBackward):
275
    def test_backward(self):
276 277 278 279 280 281 282 283
        """
        Instantiate each NetClass to test backward.
        """
        self.global_block_idx = 0
        self.net = SimpleNet()
        self._check_all(self.net)


284 285 286 287
class TestGradientsError(unittest.TestCase):
    def test_error(self):
        x = fluid.data(name='x', shape=[None, 2, 8, 8], dtype='float32')
        x.stop_gradient = False
288
        conv = paddle.static.nn.conv2d(x, 4, 1, bias_attr=False)
289
        y = F.relu(conv)
290 291 292 293 294 295 296 297 298 299 300 301 302 303

        with self.assertRaises(TypeError):
            x_grad = fluid.gradients(y.name, x)

        with self.assertRaises(TypeError):
            x_grad = fluid.gradients(y, x.name)

        with self.assertRaises(TypeError):
            x_grad = fluid.gradients([y], [x], target_gradients=x.name)

        with self.assertRaises(TypeError):
            x_grad = fluid.gradients([y], x, no_grad_set=conv)


304 305 306 307 308 309 310
class TestSimpleNetWithErrorParamList(TestBackward):
    def test_parameter_list_type_error(self):
        self.global_block_idx = 0
        self.net = SimpleNet()
        # The type of parameter_list argument must be list or tuple
        with self.assertRaises(TypeError):
            self._check_error_param_list(self.net, "test")
311
        # The type of parameter_list's member must be Variable or str
312 313 314 315 316
        test = fluid.data(name='test', shape=[None, 90], dtype='float32')
        with self.assertRaises(TypeError):
            self._check_error_param_list(self.net, [test, "test", 3])


317 318 319 320 321 322 323 324 325 326 327 328 329
class TestSimpleNetWithErrorNoGradSet(TestBackward):
    def test_no_grad_set_type_error(self):
        self.global_block_idx = 0
        self.net = SimpleNet()
        # The type of no_grad_set argument must be set or list or tuple
        with self.assertRaises(TypeError):
            self._check_error_no_grad_set(self.net, "test")
        # The type of no_grad_set's member must be Variable or str
        test = fluid.data(name='test', shape=[None, 90], dtype='float32')
        with self.assertRaises(TypeError):
            self._check_error_no_grad_set(self.net, [test, "test", 3])


330 331 332 333 334
class TestAppendBackwardWithError(unittest.TestCase):
    def build_net(self):
        x = fluid.data(name='x', shape=[None, 13], dtype='int64')
        y = fluid.data(name='y', shape=[None, 1], dtype='float32')
        x_emb = fluid.embedding(x, size=[100, 256])
C
Charles-hit 已提交
335
        y_predict = paddle.static.nn.fc(x=x_emb, size=1, name='my_fc')
336
        loss = paddle.nn.functional.square_error_cost(input=y_predict, label=y)
337
        avg_loss = paddle.mean(loss)
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
        param_names = [
            param.name
            for param in fluid.default_main_program().block(0).all_parameters()
        ]

        return avg_loss, param_names

    def setUp(self):
        main_program = fluid.Program()
        with fluid.program_guard(main_program):
            self.avg_loss, self.param_names = self.build_net()

    def test_loss_type_error(self):
        with self.assertRaises(TypeError):
            fluid.backward.append_backward(loss=self.avg_loss.name)

    def test_parameter_list_type_error(self):
        with self.assertRaises(TypeError):
            self.param_names[0] = np.random.random([10])
357 358 359
            fluid.backward.append_backward(
                loss=self.avg_loss, parameter_list=self.param_names
            )
360 361 362 363 364 365 366

    def test_callback_type_error(self):
        with self.assertRaises(TypeError):

            def callback(block, context):
                return

367 368 369
            fluid.backward.append_backward(
                loss=self.avg_loss, callbacks=callback
            )
370 371


372 373 374 375 376
class TestGradientsWithOptimizer(unittest.TestCase):
    def _check_grad_op_name(self, forward_list, optimiezed_list):
        backward_list = [op + "_grad" for op in reversed(forward_list)]
        idx = optimiezed_list.index(backward_list[0], len(backward_list))

377 378 379
        self.assertListEqual(
            backward_list, optimiezed_list[idx : idx + len(backward_list)]
        )
380 381 382 383 384 385 386 387 388 389 390 391

    def test_gradient_with_optimizer(self):
        main = fluid.Program()
        startup = fluid.Program()

        with fluid.program_guard(main, startup):
            img = static.data(name='image', shape=[None, 784])
            pred = static.nn.fc(x=img, size=10, activation='relu')
            loss = paddle.mean(pred)
            opt = paddle.optimizer.Momentum(learning_rate=0.01, momentum=0.9)

            forward_list = [o.type for o in main.current_block().ops]
392 393 394 395 396 397
            (
                optimize_ops,
                pram_grads,
            ) = paddle.autograd.backward_mode.gradients_with_optimizer(
                main, opt
            )
398 399 400 401 402 403 404 405

            optimized_list = [o.type for o in main.current_block().ops]

            self.assertGreater(len(optimized_list), len(forward_list))
            self.assertIn(opt.type, optimized_list)
            self._check_grad_op_name(forward_list, optimized_list)


406 407 408
# TODO(Aurelius84): add conditional network test
class ConditionalNet(BackwardNet):
    def __init__(self):
409
        super().__init__()
410 411


412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
class TestBackwardUninitializedVariable(unittest.TestCase):
    """this case is found in yolov5 while to_static.
    gradient aggregation may cause sum a invalid variable.
    """

    def test(self):
        paddle.enable_static()
        main_prg, startup_prg = paddle.static.Program(), paddle.static.Program()
        with paddle.static.program_guard(main_prg, startup_prg):
            gt = paddle.static.data(name='gt', shape=[4], dtype='float32')
            x = paddle.static.data(name='x', shape=[2], dtype='float32')
            gt.stop_gradient = True
            x.stop_gradient = False
            gt = gt.reshape([4, 1]).reshape([4])
            loss = (
                paddle.nn.functional.binary_cross_entropy(x, gt[:2])
                + (gt[2:4] * x).sum()
            )
            exe = paddle.static.Executor()
            paddle.fluid.backward.gradients(loss, [])
            exe.run(startup_prg)
            # Optimizer
            out = exe.run(
                main_prg,
                feed={
                    'gt': np.array([1.0, 1.0, 0.0, 0.0], dtype='float32'),
                    'x': np.array([0.5, 0.5], dtype='float32'),
                },
                fetch_list=[loss],
            )
            print(out)


445
if __name__ == '__main__':
446
    paddle.enable_static()
447
    unittest.main()