test_imperative_double_grad.py 26.6 KB
Newer Older
1
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13 14 15
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddle.fluid as fluid
H
hong 已提交
16
import paddle
17
from paddle.fluid.wrapped_decorator import wrap_decorator
Z
Zeng Jinle 已提交
18
from paddle.vision.models import resnet50, resnet101
19 20 21
import unittest
from unittest import TestCase
import numpy as np
22
from paddle.fluid.framework import _test_eager_guard
23 24 25


def _dygraph_guard_(func):
26

27
    def __impl__(*args, **kwargs):
J
Jiabin Yang 已提交
28
        if fluid._non_static_mode():
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
            return func(*args, **kwargs)
        else:
            with fluid.dygraph.guard():
                return func(*args, **kwargs)

    return __impl__


dygraph_guard = wrap_decorator(_dygraph_guard_)


def random_var(size, low=-1, high=1, dtype='float32'):
    x_np = np.random.uniform(low=low, high=high, size=size).astype(dtype)
    return fluid.dygraph.to_variable(x_np)


45
class TestEagerGrad(TestCase):
46

47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
    def func_simple_example_eager_grad(self):
        np.random.seed(2021)
        paddle.set_device('cpu')
        np_x = np.random.random((3, 3))
        np_y = np.random.random((3, 1))
        x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False)
        y = paddle.to_tensor(np_y, dtype="float64", stop_gradient=False)
        out = paddle.matmul(x, y)
        dx = fluid.dygraph.grad(out, x)

        dout = np.ones_like(np_y)
        expected_dx = np.matmul(dout, np.transpose(np_y))

        # stop_gradient = !create_graph, create_graph default false
        self.assertEqual(dx[0].stop_gradient, True)
62
        np.testing.assert_allclose(dx[0].numpy(), expected_dx, rtol=1e-05)
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83

    def test_simple_example_eager_grad(self):
        with _test_eager_guard():
            self.func_simple_example_eager_grad()
        self.func_simple_example_eager_grad()

    def func_simple_example_eager_grad_allow_unused(self):
        np.random.seed(2021)
        paddle.set_device('cpu')
        np_x = np.random.random((3, 3))
        np_y = np.random.random((3, 1))
        np_z = np.random.random((3, 1))
        x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False)
        y = paddle.to_tensor(np_y, dtype="float64", stop_gradient=False)
        z = paddle.to_tensor(np_z, dtype="float64", stop_gradient=False)
        out_z = paddle.nn.functional.sigmoid(z)
        out = paddle.matmul(x, y)

        dx = fluid.dygraph.grad(out, [x, z], allow_unused=True)
        dout = np.ones_like(np_y)
        expected_dx = np.matmul(dout, np.transpose(np_y))
84
        np.testing.assert_allclose(dx[0].numpy(), expected_dx, rtol=1e-05)
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
        # stop_gradient = !create_graph, create_graph default false
        self.assertEqual(dx[0].stop_gradient, True)
        # x is unused input in the graph
        self.assertEqual(dx[1], None)

    def test_simple_example_eager_grad_allow_unused(self):
        with _test_eager_guard():
            self.func_simple_example_eager_grad_allow_unused()
        self.func_simple_example_eager_grad_allow_unused()

    def func_simple_example_eager_grad_not_allow_unused(self):
        np.random.seed(2021)
        paddle.set_device('cpu')
        np_x = np.random.random((3, 3))
        np_y = np.random.random((3, 1))
        np_z = np.random.random((3, 1))
        x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False)
        y = paddle.to_tensor(np_y, dtype="float64", stop_gradient=False)
        z = paddle.to_tensor(np_z, dtype="float64", stop_gradient=False)
        out_z = paddle.nn.functional.sigmoid(z)
        out = paddle.matmul(x, y)

        try:
            # allow_unused is false in default
            dx = fluid.dygraph.grad(out, [x, z])
        except ValueError as e:
111
            error_msg = str(e)
112 113 114 115 116 117 118
            assert error_msg.find("allow_unused") > 0

    def test_simple_example_eager_grad_not_allow_unused(self):
        with _test_eager_guard():
            self.func_simple_example_eager_grad_not_allow_unused()
        self.func_simple_example_eager_grad_not_allow_unused()

119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
    def func_simple_example_eager_grad_duplicate_input(self):
        np.random.seed(2021)
        paddle.set_device('cpu')
        np_x = np.random.random((3, 3))
        np_y = np.random.random((3, 1))
        np_z = np.random.random((3, 1))
        x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False)
        y = paddle.to_tensor(np_y, dtype="float64", stop_gradient=False)
        z = paddle.to_tensor(np_z, dtype="float64", stop_gradient=False)
        out_z = paddle.nn.functional.sigmoid(z)
        out = paddle.matmul(x, y)

        try:
            # duplicate input will arise RuntimeError errors
            dx = fluid.dygraph.grad(out, [x, x])
        except RuntimeError as e:
135
            error_msg = str(e)
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
            assert error_msg.find("duplicate") > 0

    def test_simple_example_eager_grad_duplicate_input(self):
        with _test_eager_guard():
            self.func_simple_example_eager_grad_duplicate_input()
        self.func_simple_example_eager_grad_duplicate_input()

    def func_simple_example_eager_grad_duplicate_output(self):
        np.random.seed(2021)
        paddle.set_device('cpu')
        np_x = np.random.random((3, 3))
        np_y = np.random.random((3, 1))
        np_z = np.random.random((3, 1))
        x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False)
        y = paddle.to_tensor(np_y, dtype="float64", stop_gradient=False)
        z = paddle.to_tensor(np_z, dtype="float64", stop_gradient=False)
        out_z = paddle.nn.functional.sigmoid(z)
        out = paddle.matmul(x, y)

        try:
            # duplicate output will arise RuntimeError errors
            dx = fluid.dygraph.grad([out, out], [x])
        except RuntimeError as e:
159
            error_msg = str(e)
160 161 162 163 164 165 166
            assert error_msg.find("duplicate") > 0

    def test_simple_example_eager_grad_duplicate_output(self):
        with _test_eager_guard():
            self.func_simple_example_eager_grad_duplicate_output()
        self.func_simple_example_eager_grad_duplicate_output()

167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
    def test_simple_example_eager_two_grad_output(self):
        with _test_eager_guard():
            x1 = paddle.to_tensor([1.0, 2.0])
            x1.stop_gradient = False
            x2 = paddle.to_tensor([1.0, 2.0])
            x2.stop_gradient = False
            out1 = x1 * 2
            out2 = x2 * 2

            dout2_record_by_hook = []

            def record_hook(grad):
                dout2_record_by_hook.append(grad)

            out2.register_hook(record_hook)

            out3 = paddle.multiply(out1, out2)
            out4 = paddle.mean(out3)
            egr_dout2, egr_dout3 = paddle.grad([out4], [out2, out3])

187 188
            np.testing.assert_array_equal(dout2_record_by_hook[0].numpy(),
                                          np.array([1.0, 2.0]))
189 190 191 192 193 194 195 196 197 198 199 200 201 202

        x1 = paddle.to_tensor([1.0, 2.0])
        x1.stop_gradient = False
        x2 = paddle.to_tensor([1.0, 2.0])
        x2.stop_gradient = False
        out1 = x1 * 2
        out2 = x2 * 2

        out3 = paddle.multiply(out1, out2)
        out4 = paddle.mean(out3)
        dout2, dout3 = paddle.grad([out4], [out2, out3])

        self.assertEqual(dout2.stop_gradient, egr_dout2.stop_gradient)
        self.assertEqual(dout3.stop_gradient, egr_dout3.stop_gradient)
203 204
        np.testing.assert_array_equal(dout2.numpy(), egr_dout2.numpy())
        np.testing.assert_array_equal(dout3.numpy(), egr_dout3.numpy())
205

206

207
class TestDygraphDoubleGrad(TestCase):
208

209 210 211 212 213 214 215 216
    def setUp(self):
        self.sort_sum_gradient = False
        self.shape = [5, 10]

    def grad(self,
             outputs,
             inputs,
             grad_outputs=None,
Z
Zeng Jinle 已提交
217 218 219 220
             no_grad_vars=None,
             retain_graph=None,
             create_graph=False,
             allow_unused=False):
221
        fluid.set_flags({'FLAGS_sort_sum_gradient': self.sort_sum_gradient})
222 223 224 225 226 227 228
        return fluid.dygraph.grad(outputs=outputs,
                                  inputs=inputs,
                                  grad_outputs=grad_outputs,
                                  no_grad_vars=no_grad_vars,
                                  retain_graph=retain_graph,
                                  create_graph=create_graph,
                                  allow_unused=allow_unused)
229 230

    @dygraph_guard
231
    def func_exception(self):
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
        with self.assertRaises(AssertionError):
            self.grad(None, None)

        shape = self.shape

        with self.assertRaises(AssertionError):
            self.grad(1, random_var(shape))

        with self.assertRaises(AssertionError):
            self.grad(random_var(shape), 1)

        with self.assertRaises(AssertionError):
            self.grad([1], [random_var(shape)])

        with self.assertRaises(AssertionError):
            self.grad([random_var(shape)], [1])

        with self.assertRaises(AssertionError):
            self.grad([random_var(shape), random_var(shape)],
                      [random_var(shape)], [random_var(shape)])

        with self.assertRaises(AssertionError):
254 255
            self.grad([random_var(shape)], [random_var(shape)],
                      no_grad_vars=[1])
256 257

        with self.assertRaises(AssertionError):
Z
Zeng Jinle 已提交
258
            self.grad([random_var(shape)], [random_var(shape)], no_grad_vars=1)
259

260 261 262 263 264
    def test_exception(self):
        with _test_eager_guard():
            self.func_exception()
        self.func_exception()

265
    @dygraph_guard
266
    def func_simple_example(self):
267 268 269 270 271
        x = random_var(self.shape)
        x.stop_gradient = False
        y = x + 1

        for create_graph in [False, True]:
272 273 274
            dx, = self.grad([x], [x],
                            create_graph=create_graph,
                            retain_graph=True)
275 276 277 278
            self.assertEqual(dx.shape, x.shape)
            self.assertTrue(np.all(dx.numpy() == 1))
            self.assertNotEqual(dx.stop_gradient, create_graph)

279 280 281
            dx_mul_2, = self.grad([y, x], [x],
                                  create_graph=create_graph,
                                  retain_graph=True)
282 283 284 285
            self.assertEqual(dx_mul_2.shape, x.shape)
            self.assertTrue(np.all(dx_mul_2.numpy() == 2))
            self.assertNotEqual(dx_mul_2.stop_gradient, create_graph)

286 287 288
            none_grad, = self.grad([x], [y],
                                   create_graph=create_graph,
                                   allow_unused=True)
289 290
            self.assertTrue(none_grad is None)

291 292
            grad_with_none_and_not_none, = self.grad([x, y], [y],
                                                     create_graph=create_graph)
293 294 295 296 297
            self.assertTrue(grad_with_none_and_not_none.shape, x.shape)
            self.assertTrue(np.all(grad_with_none_and_not_none.numpy() == 1))
            self.assertNotEqual(grad_with_none_and_not_none.stop_gradient,
                                create_graph)

298 299 300 301 302
    def test_simple_example(self):
        with _test_eager_guard():
            self.func_simple_example()
        self.func_simple_example()

303
    @dygraph_guard
304 305 306 307 308 309 310 311 312 313 314 315 316 317
    def func_example_no_grad_vars(self):
        x = random_var(self.shape)
        x_np = x.numpy()
        numel = x_np.size
        x.stop_gradient = False

        y1 = fluid.layers.relu(x)
        y2 = fluid.layers.relu(x)
        z = y1 + y2
        w = z * z

        w_mean = fluid.layers.reduce_mean(w)
        del y1, z, w

318 319 320
        dx_actual, = self.grad([w_mean], [x],
                               create_graph=True,
                               no_grad_vars=[y2])
321 322 323 324 325 326 327

        self.assertFalse(y2.stop_gradient)
        self.assertFalse(dx_actual.stop_gradient)

        dx_expected = (1.0 / float(numel) * (np.maximum(x_np, 0) + y2.numpy()) *
                       (x_np > 0) * 2).astype('float32')

328
        np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05)
329 330 331 332 333 334 335 336

    def test_example_no_grad_vars(self):
        with _test_eager_guard():
            self.func_example_no_grad_vars()
        self.func_example_no_grad_vars()

    @dygraph_guard
    def func_none_one_initial_gradient(self):
337 338 339 340 341 342
        numel = 1
        for s in self.shape:
            numel *= s

        half_numel = int(numel / 2)
        half_x_positive = np.random.uniform(low=1, high=2, size=[half_numel])
343 344 345 346 347
        half_x_negative = np.random.uniform(low=-2,
                                            high=-1,
                                            size=[numel - half_numel])
        x_np = np.array(list(half_x_positive) +
                        list(half_x_negative)).astype('float32')
348 349 350
        np.random.shuffle(x_np)

        x = fluid.dygraph.to_variable(x_np)
351 352
        x.stop_gradient = False

353 354
        alpha = 0.2
        y = fluid.layers.leaky_relu(x, alpha=alpha)
355 356 357 358
        y = y * y
        z = y * y

        x_np = x.numpy()
359 360
        relu_x_np = np.maximum(x_np, alpha * x_np).astype('float32')
        relu_x_grad_np = ((x_np > 0) + (x_np < 0) * alpha).astype('float32')
361 362 363 364
        dy_expected = (relu_x_np * relu_x_grad_np * 2).astype('float32')
        dz_expected = (np.power(relu_x_np, 3) * relu_x_grad_np *
                       4).astype('float32')

365 366
        random_grad_y = random_var(y.shape, low=1, high=2)
        random_grad_z = random_var(z.shape, low=1, high=2)
367 368 369 370 371 372 373 374 375
        ones_grad_y = np.ones(y.shape).astype('float32')
        ones_grad_z = np.ones(z.shape).astype('float32')

        original_random_grad_y = random_grad_y.numpy()
        original_random_grad_z = random_grad_z.numpy()

        for grad_y in [random_grad_y]:
            for grad_z in [random_grad_z]:
                for create_graph in [False, True]:
376 377 378 379 380
                    dx_actual, = self.grad(outputs=[y, z],
                                           inputs=[x],
                                           grad_outputs=[grad_y, grad_z],
                                           create_graph=create_graph,
                                           retain_graph=True)
381 382 383 384 385 386 387

                    grad_y_np = ones_grad_y if grad_y is None else grad_y.numpy(
                    )
                    grad_z_np = ones_grad_z if grad_z is None else grad_z.numpy(
                    )

                    dx_expected = dy_expected * grad_y_np + dz_expected * grad_z_np
388 389 390
                    np.testing.assert_allclose(dx_actual.numpy(),
                                               dx_expected,
                                               rtol=1e-05)
391 392 393

                    if grad_y is not None:
                        self.assertTrue(grad_y.stop_gradient)
394 395
                        np.testing.assert_array_equal(grad_y.numpy(),
                                                      original_random_grad_y)
396 397 398

                    if grad_z is not None:
                        self.assertTrue(grad_z.stop_gradient)
399 400
                        np.testing.assert_array_equal(grad_z.numpy(),
                                                      original_random_grad_z)
401

402 403 404 405 406
    def test_none_one_initial_gradient(self):
        with _test_eager_guard():
            self.func_none_one_initial_gradient()
        self.func_none_one_initial_gradient()

407
    @dygraph_guard
408
    def func_example_with_gradient_accumulation_and_create_graph(self):
409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
        x = random_var(self.shape)
        x_np = x.numpy()
        numel = x_np.size
        x.stop_gradient = False

        y = fluid.layers.relu(x)
        z = y + 1
        w = z * z

        w_mean = fluid.layers.reduce_mean(w)
        del y, z, w

        dx_actual, = self.grad([w_mean], [x], create_graph=True)
        del w_mean

        self.assertFalse(dx_actual.stop_gradient)

        # Theoritical result based on math calculation
        dx_expected = (1.0 / float(numel) * (np.maximum(x_np, 0) + 1) *
                       (x_np > 0) * 2).astype('float32')
429
        np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05)
430

431 432
        loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
        loss.backward(retain_graph=True)
433

434 435 436 437
        x_grad_actual = x.gradient()
        x_grad_expected = (2.0 / float(numel) *
                           (x_np + dx_expected *
                            (x_np > 0) * 2 / float(numel))).astype('float32')
438
        np.testing.assert_allclose(x_grad_actual, x_grad_expected, rtol=1e-05)
439 440 441

        for i in range(5):
            loss.backward(retain_graph=True)
442
            x_grad_actual = x.gradient()
443 444 445 446
            x_grad_expected = (
                i + 2) * (2.0 / float(numel) *
                          (x_np + dx_expected *
                           (x_np > 0) * 2 / float(numel))).astype('float32')
447 448 449
            np.testing.assert_allclose(x_grad_actual,
                                       x_grad_expected,
                                       rtol=1e-05)
450

451 452 453 454 455
    def test_example_with_gradient_accumulation_and_create_graph(self):
        with _test_eager_guard():
            self.func_example_with_gradient_accumulation_and_create_graph()
        self.func_example_with_gradient_accumulation_and_create_graph()

456
    @dygraph_guard
457
    def func_example_with_gradient_accumulation_and_no_grad_vars(self):
458 459 460 461 462 463 464 465 466 467 468 469 470
        x = random_var(self.shape)
        x_np = x.numpy()
        numel = x_np.size
        x.stop_gradient = False

        y1 = fluid.layers.relu(x)
        y2 = fluid.layers.relu(x)
        z = y1 + y2
        w = z * z

        w_mean = fluid.layers.reduce_mean(w)
        del y1, z, w

471 472 473 474
        dx_actual, = self.grad([w_mean], [x],
                               retain_graph=True,
                               create_graph=True,
                               no_grad_vars=[y2])
475 476 477 478 479 480

        self.assertFalse(y2.stop_gradient)
        self.assertFalse(dx_actual.stop_gradient)

        dx_expected = (1.0 / float(numel) * (np.maximum(x_np, 0) + y2.numpy()) *
                       (x_np > 0) * 2).astype('float32')
481
        np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05)
482

483 484
        loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
        loss.backward()
485

486 487 488 489
        x_grad_actual = x.gradient()
        x_grad_expected = (2.0 / float(numel) *
                           (x_np + dx_expected *
                            (x_np > 0) * 4 / float(numel))).astype('float32')
490
        np.testing.assert_allclose(x_grad_actual, x_grad_expected, rtol=1e-05)
491 492 493 494 495

    def test_example_with_gradient_accumulation_and_no_grad_vars(self):
        with _test_eager_guard():
            self.func_example_with_gradient_accumulation_and_no_grad_vars()
        self.func_example_with_gradient_accumulation_and_no_grad_vars()
496 497

    @dygraph_guard
498
    def func_example_with_gradient_accumulation_and_not_create_graph(self):
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
        x = random_var(self.shape)
        x_np = x.numpy()
        numel = x_np.size
        x.stop_gradient = False

        y = fluid.layers.relu(x)
        z = y + 1
        w = z * z

        w_mean = fluid.layers.reduce_mean(w)
        del y, z, w

        dx_actual, = self.grad([w_mean], [x], create_graph=False)
        del w_mean

        self.assertTrue(dx_actual.stop_gradient)

        dx_expected = (1.0 / float(numel) * (np.maximum(x_np, 0) + 1) *
                       (x_np > 0) * 2).astype('float32')

519
        np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05)
520

521 522
        loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
        loss.backward()
523

524 525
        x_grad_actual = x.gradient()
        x_grad_expected = (2.0 * x_np / float(numel)).astype('float32')
526
        np.testing.assert_allclose(x_grad_actual, x_grad_expected, rtol=1e-05)
527 528 529 530 531

    def test_example_with_gradient_accumulation_and_not_create_graph(self):
        with _test_eager_guard():
            self.func_example_with_gradient_accumulation_and_not_create_graph()
        self.func_example_with_gradient_accumulation_and_not_create_graph()
532 533 534


class TestDygraphDoubleGradSortGradient(TestDygraphDoubleGrad):
535

536 537 538 539 540
    def setUp(self):
        self.sort_sum_gradient = True
        self.shape = [5, 10]


H
hong 已提交
541
class TestDygraphDoubleGradVisitedUniq(TestCase):
542

543
    def func_compare(self):
544 545
        value = np.random.uniform(-0.5, 0.5, 100).reshape(10, 2,
                                                          5).astype("float32")
H
hong 已提交
546 547

        def model_f(input):
548
            linear = fluid.dygraph.Linear(5, 3, bias_attr=False)
H
hong 已提交
549 550
            for i in range(10):
                if i == 0:
551
                    out = linear(input)
H
hong 已提交
552
                else:
553
                    out = out + linear(input)
H
hong 已提交
554 555
            return out

556 557
        fluid.set_flags({'FLAGS_sort_sum_gradient': True})

H
hong 已提交
558
        with fluid.dygraph.guard():
C
cnn 已提交
559
            paddle.seed(123)
L
Leo Chen 已提交
560
            paddle.framework.random._manual_program_seed(123)
H
hong 已提交
561 562 563 564 565
            a = fluid.dygraph.to_variable(value)
            a.stop_gradient = False

            out = model_f(a)

566 567 568 569 570
            dx = fluid.dygraph.grad(outputs=[out],
                                    inputs=[a],
                                    create_graph=False,
                                    only_inputs=True,
                                    allow_unused=False)
H
hong 已提交
571 572 573 574

            grad_1 = dx[0].numpy()

        with fluid.dygraph.guard():
C
cnn 已提交
575
            paddle.seed(123)
L
Leo Chen 已提交
576
            paddle.framework.random._manual_program_seed(123)
H
hong 已提交
577 578 579 580
            a = fluid.dygraph.to_variable(value)
            a.stop_gradient = False

            out = model_f(a)
581
            out.backward()
H
hong 已提交
582 583 584

            grad_2 = a.gradient()

585
        np.testing.assert_array_equal(grad_1, grad_2)
586

587 588 589 590 591
    def test_compare(self):
        with _test_eager_guard():
            self.func_compare()
        self.func_compare()

592 593

class TestRaiseNoDoubleGradOp(TestCase):
594

595 596 597 598
    def raise_no_grad_op(self):
        with fluid.dygraph.guard():
            x = fluid.layers.ones(shape=[2, 3, 2, 2], dtype='float32')
            x.stop_gradient = False
599
            y = paddle.fluid.layers.group_norm(x, groups=1)
600

601 602 603 604
            dx = fluid.dygraph.grad(outputs=[y],
                                    inputs=[x],
                                    create_graph=True,
                                    retain_graph=True)[0]
605 606 607 608 609

            loss = fluid.layers.reduce_mean(dx)
            loss.backward()

    def test_raise(self):
610
        self.assertRaises(RuntimeError, self.raise_no_grad_op)
H
hong 已提交
611 612


W
Weilong Wu 已提交
613
class TestDoubleGradResNet(TestCase):
614

W
Weilong Wu 已提交
615 616 617 618 619
    def setUp(self):
        paddle.seed(123)
        paddle.framework.random._manual_program_seed(123)
        self.data = np.random.rand(1, 3, 224, 224).astype(np.float32)

Z
Zeng Jinle 已提交
620
    @dygraph_guard
W
Weilong Wu 已提交
621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
    def test_resnet_resnet50(self):
        with _test_eager_guard():
            model = resnet50(pretrained=False)
            egr_data = paddle.to_tensor(self.data)
            egr_data.stop_gradient = False
            egr_out = model(egr_data)
            egr_preds = paddle.argmax(egr_out, axis=1)
            egr_label_onehot = paddle.nn.functional.one_hot(
                paddle.to_tensor(egr_preds), num_classes=egr_out.shape[1])
            egr_target = paddle.sum(egr_out * egr_label_onehot, axis=1)

            egr_g = paddle.grad(outputs=egr_target, inputs=egr_out)[0]
            egr_g_numpy = egr_g.numpy()
            self.assertEqual(list(egr_g_numpy.shape), list(egr_out.shape))

        model = resnet50(pretrained=False)
        data = paddle.to_tensor(self.data)
Z
Zeng Jinle 已提交
638
        data.stop_gradient = False
W
Weilong Wu 已提交
639
        out = model(data)
Z
Zeng Jinle 已提交
640
        preds = paddle.argmax(out, axis=1)
641 642
        label_onehot = paddle.nn.functional.one_hot(paddle.to_tensor(preds),
                                                    num_classes=out.shape[1])
Z
Zeng Jinle 已提交
643 644 645 646 647 648
        target = paddle.sum(out * label_onehot, axis=1)

        g = paddle.grad(outputs=target, inputs=out)[0]
        g_numpy = g.numpy()
        self.assertEqual(list(g_numpy.shape), list(out.shape))

649 650
        np.testing.assert_array_equal(egr_out, out)
        np.testing.assert_array_equal(egr_g_numpy, g_numpy)
Z
Zeng Jinle 已提交
651

W
Weilong Wu 已提交
652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672
    @dygraph_guard
    def test_resnet_resnet101(self):
        with _test_eager_guard():
            model = resnet101(pretrained=False)
            egr_data = paddle.to_tensor(self.data)
            egr_data.stop_gradient = False
            egr_out = model(egr_data)
            egr_preds = paddle.argmax(egr_out, axis=1)
            egr_label_onehot = paddle.nn.functional.one_hot(
                paddle.to_tensor(egr_preds), num_classes=egr_out.shape[1])
            egr_target = paddle.sum(egr_out * egr_label_onehot, axis=1)

            egr_g = paddle.grad(outputs=egr_target, inputs=egr_out)[0]
            egr_g_numpy = egr_g.numpy()
            self.assertEqual(list(egr_g_numpy.shape), list(egr_out.shape))

        model = resnet101(pretrained=False)
        data = paddle.to_tensor(self.data)
        data.stop_gradient = False
        out = model(data)
        preds = paddle.argmax(out, axis=1)
673 674
        label_onehot = paddle.nn.functional.one_hot(paddle.to_tensor(preds),
                                                    num_classes=out.shape[1])
W
Weilong Wu 已提交
675
        target = paddle.sum(out * label_onehot, axis=1)
Z
Zeng Jinle 已提交
676

W
Weilong Wu 已提交
677 678 679
        g = paddle.grad(outputs=target, inputs=out)[0]
        g_numpy = g.numpy()
        self.assertEqual(list(g_numpy.shape), list(out.shape))
Z
Zeng Jinle 已提交
680

681 682
        np.testing.assert_array_equal(egr_out, out)
        np.testing.assert_array_equal(egr_g_numpy, g_numpy)
Z
Zeng Jinle 已提交
683 684


685
class TestDoubleGradBasics(TestCase):
686

687 688 689
    def test_matmul(self):
        input_numpy = np.ones([3, 3]) * 2
        with _test_eager_guard():
690 691 692 693 694 695 696 697 698
            x = paddle.to_tensor(input_numpy,
                                 stop_gradient=False,
                                 dtype='float32')
            y = paddle.to_tensor(input_numpy,
                                 stop_gradient=False,
                                 dtype='float32')
            grad_out = paddle.to_tensor(np.ones([3, 3]),
                                        stop_gradient=False,
                                        dtype='float32')
699 700

            out = paddle.matmul(x, y, False, False)
701 702 703
            new_x_g, new_y_g = paddle.grad([out], [x, y], [grad_out],
                                           retain_graph=True,
                                           create_graph=True)
704 705 706
            new_x_g.backward()

            out_ref = np.ones([3, 3]) * 12.0
707
            np.testing.assert_array_equal(out.numpy(), out_ref)
708 709 710

            new_x_g_ref = np.ones([3, 3]) * 6.0
            new_y_g_ref = np.ones([3, 3]) * 6.0
711 712
            np.testing.assert_array_equal(new_x_g.numpy(), new_x_g_ref)
            np.testing.assert_array_equal(new_y_g.numpy(), new_y_g_ref)
713 714

            x_grad_ref = np.ones([3, 3]) * 0.0
715
            np.testing.assert_array_equal(x.grad.numpy(), x_grad_ref)
716 717

            y_grad_ref = np.ones([3, 3]) * 3.0
718
            np.testing.assert_array_equal(y.grad.numpy(), y_grad_ref)
719 720

            grad_out_grad_ref = np.ones([3, 3]) * 6.0
721 722
            np.testing.assert_array_equal(grad_out.grad.numpy(),
                                          grad_out_grad_ref)
723 724


725 726
if __name__ == '__main__':
    unittest.main()