test_imperative_triple_grad.py 11.4 KB
Newer Older
W
Weilong Wu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
from unittest import TestCase
17

W
Weilong Wu 已提交
18
import numpy as np
19 20 21

import paddle
import paddle.fluid as fluid
22
from paddle.fluid.framework import _test_eager_guard
23
from paddle.fluid.wrapped_decorator import wrap_decorator
W
Weilong Wu 已提交
24 25 26 27


def _dygraph_guard_(func):
    def __impl__(*args, **kwargs):
J
Jiabin Yang 已提交
28
        if fluid._non_static_mode():
W
Weilong Wu 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
            return func(*args, **kwargs)
        else:
            with fluid.dygraph.guard():
                return func(*args, **kwargs)

    return __impl__


dygraph_guard = wrap_decorator(_dygraph_guard_)


def random_var(size, low=-1, high=1, dtype='float32'):
    np.random.seed(2021)
    x_np = np.random.uniform(low=low, high=high, size=size).astype(dtype)
    return fluid.dygraph.to_variable(x_np)


46 47 48 49
class TestDygraphTripleGradMatmul(TestCase):
    def test_matmul_triple_grad(self):
        input_numpy = np.ones([3, 3]) * 2
        with _test_eager_guard():
50 51 52 53 54 55
            x = paddle.to_tensor(
                input_numpy, stop_gradient=False, dtype='float32'
            )
            y = paddle.to_tensor(
                input_numpy, stop_gradient=False, dtype='float32'
            )
56 57
            out = paddle.matmul(x, y, False, False)

58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
            new_out_g = paddle.to_tensor(
                np.ones([3, 3]), stop_gradient=False, dtype='float32'
            )
            new_x_g, new_y_g = paddle.grad(
                [out], [x, y], [new_out_g], retain_graph=True, create_graph=True
            )

            new_x_g_g = paddle.to_tensor(
                np.ones([3, 3]), stop_gradient=False, dtype='float32'
            )
            new_y_g_g = paddle.to_tensor(
                np.ones([3, 3]), stop_gradient=False, dtype='float32'
            )
            new_a, new_b, new_c = paddle.grad(
                [new_x_g, new_y_g],
                [x, y, new_out_g],
                [new_x_g_g, new_y_g_g],
                retain_graph=True,
                create_graph=True,
            )
78 79 80 81

            new_a.backward()

            out_ref = np.ones([3, 3]) * 12.0
82
            np.testing.assert_array_equal(out.numpy(), out_ref)
83 84 85

            new_x_g_ref = np.ones([3, 3]) * 6.0
            new_y_g_ref = np.ones([3, 3]) * 6.0
86 87
            np.testing.assert_array_equal(new_x_g.numpy(), new_x_g_ref)
            np.testing.assert_array_equal(new_y_g.numpy(), new_y_g_ref)
88 89 90 91 92

            new_a_ref = np.ones([3, 3]) * 3.0
            new_b_ref = np.ones([3, 3]) * 3.0
            new_c_ref = np.ones([3, 3]) * 12.0

93 94 95
            np.testing.assert_array_equal(new_a.numpy(), new_a_ref)
            np.testing.assert_array_equal(new_b.numpy(), new_b_ref)
            np.testing.assert_array_equal(new_c.numpy(), new_c_ref)
96 97

            x_grad_ref = np.ones([3, 3]) * 0.0
98
            np.testing.assert_array_equal(x.grad.numpy(), x_grad_ref)
99 100

            y_grad_ref = np.ones([3, 3]) * 0.0
101
            np.testing.assert_array_equal(y.grad.numpy(), y_grad_ref)
102 103

            new_out_g_ref = np.ones([3, 3]) * 3.0
104
            np.testing.assert_array_equal(new_out_g.grad.numpy(), new_out_g_ref)
105 106 107

            new_x_g_g_ref = np.ones([3, 3]) * 0.0
            new_y_g_g_ref = np.ones([3, 3]) * 3.0
108 109
            np.testing.assert_array_equal(new_x_g_g.grad.numpy(), new_x_g_g_ref)
            np.testing.assert_array_equal(new_y_g_g.grad.numpy(), new_y_g_g_ref)
110 111


W
Weilong Wu 已提交
112 113 114 115 116
class TestDygraphTripleGrad(TestCase):
    def setUp(self):
        self.sort_sum_gradient = False
        self.shape = [5, 5]

117 118 119 120 121 122 123 124 125 126
    def grad(
        self,
        outputs,
        inputs,
        grad_outputs=None,
        no_grad_vars=None,
        retain_graph=None,
        create_graph=False,
        allow_unused=False,
    ):
W
Weilong Wu 已提交
127
        fluid.set_flags({'FLAGS_sort_sum_gradient': self.sort_sum_gradient})
128 129 130 131 132 133 134 135 136
        return fluid.dygraph.grad(
            outputs=outputs,
            inputs=inputs,
            grad_outputs=grad_outputs,
            no_grad_vars=no_grad_vars,
            retain_graph=retain_graph,
            create_graph=create_graph,
            allow_unused=allow_unused,
        )
W
Weilong Wu 已提交
137 138

    @dygraph_guard
139
    def func_exception(self):
W
Weilong Wu 已提交
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
        with self.assertRaises(AssertionError):
            self.grad(None, None)

        shape = self.shape

        with self.assertRaises(AssertionError):
            self.grad(1, random_var(shape))

        with self.assertRaises(AssertionError):
            self.grad(random_var(shape), 1)

        with self.assertRaises(AssertionError):
            self.grad([1], [random_var(shape)])

        with self.assertRaises(AssertionError):
            self.grad([random_var(shape)], [1])

        with self.assertRaises(AssertionError):
158 159 160 161 162
            self.grad(
                [random_var(shape), random_var(shape)],
                [random_var(shape)],
                [random_var(shape)],
            )
W
Weilong Wu 已提交
163 164

        with self.assertRaises(AssertionError):
165 166 167
            self.grad(
                [random_var(shape)], [random_var(shape)], no_grad_vars=[1]
            )
W
Weilong Wu 已提交
168 169 170 171 172

        with self.assertRaises(AssertionError):
            self.grad([random_var(shape)], [random_var(shape)], no_grad_vars=1)

    @dygraph_guard
173
    def func_example_with_gradient_and_create_graph(self):
W
Weilong Wu 已提交
174 175 176 177 178 179 180 181 182 183 184 185 186
        x = random_var(self.shape)
        x_np = x.numpy()
        x.stop_gradient = False

        y = random_var(self.shape)
        y_np = y.numpy()
        y.stop_gradient = False

        z = random_var(self.shape)
        z_np = z.numpy()
        numel = z_np.size
        z.stop_gradient = False

187
        out = paddle.nn.functional.sigmoid(paddle.matmul(x, y) + z)
W
Weilong Wu 已提交
188 189
        out_np = out.numpy()

190
        (dx_actual,) = self.grad([out], [x], create_graph=True)
W
Weilong Wu 已提交
191 192
        # Theoritical result based on math calculation
        dout = np.ones(self.shape).astype('float32')
193 194 195
        dx_expected = np.matmul(
            dout * out_np * (1 - out_np), np.transpose(y_np)
        )
196
        np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05)
W
Weilong Wu 已提交
197

198
        (ddx_actual,) = self.grad([dx_actual], [x], create_graph=True)
W
Weilong Wu 已提交
199 200 201
        # Theoritical result based on math calculation
        DDY = np.zeros(self.shape).astype('float32')
        DDX = np.ones(self.shape).astype('float32')
202 203 204
        double_grad_tmp1 = np.matmul(
            dout * out_np * (1 - out_np), np.transpose(DDY)
        )
W
Weilong Wu 已提交
205 206
        double_grad_tmp2 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY)
        double_grad_tmp3 = (
207 208 209 210 211
            (1 - 2 * out_np) * dout * double_grad_tmp2 * out_np * (1 - out_np)
        )
        ddx_expected = double_grad_tmp1 + np.matmul(
            double_grad_tmp3, np.transpose(y_np)
        )
212
        np.testing.assert_allclose(ddx_actual.numpy(), ddx_expected, rtol=1e-05)
W
Weilong Wu 已提交
213 214 215 216 217

        # Theoritical result based on math calculation
        d_ddout = np.zeros(self.shape).astype('float32')
        tmp0 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY)
        tmp1 = (1 - 2 * out_np) * ((1 - 2 * out_np) * dout * tmp0 * tmp0)
218 219 220 221 222 223 224
        tmp2 = (
            tmp0 * (1 - 2 * out_np) * d_ddout
            - 2 * dout * (1 - out_np) * out_np * tmp0 * tmp0
        )
        dddx_expected = np.matmul(
            ((tmp1 + tmp2) * out_np * (1 - out_np)), np.transpose(y_np)
        )
W
Weilong Wu 已提交
225 226 227

        ddx_actual.backward()
        dddx_grad_actual = x.gradient()
228
        np.testing.assert_allclose(dddx_grad_actual, dddx_expected, rtol=1e-05)
W
Weilong Wu 已提交
229

230
    def test_all_cases(self):
231
        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
232 233 234
        self.func_exception()
        self.func_example_with_gradient_and_create_graph()
        with _test_eager_guard():
235 236
            self.func_exception()
            self.func_example_with_gradient_and_create_graph()
237
        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
238

W
Weilong Wu 已提交
239

240 241 242 243 244 245 246
class TestDygraphTripleGradBradcastCase(TestCase):
    def setUp(self):
        self.sort_sum_gradient = False
        self.x_shape = [3, 2, 2]
        self.y_shape = [1, 2, 2]
        self.z_shape = [2, 2]

247 248 249 250 251 252 253 254 255 256
    def grad(
        self,
        outputs,
        inputs,
        grad_outputs=None,
        no_grad_vars=None,
        retain_graph=None,
        create_graph=False,
        allow_unused=False,
    ):
257
        fluid.set_flags({'FLAGS_sort_sum_gradient': self.sort_sum_gradient})
258 259 260 261 262 263 264 265 266
        return fluid.dygraph.grad(
            outputs=outputs,
            inputs=inputs,
            grad_outputs=grad_outputs,
            no_grad_vars=no_grad_vars,
            retain_graph=retain_graph,
            create_graph=create_graph,
            allow_unused=allow_unused,
        )
267 268

    @dygraph_guard
269
    def func_example_with_gradient_and_create_graph(self):
270 271 272 273 274 275 276 277 278 279 280 281 282
        x = random_var(self.x_shape)
        x_np = x.numpy()
        x.stop_gradient = False

        y = random_var(self.y_shape)
        y_np = y.numpy()
        y.stop_gradient = False

        z = random_var(self.z_shape)
        z_np = z.numpy()
        numel = z_np.size
        z.stop_gradient = False

283
        out = paddle.nn.functional.sigmoid(paddle.matmul(x, y) + z)
284 285
        out_np = out.numpy()

286
        (dx_actual,) = self.grad([out], [x], create_graph=True)
287 288
        # Theoritical result based on math calculation
        dout = np.ones(self.x_shape).astype('float32')
289 290 291
        dx_expected = np.matmul(
            dout * out_np * (1 - out_np), np.transpose(y_np, axes=(0, 2, 1))
        )
292
        np.testing.assert_allclose(dx_actual.numpy(), dx_expected, rtol=1e-05)
293

294
        (ddx_actual,) = self.grad([dx_actual], [x], create_graph=True)
295 296 297
        # Theoritical result based on math calculation
        DDY = np.zeros(self.y_shape).astype('float32')
        DDX = np.ones(self.x_shape).astype('float32')
298 299 300
        double_grad_tmp1 = np.matmul(
            dout * out_np * (1 - out_np), np.transpose(DDY, axes=(0, 2, 1))
        )
301 302
        double_grad_tmp2 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY)
        double_grad_tmp3 = (
303 304
            (1 - 2 * out_np) * dout * double_grad_tmp2 * out_np * (1 - out_np)
        )
305
        ddx_expected = double_grad_tmp1 + np.matmul(
306 307
            double_grad_tmp3, np.transpose(y_np, axes=(0, 2, 1))
        )
308
        np.testing.assert_allclose(ddx_actual.numpy(), ddx_expected, rtol=1e-05)
309 310 311 312 313

        # Theoritical result based on math calculation
        d_ddout = np.zeros(self.x_shape).astype('float32')
        tmp0 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY)
        tmp1 = (1 - 2 * out_np) * ((1 - 2 * out_np) * dout * tmp0 * tmp0)
314 315 316 317 318 319 320 321
        tmp2 = (
            tmp0 * (1 - 2 * out_np) * d_ddout
            - 2 * dout * (1 - out_np) * out_np * tmp0 * tmp0
        )
        dddx_expected = np.matmul(
            ((tmp1 + tmp2) * out_np * (1 - out_np)),
            np.transpose(y_np, axes=(0, 2, 1)),
        )
322 323 324

        ddx_actual.backward()
        dddx_grad_actual = x.gradient()
325
        np.testing.assert_allclose(dddx_grad_actual, dddx_expected, rtol=1e-05)
326

327
    def test_all_cases(self):
328
        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
329 330
        self.func_example_with_gradient_and_create_graph()
        with _test_eager_guard():
331
            self.func_example_with_gradient_and_create_graph()
332
        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
333

334

W
Weilong Wu 已提交
335 336
if __name__ == '__main__':
    unittest.main()