test_imperative_basic.py 33.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

X
Xin Pan 已提交
15
import contextlib
16 17 18 19 20
import unittest
import numpy as np

import paddle.fluid as fluid
from paddle.fluid import core
21
from paddle.fluid import Linear
22
from paddle.fluid.layer_helper import LayerHelper
M
minqiyang 已提交
23
from test_imperative_base import new_program_scope
24
import paddle.fluid.dygraph_utils as dygraph_utils
25
from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper
26
import paddle
27 28


29
class MyLayer(fluid.Layer):
30 31
    def __init__(self):
        super(MyLayer, self).__init__()
32 33

    def forward(self, inputs):
M
minqiyang 已提交
34
        x = fluid.layers.relu(inputs)
35
        self._x_for_debug = x
X
Xin Pan 已提交
36 37 38
        x = fluid.layers.elementwise_mul(x, x)
        x = fluid.layers.reduce_sum(x)
        return [x]
39 40


41
class MLP(fluid.Layer):
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self._linear1 = Linear(
            input_size,
            3,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)))
        self._linear2 = Linear(
            3,
            4,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)))
X
Xin Pan 已提交
58 59

    def forward(self, inputs):
60 61
        x = self._linear1(inputs)
        x = self._linear2(x)
X
Xin Pan 已提交
62 63 64 65
        x = fluid.layers.reduce_sum(x)
        return x


66
class SimpleRNNCell(fluid.Layer):
67 68
    def __init__(self, step_input_size, hidden_size, output_size, param_attr):
        super(SimpleRNNCell, self).__init__()
69 70 71
        self.step_input_size = step_input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
72 73
        self._dtype = core.VarDesc.VarType.FP32
        self.param_attr = param_attr
74 75 76 77

        i2h_param_shape = [self.step_input_size, self.hidden_size]
        h2h_param_shape = [self.hidden_size, self.hidden_size]
        h2o_param_shape = [self.output_size, self.hidden_size]
S
songyouwei 已提交
78
        self._i2h_w = None
79 80
        self._i2h_w = self.create_parameter(
            attr=self.param_attr,
81 82 83
            shape=i2h_param_shape,
            dtype=self._dtype,
            is_bias=False)
84 85
        self._h2h_w = self.create_parameter(
            attr=self.param_attr,
86 87 88
            shape=h2h_param_shape,
            dtype=self._dtype,
            is_bias=False)
89 90
        self._h2o_w = self.create_parameter(
            attr=self.param_attr,
91 92 93 94 95
            shape=h2o_param_shape,
            dtype=self._dtype,
            is_bias=False)

    def forward(self, input, pre_hidden):
96 97 98 99 100 101
        tmp_i2h = self.create_variable(dtype=self._dtype)
        tmp_h2h = self.create_variable(dtype=self._dtype)
        hidden = self.create_variable(dtype=self._dtype)
        out = self.create_variable(dtype=self._dtype)
        softmax_out = self.create_variable(dtype=self._dtype)
        reduce_out = self.create_variable(dtype=self._dtype)
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
        self._helper.append_op(
            type="mul",
            inputs={"X": input,
                    "Y": self._i2h_w},
            outputs={"Out": tmp_i2h},
            attrs={"x_num_col_dims": 1,
                   "y_num_col_dims": 1})

        self._helper.append_op(
            type="mul",
            inputs={"X": pre_hidden,
                    "Y": self._h2h_w},
            outputs={"Out": tmp_h2h},
            attrs={"x_num_col_dims": 1,
                   "y_num_col_dims": 1})

        self._helper.append_op(
            type="elementwise_add",
            inputs={'X': tmp_h2h,
                    'Y': tmp_i2h},
            outputs={'Out': hidden},
            attrs={'axis': -1,
                   'use_mkldnn': False})
125
        hidden = self._helper.append_activation(hidden, act='tanh')
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144

        self._helper.append_op(
            type="mul",
            inputs={"X": hidden,
                    "Y": self._h2o_w},
            outputs={"Out": out},
            attrs={"x_num_col_dims": 1,
                   "y_num_col_dims": 1})

        self._helper.append_op(
            type="softmax",
            inputs={"X": out},
            outputs={"Out": softmax_out},
            attrs={"use_cudnn": False})

        self._helper.append_op(
            type='reduce_sum',
            inputs={'X': softmax_out},
            outputs={'Out': reduce_out},
145
            attrs={'keep_dim': False,
146 147 148 149 150
                   'reduce_all': True})

        return reduce_out, hidden


151
class SimpleRNN(fluid.Layer):
152 153
    def __init__(self):
        super(SimpleRNN, self).__init__()
J
JiabinYang 已提交
154 155 156 157 158 159
        self.seq_len = 4
        self._cell = SimpleRNNCell(
            3,
            3,
            3,
            fluid.ParamAttr(initializer=fluid.initializer.Constant(value=0.1)))
J
JiabinYang 已提交
160 161

    def forward(self, inputs):
J
JiabinYang 已提交
162
        outs = list()
J
JiabinYang 已提交
163 164
        pre_hiddens = list()

165
        init_hidden = self.create_parameter(
J
JiabinYang 已提交
166 167 168 169 170 171
            attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)),
            shape=[1, 3],
            dtype='float32',
            is_bias=False)
        pre_hidden = init_hidden
J
JiabinYang 已提交
172
        for i in range(self.seq_len):
J
JiabinYang 已提交
173 174 175
            input = fluid.layers.slice(
                inputs, axes=[1], starts=[i], ends=[i + 1])
            input = fluid.layers.reshape(input, shape=[1, 3])
J
JiabinYang 已提交
176 177
            out_softmax, pre_hidden = self._cell(input, pre_hidden)
            outs.append(out_softmax)
J
JiabinYang 已提交
178

J
JiabinYang 已提交
179
        return outs, pre_hiddens
J
JiabinYang 已提交
180 181


M
minqiyang 已提交
182
class TestImperative(unittest.TestCase):
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
    def test_functional_dygraph_context(self):
        self.assertFalse(fluid.dygraph.enabled())
        fluid.enable_dygraph()
        self.assertTrue(fluid.dygraph.enabled())
        np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
        var_inp = fluid.dygraph.base.to_variable(np_inp)
        mlp = MLP(input_size=2)
        out = mlp(var_inp)
        dy_out1 = out.numpy()
        out.backward()
        dy_grad1 = mlp._linear1.weight.gradient()
        fluid.disable_dygraph()
        self.assertFalse(fluid.dygraph.enabled())
        with fluid.dygraph.guard():
            self.assertTrue(fluid.dygraph.enabled())
            var_inp = fluid.dygraph.base.to_variable(np_inp)
            mlp = MLP(input_size=2)
            out = mlp(var_inp)
            dy_out2 = out.numpy()
            out.backward()
            dy_grad2 = mlp._linear1.weight.gradient()
        self.assertFalse(fluid.dygraph.enabled())
        self.assertTrue(np.array_equal(dy_out1, dy_out2))
206 207 208
        self.assertTrue(np.array_equal(dy_grad1, dy_grad2))

    def test_functional_paddle_imperative_dygraph_context(self):
209 210 211
        self.assertFalse(paddle.in_dynamic_mode())
        paddle.disable_static()
        self.assertTrue(paddle.in_dynamic_mode())
212
        np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
Z
Zhou Wei 已提交
213
        var_inp = paddle.to_tensor(np_inp)
214 215 216 217 218
        mlp = MLP(input_size=2)
        out = mlp(var_inp)
        dy_out1 = out.numpy()
        out.backward()
        dy_grad1 = mlp._linear1.weight.gradient()
219 220 221 222
        paddle.enable_static()
        self.assertFalse(paddle.in_dynamic_mode())
        paddle.disable_static()
        self.assertTrue(paddle.in_dynamic_mode())
Z
Zhou Wei 已提交
223
        var_inp = paddle.to_tensor(np_inp)
224 225 226 227 228 229 230
        mlp = MLP(input_size=2)
        out = mlp(var_inp)
        dy_out2 = out.numpy()
        out.backward()
        dy_grad2 = mlp._linear1.weight.gradient()
        paddle.enable_static()
        self.assertFalse(paddle.in_dynamic_mode())
231
        self.assertTrue(np.array_equal(dy_out1, dy_out2))
232 233
        self.assertTrue(np.array_equal(dy_grad1, dy_grad2))

234 235 236 237 238 239 240 241 242 243 244
    def test_isinstance(self):
        var = fluid.layers.data(shape=[1], name='x', dtype='float32')
        self.assertTrue(isinstance(var, fluid.Variable))
        with fluid.dygraph.guard():
            var_base = fluid.dygraph.base.to_variable(np.array([3, 4, 5]))
            self.assertTrue(isinstance(var_base, core.VarBase))
            self.assertTrue(isinstance(var_base, fluid.Variable))

    def test_create_VarBase(self):
        x = np.ones([2, 2], np.float32)
        y = np.zeros([3, 3], np.float32)
245 246
        t = fluid.Tensor()
        t.set(x, fluid.CPUPlace())
247 248 249 250 251 252
        with fluid.dygraph.guard():
            tmp = fluid.core.VarBase(value=x, place=fluid.core.CPUPlace())
            tmp2 = fluid.core.VarBase(y, fluid.core.CPUPlace())
            tmp3 = fluid.dygraph.base.to_variable(x)
            tmp4 = fluid.core.VarBase(y)
            tmp5 = fluid.core.VarBase(value=x)
253
            tmp6 = fluid.core.VarBase(t)
254 255 256 257 258 259

            self.assertTrue(np.array_equal(x, tmp.numpy()))
            self.assertTrue(np.array_equal(y, tmp2.numpy()))
            self.assertTrue(np.array_equal(x, tmp3.numpy()))
            self.assertTrue(np.array_equal(y, tmp4.numpy()))
            self.assertTrue(np.array_equal(x, tmp5.numpy()))
260
            self.assertTrue(np.array_equal(x, tmp6.numpy()))
261

262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
    def test_no_grad_guard(self):
        data = np.array([[2, 3], [4, 5]]).astype('float32')
        with fluid.dygraph.guard():
            l0 = fluid.Linear(2, 2)
            self.assertTrue(l0.weight._grad_ivar() is None)
            l1 = fluid.Linear(2, 2)
            with fluid.dygraph.no_grad():
                self.assertTrue(l1.weight.stop_gradient is False)
                tmp = l1.weight * 2
                self.assertTrue(tmp.stop_gradient)
            x = fluid.dygraph.to_variable(data)
            y = l0(x) + tmp
            o = l1(y)
            o.backward()

            self.assertTrue(tmp._grad_ivar() is None)
            self.assertTrue(l0.weight._grad_ivar() is not None)

280 281 282 283 284 285
    def test_paddle_imperative_no_grad_guard(self):
        data = np.array([[2, 3], [4, 5]]).astype('float32')
        with fluid.dygraph.guard():
            l0 = fluid.Linear(2, 2)
            self.assertTrue(l0.weight._grad_ivar() is None)
            l1 = fluid.Linear(2, 2)
286
            with paddle.no_grad():
287 288
                self.assertTrue(l1.weight.stop_gradient is False)
                tmp = l1.weight * 2
289
                self.assertTrue(tmp.stop_gradient)
290 291 292 293 294
            x = fluid.dygraph.to_variable(data)
            y = l0(x) + tmp
            o = l1(y)
            o.backward()

295
            self.assertTrue(tmp._grad_ivar() is None)
296 297
            self.assertTrue(l0.weight._grad_ivar() is not None)

298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
    def test_paddle_imperative_set_grad_enabled(self):
        data = np.array([[2, 3], [4, 5]]).astype('float32')
        with fluid.dygraph.guard():
            l0 = fluid.Linear(2, 2)
            self.assertTrue(l0.weight._grad_ivar() is None)
            l1 = fluid.Linear(2, 2)
            with paddle.set_grad_enabled(False):
                self.assertTrue(l1.weight.stop_gradient is False)
                tmp = l1.weight * 2
                with paddle.set_grad_enabled(True):
                    tmp2 = l1.weight * 2
                self.assertTrue(tmp.stop_gradient)
                self.assertTrue(tmp2.stop_gradient is False)
            x = fluid.dygraph.to_variable(data)
            y = l0(x) + tmp2
            o = l1(y)
            o.backward()

            self.assertTrue(tmp._grad_ivar() is None)
            self.assertTrue(tmp2._grad_ivar() is not None)
            self.assertTrue(l0.weight._grad_ivar() is not None)

W
wuhuanzhou 已提交
320 321 322 323 324 325 326
    def test_paddle_imperative_is_grad_enabled(self):
        with fluid.dygraph.guard():
            with paddle.set_grad_enabled(False):
                self.assertTrue(paddle.is_grad_enabled() is False)
                with paddle.set_grad_enabled(True):
                    self.assertTrue(paddle.is_grad_enabled())

M
minqiyang 已提交
327 328
    def test_sum_op(self):
        x = np.ones([2, 2], np.float32)
L
lujun 已提交
329
        with fluid.dygraph.guard():
M
minqiyang 已提交
330 331
            inputs = []
            for _ in range(10):
332 333 334
                tmp = fluid.dygraph.base.to_variable(x)
                tmp.stop_gradient = False
                inputs.append(tmp)
M
minqiyang 已提交
335 336
            ret = fluid.layers.sums(inputs)
            loss = fluid.layers.reduce_sum(ret)
L
lujun 已提交
337
            loss.backward()
338 339 340
        with fluid.dygraph.guard():
            inputs2 = []
            for _ in range(10):
341 342 343
                tmp = fluid.dygraph.base.to_variable(x)
                tmp.stop_gradient = False
                inputs2.append(tmp)
344 345
            ret2 = fluid.layers.sums(inputs2)
            loss2 = fluid.layers.reduce_sum(ret2)
346 347
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            loss2.backward()
348

349 350
            self.assertTrue(np.allclose(ret.numpy(), x * 10))
            self.assertTrue(np.allclose(inputs[0].gradient(), x))
351 352 353
            self.assertTrue(np.allclose(ret2.numpy(), x * 10))
            a = inputs2[0].gradient()
            self.assertTrue(np.allclose(inputs2[0].gradient(), x))
M
minqiyang 已提交
354

355 356 357 358 359 360 361 362 363
    def test_empty_var(self):
        with fluid.dygraph.guard():
            cur_program = fluid.Program()
            cur_block = cur_program.current_block()
            new_variable = cur_block.create_var(
                name="X", shape=[-1, 23, 48], dtype='float32')
            try:
                new_variable.numpy()
            except Exception as e:
364
                assert type(e) == ValueError
365 366 367 368

            try:
                new_variable.backward()
            except Exception as e:
369
                assert type(e) == core.EnforceNotMet
370 371 372 373

            try:
                new_variable.clear_gradient()
            except Exception as e:
374
                assert type(e) == core.EnforceNotMet
375 376 377 378 379 380 381 382 383 384 385 386 387

    def test_empty_grad(self):
        with fluid.dygraph.guard():
            x = np.ones([2, 2], np.float32)
            new_var = fluid.dygraph.base.to_variable(x)
            try:
                new_var.gradient()
            except Exception as e:
                assert type(e) == ValueError

            try:
                new_var.clear_gradient()
            except Exception as e:
388
                assert type(e) == core.EnforceNotMet
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405

        with fluid.dygraph.guard():
            cur_program = fluid.Program()
            cur_block = cur_program.current_block()
            new_variable = cur_block.create_var(
                name="X", shape=[-1, 23, 48], dtype='float32')
            try:
                new_variable.gradient()
            except Exception as e:
                assert type(e) == ValueError

    def test_set_persistable(self):
        with fluid.dygraph.guard():
            x = np.ones([2, 2], np.float32)
            new_var = fluid.dygraph.base.to_variable(x)
            self.assertFalse(new_var.persistable)
            new_var.persistable = True
406
            self.assertTrue(new_var.persistable)
407

M
minqiyang 已提交
408
    def test_layer(self):
L
lujun 已提交
409
        with fluid.dygraph.guard():
410
            l = fluid.Layer("l")
M
minqiyang 已提交
411 412 413 414
            self.assertRaises(NotImplementedError, l.forward, [])

    def test_layer_in_out(self):
        np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
L
lujun 已提交
415 416
        with fluid.dygraph.guard():
            var_inp = fluid.dygraph.base.to_variable(np_inp)
417
            var_inp.stop_gradient = False
418
            l = MyLayer()
M
minqiyang 已提交
419 420
            x = l(var_inp)[0]
            self.assertIsNotNone(x)
421
            dy_out = x.numpy()
L
lujun 已提交
422
            x.backward()
423
            dy_grad = l._x_for_debug.gradient()
M
minqiyang 已提交
424

425 426
        with fluid.dygraph.guard():
            var_inp2 = fluid.dygraph.base.to_variable(np_inp)
427
            var_inp2.stop_gradient = False
428
            l2 = MyLayer()
429 430 431
            x2 = l2(var_inp2)[0]
            self.assertIsNotNone(x2)
            dy_out2 = x2.numpy()
432 433
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            x2.backward()
434 435
            dy_grad2 = l2._x_for_debug.gradient()

M
minqiyang 已提交
436 437 438
        with new_program_scope():
            inp = fluid.layers.data(
                name="inp", shape=[3], append_batch_size=False)
439
            l = MyLayer()
M
minqiyang 已提交
440 441 442 443 444 445 446 447 448 449 450 451
            x = l(inp)[0]
            param_grads = fluid.backward.append_backward(
                x, parameter_list=[l._x_for_debug.name])[0]
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))

            static_out, static_grad = exe.run(
                feed={inp.name: np_inp},
                fetch_list=[x.name, param_grads[1].name])

        self.assertTrue(np.allclose(dy_out, static_out))
        self.assertTrue(np.allclose(dy_grad, static_grad))
452 453
        self.assertTrue(np.allclose(dy_out2, static_out))
        self.assertTrue(np.allclose(dy_grad2, static_grad))
M
minqiyang 已提交
454 455 456

    def test_mlp(self):
        np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
L
lujun 已提交
457 458
        with fluid.dygraph.guard():
            var_inp = fluid.dygraph.base.to_variable(np_inp)
459
            mlp = MLP(input_size=2)
M
minqiyang 已提交
460
            out = mlp(var_inp)
461
            dy_out = out.numpy()
L
lujun 已提交
462
            out.backward()
463
            dy_grad = mlp._linear1.weight.gradient()
M
minqiyang 已提交
464

465 466
        with fluid.dygraph.guard():
            var_inp2 = fluid.dygraph.base.to_variable(np_inp)
467
            mlp2 = MLP(input_size=2)
468 469
            out2 = mlp2(var_inp2)
            dy_out2 = out2.numpy()
470 471
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            out2.backward()
472
            dy_grad2 = mlp2._linear1.weight.gradient()
473

M
minqiyang 已提交
474 475 476
        with new_program_scope():
            inp = fluid.layers.data(
                name="inp", shape=[2, 2], append_batch_size=False)
477
            mlp = MLP(input_size=2)
M
minqiyang 已提交
478 479
            out = mlp(inp)
            param_grads = fluid.backward.append_backward(
480
                out, parameter_list=[mlp._linear1.weight.name])[0]
M
minqiyang 已提交
481 482 483 484 485 486 487 488 489 490
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            exe.run(fluid.default_startup_program())

            static_out, static_grad = exe.run(
                feed={inp.name: np_inp},
                fetch_list=[out.name, param_grads[1].name])

        self.assertTrue(np.allclose(dy_out, static_out))
        self.assertTrue(np.allclose(dy_grad, static_grad))
491 492
        self.assertTrue(np.allclose(dy_out2, static_out))
        self.assertTrue(np.allclose(dy_grad2, static_grad))
M
minqiyang 已提交
493 494

        params = mlp.parameters(True)
495 496 497 498
        self.assertEqual("linear_0.w_0", params[0].name)
        self.assertEqual("linear_0.b_0", params[1].name)
        self.assertEqual("linear_1.w_0", params[2].name)
        self.assertEqual("linear_1.b_0", params[3].name)
M
minqiyang 已提交
499 500
        self.assertEqual(len(params), 4)

J
Jiabin Yang 已提交
501
        sublayers = mlp.sublayers()
502 503
        self.assertEqual(mlp._linear1, sublayers[0])
        self.assertEqual(mlp._linear2, sublayers[1])
M
minqiyang 已提交
504 505
        self.assertEqual(len(sublayers), 2)

506 507 508 509 510 511 512
    def test_gradient_accumulation(self):
        def test_single_api(sort_sum_gradient):
            fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient})
            x = paddle.to_tensor(5., stop_gradient=False)
            for i in range(10):
                y = paddle.pow(x, 4.0)
                y.backward()
513
                self.assertEqual(x.grad.numpy(), (i + 1) * 500)
514
            x.clear_gradient()
515
            self.assertEqual(x.grad.numpy(), 0.)
516
            for i in range(10):
517 518
                y = paddle.pow(x, 4.0)
                y.backward()
519
                self.assertEqual(x.grad.numpy(), (i + 1) * 500)
520
            x.clear_grad()
521
            self.assertEqual(x.grad.numpy(), 0.)
522 523 524 525 526 527 528 529 530 531

        def test_simple_net(sort_sum_gradient):
            fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient})
            x = paddle.to_tensor(5., stop_gradient=False)
            y = paddle.to_tensor(2., stop_gradient=False)
            z = paddle.to_tensor(3., stop_gradient=False)

            def fun(x, y, z):
                loss1 = x * x * y
                loss2 = x * z
532 533
                loss1.backward(retain_graph=True)
                loss2.backward(retain_graph=True)
534 535 536
                self.assertTrue(np.array_equal(x.grad.numpy(), [23.]))
                self.assertTrue(np.array_equal(y.grad.numpy(), [25.]))
                self.assertTrue(np.array_equal(z.grad.numpy(), [5.]))
537 538 539 540
                x.clear_grad()
                y.clear_grad()
                z.clear_grad()

541 542
                dx = paddle.grad([loss1], x, create_graph=True)[0]
                loss = loss1 + loss2 + dx
543
                # loss = x*x*y + x*z + 2*x*y
544 545 546 547 548
                return loss

            loss = fun(x, y, z)
            loss.backward(retain_graph=True)
            # x.grad = 2*x*y + z + 2*y = 27 
549
            self.assertTrue(np.array_equal(x.grad.numpy(), [27]))
550 551

            loss.backward(retain_graph=True)
552
            self.assertTrue(np.array_equal(x.grad.numpy(), [54]))
553 554

            loss.backward()
555
            self.assertTrue(np.array_equal(x.grad.numpy(), [81]))
556 557 558 559 560 561 562 563 564

            with self.assertRaises(RuntimeError):
                loss.backward()

            loss1 = x * x * y
            loss2 = x * z
            dx = paddle.grad([loss1], x, create_graph=True)[0]
            loss = loss1 + loss2 + dx
            loss.backward()
565 566
            self.assertTrue(np.array_equal(dx.grad.numpy(), [1]))
            self.assertTrue(np.array_equal(x.grad.numpy(), [108]))
567 568 569 570 571 572 573 574 575

        def test_mlp(sort_sum_gradient):
            fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient})
            input_size = 5
            paddle.seed(1)
            mlp1 = MLP(input_size=input_size)
            # generate the gradient of each step
            mlp2 = MLP(input_size=input_size)

576 577 578 579
            expected_weight1_grad = 0.
            expected_bias1_grad = 0.
            expected_weight2_grad = 0.
            expected_bias2_grad = 0.
580

581
            for batch_id in range(100):
582 583 584 585
                x = paddle.uniform([10, input_size])
                detach_x = x.detach()
                clear_loss = mlp2(detach_x)
                clear_loss.backward()
586 587 588 589 590 591 592 593
                expected_weight1_grad = (
                    expected_weight1_grad + mlp2._linear1.weight.grad.numpy())
                expected_bias1_grad = (
                    expected_bias1_grad + mlp2._linear1.bias.grad.numpy())
                expected_weight2_grad = (
                    expected_weight2_grad + mlp2._linear2.weight.grad.numpy())
                expected_bias2_grad = (
                    expected_bias2_grad + mlp2._linear2.bias.grad.numpy())
594 595 596 597

                loss = mlp1(x)
                loss.backward()

598
                self.assertTrue(np.array_equal(loss.grad.numpy(), [1]))
599
                self.assertTrue(
600
                    np.allclose(mlp1._linear1.weight.grad.numpy(),
601 602
                                expected_weight1_grad))
                self.assertTrue(
603 604
                    np.allclose(mlp1._linear1.bias.grad.numpy(),
                                expected_bias1_grad))
605
                self.assertTrue(
606
                    np.allclose(mlp1._linear2.weight.grad.numpy(),
607 608
                                expected_weight2_grad))
                self.assertTrue(
609 610
                    np.allclose(mlp1._linear2.bias.grad.numpy(),
                                expected_bias2_grad))
611 612

                mlp2.clear_gradients()
613
                self.assertTrue(np.array_equal(clear_loss.grad.numpy(), [1]))
614
                if ((batch_id + 1) % 10) % 2 == 0:
615
                    mlp1.clear_gradients()
616 617 618 619
                    expected_weight1_grad = 0.
                    expected_bias1_grad = 0.
                    expected_weight2_grad = 0.
                    expected_bias2_grad = 0.
620 621 622 623 624 625 626 627 628
                elif ((batch_id + 1) % 10) % 2 == 1:
                    mlp1.clear_gradients()
                    mlp1._linear1.weight._set_grad_ivar(
                        paddle.ones([input_size, 3]))
                    mlp1._linear2.weight._set_grad_ivar(paddle.ones([3, 4]))
                    expected_weight1_grad = 1.
                    expected_bias1_grad = 0.
                    expected_weight2_grad = 1.
                    expected_bias2_grad = 0.
629 630 631 632 633 634 635 636 637

        with fluid.dygraph.guard():
            test_single_api(False)
            test_single_api(True)
            test_simple_net(False)
            test_simple_net(True)
            test_mlp(False)
            test_mlp(True)

X
Xin Pan 已提交
638
    def test_dygraph_vs_static(self):
639 640
        np_inp1 = np.random.rand(4, 3, 3)
        np_inp2 = np.random.rand(4, 3, 3)
X
Xin Pan 已提交
641 642 643

        # dynamic graph
        with fluid.dygraph.guard():
644 645 646
            inp1 = fluid.dygraph.to_variable(np_inp1)
            inp2 = fluid.dygraph.to_variable(np_inp2)
            if np.sum(np_inp1) < np.sum(np_inp2):
X
Xin Pan 已提交
647 648 649
                x = fluid.layers.elementwise_add(inp1, inp2)
            else:
                x = fluid.layers.elementwise_sub(inp1, inp2)
L
lujun 已提交
650
            dygraph_result = x.numpy()
X
Xin Pan 已提交
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683

        # static graph
        with new_program_scope():
            inp_data1 = fluid.layers.data(
                name='inp1', shape=[3, 3], dtype=np.float32)
            inp_data2 = fluid.layers.data(
                name='inp2', shape=[3, 3], dtype=np.float32)

            a = fluid.layers.expand(
                fluid.layers.reshape(
                    fluid.layers.reduce_sum(inp_data1), [1, 1]), [4, 1])
            b = fluid.layers.expand(
                fluid.layers.reshape(
                    fluid.layers.reduce_sum(inp_data2), [1, 1]), [4, 1])
            cond = fluid.layers.less_than(x=a, y=b)

            ie = fluid.layers.IfElse(cond)
            with ie.true_block():
                d1 = ie.input(inp_data1)
                d2 = ie.input(inp_data2)
                d3 = fluid.layers.elementwise_add(d1, d2)
                ie.output(d3)

            with ie.false_block():
                d1 = ie.input(inp_data1)
                d2 = ie.input(inp_data2)
                d3 = fluid.layers.elementwise_sub(d1, d2)
                ie.output(d3)
            out = ie()

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            static_result = exe.run(fluid.default_main_program(),
684 685
                                    feed={'inp1': np_inp1,
                                          'inp2': np_inp2},
X
Xin Pan 已提交
686 687 688
                                    fetch_list=out)[0]
        self.assertTrue(np.allclose(dygraph_result, static_result))

M
minqiyang 已提交
689 690 691 692 693
    def test_rnn(self):
        np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],
                           [10.0, 11.0, 12.0]])
        np_inp = np_inp.reshape((1, 4, 3))
        np_inp = np_inp.astype(np.float32)
L
lujun 已提交
694 695
        with fluid.dygraph.guard():
            var_inp = fluid.dygraph.base.to_variable(np_inp)
M
minqiyang 已提交
696
            var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3])
697
            simple_rnn = SimpleRNN()
M
minqiyang 已提交
698
            outs, pre_hiddens = simple_rnn.forward(var_inp)
699
            dy_out = outs[3].numpy()
L
lujun 已提交
700
            outs[3].backward()
701 702 703
            dy_grad_h2o = simple_rnn._cell._h2o_w.gradient()
            dy_grad_h2h = simple_rnn._cell._h2h_w.gradient()
            dy_grad_i2h = simple_rnn._cell._i2h_w.gradient()
M
minqiyang 已提交
704

705 706 707
        with fluid.dygraph.guard():
            var_inp2 = fluid.dygraph.base.to_variable(np_inp)
            var_inp2 = fluid.layers.reshape(var_inp2, shape=[1, 4, 3])
708
            simple_rnn2 = SimpleRNN()
709 710
            outs2, pre_hiddens2 = simple_rnn2.forward(var_inp2)
            dy_out2 = outs2[3].numpy()
711 712
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            outs2[3].backward()
713 714 715 716
            dy_grad_h2o2 = simple_rnn2._cell._h2o_w.gradient()
            dy_grad_h2h2 = simple_rnn2._cell._h2h_w.gradient()
            dy_grad_i2h2 = simple_rnn2._cell._i2h_w.gradient()

M
minqiyang 已提交
717 718 719
        with new_program_scope():
            inp = fluid.layers.data(
                name="inp", shape=[1, 4, 3], append_batch_size=False)
720
            simple_rnn = SimpleRNN()
M
minqiyang 已提交
721 722 723 724 725 726 727 728 729 730
            outs, pre_hiddens = simple_rnn(inp)
            param_grads = fluid.backward.append_backward(outs[3])
            exe = fluid.Executor(fluid.CPUPlace())
            exe.run(fluid.default_startup_program())
            static_out, static_grad_h2o, static_grad_h2h, static_grad_i2h = exe.run(
                feed={inp.name: np_inp},
                fetch_list=[
                    outs[3].name, param_grads[0][1].name,
                    param_grads[1][1].name, param_grads[2][1].name
                ])
731

M
minqiyang 已提交
732 733 734 735
        self.assertTrue(np.allclose(dy_out, static_out))
        self.assertTrue(np.allclose(dy_grad_h2o, static_grad_h2o))
        self.assertTrue(np.allclose(dy_grad_h2h, static_grad_h2h))
        self.assertTrue(np.allclose(dy_grad_i2h, static_grad_i2h))
736 737 738 739
        self.assertTrue(np.allclose(dy_out2, static_out))
        self.assertTrue(np.allclose(dy_grad_h2o2, static_grad_h2o))
        self.assertTrue(np.allclose(dy_grad_h2h2, static_grad_h2h))
        self.assertTrue(np.allclose(dy_grad_i2h2, static_grad_i2h))
M
minqiyang 已提交
740

741 742 743 744 745 746 747
    def test_layer_attrs(self):
        layer = fluid.dygraph.Layer("test")
        layer.test_attr = 1
        self.assertFalse(hasattr(layer, "whatever"))
        self.assertTrue(hasattr(layer, "test_attr"))
        self.assertEqual(layer.test_attr, 1)

748 749 750 751 752 753 754 755 756 757 758 759 760
        my_layer = MyLayer()
        my_layer.w1 = my_layer.create_parameter([3, 3])
        my_layer.add_parameter('w2', None)
        self.assertEqual(len(my_layer.parameters()), 1)
        self.assertRaises(TypeError, my_layer.__setattr__, 'w1', 'str')
        my_layer.w1 = None
        self.assertEqual(len(my_layer.parameters()), 0)
        my_layer.l1 = fluid.dygraph.Linear(3, 3)
        self.assertEqual(len(my_layer.sublayers()), 1)
        self.assertRaises(TypeError, my_layer.__setattr__, 'l1', 'str')
        my_layer.l1 = None
        self.assertEqual(len(my_layer.sublayers()), 0)

761

762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785
class TestDygraphUtils(unittest.TestCase):
    def test_append_activation_in_dygraph_exception(self):
        with new_program_scope():
            np_inp = np.random.random(size=(10, 20, 30)).astype(np.float32)
            a = fluid.layers.data("a", [10, 20])
            func = dygraph_utils._append_activation_in_dygraph
            self.assertRaises(AssertionError, func, a, act="sigmoid")

    def test_append_activation_in_dygraph1(self):
        a_np = np.random.random(size=(10, 20, 30)).astype(np.float32)
        func = dygraph_utils._append_activation_in_dygraph
        with fluid.dygraph.guard():
            a = fluid.dygraph.to_variable(a_np)
            res1 = func(a, act="hard_sigmoid")
            res2 = fluid.layers.hard_sigmoid(a)
            self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))

    def test_append_activation_in_dygraph2(self):
        a_np = np.random.random(size=(10, 20, 30)).astype(np.float32)
        func = dygraph_utils._append_activation_in_dygraph
        with fluid.dygraph.guard():
            a = fluid.dygraph.to_variable(a_np)
            res1 = func(a, act="sigmoid", use_mkldnn=True, use_cudnn=True)
            res2 = fluid.layers.sigmoid(a)
786
            self.assertTrue(np.allclose(res1.numpy(), res2.numpy()))
787

788 789 790 791 792 793 794 795 796 797
    def test_append_activation_in_dygraph3(self):
        a_np = np.random.random(size=(10, 20, 30)).astype(np.float32)
        helper = LayerObjectHelper(fluid.unique_name.generate("test"))
        func = helper.append_activation
        with fluid.dygraph.guard():
            a = fluid.dygraph.to_variable(a_np)
            res1 = func(a, act="sigmoid", use_cudnn=True)
            res2 = fluid.layers.sigmoid(a)
            self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))

798 799 800 801 802 803 804 805 806 807 808 809 810 811 812
    def test_append_activation_in_dygraph_use_mkldnn(self):
        a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32)
        helper = LayerHelper(
            fluid.unique_name.generate("test"), act="relu", use_mkldnn=True)
        func = helper.append_activation
        with fluid.dygraph.guard():
            a = fluid.dygraph.to_variable(a_np)
            res1 = func(a)
            res2 = fluid.layers.relu(a)
            self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))

    def test_append_activation_in_dygraph_global_use_mkldnn(self):
        a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32)
        helper = LayerHelper(fluid.unique_name.generate("test"), act="relu")
        func = helper.append_activation
813
        with fluid.dygraph.guard(fluid.core.CPUPlace()):
814 815 816 817 818 819 820 821 822
            a = fluid.dygraph.to_variable(a_np)
            fluid.set_flags({'FLAGS_use_mkldnn': True})
            try:
                res1 = func(a)
            finally:
                fluid.set_flags({'FLAGS_use_mkldnn': False})
            res2 = fluid.layers.relu(a)
        self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))

823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839
    def test_append_bias_in_dygraph_exception(self):
        with new_program_scope():
            np_inp = np.random.random(size=(10, 20, 30)).astype(np.float32)
            a = fluid.layers.data("a", [10, 20])
            func = dygraph_utils._append_bias_in_dygraph
            self.assertRaises(AssertionError, func, a)

    def test_append_bias_in_dygraph(self):
        a_np = np.random.random(size=(10, 20, 30)).astype(np.float32)
        func = dygraph_utils._append_bias_in_dygraph
        with fluid.dygraph.guard():
            a = fluid.dygraph.to_variable(a_np)
            res1 = func(a, bias=a)
            res2 = a + a
            self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))


840 841 842 843 844 845 846 847 848
class TestDygraphGuardWithError(unittest.TestCase):
    def test_without_guard(self):
        with fluid.dygraph.guard():
            x = fluid.dygraph.to_variable(np.zeros([10, 10]))
        with self.assertRaisesRegexp(TypeError,
                                     "Please use `with fluid.dygraph.guard()"):
            y = fluid.layers.matmul(x, x)


849 850 851 852 853 854 855 856
class TestMetaclass(unittest.TestCase):
    def test_metaclass(self):
        self.assertEqual(type(MyLayer).__name__, 'type')
        self.assertNotEqual(type(MyLayer).__name__, 'pybind11_type')
        self.assertEqual(
            type(paddle.fluid.core.VarBase).__name__, 'pybind11_type')


857
if __name__ == '__main__':
858
    paddle.enable_static()
859
    unittest.main()