test_imperative_basic.py 38.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

X
Xin Pan 已提交
15
import contextlib
16 17 18 19 20
import unittest
import numpy as np

import paddle.fluid as fluid
from paddle.fluid import core
21
from paddle.fluid import Linear
22
from paddle.fluid.layer_helper import LayerHelper
M
minqiyang 已提交
23
from test_imperative_base import new_program_scope
24
import paddle.fluid.dygraph_utils as dygraph_utils
25
from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper
26
import paddle
27
from paddle.fluid.framework import _test_eager_guard, _in_eager_mode, in_dygraph_mode
28 29


30
class MyLayer(fluid.Layer):
31 32
    def __init__(self):
        super(MyLayer, self).__init__()
33 34

    def forward(self, inputs):
M
minqiyang 已提交
35
        x = fluid.layers.relu(inputs)
36
        self._x_for_debug = x
X
Xin Pan 已提交
37 38 39
        x = fluid.layers.elementwise_mul(x, x)
        x = fluid.layers.reduce_sum(x)
        return [x]
40 41


42
class MLP(fluid.Layer):
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self._linear1 = Linear(
            input_size,
            3,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)))
        self._linear2 = Linear(
            3,
            4,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)),
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)))
X
Xin Pan 已提交
59 60

    def forward(self, inputs):
61 62
        x = self._linear1(inputs)
        x = self._linear2(x)
X
Xin Pan 已提交
63 64 65 66
        x = fluid.layers.reduce_sum(x)
        return x


67
class SimpleRNNCell(fluid.Layer):
68 69
    def __init__(self, step_input_size, hidden_size, output_size, param_attr):
        super(SimpleRNNCell, self).__init__()
70 71 72
        self.step_input_size = step_input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
73 74
        self._dtype = core.VarDesc.VarType.FP32
        self.param_attr = param_attr
75 76 77 78

        i2h_param_shape = [self.step_input_size, self.hidden_size]
        h2h_param_shape = [self.hidden_size, self.hidden_size]
        h2o_param_shape = [self.output_size, self.hidden_size]
S
songyouwei 已提交
79
        self._i2h_w = None
80 81
        self._i2h_w = self.create_parameter(
            attr=self.param_attr,
82 83 84
            shape=i2h_param_shape,
            dtype=self._dtype,
            is_bias=False)
85 86
        self._h2h_w = self.create_parameter(
            attr=self.param_attr,
87 88 89
            shape=h2h_param_shape,
            dtype=self._dtype,
            is_bias=False)
90 91
        self._h2o_w = self.create_parameter(
            attr=self.param_attr,
92 93 94 95 96
            shape=h2o_param_shape,
            dtype=self._dtype,
            is_bias=False)

    def forward(self, input, pre_hidden):
97 98 99
        tmp_i2h = paddle.fluid.layers.nn.mul(input, self._i2h_w)
        tmp_h2h = paddle.fluid.layers.nn.mul(pre_hidden, self._h2h_w)
        hidden = paddle.add(tmp_h2h, tmp_i2h)
100
        hidden = self._helper.append_activation(hidden, act='tanh')
101 102 103
        out = paddle.fluid.layers.nn.mul(hidden, self._h2o_w)
        softmax_out = paddle.nn.functional.softmax(out)
        reduce_out = paddle.fluid.layers.nn.reduce_sum(softmax_out)
104 105 106
        return reduce_out, hidden


107
class SimpleRNN(fluid.Layer):
108 109
    def __init__(self):
        super(SimpleRNN, self).__init__()
J
JiabinYang 已提交
110 111 112 113 114 115
        self.seq_len = 4
        self._cell = SimpleRNNCell(
            3,
            3,
            3,
            fluid.ParamAttr(initializer=fluid.initializer.Constant(value=0.1)))
J
JiabinYang 已提交
116 117

    def forward(self, inputs):
J
JiabinYang 已提交
118
        outs = list()
J
JiabinYang 已提交
119 120
        pre_hiddens = list()

121
        init_hidden = self.create_parameter(
J
JiabinYang 已提交
122 123 124 125 126 127
            attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=0.1)),
            shape=[1, 3],
            dtype='float32',
            is_bias=False)
        pre_hidden = init_hidden
J
JiabinYang 已提交
128
        for i in range(self.seq_len):
J
JiabinYang 已提交
129 130 131
            input = fluid.layers.slice(
                inputs, axes=[1], starts=[i], ends=[i + 1])
            input = fluid.layers.reshape(input, shape=[1, 3])
J
JiabinYang 已提交
132 133
            out_softmax, pre_hidden = self._cell(input, pre_hidden)
            outs.append(out_softmax)
J
JiabinYang 已提交
134

J
JiabinYang 已提交
135
        return outs, pre_hiddens
J
JiabinYang 已提交
136 137


M
minqiyang 已提交
138
class TestImperative(unittest.TestCase):
139
    def functional_dygraph_context(self):
140 141 142 143
        self.assertFalse(fluid.dygraph.enabled())
        fluid.enable_dygraph()
        self.assertTrue(fluid.dygraph.enabled())
        np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
144
        var_inp = paddle.to_tensor(np_inp)
145 146 147 148 149 150 151 152 153
        mlp = MLP(input_size=2)
        out = mlp(var_inp)
        dy_out1 = out.numpy()
        out.backward()
        dy_grad1 = mlp._linear1.weight.gradient()
        fluid.disable_dygraph()
        self.assertFalse(fluid.dygraph.enabled())
        with fluid.dygraph.guard():
            self.assertTrue(fluid.dygraph.enabled())
154
            var_inp = paddle.to_tensor(np_inp)
155 156 157 158 159 160 161
            mlp = MLP(input_size=2)
            out = mlp(var_inp)
            dy_out2 = out.numpy()
            out.backward()
            dy_grad2 = mlp._linear1.weight.gradient()
        self.assertFalse(fluid.dygraph.enabled())
        self.assertTrue(np.array_equal(dy_out1, dy_out2))
162 163
        self.assertTrue(np.array_equal(dy_grad1, dy_grad2))

164 165 166 167 168 169
    def test_functional_dygraph_context(self):
        with _test_eager_guard():
            self.functional_dygraph_context()
        self.functional_dygraph_context()

    def functional_paddle_imperative_dygraph_context(self):
170 171 172
        self.assertFalse(paddle.in_dynamic_mode())
        paddle.disable_static()
        self.assertTrue(paddle.in_dynamic_mode())
173
        np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
Z
Zhou Wei 已提交
174
        var_inp = paddle.to_tensor(np_inp)
175 176 177 178 179
        mlp = MLP(input_size=2)
        out = mlp(var_inp)
        dy_out1 = out.numpy()
        out.backward()
        dy_grad1 = mlp._linear1.weight.gradient()
180 181 182 183
        paddle.enable_static()
        self.assertFalse(paddle.in_dynamic_mode())
        paddle.disable_static()
        self.assertTrue(paddle.in_dynamic_mode())
Z
Zhou Wei 已提交
184
        var_inp = paddle.to_tensor(np_inp)
185 186 187 188 189 190 191
        mlp = MLP(input_size=2)
        out = mlp(var_inp)
        dy_out2 = out.numpy()
        out.backward()
        dy_grad2 = mlp._linear1.weight.gradient()
        paddle.enable_static()
        self.assertFalse(paddle.in_dynamic_mode())
192
        self.assertTrue(np.array_equal(dy_out1, dy_out2))
193 194
        self.assertTrue(np.array_equal(dy_grad1, dy_grad2))

195 196 197 198 199 200
    def test_functional_paddle_imperative_dygraph_context(self):
        with _test_eager_guard():
            self.functional_paddle_imperative_dygraph_context()
        self.functional_paddle_imperative_dygraph_context()

    def func_isinstance(self):
201 202 203
        var = fluid.layers.data(shape=[1], name='x', dtype='float32')
        self.assertTrue(isinstance(var, fluid.Variable))
        with fluid.dygraph.guard():
204 205 206 207 208 209 210 211 212 213 214 215
            if fluid.framework._in_eager_mode():
                var_base = paddle.to_tensor(np.array([3, 4, 5]))
                self.assertTrue(isinstance(var_base, core.eager.EagerTensor))
            else:
                var_base = paddle.to_tensor(np.array([3, 4, 5]))
                self.assertTrue(isinstance(var_base, core.VarBase))
                self.assertTrue(isinstance(var_base, fluid.Variable))

    def test_isinstance(self):
        with _test_eager_guard():
            self.func_isinstance()
        self.func_isinstance()
216

217
    def func_create_varbase(self):
218 219
        x = np.ones([2, 2], np.float32)
        y = np.zeros([3, 3], np.float32)
220 221
        t = fluid.Tensor()
        t.set(x, fluid.CPUPlace())
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
        if _in_eager_mode():
            # TODO(jiabin): Support Kwargs and uncomment these tests
            # egr_tmp = fluid.core.eager.EagerTensor(value=x, place=fluid.core.CPUPlace())
            egr_tmp2 = fluid.core.eager.EagerTensor(y, fluid.core.CPUPlace())
            egr_tmp3 = paddle.to_tensor(x)
            egr_tmp4 = fluid.core.eager.EagerTensor(y)
            # egr_tmp5 = fluid.core.eager.EagerTensor(value=x)
            # TODO(jiabin): Support it when we merge LoDTensor with DenseTensor
            egr_tmp6 = fluid.core.eager.EagerTensor(t)

            # self.assertTrue(np.array_equal(x, egr_tmp.numpy()))
            self.assertTrue(np.array_equal(y, egr_tmp2.numpy()))
            self.assertTrue(np.array_equal(x, egr_tmp3.numpy()))
            self.assertTrue(np.array_equal(y, egr_tmp4.numpy()))
            # self.assertTrue(np.array_equal(x, egr_tmp5.numpy()))
            self.assertTrue(np.array_equal(x, egr_tmp6.numpy()))
        else:
239 240
            tmp = fluid.core.VarBase(value=x, place=fluid.core.CPUPlace())
            tmp2 = fluid.core.VarBase(y, fluid.core.CPUPlace())
241
            tmp3 = paddle.to_tensor(x)
242 243
            tmp4 = fluid.core.VarBase(y)
            tmp5 = fluid.core.VarBase(value=x)
244
            tmp6 = fluid.core.VarBase(t)
245 246 247 248 249 250

            self.assertTrue(np.array_equal(x, tmp.numpy()))
            self.assertTrue(np.array_equal(y, tmp2.numpy()))
            self.assertTrue(np.array_equal(x, tmp3.numpy()))
            self.assertTrue(np.array_equal(y, tmp4.numpy()))
            self.assertTrue(np.array_equal(x, tmp5.numpy()))
251
            self.assertTrue(np.array_equal(x, tmp6.numpy()))
252

253 254 255 256 257 258
    def test_create_varbase(self):
        with fluid.dygraph.guard():
            with _test_eager_guard():
                self.func_create_varbase()
            self.func_create_varbase()

259 260 261 262 263 264 265 266 267 268
    def test_no_grad_guard(self):
        data = np.array([[2, 3], [4, 5]]).astype('float32')
        with fluid.dygraph.guard():
            l0 = fluid.Linear(2, 2)
            self.assertTrue(l0.weight._grad_ivar() is None)
            l1 = fluid.Linear(2, 2)
            with fluid.dygraph.no_grad():
                self.assertTrue(l1.weight.stop_gradient is False)
                tmp = l1.weight * 2
                self.assertTrue(tmp.stop_gradient)
269
            x = paddle.to_tensor(data)
270
            y = paddle.add(l0(x), tmp)
271 272 273 274 275 276
            o = l1(y)
            o.backward()

            self.assertTrue(tmp._grad_ivar() is None)
            self.assertTrue(l0.weight._grad_ivar() is not None)

277 278 279 280 281 282
    def test_paddle_imperative_no_grad_guard(self):
        data = np.array([[2, 3], [4, 5]]).astype('float32')
        with fluid.dygraph.guard():
            l0 = fluid.Linear(2, 2)
            self.assertTrue(l0.weight._grad_ivar() is None)
            l1 = fluid.Linear(2, 2)
283
            with paddle.no_grad():
284 285
                self.assertTrue(l1.weight.stop_gradient is False)
                tmp = l1.weight * 2
286
                self.assertTrue(tmp.stop_gradient)
287
            x = paddle.to_tensor(data)
288
            y = paddle.add(l0(x), tmp)
289 290 291
            o = l1(y)
            o.backward()

292
            self.assertTrue(tmp._grad_ivar() is None)
293 294
            self.assertTrue(l0.weight._grad_ivar() is not None)

295 296 297 298 299 300 301 302 303 304 305 306 307
    def test_paddle_imperative_set_grad_enabled(self):
        data = np.array([[2, 3], [4, 5]]).astype('float32')
        with fluid.dygraph.guard():
            l0 = fluid.Linear(2, 2)
            self.assertTrue(l0.weight._grad_ivar() is None)
            l1 = fluid.Linear(2, 2)
            with paddle.set_grad_enabled(False):
                self.assertTrue(l1.weight.stop_gradient is False)
                tmp = l1.weight * 2
                with paddle.set_grad_enabled(True):
                    tmp2 = l1.weight * 2
                self.assertTrue(tmp.stop_gradient)
                self.assertTrue(tmp2.stop_gradient is False)
308
            x = paddle.to_tensor(data)
309
            y = paddle.add(l0(x), tmp2)
310 311 312 313 314 315 316
            o = l1(y)
            o.backward()

            self.assertTrue(tmp._grad_ivar() is None)
            self.assertTrue(tmp2._grad_ivar() is not None)
            self.assertTrue(l0.weight._grad_ivar() is not None)

W
wuhuanzhou 已提交
317 318 319 320 321 322 323
    def test_paddle_imperative_is_grad_enabled(self):
        with fluid.dygraph.guard():
            with paddle.set_grad_enabled(False):
                self.assertTrue(paddle.is_grad_enabled() is False)
                with paddle.set_grad_enabled(True):
                    self.assertTrue(paddle.is_grad_enabled())

324
    def func_sum_op(self):
M
minqiyang 已提交
325
        x = np.ones([2, 2], np.float32)
L
lujun 已提交
326
        with fluid.dygraph.guard():
M
minqiyang 已提交
327 328
            inputs = []
            for _ in range(10):
329
                tmp = paddle.to_tensor(x)
330 331
                tmp.stop_gradient = False
                inputs.append(tmp)
332
            ret = paddle.add_n(inputs)
M
minqiyang 已提交
333
            loss = fluid.layers.reduce_sum(ret)
L
lujun 已提交
334
            loss.backward()
335 336 337
        with fluid.dygraph.guard():
            inputs2 = []
            for _ in range(10):
338
                tmp = paddle.to_tensor(x)
339 340
                tmp.stop_gradient = False
                inputs2.append(tmp)
341
            ret2 = paddle.add_n(inputs2)
342
            loss2 = fluid.layers.reduce_sum(ret2)
343 344
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            loss2.backward()
345

346 347
            self.assertTrue(np.allclose(ret.numpy(), x * 10))
            self.assertTrue(np.allclose(inputs[0].gradient(), x))
348 349 350
            self.assertTrue(np.allclose(ret2.numpy(), x * 10))
            a = inputs2[0].gradient()
            self.assertTrue(np.allclose(inputs2[0].gradient(), x))
M
minqiyang 已提交
351

352 353 354 355 356
    def test_sum_op(self):
        with _test_eager_guard():
            self.func_sum_op()
        self.func_sum_op()

357
    def func_empty_var(self):
358 359 360
        with fluid.dygraph.guard():
            cur_program = fluid.Program()
            cur_block = cur_program.current_block()
361 362 363 364 365 366 367
            # Normally, we don't allow tensor with -1 shape being created in dygraph mode, this test is not good.
            if not _in_eager_mode():
                new_variable = cur_block.create_var(
                    name="X", shape=[-1, 23, 48], dtype='float32')
            else:
                new_variable = cur_block.create_var(
                    name="X", shape=[1, 23, 48], dtype='float32')
368 369 370
            try:
                new_variable.numpy()
            except Exception as e:
371
                assert type(e) == ValueError
372 373 374 375

            try:
                new_variable.backward()
            except Exception as e:
376
                assert type(e) == core.EnforceNotMet
377 378 379 380 381 382
            # TODO(jiabin): Support clear_gradient in eager mode later and remove this if statement
            if not _in_eager_mode():
                try:
                    new_variable.clear_gradient()
                except Exception as e:
                    assert type(e) == core.EnforceNotMet
383

384 385 386 387
    def test_empty_var(self):
        with _test_eager_guard():
            self.func_empty_var()
        self.func_empty_var()
388

389
    def func_empty_grad(self):
390 391
        with fluid.dygraph.guard():
            x = np.ones([2, 2], np.float32)
392
            new_var = paddle.to_tensor(x)
393 394 395 396 397 398 399
            self.assertIsNone(new_var.gradient())
            # TODO(jiabin): Support clear_gradient in eager mode later and remove this if statement
            if not _in_eager_mode():
                try:
                    new_var.clear_gradient()
                except Exception as e:
                    assert type(e) == core.EnforceNotMet
400 401 402 403

        with fluid.dygraph.guard():
            cur_program = fluid.Program()
            cur_block = cur_program.current_block()
404 405 406 407 408 409 410
            # Normally, we don't allow tensor with -1 shape being created in dygraph mode, this test is not good.
            if not _in_eager_mode():
                new_variable = cur_block.create_var(
                    name="X", shape=[-1, 23, 48], dtype='float32')
            else:
                new_variable = cur_block.create_var(
                    name="X", shape=[1, 23, 48], dtype='float32')
411 412 413 414 415
            try:
                new_variable.gradient()
            except Exception as e:
                assert type(e) == ValueError

416 417 418 419 420 421
    def test_empty_grad(self):
        with _test_eager_guard():
            self.func_empty_grad()
        self.func_empty_grad()

    def func_set_persistable(self):
422 423
        with fluid.dygraph.guard():
            x = np.ones([2, 2], np.float32)
424
            new_var = paddle.to_tensor(x)
425 426
            self.assertFalse(new_var.persistable)
            new_var.persistable = True
427
            self.assertTrue(new_var.persistable)
428

429 430 431 432 433 434
    def test_set_persistable(self):
        with _test_eager_guard():
            self.func_set_persistable()
        self.func_set_persistable()

    def func_layer(self):
L
lujun 已提交
435
        with fluid.dygraph.guard():
436
            l = fluid.Layer("l")
M
minqiyang 已提交
437 438
            self.assertRaises(NotImplementedError, l.forward, [])

439 440 441 442 443 444
    def test_layer(self):
        with _test_eager_guard():
            self.func_layer()
        self.func_layer()

    def func_layer_in_out(self):
M
minqiyang 已提交
445
        np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32)
L
lujun 已提交
446
        with fluid.dygraph.guard():
447
            var_inp = paddle.to_tensor(np_inp)
448
            var_inp.stop_gradient = False
449
            l = MyLayer()
M
minqiyang 已提交
450 451
            x = l(var_inp)[0]
            self.assertIsNotNone(x)
452
            dy_out = x.numpy()
L
lujun 已提交
453
            x.backward()
454
            dy_grad = l._x_for_debug.gradient()
M
minqiyang 已提交
455

456
        with fluid.dygraph.guard():
457
            var_inp2 = paddle.to_tensor(np_inp)
458
            var_inp2.stop_gradient = False
459
            l2 = MyLayer()
460 461 462
            x2 = l2(var_inp2)[0]
            self.assertIsNotNone(x2)
            dy_out2 = x2.numpy()
463 464
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            x2.backward()
465 466
            dy_grad2 = l2._x_for_debug.gradient()

M
minqiyang 已提交
467 468 469
        with new_program_scope():
            inp = fluid.layers.data(
                name="inp", shape=[3], append_batch_size=False)
470
            l = MyLayer()
M
minqiyang 已提交
471 472 473 474 475 476 477 478 479 480
            x = l(inp)[0]
            param_grads = fluid.backward.append_backward(
                x, parameter_list=[l._x_for_debug.name])[0]
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))

            static_out, static_grad = exe.run(
                feed={inp.name: np_inp},
                fetch_list=[x.name, param_grads[1].name])

481 482 483 484
        self.assertTrue(np.array_equal(dy_out, static_out))
        self.assertTrue(np.array_equal(dy_grad, static_grad))
        self.assertTrue(np.array_equal(dy_out2, static_out))
        self.assertTrue(np.array_equal(dy_grad2, static_grad))
M
minqiyang 已提交
485

486 487 488 489 490 491
    def test_layer_in_out(self):
        with _test_eager_guard():
            self.func_layer_in_out()
        self.func_layer_in_out()

    def func_mlp(self):
M
minqiyang 已提交
492
        np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
L
lujun 已提交
493
        with fluid.dygraph.guard():
494
            var_inp = paddle.to_tensor(np_inp)
495
            mlp = MLP(input_size=2)
M
minqiyang 已提交
496
            out = mlp(var_inp)
497
            dy_out = out.numpy()
L
lujun 已提交
498
            out.backward()
499
            dy_grad = mlp._linear1.weight.gradient()
M
minqiyang 已提交
500

501
        with fluid.dygraph.guard():
502
            var_inp2 = paddle.to_tensor(np_inp)
503
            mlp2 = MLP(input_size=2)
504 505
            out2 = mlp2(var_inp2)
            dy_out2 = out2.numpy()
506 507
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            out2.backward()
508
            dy_grad2 = mlp2._linear1.weight.gradient()
509

M
minqiyang 已提交
510 511 512
        with new_program_scope():
            inp = fluid.layers.data(
                name="inp", shape=[2, 2], append_batch_size=False)
513
            mlp = MLP(input_size=2)
M
minqiyang 已提交
514 515
            out = mlp(inp)
            param_grads = fluid.backward.append_backward(
516
                out, parameter_list=[mlp._linear1.weight.name])[0]
M
minqiyang 已提交
517 518 519 520 521 522 523 524 525 526
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            exe.run(fluid.default_startup_program())

            static_out, static_grad = exe.run(
                feed={inp.name: np_inp},
                fetch_list=[out.name, param_grads[1].name])

        self.assertTrue(np.allclose(dy_out, static_out))
        self.assertTrue(np.allclose(dy_grad, static_grad))
527 528
        self.assertTrue(np.allclose(dy_out2, static_out))
        self.assertTrue(np.allclose(dy_grad2, static_grad))
M
minqiyang 已提交
529 530

        params = mlp.parameters(True)
531 532 533 534
        self.assertEqual("linear_0.w_0", params[0].name)
        self.assertEqual("linear_0.b_0", params[1].name)
        self.assertEqual("linear_1.w_0", params[2].name)
        self.assertEqual("linear_1.b_0", params[3].name)
M
minqiyang 已提交
535 536
        self.assertEqual(len(params), 4)

J
Jiabin Yang 已提交
537
        sublayers = mlp.sublayers()
538 539
        self.assertEqual(mlp._linear1, sublayers[0])
        self.assertEqual(mlp._linear2, sublayers[1])
M
minqiyang 已提交
540 541
        self.assertEqual(len(sublayers), 2)

542 543 544 545 546
    def test_mlp(self):
        with _test_eager_guard():
            self.func_mlp()
        self.func_mlp()

547 548 549 550 551 552 553
    def test_gradient_accumulation(self):
        def test_single_api(sort_sum_gradient):
            fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient})
            x = paddle.to_tensor(5., stop_gradient=False)
            for i in range(10):
                y = paddle.pow(x, 4.0)
                y.backward()
554
                self.assertEqual(x.grad.numpy(), (i + 1) * 500)
555
            x.clear_gradient()
556
            self.assertEqual(x.grad.numpy(), 0.)
557
            for i in range(10):
558 559
                y = paddle.pow(x, 4.0)
                y.backward()
560
                self.assertEqual(x.grad.numpy(), (i + 1) * 500)
561
            x.clear_grad()
562
            self.assertEqual(x.grad.numpy(), 0.)
563 564 565 566 567 568 569 570 571 572

        def test_simple_net(sort_sum_gradient):
            fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient})
            x = paddle.to_tensor(5., stop_gradient=False)
            y = paddle.to_tensor(2., stop_gradient=False)
            z = paddle.to_tensor(3., stop_gradient=False)

            def fun(x, y, z):
                loss1 = x * x * y
                loss2 = x * z
573 574
                loss1.backward(retain_graph=True)
                loss2.backward(retain_graph=True)
575 576 577
                self.assertTrue(np.array_equal(x.grad.numpy(), [23.]))
                self.assertTrue(np.array_equal(y.grad.numpy(), [25.]))
                self.assertTrue(np.array_equal(z.grad.numpy(), [5.]))
578 579 580 581
                x.clear_grad()
                y.clear_grad()
                z.clear_grad()

582 583
                dx = paddle.grad([loss1], x, create_graph=True)[0]
                loss = loss1 + loss2 + dx
584
                # loss = x*x*y + x*z + 2*x*y
585 586 587 588 589
                return loss

            loss = fun(x, y, z)
            loss.backward(retain_graph=True)
            # x.grad = 2*x*y + z + 2*y = 27 
590
            self.assertTrue(np.array_equal(x.grad.numpy(), [27]))
591 592

            loss.backward(retain_graph=True)
593
            self.assertTrue(np.array_equal(x.grad.numpy(), [54]))
594 595

            loss.backward()
596
            self.assertTrue(np.array_equal(x.grad.numpy(), [81]))
597 598 599 600 601 602 603 604 605

            with self.assertRaises(RuntimeError):
                loss.backward()

            loss1 = x * x * y
            loss2 = x * z
            dx = paddle.grad([loss1], x, create_graph=True)[0]
            loss = loss1 + loss2 + dx
            loss.backward()
606 607
            self.assertTrue(np.array_equal(dx.grad.numpy(), [1]))
            self.assertTrue(np.array_equal(x.grad.numpy(), [108]))
608 609 610 611 612 613 614 615 616

        def test_mlp(sort_sum_gradient):
            fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient})
            input_size = 5
            paddle.seed(1)
            mlp1 = MLP(input_size=input_size)
            # generate the gradient of each step
            mlp2 = MLP(input_size=input_size)

617 618 619 620
            expected_weight1_grad = 0.
            expected_bias1_grad = 0.
            expected_weight2_grad = 0.
            expected_bias2_grad = 0.
621

622
            for batch_id in range(100):
623 624 625 626
                x = paddle.uniform([10, input_size])
                detach_x = x.detach()
                clear_loss = mlp2(detach_x)
                clear_loss.backward()
627 628 629 630 631 632 633 634
                expected_weight1_grad = (
                    expected_weight1_grad + mlp2._linear1.weight.grad.numpy())
                expected_bias1_grad = (
                    expected_bias1_grad + mlp2._linear1.bias.grad.numpy())
                expected_weight2_grad = (
                    expected_weight2_grad + mlp2._linear2.weight.grad.numpy())
                expected_bias2_grad = (
                    expected_bias2_grad + mlp2._linear2.bias.grad.numpy())
635 636 637 638

                loss = mlp1(x)
                loss.backward()

639
                self.assertTrue(np.array_equal(loss.grad.numpy(), [1]))
640
                self.assertTrue(
641
                    np.allclose(mlp1._linear1.weight.grad.numpy(),
642 643
                                expected_weight1_grad))
                self.assertTrue(
644 645
                    np.allclose(mlp1._linear1.bias.grad.numpy(),
                                expected_bias1_grad))
646
                self.assertTrue(
647
                    np.allclose(mlp1._linear2.weight.grad.numpy(),
648 649
                                expected_weight2_grad))
                self.assertTrue(
650 651
                    np.allclose(mlp1._linear2.bias.grad.numpy(),
                                expected_bias2_grad))
652 653

                mlp2.clear_gradients()
654
                self.assertTrue(np.array_equal(clear_loss.grad.numpy(), [1]))
655
                if ((batch_id + 1) % 10) % 2 == 0:
656
                    mlp1.clear_gradients()
657 658 659 660
                    expected_weight1_grad = 0.
                    expected_bias1_grad = 0.
                    expected_weight2_grad = 0.
                    expected_bias2_grad = 0.
661 662 663 664 665 666 667 668 669
                elif ((batch_id + 1) % 10) % 2 == 1:
                    mlp1.clear_gradients()
                    mlp1._linear1.weight._set_grad_ivar(
                        paddle.ones([input_size, 3]))
                    mlp1._linear2.weight._set_grad_ivar(paddle.ones([3, 4]))
                    expected_weight1_grad = 1.
                    expected_bias1_grad = 0.
                    expected_weight2_grad = 1.
                    expected_bias2_grad = 0.
670 671 672 673 674 675 676 677 678

        with fluid.dygraph.guard():
            test_single_api(False)
            test_single_api(True)
            test_simple_net(False)
            test_simple_net(True)
            test_mlp(False)
            test_mlp(True)

679
    def func_dygraph_vs_static(self):
680 681
        np_inp1 = np.random.rand(4, 3, 3)
        np_inp2 = np.random.rand(4, 3, 3)
X
Xin Pan 已提交
682 683 684

        # dynamic graph
        with fluid.dygraph.guard():
685 686
            inp1 = paddle.to_tensor(np_inp1)
            inp2 = paddle.to_tensor(np_inp2)
687
            if np.sum(np_inp1) < np.sum(np_inp2):
X
Xin Pan 已提交
688 689 690
                x = fluid.layers.elementwise_add(inp1, inp2)
            else:
                x = fluid.layers.elementwise_sub(inp1, inp2)
L
lujun 已提交
691
            dygraph_result = x.numpy()
X
Xin Pan 已提交
692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724

        # static graph
        with new_program_scope():
            inp_data1 = fluid.layers.data(
                name='inp1', shape=[3, 3], dtype=np.float32)
            inp_data2 = fluid.layers.data(
                name='inp2', shape=[3, 3], dtype=np.float32)

            a = fluid.layers.expand(
                fluid.layers.reshape(
                    fluid.layers.reduce_sum(inp_data1), [1, 1]), [4, 1])
            b = fluid.layers.expand(
                fluid.layers.reshape(
                    fluid.layers.reduce_sum(inp_data2), [1, 1]), [4, 1])
            cond = fluid.layers.less_than(x=a, y=b)

            ie = fluid.layers.IfElse(cond)
            with ie.true_block():
                d1 = ie.input(inp_data1)
                d2 = ie.input(inp_data2)
                d3 = fluid.layers.elementwise_add(d1, d2)
                ie.output(d3)

            with ie.false_block():
                d1 = ie.input(inp_data1)
                d2 = ie.input(inp_data2)
                d3 = fluid.layers.elementwise_sub(d1, d2)
                ie.output(d3)
            out = ie()

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            static_result = exe.run(fluid.default_main_program(),
725 726
                                    feed={'inp1': np_inp1,
                                          'inp2': np_inp2},
X
Xin Pan 已提交
727 728 729
                                    fetch_list=out)[0]
        self.assertTrue(np.allclose(dygraph_result, static_result))

730 731 732 733 734 735
    def test_dygraph_vs_static(self):
        with _test_eager_guard():
            self.func_dygraph_vs_static()
        self.func_dygraph_vs_static()

    def func_rnn(self):
M
minqiyang 已提交
736 737 738 739
        np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],
                           [10.0, 11.0, 12.0]])
        np_inp = np_inp.reshape((1, 4, 3))
        np_inp = np_inp.astype(np.float32)
L
lujun 已提交
740
        with fluid.dygraph.guard():
741
            var_inp = paddle.to_tensor(np_inp)
M
minqiyang 已提交
742
            var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3])
743
            simple_rnn = SimpleRNN()
M
minqiyang 已提交
744
            outs, pre_hiddens = simple_rnn.forward(var_inp)
745
            dy_out = outs[3].numpy()
L
lujun 已提交
746
            outs[3].backward()
747 748 749
            dy_grad_h2o = simple_rnn._cell._h2o_w.gradient()
            dy_grad_h2h = simple_rnn._cell._h2h_w.gradient()
            dy_grad_i2h = simple_rnn._cell._i2h_w.gradient()
M
minqiyang 已提交
750

751
        with fluid.dygraph.guard():
752
            var_inp2 = paddle.to_tensor(np_inp)
753
            var_inp2 = fluid.layers.reshape(var_inp2, shape=[1, 4, 3])
754
            simple_rnn2 = SimpleRNN()
755 756
            outs2, pre_hiddens2 = simple_rnn2.forward(var_inp2)
            dy_out2 = outs2[3].numpy()
757 758
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            outs2[3].backward()
759 760 761 762
            dy_grad_h2o2 = simple_rnn2._cell._h2o_w.gradient()
            dy_grad_h2h2 = simple_rnn2._cell._h2h_w.gradient()
            dy_grad_i2h2 = simple_rnn2._cell._i2h_w.gradient()

M
minqiyang 已提交
763 764 765
        with new_program_scope():
            inp = fluid.layers.data(
                name="inp", shape=[1, 4, 3], append_batch_size=False)
766
            simple_rnn = SimpleRNN()
M
minqiyang 已提交
767 768 769 770 771 772 773 774 775 776
            outs, pre_hiddens = simple_rnn(inp)
            param_grads = fluid.backward.append_backward(outs[3])
            exe = fluid.Executor(fluid.CPUPlace())
            exe.run(fluid.default_startup_program())
            static_out, static_grad_h2o, static_grad_h2h, static_grad_i2h = exe.run(
                feed={inp.name: np_inp},
                fetch_list=[
                    outs[3].name, param_grads[0][1].name,
                    param_grads[1][1].name, param_grads[2][1].name
                ])
777

778 779 780 781 782 783 784 785 786 787 788 789 790
        self.assertTrue(np.array_equal(dy_out, static_out))
        self.assertTrue(np.array_equal(dy_grad_h2o, static_grad_h2o))
        self.assertTrue(np.array_equal(dy_grad_h2h, static_grad_h2h))
        self.assertTrue(np.array_equal(dy_grad_i2h, static_grad_i2h))
        self.assertTrue(np.array_equal(dy_out2, static_out))
        self.assertTrue(np.array_equal(dy_grad_h2o2, static_grad_h2o))
        self.assertTrue(np.array_equal(dy_grad_h2h2, static_grad_h2h))
        self.assertTrue(np.array_equal(dy_grad_i2h2, static_grad_i2h))

    def test_rnn(self):
        with _test_eager_guard():
            self.func_rnn()
        self.func_rnn()
M
minqiyang 已提交
791

792
    def func_layer_attrs(self):
793 794 795 796 797 798
        layer = fluid.dygraph.Layer("test")
        layer.test_attr = 1
        self.assertFalse(hasattr(layer, "whatever"))
        self.assertTrue(hasattr(layer, "test_attr"))
        self.assertEqual(layer.test_attr, 1)

799 800 801 802 803 804 805 806 807 808 809 810 811
        my_layer = MyLayer()
        my_layer.w1 = my_layer.create_parameter([3, 3])
        my_layer.add_parameter('w2', None)
        self.assertEqual(len(my_layer.parameters()), 1)
        self.assertRaises(TypeError, my_layer.__setattr__, 'w1', 'str')
        my_layer.w1 = None
        self.assertEqual(len(my_layer.parameters()), 0)
        my_layer.l1 = fluid.dygraph.Linear(3, 3)
        self.assertEqual(len(my_layer.sublayers()), 1)
        self.assertRaises(TypeError, my_layer.__setattr__, 'l1', 'str')
        my_layer.l1 = None
        self.assertEqual(len(my_layer.sublayers()), 0)

812 813 814 815 816
    def test_layer_attrs(self):
        with _test_eager_guard():
            self.func_layer_attrs()
        self.func_layer_attrs()

817

818
class TestDygraphUtils(unittest.TestCase):
819
    def func_append_activation_in_dygraph_exception(self):
820 821 822 823 824 825
        with new_program_scope():
            np_inp = np.random.random(size=(10, 20, 30)).astype(np.float32)
            a = fluid.layers.data("a", [10, 20])
            func = dygraph_utils._append_activation_in_dygraph
            self.assertRaises(AssertionError, func, a, act="sigmoid")

826 827 828 829 830 831
    def test_append_activation_in_dygraph_exception(self):
        with _test_eager_guard():
            self.func_append_activation_in_dygraph_exception()
        self.func_append_activation_in_dygraph_exception()

    def func_append_activation_in_dygraph1(self):
832 833 834
        a_np = np.random.random(size=(10, 20, 30)).astype(np.float32)
        func = dygraph_utils._append_activation_in_dygraph
        with fluid.dygraph.guard():
835
            a = paddle.to_tensor(a_np)
836 837 838 839
            res1 = func(a, act="hard_sigmoid")
            res2 = fluid.layers.hard_sigmoid(a)
            self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))

840 841 842 843 844 845
    def test_append_activation_in_dygraph1(self):
        with _test_eager_guard():
            self.func_append_activation_in_dygraph1()
        self.func_append_activation_in_dygraph1()

    def func_append_activation_in_dygraph2(self):
846 847 848
        a_np = np.random.random(size=(10, 20, 30)).astype(np.float32)
        func = dygraph_utils._append_activation_in_dygraph
        with fluid.dygraph.guard():
849
            a = paddle.to_tensor(a_np)
850 851
            res1 = func(a, act="sigmoid", use_mkldnn=True, use_cudnn=True)
            res2 = fluid.layers.sigmoid(a)
852
            self.assertTrue(np.allclose(res1.numpy(), res2.numpy()))
853

854 855 856 857 858 859
    def test_append_activation_in_dygraph2(self):
        with _test_eager_guard():
            self.func_append_activation_in_dygraph2()
        self.func_append_activation_in_dygraph2()

    def func_append_activation_in_dygraph3(self):
860 861 862 863
        a_np = np.random.random(size=(10, 20, 30)).astype(np.float32)
        helper = LayerObjectHelper(fluid.unique_name.generate("test"))
        func = helper.append_activation
        with fluid.dygraph.guard():
864
            a = paddle.to_tensor(a_np)
865 866 867 868
            res1 = func(a, act="sigmoid", use_cudnn=True)
            res2 = fluid.layers.sigmoid(a)
            self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))

869 870 871 872 873 874
    def test_append_activation_in_dygraph3(self):
        with _test_eager_guard():
            self.func_append_activation_in_dygraph3()
        self.func_append_activation_in_dygraph3()

    def func_append_activation_in_dygraph_use_mkldnn(self):
875 876 877 878 879
        a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32)
        helper = LayerHelper(
            fluid.unique_name.generate("test"), act="relu", use_mkldnn=True)
        func = helper.append_activation
        with fluid.dygraph.guard():
880
            a = paddle.to_tensor(a_np)
881 882 883 884
            res1 = func(a)
            res2 = fluid.layers.relu(a)
            self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))

885 886 887 888 889 890
    def test_append_activation_in_dygraph_use_mkldnn(self):
        with _test_eager_guard():
            self.func_append_activation_in_dygraph_use_mkldnn()
        self.func_append_activation_in_dygraph_use_mkldnn()

    def func_append_activation_in_dygraph_global_use_mkldnn(self):
891 892 893
        a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32)
        helper = LayerHelper(fluid.unique_name.generate("test"), act="relu")
        func = helper.append_activation
894
        with fluid.dygraph.guard(fluid.core.CPUPlace()):
895
            a = paddle.to_tensor(a_np)
896 897 898 899 900 901 902 903
            fluid.set_flags({'FLAGS_use_mkldnn': True})
            try:
                res1 = func(a)
            finally:
                fluid.set_flags({'FLAGS_use_mkldnn': False})
            res2 = fluid.layers.relu(a)
        self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))

904 905 906 907 908 909
    def test_append_activation_in_dygraph_global_use_mkldnn(self):
        with _test_eager_guard():
            self.func_append_activation_in_dygraph_global_use_mkldnn()
        self.func_append_activation_in_dygraph_global_use_mkldnn()

    def func_append_bias_in_dygraph_exception(self):
910 911 912 913 914 915
        with new_program_scope():
            np_inp = np.random.random(size=(10, 20, 30)).astype(np.float32)
            a = fluid.layers.data("a", [10, 20])
            func = dygraph_utils._append_bias_in_dygraph
            self.assertRaises(AssertionError, func, a)

916 917 918 919 920 921
    def test_append_bias_in_dygraph_exception(self):
        with _test_eager_guard():
            self.func_append_bias_in_dygraph_exception()
        self.func_append_bias_in_dygraph_exception()

    def func_append_bias_in_dygraph(self):
922 923 924
        a_np = np.random.random(size=(10, 20, 30)).astype(np.float32)
        func = dygraph_utils._append_bias_in_dygraph
        with fluid.dygraph.guard():
925
            a = paddle.to_tensor(a_np)
926
            res1 = func(a, bias=a)
927
            res2 = paddle.add(a, a)
928 929
            self.assertTrue(np.array_equal(res1.numpy(), res2.numpy()))

930 931 932 933 934
    def test_append_bias_in_dygraph(self):
        with _test_eager_guard():
            self.func_append_bias_in_dygraph()
        self.func_append_bias_in_dygraph()

935

936
class TestDygraphGuardWithError(unittest.TestCase):
937
    def func_without_guard(self):
938
        with fluid.dygraph.guard():
939
            x = paddle.to_tensor(np.zeros([10, 10]))
940 941 942 943
        with self.assertRaisesRegexp(TypeError,
                                     "Please use `with fluid.dygraph.guard()"):
            y = fluid.layers.matmul(x, x)

944 945 946 947 948
    def test_without_guard(self):
        with _test_eager_guard():
            self.func_without_guard()
        self.func_without_guard()

949

950
class TestMetaclass(unittest.TestCase):
951
    def func_metaclass(self):
952 953
        self.assertEqual(type(MyLayer).__name__, 'type')
        self.assertNotEqual(type(MyLayer).__name__, 'pybind11_type')
954 955 956 957 958 959 960 961 962 963 964
        if core._in_eager_mode():
            self.assertEqual(
                type(paddle.fluid.core.eager.EagerTensor).__name__, 'type')
        else:
            self.assertEqual(
                type(paddle.fluid.core.VarBase).__name__, 'pybind11_type')

    def test_metaclass(self):
        with _test_eager_guard():
            self.func_metaclass()
        self.func_metaclass()
965 966


967
if __name__ == '__main__':
968
    paddle.enable_static()
969
    unittest.main()