From a851b97a581e5dcb389e7a8817d838320e85ebe5 Mon Sep 17 00:00:00 2001
From: zhongpu <2013000149@qq.com>
Date: Sat, 9 May 2020 15:00:58 +0800
Subject: [PATCH] copy dygraph api to paddle.imperative (#24085)

* copy dygraph api to paddle.imperative, test=develop

* polish the code, test=develop

* polish code, test=develop

* polish code, test=develop

* move paddle.imperative.Layer to paddle.nn.Layer, test=develop
---
 python/paddle/__init__.py                     |   6 +
 .../tests/unittests/test_imperative_basic.py  |  25 ++
 .../unittests/test_imperative_save_load.py    |  15 +-
 .../test_imperative_selected_rows.py          |   2 +-
 .../test_paddle_imperative_double_grad.py     | 300 ++++++++++++++++++
 python/paddle/imperative/__init__.py          |  12 +-
 python/paddle/nn/__init__.py                  |   1 +
 7 files changed, 347 insertions(+), 14 deletions(-)
 create mode 100644 python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py

diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index 07073bfbfd..ad4a6d64d1 100644
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -202,3 +202,9 @@ from .tensor.stat import var  #DEFINE_ALIAS
 # from .tensor.tensor import Tensor        #DEFINE_ALIAS
 # from .tensor.tensor import LoDTensor        #DEFINE_ALIAS
 # from .tensor.tensor import LoDTensorArray        #DEFINE_ALIAS
+from .fluid.dygraph.base import enable_dygraph  #DEFINE_ALIAS
+from .fluid.dygraph.base import disable_dygraph  #DEFINE_ALIAS
+from .fluid.framework import in_dygraph_mode  #DEFINE_ALIAS
+enable_imperative = enable_dygraph  #DEFINE_ALIAS
+disable_imperative = disable_dygraph  #DEFINE_ALIAS
+in_imperative_mode = in_dygraph_mode
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
index a8f621fa62..d52860396a 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py
@@ -204,6 +204,31 @@ class TestImperative(unittest.TestCase):
         self.assertTrue(np.array_equal(dy_out1, dy_out2))
         self.assertTrue(np.array_equal(dy_grad1, dy_grad2))
 
+    def test_functional_paddle_imperative_dygraph_context(self):
+        self.assertFalse(paddle.imperative.enabled())
+        paddle.enable_imperative()
+        self.assertTrue(paddle.imperative.enabled())
+        np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32)
+        var_inp = paddle.imperative.to_variable(np_inp)
+        mlp = MLP(input_size=2)
+        out = mlp(var_inp)
+        dy_out1 = out.numpy()
+        out.backward()
+        dy_grad1 = mlp._linear1.weight.gradient()
+        paddle.disable_imperative()
+        self.assertFalse(paddle.imperative.enabled())
+        with paddle.imperative.guard():
+            self.assertTrue(paddle.imperative.enabled())
+            var_inp = paddle.imperative.to_variable(np_inp)
+            mlp = MLP(input_size=2)
+            out = mlp(var_inp)
+            dy_out2 = out.numpy()
+            out.backward()
+            dy_grad2 = mlp._linear1.weight.gradient()
+        self.assertFalse(paddle.imperative.enabled())
+        self.assertTrue(np.array_equal(dy_out1, dy_out2))
+        self.assertTrue(np.array_equal(dy_grad1, dy_grad2))
+
     def test_isinstance(self):
         var = fluid.layers.data(shape=[1], name='x', dtype='float32')
         self.assertTrue(isinstance(var, fluid.Variable))
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
index 734856ec0d..3dac9324e4 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
@@ -289,7 +289,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
                 np_t = v.numpy()
                 self.model_base[k] = np_t
 
-            fluid.save_dygraph(self.state_dict, "./test_dy")
+            paddle.imperative.save(self.state_dict, "./test_dy")
 
     def testLoadAndSetVarBase(self):
         seed = 90
@@ -369,7 +369,8 @@ class TestDygraphPtbRnn(unittest.TestCase):
             if isinstance(adam._learning_rate, LearningRateDecay):
                 adam._learning_rate.step_num = 0
 
-            para_state_dict, opti_state_dict = fluid.load_dygraph("./test_dy")
+            para_state_dict, opti_state_dict = paddle.imperative.load(
+                "./test_dy")
             adam.set_dict(opti_state_dict)
 
             opti_dict = adam.state_dict()
@@ -881,18 +882,18 @@ class TestDygraphPtbRnn(unittest.TestCase):
         with fluid.dygraph.guard():
             emb = fluid.dygraph.Embedding([10, 10])
             state_dict = emb.state_dict()
-            paddle.imperative.save_dygraph(state_dict,
-                                           os.path.join('saved_dy', 'emb_dy'))
+            paddle.imperative.save(state_dict,
+                                   os.path.join('saved_dy', 'emb_dy'))
 
-            para_state_dict, opti_state_dict = paddle.imperative.load_dygraph(
+            para_state_dict, opti_state_dict = paddle.imperative.load(
                 os.path.join('saved_dy', 'emb_dy'))
 
             self.assertTrue(opti_state_dict == None)
 
-            para_state_dict, opti_state_dict = paddle.imperative.load_dygraph(
+            para_state_dict, opti_state_dict = paddle.imperative.load(
                 os.path.join('saved_dy', 'emb_dy.pdparams'))
 
-            para_state_dict, opti_state_dict = paddle.imperative.load_dygraph(
+            para_state_dict, opti_state_dict = paddle.imperative.load(
                 os.path.join('saved_dy', 'emb_dy.pdopt'))
 
 
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
index cfaca5a565..2789174ba7 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py
@@ -24,7 +24,7 @@ import paddle.fluid.core as core
 import paddle
 
 
-class SimpleNet(paddle.imperative.Layer):
+class SimpleNet(paddle.nn.Layer):
     def __init__(self, vocab_size, hidden_size, dtype):
         super(SimpleNet, self).__init__()
         self.emb = fluid.dygraph.Embedding(
diff --git a/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py
new file mode 100644
index 0000000000..50e5874789
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_paddle_imperative_double_grad.py
@@ -0,0 +1,300 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid as fluid
+from paddle.fluid.wrapped_decorator import wrap_decorator
+import unittest
+from unittest import TestCase
+import numpy as np
+import paddle
+
+
+def _dygraph_guard_(func):
+    def __impl__(*args, **kwargs):
+        if paddle.in_imperative_mode():
+            return func(*args, **kwargs)
+        else:
+            with fluid.dygraph.guard():
+                return func(*args, **kwargs)
+
+    return __impl__
+
+
+dygraph_guard = wrap_decorator(_dygraph_guard_)
+
+
+def random_var(size, low=-1, high=1, dtype='float32'):
+    x_np = np.random.uniform(low=low, high=high, size=size).astype(dtype)
+    return fluid.dygraph.to_variable(x_np)
+
+
+class TestDygraphDoubleGrad(TestCase):
+    def setUp(self):
+        self.sort_sum_gradient = False
+        self.shape = [5, 10]
+
+    def grad(self,
+             outputs,
+             inputs,
+             grad_outputs=None,
+             no_grad_vars=None,
+             retain_graph=None,
+             create_graph=False,
+             allow_unused=False):
+        backward_strategy = fluid.dygraph.BackwardStrategy()
+        backward_strategy.sort_sum_gradient = self.sort_sum_gradient
+        return paddle.imperative.grad(
+            outputs=outputs,
+            inputs=inputs,
+            grad_outputs=grad_outputs,
+            no_grad_vars=no_grad_vars,
+            retain_graph=retain_graph,
+            create_graph=create_graph,
+            allow_unused=allow_unused,
+            backward_strategy=backward_strategy)
+
+    @dygraph_guard
+    def test_exception(self):
+        with self.assertRaises(AssertionError):
+            self.grad(None, None)
+
+        shape = self.shape
+
+        with self.assertRaises(AssertionError):
+            self.grad(1, random_var(shape))
+
+        with self.assertRaises(AssertionError):
+            self.grad(random_var(shape), 1)
+
+        with self.assertRaises(AssertionError):
+            self.grad([1], [random_var(shape)])
+
+        with self.assertRaises(AssertionError):
+            self.grad([random_var(shape)], [1])
+
+        with self.assertRaises(AssertionError):
+            self.grad([random_var(shape), random_var(shape)],
+                      [random_var(shape)], [random_var(shape)])
+
+        with self.assertRaises(AssertionError):
+            self.grad(
+                [random_var(shape)], [random_var(shape)], no_grad_vars=[1])
+
+        with self.assertRaises(AssertionError):
+            self.grad([random_var(shape)], [random_var(shape)], no_grad_vars=1)
+
+    @dygraph_guard
+    def test_simple_example(self):
+        x = random_var(self.shape)
+        x.stop_gradient = False
+        y = x + 1
+
+        for create_graph in [False, True]:
+            dx, = self.grad(
+                [x], [x], create_graph=create_graph, retain_graph=True)
+            self.assertEqual(dx.shape, x.shape)
+            self.assertTrue(np.all(dx.numpy() == 1))
+            self.assertNotEqual(dx.stop_gradient, create_graph)
+
+            dx_mul_2, = self.grad(
+                [y, x], [x], create_graph=create_graph, retain_graph=True)
+            self.assertEqual(dx_mul_2.shape, x.shape)
+            self.assertTrue(np.all(dx_mul_2.numpy() == 2))
+            self.assertNotEqual(dx_mul_2.stop_gradient, create_graph)
+
+            none_grad, = self.grad(
+                [x], [y], create_graph=create_graph, allow_unused=True)
+            self.assertTrue(none_grad is None)
+
+            grad_with_none_and_not_none, = self.grad(
+                [x, y], [y], create_graph=create_graph)
+            self.assertTrue(grad_with_none_and_not_none.shape, x.shape)
+            self.assertTrue(np.all(grad_with_none_and_not_none.numpy() == 1))
+            self.assertNotEqual(grad_with_none_and_not_none.stop_gradient,
+                                create_graph)
+
+    @dygraph_guard
+    def test_none_one_initial_gradient(self):
+        numel = 1
+        for s in self.shape:
+            numel *= s
+
+        half_numel = int(numel / 2)
+        half_x_positive = np.random.uniform(low=1, high=2, size=[half_numel])
+        half_x_negative = np.random.uniform(
+            low=-2, high=-1, size=[numel - half_numel])
+        x_np = np.array(list(half_x_positive) + list(half_x_negative)).astype(
+            'float32')
+        np.random.shuffle(x_np)
+
+        x = fluid.dygraph.to_variable(x_np)
+        x.stop_gradient = False
+
+        alpha = 0.2
+        y = fluid.layers.leaky_relu(x, alpha=alpha)
+        y = y * y
+        z = y * y
+
+        x_np = x.numpy()
+        relu_x_np = np.maximum(x_np, alpha * x_np).astype('float32')
+        relu_x_grad_np = ((x_np > 0) + (x_np < 0) * alpha).astype('float32')
+        dy_expected = (relu_x_np * relu_x_grad_np * 2).astype('float32')
+        dz_expected = (np.power(relu_x_np, 3) * relu_x_grad_np *
+                       4).astype('float32')
+
+        random_grad_y = random_var(y.shape, low=1, high=2)
+        random_grad_z = random_var(z.shape, low=1, high=2)
+        ones_grad_y = np.ones(y.shape).astype('float32')
+        ones_grad_z = np.ones(z.shape).astype('float32')
+
+        original_random_grad_y = random_grad_y.numpy()
+        original_random_grad_z = random_grad_z.numpy()
+
+        for grad_y in [random_grad_y]:
+            for grad_z in [random_grad_z]:
+                for create_graph in [False, True]:
+                    dx_actual, = self.grad(
+                        outputs=[y, z],
+                        inputs=[x],
+                        grad_outputs=[grad_y, grad_z],
+                        create_graph=create_graph,
+                        retain_graph=True)
+
+                    grad_y_np = ones_grad_y if grad_y is None else grad_y.numpy(
+                    )
+                    grad_z_np = ones_grad_z if grad_z is None else grad_z.numpy(
+                    )
+
+                    dx_expected = dy_expected * grad_y_np + dz_expected * grad_z_np
+                    self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected))
+
+                    if grad_y is not None:
+                        self.assertTrue(grad_y.stop_gradient)
+                        self.assertTrue(
+                            np.array_equal(grad_y.numpy(),
+                                           original_random_grad_y))
+
+                    if grad_z is not None:
+                        self.assertTrue(grad_z.stop_gradient)
+                        self.assertTrue(
+                            np.array_equal(grad_z.numpy(),
+                                           original_random_grad_z))
+
+    @dygraph_guard
+    def test_example_with_gradient_accumulation_and_create_graph(self):
+        x = random_var(self.shape)
+        x_np = x.numpy()
+        numel = x_np.size
+        x.stop_gradient = False
+
+        y = fluid.layers.relu(x)
+        z = y + 1
+        w = z * z
+
+        w_mean = fluid.layers.reduce_mean(w)
+        del y, z, w
+
+        dx_actual, = self.grad([w_mean], [x], create_graph=True)
+        del w_mean
+
+        self.assertFalse(dx_actual.stop_gradient)
+
+        # Theoritical result based on math calculation
+        dx_expected = (1.0 / float(numel) * (np.maximum(x_np, 0) + 1) *
+                       (x_np > 0) * 2).astype('float32')
+        self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected))
+
+        loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
+        loss.backward()
+
+        x_grad_actual = x.gradient()
+        x_grad_expected = (2.0 / float(numel) *
+                           (x_np + dx_expected *
+                            (x_np > 0) * 2 / float(numel))).astype('float32')
+        self.assertTrue(np.allclose(x_grad_actual, x_grad_expected))
+
+    @dygraph_guard
+    def test_example_with_gradient_accumulation_and_no_grad_vars(self):
+        x = random_var(self.shape)
+        x_np = x.numpy()
+        numel = x_np.size
+        x.stop_gradient = False
+
+        y1 = fluid.layers.relu(x)
+        y2 = fluid.layers.relu(x)
+        z = y1 + y2
+        w = z * z
+
+        w_mean = fluid.layers.reduce_mean(w)
+        del y1, z, w
+
+        dx_actual, = self.grad(
+            [w_mean], [x], create_graph=True, no_grad_vars=[y2])
+
+        self.assertFalse(y2.stop_gradient)
+        self.assertFalse(dx_actual.stop_gradient)
+
+        dx_expected = (1.0 / float(numel) * (np.maximum(x_np, 0) + y2.numpy()) *
+                       (x_np > 0) * 2).astype('float32')
+        self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected))
+
+        loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
+        loss.backward()
+
+        x_grad_actual = x.gradient()
+        x_grad_expected = (2.0 / float(numel) *
+                           (x_np + dx_expected *
+                            (x_np > 0) * 4 / float(numel))).astype('float32')
+        self.assertTrue(np.allclose(x_grad_actual, x_grad_expected))
+
+    @dygraph_guard
+    def test_example_with_gradient_accumulation_and_not_create_graph(self):
+        x = random_var(self.shape)
+        x_np = x.numpy()
+        numel = x_np.size
+        x.stop_gradient = False
+
+        y = fluid.layers.relu(x)
+        z = y + 1
+        w = z * z
+
+        w_mean = fluid.layers.reduce_mean(w)
+        del y, z, w
+
+        dx_actual, = self.grad([w_mean], [x], create_graph=False)
+        del w_mean
+
+        self.assertTrue(dx_actual.stop_gradient)
+
+        dx_expected = (1.0 / float(numel) * (np.maximum(x_np, 0) + 1) *
+                       (x_np > 0) * 2).astype('float32')
+
+        self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected))
+
+        loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
+        loss.backward()
+
+        x_grad_actual = x.gradient()
+        x_grad_expected = (2.0 * x_np / float(numel)).astype('float32')
+        self.assertTrue(np.allclose(x_grad_actual, x_grad_expected))
+
+
+class TestDygraphDoubleGradSortGradient(TestDygraphDoubleGrad):
+    def setUp(self):
+        self.sort_sum_gradient = True
+        self.shape = [5, 10]
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/imperative/__init__.py b/python/paddle/imperative/__init__.py
index 6feef89a2d..dcaf23a765 100644
--- a/python/paddle/imperative/__init__.py
+++ b/python/paddle/imperative/__init__.py
@@ -14,16 +14,16 @@
 
 # define api used to run in imperative mode 
 __all__ = [
-    'BackwardStrategy', 'guard', 'Layer', 'LayerList', 'load_dygraph',
-    'save_dygraph', 'prepare_context', 'to_variable', 'TracedLayer', 'no_grad',
-    'ParameterList', 'Sequential'
+    'BackwardStrategy', 'enabled', 'grad', 'guard', 'LayerList', 'load', 'save',
+    'prepare_context', 'to_variable', 'TracedLayer', 'no_grad', 'ParameterList',
+    'Sequential'
 ]
 
 from paddle.fluid import core
-from ..fluid.dygraph.base import guard, no_grad, to_variable
-from ..fluid.dygraph.layers import Layer
+from ..fluid.dygraph.base import enabled, guard, no_grad, to_variable, grad
 from ..fluid.dygraph.container import LayerList, ParameterList, Sequential
-from ..fluid.dygraph.checkpoint import load_dygraph, save_dygraph
+from ..fluid.dygraph.checkpoint import load_dygraph as load
+from ..fluid.dygraph.checkpoint import save_dygraph as save
 from ..fluid.dygraph.parallel import prepare_context
 from ..fluid.dygraph.jit import TracedLayer
 
diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
index e20d902305..c164bb7829 100644
--- a/python/paddle/nn/__init__.py
+++ b/python/paddle/nn/__init__.py
@@ -95,3 +95,4 @@ from .layer.norm import InstanceNorm  #DEFINE_ALIAS
 # from .layer.rnn import LSTMCell        #DEFINE_ALIAS
 from .layer import loss  #DEFINE_ALIAS
 from .layer import conv  #DEFINE_ALIAS
+from ..fluid.dygraph.layers import Layer  #DEFINE_ALIAS
-- 
GitLab