Add dygraph triple grad test (#36814)

* native commit for triple grad of sigmod * Updated unittests files * init functional jacobian api * Updated trible_test func * Updated gradient_checker & test_script * finish test with dtype float32 * add float64 test case * polish code * use atol=1e-5 with dtype float64 * fix for ci * set timeout for test_jacobian * fix dygraph grad to support high differential * polish API docstring * Updated gradient checker and some related files * fix double grad strip error for high differential * fix double grad strip error for high differential * Add Sigmoid triple grad tests * fix dygraph double grad dtype error when calling for high differential senario * Updated triple grad teses func * Use np.random to initialize ddx * Updated triple_grad_check func * add todo for gradient checker and refine some comments * remove additional code * add test for warnging in backward.py * format python code * support multi input in triple gradient checker * Add matmul triple grad kernel * Updated comments of TODO * Supported some special tests * Change code-format to follow CI std * Updated gradient_checker.py * Fix conflicts * Removed unnecessary printing log * Change code style to follow CI std * Add Dygraph Triple Grad test Co-authored-by: N levi131 <limaolin01@baidu.com> Co-authored-by: N Jiabin Yang <360788950@qq.com>

Add dygraph triple grad test (#36814)
* native commit for triple grad of sigmod * Updated unittests files * init functional jacobian api * Updated trible_test func * Updated gradient_checker & test_script * finish test with dtype float32 * add float64 test case * polish code * use atol=1e-5 with dtype float64 * fix for ci * set timeout for test_jacobian * fix dygraph grad to support high differential * polish API docstring * Updated gradient checker and some related files * fix double grad strip error for high differential * fix double grad strip error for high differential * Add Sigmoid triple grad tests * fix dygraph double grad dtype error when calling for high differential senario * Updated triple grad teses func * Use np.random to initialize ddx * Updated triple_grad_check func * add todo for gradient checker and refine some comments * remove additional code * add test for warnging in backward.py * format python code * support multi input in triple gradient checker * Add matmul triple grad kernel * Updated comments of TODO * Supported some special tests * Change code-format to follow CI std * Updated gradient_checker.py * Fix conflicts * Removed unnecessary printing log * Change code style to follow CI std * Add Dygraph Triple Grad test Co-authored-by: N levi131 <limaolin01@baidu.com> Co-authored-by: N Jiabin Yang <360788950@qq.com>
8fb6e77b · Weilong Wu · GitHub · 65fd59e2 · 8fb6e77b
隐藏空白更改
内联并排

Showing with 150 addition and 0 deletion

python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py ...ddle/fluid/tests/unittests/test_imperative_triple_grad.py +150 -0

未找到文件。
--- a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.fluid as fluid
+import paddle
+from paddle.fluid.wrapped_decorator import wrap_decorator
+from paddle.vision.models import resnet50, resnet101
+import unittest
+from unittest import TestCase
+import numpy as np
+
+
+def _dygraph_guard_(func):
+    def __impl__(*args, **kwargs):
+        if fluid.in_dygraph_mode():
+            return func(*args, **kwargs)
+        else:
+            with fluid.dygraph.guard():
+                return func(*args, **kwargs)
+
+    return __impl__
+
+
+dygraph_guard = wrap_decorator(_dygraph_guard_)
+
+
+def random_var(size, low=-1, high=1, dtype='float32'):
+    np.random.seed(2021)
+    x_np = np.random.uniform(low=low, high=high, size=size).astype(dtype)
+    return fluid.dygraph.to_variable(x_np)
+
+
+class TestDygraphTripleGrad(TestCase):
+    def setUp(self):
+        self.sort_sum_gradient = False
+        self.shape = [5, 5]
+
+    def grad(self,
+             outputs,
+             inputs,
+             grad_outputs=None,
+             no_grad_vars=None,
+             retain_graph=None,
+             create_graph=False,
+             allow_unused=False):
+        fluid.set_flags({'FLAGS_sort_sum_gradient': self.sort_sum_gradient})
+        return fluid.dygraph.grad(
+            outputs=outputs,
+            inputs=inputs,
+            grad_outputs=grad_outputs,
+            no_grad_vars=no_grad_vars,
+            retain_graph=retain_graph,
+            create_graph=create_graph,
+            allow_unused=allow_unused)
+
+    @dygraph_guard
+    def test_exception(self):
+        with self.assertRaises(AssertionError):
+            self.grad(None, None)
+
+        shape = self.shape
+
+        with self.assertRaises(AssertionError):
+            self.grad(1, random_var(shape))
+
+        with self.assertRaises(AssertionError):
+            self.grad(random_var(shape), 1)
+
+        with self.assertRaises(AssertionError):
+            self.grad([1], [random_var(shape)])
+
+        with self.assertRaises(AssertionError):
+            self.grad([random_var(shape)], [1])
+
+        with self.assertRaises(AssertionError):
+            self.grad([random_var(shape), random_var(shape)],
+                      [random_var(shape)], [random_var(shape)])
+
+        with self.assertRaises(AssertionError):
+            self.grad(
+                [random_var(shape)], [random_var(shape)], no_grad_vars=[1])
+
+        with self.assertRaises(AssertionError):
+            self.grad([random_var(shape)], [random_var(shape)], no_grad_vars=1)
+
+    @dygraph_guard
+    def test_example_with_gradient_and_create_graph(self):
+        x = random_var(self.shape)
+        x_np = x.numpy()
+        x.stop_gradient = False
+
+        y = random_var(self.shape)
+        y_np = y.numpy()
+        y.stop_gradient = False
+
+        z = random_var(self.shape)
+        z_np = z.numpy()
+        numel = z_np.size
+        z.stop_gradient = False
+
+        out = fluid.layers.sigmoid(paddle.matmul(x, y) + z)
+        out_np = out.numpy()
+
+        dx_actual, = self.grad([out], [x], create_graph=True)
+        # Theoritical result based on math calculation
+        dout = np.ones(self.shape).astype('float32')
+        dx_expected = np.matmul(dout * out_np * (1 - out_np),
+                                np.transpose(y_np))
+        self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected))
+
+        ddx_actual, = self.grad([dx_actual], [x], create_graph=True)
+        # Theoritical result based on math calculation
+        DDY = np.zeros(self.shape).astype('float32')
+        DDX = np.ones(self.shape).astype('float32')
+        double_grad_tmp1 = np.matmul(dout * out_np * (1 - out_np),
+                                     np.transpose(DDY))
+        double_grad_tmp2 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY)
+        double_grad_tmp3 = (
+            1 - 2 * out_np) * dout * double_grad_tmp2 * out_np * (1 - out_np)
+        ddx_expected = double_grad_tmp1 + np.matmul(double_grad_tmp3,
+                                                    np.transpose(y_np))
+        self.assertTrue(np.allclose(ddx_actual.numpy(), ddx_expected))
+
+        # Theoritical result based on math calculation
+        d_ddout = np.zeros(self.shape).astype('float32')
+        tmp0 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY)
+        tmp1 = (1 - 2 * out_np) * ((1 - 2 * out_np) * dout * tmp0 * tmp0)
+        tmp2 = tmp0 * (1 - 2 * out_np) * d_ddout - 2 * dout * (
+            1 - out_np) * out_np * tmp0 * tmp0
+        dddx_expected = np.matmul(((tmp1 + tmp2) * out_np * (1 - out_np)),
+                                  np.transpose(y_np))
+
+        ddx_actual.backward()
+        dddx_grad_actual = x.gradient()
+        self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected))
+
+
+if __name__ == '__main__':
+    unittest.main()