Add dygraph triple grad test, broadcast case (#37377)

bb2733fa · Weilong Wu · GitHub · b505ff96 · bb2733fa
隐藏空白更改
内联并排

Showing with 82 addition and 0 deletion

python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py ...ddle/fluid/tests/unittests/test_imperative_triple_grad.py +82 -0

未找到文件。
--- a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py
@@ -146,5 +146,87 @@ class TestDygraphTripleGrad(TestCase):
        self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected))


+class TestDygraphTripleGradBradcastCase(TestCase):
+    def setUp(self):
+        self.sort_sum_gradient = False
+        self.x_shape = [3, 2, 2]
+        self.y_shape = [1, 2, 2]
+        self.z_shape = [2, 2]
+
+    def grad(self,
+             outputs,
+             inputs,
+             grad_outputs=None,
+             no_grad_vars=None,
+             retain_graph=None,
+             create_graph=False,
+             allow_unused=False):
+        fluid.set_flags({'FLAGS_sort_sum_gradient': self.sort_sum_gradient})
+        return fluid.dygraph.grad(
+            outputs=outputs,
+            inputs=inputs,
+            grad_outputs=grad_outputs,
+            no_grad_vars=no_grad_vars,
+            retain_graph=retain_graph,
+            create_graph=create_graph,
+            allow_unused=allow_unused)
+
+    @dygraph_guard
+    def test_example_with_gradient_and_create_graph(self):
+        x = random_var(self.x_shape)
+        x_np = x.numpy()
+        x.stop_gradient = False
+
+        y = random_var(self.y_shape)
+        y_np = y.numpy()
+        y.stop_gradient = False
+
+        z = random_var(self.z_shape)
+        z_np = z.numpy()
+        numel = z_np.size
+        z.stop_gradient = False
+
+        out = fluid.layers.sigmoid(paddle.matmul(x, y) + z)
+        out_np = out.numpy()
+
+        dx_actual, = self.grad([out], [x], create_graph=True)
+        # Theoritical result based on math calculation
+        dout = np.ones(self.x_shape).astype('float32')
+        dx_expected = np.matmul(
+            dout * out_np * (1 - out_np), np.transpose(
+                y_np, axes=(0, 2, 1)))
+        self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected))
+
+        ddx_actual, = self.grad([dx_actual], [x], create_graph=True)
+        # Theoritical result based on math calculation
+        DDY = np.zeros(self.y_shape).astype('float32')
+        DDX = np.ones(self.x_shape).astype('float32')
+        double_grad_tmp1 = np.matmul(
+            dout * out_np * (1 - out_np), np.transpose(
+                DDY, axes=(0, 2, 1)))
+        double_grad_tmp2 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY)
+        double_grad_tmp3 = (
+            1 - 2 * out_np) * dout * double_grad_tmp2 * out_np * (1 - out_np)
+        ddx_expected = double_grad_tmp1 + np.matmul(
+            double_grad_tmp3, np.transpose(
+                y_np, axes=(0, 2, 1)))
+        self.assertTrue(np.allclose(ddx_actual.numpy(), ddx_expected))
+
+        # Theoritical result based on math calculation
+        d_ddout = np.zeros(self.x_shape).astype('float32')
+        tmp0 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY)
+        tmp1 = (1 - 2 * out_np) * ((1 - 2 * out_np) * dout * tmp0 * tmp0)
+        tmp2 = tmp0 * (1 - 2 * out_np) * d_ddout - 2 * dout * (
+            1 - out_np) * out_np * tmp0 * tmp0
+        dddx_expected = np.matmul(
+            ((tmp1 + tmp2) * out_np * (1 - out_np)),
+            np.transpose(
+                y_np, axes=(0, 2, 1)))
+
+        ddx_actual.backward()
+        dddx_grad_actual = x.gradient()
+        self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected))
+
+
 if __name__ == '__main__':
    unittest.main()