From bb2733fa2f399187017e967f506816ab4a99d3b3 Mon Sep 17 00:00:00 2001 From: Weilong Wu Date: Fri, 19 Nov 2021 21:01:24 +0800 Subject: [PATCH] Add dygraph triple grad test, broadcast case (#37377) --- .../unittests/test_imperative_triple_grad.py | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py index d61a083083..ea8e5cfd75 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_triple_grad.py @@ -146,5 +146,87 @@ class TestDygraphTripleGrad(TestCase): self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected)) +class TestDygraphTripleGradBradcastCase(TestCase): + def setUp(self): + self.sort_sum_gradient = False + self.x_shape = [3, 2, 2] + self.y_shape = [1, 2, 2] + self.z_shape = [2, 2] + + def grad(self, + outputs, + inputs, + grad_outputs=None, + no_grad_vars=None, + retain_graph=None, + create_graph=False, + allow_unused=False): + fluid.set_flags({'FLAGS_sort_sum_gradient': self.sort_sum_gradient}) + return fluid.dygraph.grad( + outputs=outputs, + inputs=inputs, + grad_outputs=grad_outputs, + no_grad_vars=no_grad_vars, + retain_graph=retain_graph, + create_graph=create_graph, + allow_unused=allow_unused) + + @dygraph_guard + def test_example_with_gradient_and_create_graph(self): + x = random_var(self.x_shape) + x_np = x.numpy() + x.stop_gradient = False + + y = random_var(self.y_shape) + y_np = y.numpy() + y.stop_gradient = False + + z = random_var(self.z_shape) + z_np = z.numpy() + numel = z_np.size + z.stop_gradient = False + + out = fluid.layers.sigmoid(paddle.matmul(x, y) + z) + out_np = out.numpy() + + dx_actual, = self.grad([out], [x], create_graph=True) + # Theoritical result based on math calculation + dout = np.ones(self.x_shape).astype('float32') + dx_expected = np.matmul( + dout * out_np * (1 - out_np), np.transpose( + y_np, axes=(0, 2, 1))) + self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected)) + + ddx_actual, = self.grad([dx_actual], [x], create_graph=True) + # Theoritical result based on math calculation + DDY = np.zeros(self.y_shape).astype('float32') + DDX = np.ones(self.x_shape).astype('float32') + double_grad_tmp1 = np.matmul( + dout * out_np * (1 - out_np), np.transpose( + DDY, axes=(0, 2, 1))) + double_grad_tmp2 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY) + double_grad_tmp3 = ( + 1 - 2 * out_np) * dout * double_grad_tmp2 * out_np * (1 - out_np) + ddx_expected = double_grad_tmp1 + np.matmul( + double_grad_tmp3, np.transpose( + y_np, axes=(0, 2, 1))) + self.assertTrue(np.allclose(ddx_actual.numpy(), ddx_expected)) + + # Theoritical result based on math calculation + d_ddout = np.zeros(self.x_shape).astype('float32') + tmp0 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY) + tmp1 = (1 - 2 * out_np) * ((1 - 2 * out_np) * dout * tmp0 * tmp0) + tmp2 = tmp0 * (1 - 2 * out_np) * d_ddout - 2 * dout * ( + 1 - out_np) * out_np * tmp0 * tmp0 + dddx_expected = np.matmul( + ((tmp1 + tmp2) * out_np * (1 - out_np)), + np.transpose( + y_np, axes=(0, 2, 1))) + + ddx_actual.backward() + dddx_grad_actual = x.gradient() + self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected)) + + if __name__ == '__main__': unittest.main() -- GitLab