未验证 提交 bb2733fa 编写于 作者: W Weilong Wu 提交者: GitHub

Add dygraph triple grad test, broadcast case (#37377)

上级 b505ff96
...@@ -146,5 +146,87 @@ class TestDygraphTripleGrad(TestCase): ...@@ -146,5 +146,87 @@ class TestDygraphTripleGrad(TestCase):
self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected)) self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected))
class TestDygraphTripleGradBradcastCase(TestCase):
def setUp(self):
self.sort_sum_gradient = False
self.x_shape = [3, 2, 2]
self.y_shape = [1, 2, 2]
self.z_shape = [2, 2]
def grad(self,
outputs,
inputs,
grad_outputs=None,
no_grad_vars=None,
retain_graph=None,
create_graph=False,
allow_unused=False):
fluid.set_flags({'FLAGS_sort_sum_gradient': self.sort_sum_gradient})
return fluid.dygraph.grad(
outputs=outputs,
inputs=inputs,
grad_outputs=grad_outputs,
no_grad_vars=no_grad_vars,
retain_graph=retain_graph,
create_graph=create_graph,
allow_unused=allow_unused)
@dygraph_guard
def test_example_with_gradient_and_create_graph(self):
x = random_var(self.x_shape)
x_np = x.numpy()
x.stop_gradient = False
y = random_var(self.y_shape)
y_np = y.numpy()
y.stop_gradient = False
z = random_var(self.z_shape)
z_np = z.numpy()
numel = z_np.size
z.stop_gradient = False
out = fluid.layers.sigmoid(paddle.matmul(x, y) + z)
out_np = out.numpy()
dx_actual, = self.grad([out], [x], create_graph=True)
# Theoritical result based on math calculation
dout = np.ones(self.x_shape).astype('float32')
dx_expected = np.matmul(
dout * out_np * (1 - out_np), np.transpose(
y_np, axes=(0, 2, 1)))
self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected))
ddx_actual, = self.grad([dx_actual], [x], create_graph=True)
# Theoritical result based on math calculation
DDY = np.zeros(self.y_shape).astype('float32')
DDX = np.ones(self.x_shape).astype('float32')
double_grad_tmp1 = np.matmul(
dout * out_np * (1 - out_np), np.transpose(
DDY, axes=(0, 2, 1)))
double_grad_tmp2 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY)
double_grad_tmp3 = (
1 - 2 * out_np) * dout * double_grad_tmp2 * out_np * (1 - out_np)
ddx_expected = double_grad_tmp1 + np.matmul(
double_grad_tmp3, np.transpose(
y_np, axes=(0, 2, 1)))
self.assertTrue(np.allclose(ddx_actual.numpy(), ddx_expected))
# Theoritical result based on math calculation
d_ddout = np.zeros(self.x_shape).astype('float32')
tmp0 = np.matmul(DDX, y_np) + np.matmul(x_np, DDY)
tmp1 = (1 - 2 * out_np) * ((1 - 2 * out_np) * dout * tmp0 * tmp0)
tmp2 = tmp0 * (1 - 2 * out_np) * d_ddout - 2 * dout * (
1 - out_np) * out_np * tmp0 * tmp0
dddx_expected = np.matmul(
((tmp1 + tmp2) * out_np * (1 - out_np)),
np.transpose(
y_np, axes=(0, 2, 1)))
ddx_actual.backward()
dddx_grad_actual = x.gradient()
self.assertTrue(np.allclose(dddx_grad_actual, dddx_expected))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册