fix the backward bug of cumsum (#50997)

934934d8 · wawltor · GitHub · 753fa844 · 934934d8 · 934934d8
4 changed file
--- a/paddle/phi/kernels/cpu/cum_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/cum_grad_kernel.cc
@@ -30,9 +30,20 @@ void CumsumGradKernel(const Context& dev_ctx,
                      bool exclusive,
                      bool reverse,
                      DenseTensor* x_grad) {
-  x_grad->Resize(x.dims());
+  auto x_dims = x.dims();
+  // If the attribute of flatten is `True`, the cumsum kernel is compose of the
+  // operation of flatten and cumsum, need to flatten the tensor of input
+  // gradient, and last step need to unflatten the tensor
+  if (flatten) {
+    x_grad->Resize(out_grad.dims());
+  } else {
+    x_grad->Resize(x_dims);
+  }
  CumsumKernel<T, Context>(
      dev_ctx, out_grad, axis, flatten, exclusive, !reverse, x_grad);
+  if (flatten) {
+    x_grad->Resize(x_dims);
+  }
 }
 }  // namespace phi

--- a/paddle/phi/kernels/gpu/cum_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/cum_grad_kernel.cu
@@ -44,9 +44,20 @@ void CumsumGradKernel(const Context& dev_ctx,
                      bool exclusive,
                      bool reverse,
                      DenseTensor* x_grad) {
-  x_grad->Resize(x.dims());
+  auto x_dims = x.dims();
+  // If the attribute of flatten is `True`, the cumsum kernel is compose of the
+  // operation of flatten and cumsum, need to flatten the tensor of input
+  // gradient, and last step need to unflatten the tensor
+  if (flatten) {
+    x_grad->Resize(out_grad.dims());
+  } else {
+    x_grad->Resize(x_dims);
+  }
  CumsumKernel<T, Context>(
      dev_ctx, out_grad, axis, flatten, exclusive, !reverse, x_grad);
+  if (flatten) {
+    x_grad->Resize(x_dims);
+  }
 }
 }  // namespace phi

--- a/paddle/phi/kernels/xpu/cum_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/cum_grad_kernel.cc
@@ -27,9 +27,20 @@ void CumsumGradKernel(const Context& dev_ctx,
                      bool exclusive,
                      bool reverse,
                      DenseTensor* x_grad) {
-  x_grad->Resize(x.dims());
+  auto x_dims = x.dims();
+  // If the attribute of flatten is `True`, the cumsum kernel is compose of the
+  // operation of flatten and cumsum, need to flatten the tensor of input
+  // gradient, and last step need to unflatten the tensor
+  if (flatten) {
+    x_grad->Resize(out_grad.dims());
+  } else {
+    x_grad->Resize(x_dims);
+  }
  CumsumKernel<T, Context>(
      dev_ctx, out_grad, axis, flatten, exclusive, !reverse, x_grad);
+  if (flatten) {
+    x_grad->Resize(x_dims);
+  }
 }
 }  // namespace phi

--- a/python/paddle/fluid/tests/unittests/test_cumsum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cumsum_op.py
@@ -200,6 +200,20 @@ class TestSumOp5(OpTest):
        self.check_grad(['X'], 'Out')
+class TestSumOp6(OpTest):
+    def setUp(self):
+        self.op_type = "cumsum"
+        self.attrs = {'axis': -1, 'flatten': True}
+        self.inputs = {'X': np.random.random((5, 6, 5)).astype("float64")}
+        self.outputs = {'Out': self.inputs['X'].cumsum()}
+    def test_check_output(self):
+        self.check_output()
+    def test_check_grad(self):
+        self.check_grad(['X'], 'Out')
 class TestSumOp7(OpTest):
    def setUp(self):
        self.op_type = "cumsum"