diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc index 505e48054e75689e95b03e9ceb82de6fdd9a529d..e2cd7ca353ccfd64edd68c83208aa0ee2e459d24 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc @@ -83,6 +83,11 @@ class ReduceSumGradNPUKernel : public framework::OpKernel { Tensor out_grad_tmp(out_grad->type()); out_grad_tmp.Resize(out_dims); out_grad_tmp.mutable_data(ctx.GetPlace()); + framework::TensorCopy( + *out_grad, ctx.GetPlace(), + ctx.template device_context(), + &out_grad_tmp); + out_grad_tmp.Resize(out_dims); auto runner = NpuOpRunner("BroadcastToD", {out_grad_tmp}, {*x_grad}, {{"shape", framework::vectorize(x->dims())}}); diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py index caae0507c2645dda6faaeab52a07d7134721cb98..dea5141a024c84655d672eb44c2a2c4e9b9c0501 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py @@ -102,7 +102,9 @@ class TestReduceSumNet(unittest.TestCase): label = paddle.static.data( name="label", shape=[2, 1], dtype='int64') - z = paddle.add(a, b) + a_1 = fluid.layers.fc(input=a, size=4, num_flatten_dims=2, act=None) + b_1 = fluid.layers.fc(input=b, size=4, num_flatten_dims=2, act=None) + z = paddle.add(a_1, b_1) z_1 = self.set_reduce_sum_function(z) prediction = fluid.layers.fc(input=z_1, size=2, act='softmax')