未验证 提交 444c2852 编写于 作者: P pangyoki 提交者: GitHub

【NPU】Add TensorCopy to NPU kernel for reduce_sum op (#31667)

* update unittest

* add TensorCopy in npu grad kernel
上级 8f08f160
......@@ -83,6 +83,11 @@ class ReduceSumGradNPUKernel : public framework::OpKernel<T> {
Tensor out_grad_tmp(out_grad->type());
out_grad_tmp.Resize(out_dims);
out_grad_tmp.mutable_data<T>(ctx.GetPlace());
framework::TensorCopy(
*out_grad, ctx.GetPlace(),
ctx.template device_context<platform::DeviceContext>(),
&out_grad_tmp);
out_grad_tmp.Resize(out_dims);
auto runner = NpuOpRunner("BroadcastToD", {out_grad_tmp}, {*x_grad},
{{"shape", framework::vectorize(x->dims())}});
......
......@@ -102,7 +102,9 @@ class TestReduceSumNet(unittest.TestCase):
label = paddle.static.data(
name="label", shape=[2, 1], dtype='int64')
z = paddle.add(a, b)
a_1 = fluid.layers.fc(input=a, size=4, num_flatten_dims=2, act=None)
b_1 = fluid.layers.fc(input=b, size=4, num_flatten_dims=2, act=None)
z = paddle.add(a_1, b_1)
z_1 = self.set_reduce_sum_function(z)
prediction = fluid.layers.fc(input=z_1, size=2, act='softmax')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册