未验证 提交 1de6daff 编写于 作者: L Leo Chen 提交者: GitHub

[NPU] fix shape of dx in mul_grad (#31675)

* fix shape of dx

* refine code
上级 3dd992e2
...@@ -140,19 +140,15 @@ class MulGradNPUKernel : public framework::OpKernel<T> { ...@@ -140,19 +140,15 @@ class MulGradNPUKernel : public framework::OpKernel<T> {
// matmul // matmul
if (dx) { if (dx) {
// matmul [2, 5] * [12, 5] => [2, 12] // matmul [2, 5] * [12, 5] => [2, 12]
Tensor tmp_matmul(y->type()); dx->mutable_data<T>(ctx.GetPlace());
tmp_matmul.Resize( auto dx_dims = dx->dims();
framework::make_ddim({dout->dims()[0], y->dims()[0]})); dx->Resize(framework::make_ddim({dout->dims()[0], y->dims()[0]}));
tmp_matmul.mutable_data<T>(ctx.GetPlace());
auto runner_matmul = auto runner_matmul =
NpuOpRunner("MatMul", {*dout, *y}, {tmp_matmul}, NpuOpRunner("MatMul", {*dout, *y}, {*dx},
{{"transpose_x1", false}, {"transpose_x2", true}}); {{"transpose_x1", false}, {"transpose_x2", true}});
runner_matmul.Run(stream); runner_matmul.Run(stream);
// reshape [2, 12] => [2, 3, 4] // reshape [2, 12] => [2, 3, 4]
dx->mutable_data(ctx.GetPlace(), x->type()); dx->Resize(dx_dims);
framework::TensorCopy(
tmp_matmul, ctx.GetPlace(),
ctx.template device_context<platform::DeviceContext>(), dx);
} }
if (dy) { if (dy) {
...@@ -193,18 +189,15 @@ class MulGradNPUKernel : public framework::OpKernel<T> { ...@@ -193,18 +189,15 @@ class MulGradNPUKernel : public framework::OpKernel<T> {
if (dx) { if (dx) {
// tmp_dout * y [6,5] * [4,5] => [6, 4] // tmp_dout * y [6,5] * [4,5] => [6, 4]
Tensor tmp_matmul(y->type()); dx->mutable_data<T>(ctx.GetPlace());
tmp_matmul.Resize(framework::make_ddim({dout_first_dim, y->dims()[0]})); auto dx_dims = dx->dims();
tmp_matmul.mutable_data<T>(ctx.GetPlace()); dx->Resize(framework::make_ddim({dout_first_dim, y->dims()[0]}));
auto runner_matmul = auto runner_matmul =
NpuOpRunner("MatMul", {tmp_dout, *y}, {tmp_matmul}, NpuOpRunner("MatMul", {tmp_dout, *y}, {*dx},
{{"transpose_x1", false}, {"transpose_x2", true}}); {{"transpose_x1", false}, {"transpose_x2", true}});
runner_matmul.Run(stream); runner_matmul.Run(stream);
// reshape [6,4] => [2, 3, 4] // reshape [2, 12] => [2, 3, 4]
dx->mutable_data(ctx.GetPlace(), x->type()); dx->Resize(dx_dims);
framework::TensorCopy(
tmp_matmul, ctx.GetPlace(),
ctx.template device_context<platform::DeviceContext>(), dx);
} }
if (dy) { if (dy) {
// flatten x.shape [2,3,4] => [6, 4] // flatten x.shape [2,3,4] => [6, 4]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册