From fb7590d487431ba3b7b26bd3e0267c7194a127ff Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Sun, 25 Apr 2021 10:46:03 +0800 Subject: [PATCH] [NPU] refine lookup_table_v2_grad npu_kernel (#32497) * use ZerosLike instead of NPUMemsetAsync * fix compile --- paddle/fluid/operators/lookup_table_v2_op_npu.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/lookup_table_v2_op_npu.cc b/paddle/fluid/operators/lookup_table_v2_op_npu.cc index 320b498156..87618b954d 100644 --- a/paddle/fluid/operators/lookup_table_v2_op_npu.cc +++ b/paddle/fluid/operators/lookup_table_v2_op_npu.cc @@ -55,19 +55,19 @@ class LookupTableV2GradNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto *ids_t = ctx.Input("Ids"); - auto *output_grad_t = ctx.Input(framework::GradVarName("Out")); auto *table_grad_t = ctx.Output(framework::GradVarName("W")); - auto *p = table_grad_t->mutable_data(ctx.GetPlace()); + table_grad_t->mutable_data(ctx.GetPlace()); auto stream = ctx.template device_context() .stream(); - platform::NPUMemsetAsync(static_cast(p), 0, - table_grad_t->numel() * sizeof(T), stream); + auto runner_zeros = + NpuOpRunner("ZerosLike", {*table_grad_t}, {*table_grad_t}); + runner_zeros.Run(stream); // NOTE(zhiqiu): It seems in cann 20.1, the first input and output // can be different tensor, but in cann 20.2+, it does inplace operation. -- GitLab