未验证 提交 792d3d76 编写于 作者: A Aganlengzi 提交者: GitHub

[NPU] fix lookup_table_v2_grad ACL error for model BoW (#36864)

* [NPU] fix lookup_table_v2_grad ACL error for model BoW

* add more unit tests
上级 0a963ee9
...@@ -101,6 +101,7 @@ class LookupTableV2GradNPUKernel : public framework::OpKernel<T> { ...@@ -101,6 +101,7 @@ class LookupTableV2GradNPUKernel : public framework::OpKernel<T> {
auto stream = auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>() ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream(); .stream();
int64_t padding_idx = ctx.Attr<int64_t>("padding_idx");
/* EmbeddingDenseGrad has bug on large shape, temporarily disable it. /* EmbeddingDenseGrad has bug on large shape, temporarily disable it.
...@@ -123,13 +124,34 @@ class LookupTableV2GradNPUKernel : public framework::OpKernel<T> { ...@@ -123,13 +124,34 @@ class LookupTableV2GradNPUKernel : public framework::OpKernel<T> {
NpuOpRunner("ZerosLike", {*table_grad_t}, {*table_grad_t}); NpuOpRunner("ZerosLike", {*table_grad_t}, {*table_grad_t});
runner_zeros.Run(stream); runner_zeros.Run(stream);
// NOTE(zhiqiu): It seems in cann 20.1, the first input and output if (padding_idx == kNoPadding) {
// can be different tensor, but in cann 20.2+, it does inplace operation. // NOTE(zhiqiu): It seems in cann 20.1, the first input and output
// Thus, the first input and output should be same tensor. // can be different tensor, but in cann 20.2+, it does inplace operation.
const auto &runner_scatter = // Thus, the first input and output should be same tensor.
NpuOpRunner("ScatterAdd", {*table_grad_t, *ids_t, *output_grad_t}, const auto &runner_scatter =
{*table_grad_t}, {{"use_locking", true}}); NpuOpRunner("ScatterAdd", {*table_grad_t, *ids_t, *output_grad_t},
runner_scatter.Run(stream); {*table_grad_t}, {{"use_locking", true}});
runner_scatter.Run(stream);
} else {
Tensor casted_ids_t;
if (ids_t->type() != framework::proto::VarType::INT32) {
casted_ids_t.mutable_data<int32_t>(ids_t->dims(), ctx.GetPlace());
const auto &cast_runner = NpuOpRunner("Cast", {*ids_t}, {casted_ids_t},
{{"dst_type", ACL_INT32}});
cast_runner.Run(stream);
} else {
casted_ids_t.ShareDataWith(*ids_t);
}
auto table_grad_dims = table_grad_t->dims();
NpuOpRunner runner;
runner.SetType("UnsortedSegmentSum")
.AddInput(*output_grad_t)
.AddInput(casted_ids_t)
.AddInput(std::vector<int64_t>{table_grad_dims[0]})
.AddOutput(*table_grad_t);
runner.Run(stream);
}
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -38,7 +38,7 @@ class TestLookupTableV2(OpTest): ...@@ -38,7 +38,7 @@ class TestLookupTableV2(OpTest):
np.random.seed(SEED) np.random.seed(SEED)
w = np.random.random([self.vocab, self.dim]).astype(self.dtype) w = np.random.random([self.vocab, self.dim]).astype(self.dtype)
x = np.random.randint( x = np.random.randint(
0, self.vocab, size=(self.bsz, self.seqlen)).astype(np.int32) 0, self.vocab, size=(self.bsz, self.seqlen)).astype(self.ids_dtype)
out = w[x] out = w[x]
if self.padding_idx != -1: if self.padding_idx != -1:
out[np.squeeze(x == self.padding_idx)] = np.zeros(self.dim) out[np.squeeze(x == self.padding_idx)] = np.zeros(self.dim)
...@@ -60,6 +60,7 @@ class TestLookupTableV2(OpTest): ...@@ -60,6 +60,7 @@ class TestLookupTableV2(OpTest):
def init_dtype(self): def init_dtype(self):
self.dtype = np.float32 self.dtype = np.float32
self.ids_dtype = np.int32
def init_dims(self): def init_dims(self):
self.bsz = 6 self.bsz = 6
...@@ -85,6 +86,7 @@ class TestLookupTableV2FP16(TestLookupTableV2): ...@@ -85,6 +86,7 @@ class TestLookupTableV2FP16(TestLookupTableV2):
def init_dtype(self): def init_dtype(self):
self.dtype = np.float16 self.dtype = np.float16
self.ids_dtype = np.int32
def set_npu(self): def set_npu(self):
self.__class__.use_npu = True self.__class__.use_npu = True
...@@ -105,6 +107,7 @@ class TestLookupTableV2Dim32FP16(TestLookupTableV2): ...@@ -105,6 +107,7 @@ class TestLookupTableV2Dim32FP16(TestLookupTableV2):
def init_dtype(self): def init_dtype(self):
self.dtype = np.float16 self.dtype = np.float16
self.ids_dtype = np.int64
def init_dims(self): def init_dims(self):
self.bsz = 6 self.bsz = 6
...@@ -122,5 +125,14 @@ class TestLookupTableV2WithPadding(TestLookupTableV2): ...@@ -122,5 +125,14 @@ class TestLookupTableV2WithPadding(TestLookupTableV2):
self.padding_idx = np.random.randint(0, self.vocab) self.padding_idx = np.random.randint(0, self.vocab)
class TestLookupTableV2WithPadding1(TestLookupTableV2):
def init_padding_idx(self):
self.padding_idx = np.random.randint(0, self.vocab)
def init_dtype(self):
self.dtype = np.float32
self.ids_dtype = np.int64
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册