未验证 提交 792d3d76 编写于 作者: A Aganlengzi 提交者: GitHub

[NPU] fix lookup_table_v2_grad ACL error for model BoW (#36864)

* [NPU] fix lookup_table_v2_grad ACL error for model BoW

* add more unit tests
上级 0a963ee9
......@@ -101,6 +101,7 @@ class LookupTableV2GradNPUKernel : public framework::OpKernel<T> {
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
int64_t padding_idx = ctx.Attr<int64_t>("padding_idx");
/* EmbeddingDenseGrad has bug on large shape, temporarily disable it.
......@@ -123,13 +124,34 @@ class LookupTableV2GradNPUKernel : public framework::OpKernel<T> {
NpuOpRunner("ZerosLike", {*table_grad_t}, {*table_grad_t});
runner_zeros.Run(stream);
// NOTE(zhiqiu): It seems in cann 20.1, the first input and output
// can be different tensor, but in cann 20.2+, it does inplace operation.
// Thus, the first input and output should be same tensor.
const auto &runner_scatter =
NpuOpRunner("ScatterAdd", {*table_grad_t, *ids_t, *output_grad_t},
{*table_grad_t}, {{"use_locking", true}});
runner_scatter.Run(stream);
if (padding_idx == kNoPadding) {
// NOTE(zhiqiu): It seems in cann 20.1, the first input and output
// can be different tensor, but in cann 20.2+, it does inplace operation.
// Thus, the first input and output should be same tensor.
const auto &runner_scatter =
NpuOpRunner("ScatterAdd", {*table_grad_t, *ids_t, *output_grad_t},
{*table_grad_t}, {{"use_locking", true}});
runner_scatter.Run(stream);
} else {
Tensor casted_ids_t;
if (ids_t->type() != framework::proto::VarType::INT32) {
casted_ids_t.mutable_data<int32_t>(ids_t->dims(), ctx.GetPlace());
const auto &cast_runner = NpuOpRunner("Cast", {*ids_t}, {casted_ids_t},
{{"dst_type", ACL_INT32}});
cast_runner.Run(stream);
} else {
casted_ids_t.ShareDataWith(*ids_t);
}
auto table_grad_dims = table_grad_t->dims();
NpuOpRunner runner;
runner.SetType("UnsortedSegmentSum")
.AddInput(*output_grad_t)
.AddInput(casted_ids_t)
.AddInput(std::vector<int64_t>{table_grad_dims[0]})
.AddOutput(*table_grad_t);
runner.Run(stream);
}
}
};
} // namespace operators
......
......@@ -38,7 +38,7 @@ class TestLookupTableV2(OpTest):
np.random.seed(SEED)
w = np.random.random([self.vocab, self.dim]).astype(self.dtype)
x = np.random.randint(
0, self.vocab, size=(self.bsz, self.seqlen)).astype(np.int32)
0, self.vocab, size=(self.bsz, self.seqlen)).astype(self.ids_dtype)
out = w[x]
if self.padding_idx != -1:
out[np.squeeze(x == self.padding_idx)] = np.zeros(self.dim)
......@@ -60,6 +60,7 @@ class TestLookupTableV2(OpTest):
def init_dtype(self):
self.dtype = np.float32
self.ids_dtype = np.int32
def init_dims(self):
self.bsz = 6
......@@ -85,6 +86,7 @@ class TestLookupTableV2FP16(TestLookupTableV2):
def init_dtype(self):
self.dtype = np.float16
self.ids_dtype = np.int32
def set_npu(self):
self.__class__.use_npu = True
......@@ -105,6 +107,7 @@ class TestLookupTableV2Dim32FP16(TestLookupTableV2):
def init_dtype(self):
self.dtype = np.float16
self.ids_dtype = np.int64
def init_dims(self):
self.bsz = 6
......@@ -122,5 +125,14 @@ class TestLookupTableV2WithPadding(TestLookupTableV2):
self.padding_idx = np.random.randint(0, self.vocab)
class TestLookupTableV2WithPadding1(TestLookupTableV2):
def init_padding_idx(self):
self.padding_idx = np.random.randint(0, self.vocab)
def init_dtype(self):
self.dtype = np.float32
self.ids_dtype = np.int64
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册