未验证 提交 fa5ddfd9 编写于 作者: P pangyoki 提交者: GitHub

[NPU] change Add to AddN in sum npu op (#33957)

* change Add to AddN in sum npu op

* add AddInputNames

* change fp16 to fp32 because numpy has accuracy loss in fp16 adding

* delete check

* fix runner error
上级 a84e48b9
...@@ -35,23 +35,28 @@ class SumNPUKernel : public framework::OpKernel<T> { ...@@ -35,23 +35,28 @@ class SumNPUKernel : public framework::OpKernel<T> {
auto place = ctx.GetPlace(); auto place = ctx.GetPlace();
int n = static_cast<int>(x.size()); int n = static_cast<int>(x.size());
if (n == 1) { if (n == 1) {
TensorCopy(*x[0], place, out); TensorCopy(*x[0], place, out);
return; return;
} }
std::vector<framework::Tensor> inputs;
std::vector<std::string> names;
for (int i = 0; i < n; ++i) {
if (x[i] && x[i]->numel() > 0) {
inputs.push_back(*x[i]);
names.push_back("x" + std::to_string(i));
} else {
continue;
}
}
auto stream = auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>() ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream(); .stream();
NpuOpRunner runner{"AddN", {inputs}, {*out}, {{"N", n}}};
const auto& runner = NpuOpRunner("Add", {*x[0], *x[1]}, {*out}, {}); runner.AddInputNames(names);
runner.Run(stream); runner.Run(stream);
for (int i = 2; i < n; i++) {
const auto& runner1 = NpuOpRunner("Add", {*out, *x[i]}, {*out}, {});
runner1.Run(stream);
}
} }
}; };
......
...@@ -67,7 +67,14 @@ class TestSum2(OpTest): ...@@ -67,7 +67,14 @@ class TestSum2(OpTest):
x2 = np.random.random((3, 3)).astype(self.dtype) x2 = np.random.random((3, 3)).astype(self.dtype)
x3 = np.random.random((3, 3)).astype(self.dtype) x3 = np.random.random((3, 3)).astype(self.dtype)
self.inputs = {'X': [("x0", x0), ("x1", x1), ("x2", x2), ("x3", x3)]} self.inputs = {'X': [("x0", x0), ("x1", x1), ("x2", x2), ("x3", x3)]}
y = x0 + x1 + x2 + x3 # There will be a problem if just using `y=x0+x1+x2+x3` to calculate the
# summation result as the reference standard result. The reason is that
# numpy's fp16 data has precision loss when doing `add` operation.
# For example, the results of `x0+x1+x2+x3` is different from that of
# `x3+x2+x1+x0` if the dtype is fp16.
# Therefore, converting the input to fp32 for calculation.
y = (x0.astype(np.float32) + x1.astype(np.float32) +
x2.astype(np.float32) + x3.astype(np.float32)).astype(self.dtype)
self.outputs = {'Out': y} self.outputs = {'Out': y}
self.attrs = {'use_mkldnn': False} self.attrs = {'use_mkldnn': False}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册