未验证 提交 c66586b4 编写于 作者: P pangyoki 提交者: GitHub

[NPU] fix accuracy npu op bug and change top_k's output to int64 (#32935)

* Output indices of top_k npu op change to int64

* fix accuracy npu bug

* fix errors

* change cast method to FillNpuTensorWithConstant

* change cast method to FillNpuTensorWithConstant
上级 5d627488
...@@ -23,91 +23,82 @@ template <typename DeviceContext, typename T> ...@@ -23,91 +23,82 @@ template <typename DeviceContext, typename T>
class AccuracyNPUKernel : public framework::OpKernel<T> { class AccuracyNPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* pred = ctx.Input<Tensor>("Out"); auto* inference = ctx.Input<Tensor>("Out");
auto* label = ctx.Input<Tensor>("Label"); auto* label = ctx.Input<Tensor>("Label");
// auto* logits = ctx.Input<Tensor>("Indices"); auto* indices = ctx.Input<Tensor>("Indices");
auto* acc = ctx.Output<Tensor>("Accuracy"); auto* accuracy = ctx.Output<Tensor>("Accuracy");
auto* correct = ctx.Output<Tensor>("Correct"); auto* correct = ctx.Output<Tensor>("Correct");
auto* total = ctx.Output<Tensor>("Total"); auto* total = ctx.Output<Tensor>("Total");
auto stream = auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>() ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream(); .stream();
// cast pred int num_samples = inference->dims()[0];
Tensor tmp_pred(pred->type()); if (num_samples == 0) {
tmp_pred.Resize(pred->dims()); return;
tmp_pred.mutable_data<int>(ctx.GetPlace()); }
auto runner_cast_pred =
NpuOpRunner("Cast", {*pred}, {tmp_pred},
{{"dst_type", static_cast<int>(ACL_INT32)}});
runner_cast_pred.Run(stream);
// cast label
Tensor tmp_label(label->type());
tmp_label.Resize(label->dims());
tmp_label.mutable_data<int>(ctx.GetPlace());
auto runner_cast_label =
NpuOpRunner("Cast", {*label}, {tmp_label},
{{"dst_type", static_cast<int>(ACL_INT32)}});
runner_cast_label.Run(stream);
// equal // equal
Tensor tmp_equal(label->type()); Tensor tmp_equal(framework::proto::VarType::BOOL);
tmp_equal.Resize(label->dims()); tmp_equal.Resize(inference->dims());
tmp_equal.mutable_data<bool>(ctx.GetPlace()); tmp_equal.mutable_data<bool>(ctx.GetPlace());
auto runner_equal = auto runner_equal =
NpuOpRunner("Equal", {tmp_pred, tmp_label}, {tmp_equal}, {}); NpuOpRunner("Equal", {*indices, *label}, {tmp_equal}, {});
runner_equal.Run(stream); runner_equal.Run(stream);
// cast equal // cast equal
Tensor tmp_equal_cast(label->type()); Tensor tmp_equal_cast(framework::proto::VarType::FP32);
tmp_equal_cast.Resize(label->dims()); tmp_equal_cast.Resize(inference->dims());
tmp_equal_cast.mutable_data<float>(ctx.GetPlace()); tmp_equal_cast.mutable_data<float>(ctx.GetPlace());
auto runner_cast_equal = auto runner_cast_equal = NpuOpRunner(
NpuOpRunner("Cast", {tmp_equal}, {tmp_equal_cast}, "Cast", {tmp_equal}, {tmp_equal_cast},
{{"dst_type", static_cast<float>(ACL_FLOAT)}}); {{"dst_type",
static_cast<int>(ConvertToNpuDtype(tmp_equal_cast.type()))}});
runner_cast_equal.Run(stream); runner_cast_equal.Run(stream);
// acc // [correct]
acc->mutable_data<float>(ctx.GetPlace()); // reduce_max
std::vector<int> axes_vec_1; Tensor tmp_correct_max(framework::proto::VarType::FP32);
auto runner_acc = NpuOpRunner("ReduceMeanD", {tmp_equal_cast}, {*acc}, tmp_correct_max.Resize(framework::make_ddim({num_samples}));
{{"keep_dims", false}, {"axes", axes_vec_1}}); tmp_correct_max.mutable_data<float>(ctx.GetPlace());
runner_acc.Run(stream); auto runner_reduce_max =
NpuOpRunner("ReduceMaxD", {tmp_equal_cast}, {tmp_correct_max},
// correct {{"axes", std::vector<int>{1}}, {"keep_dims", false}});
correct->mutable_data<float>(ctx.GetPlace()); runner_reduce_max.Run(stream);
std::vector<int> axes_vec_2;
auto runner_correct = // reduce_sum
NpuOpRunner("ReduceSumD", {tmp_equal_cast}, {*correct}, Tensor tmp_correct(framework::proto::VarType::FP32);
{{"keep_dims", false}, {"axes", axes_vec_2}}); tmp_correct.Resize(correct->dims());
runner_correct.Run(stream); tmp_correct.mutable_data<float>(ctx.GetPlace());
auto runner_reduce_sum =
// ones_tensor NpuOpRunner("ReduceSumD", {tmp_correct_max}, {tmp_correct},
Tensor ones_tensor(label->type()); {{"axes", std::vector<int>{0}}, {"keep_dims", false}});
ones_tensor.Resize(label->dims()); runner_reduce_sum.Run(stream);
ones_tensor.mutable_data<int>(ctx.GetPlace());
auto runner_oneslike = // cast to int
NpuOpRunner("OnesLike", {tmp_label}, {ones_tensor}, {}); correct->mutable_data<int>(ctx.GetPlace());
runner_oneslike.Run(stream); auto runner_cast_correct = NpuOpRunner(
"Cast", {tmp_correct}, {*correct},
// ones_tensor_cast {{"dst_type", static_cast<int>(ConvertToNpuDtype(correct->type()))}});
Tensor ones_tensor_cast(label->type()); runner_cast_correct.Run(stream);
ones_tensor_cast.Resize(label->dims());
ones_tensor_cast.mutable_data<float>(ctx.GetPlace()); // [total]
auto runner_ones_cast = total->mutable_data<int>(ctx.GetPlace());
NpuOpRunner("Cast", {ones_tensor}, {ones_tensor_cast}, FillNpuTensorWithConstant<int>(total, static_cast<int>(num_samples));
{{"dst_type", static_cast<float>(ACL_FLOAT)}});
runner_ones_cast.Run(stream); // use `total` of type `float32` for calculating accuracy
Tensor tmp_total(framework::proto::VarType::FP32);
// total tmp_total.Resize(total->dims());
total->mutable_data<float>(ctx.GetPlace()); tmp_total.mutable_data<float>(ctx.GetPlace());
std::vector<int> axes_vec_3; FillNpuTensorWithConstant<float>(&tmp_total,
auto runner_total = static_cast<float>(num_samples));
NpuOpRunner("ReduceSumD", {ones_tensor_cast}, {*total},
{{"keep_dims", false}, {"axes", axes_vec_3}}); // [accuracy]
runner_total.Run(stream); accuracy->mutable_data<float>(ctx.GetPlace());
auto runner_accuracy =
NpuOpRunner("Div", {tmp_correct, tmp_total}, {*accuracy}, {});
runner_accuracy.Run(stream);
} }
}; };
......
...@@ -48,7 +48,7 @@ class TopkNPUKernel : public framework::OpKernel<T> { ...@@ -48,7 +48,7 @@ class TopkNPUKernel : public framework::OpKernel<T> {
size_t k = static_cast<int>(ctx.Attr<int>("k")); size_t k = static_cast<int>(ctx.Attr<int>("k"));
output->mutable_data<T>(ctx.GetPlace()); output->mutable_data<T>(ctx.GetPlace());
indices->mutable_data<int>(ctx.GetPlace()); indices->mutable_data<int64_t>(ctx.GetPlace());
// prepare assit // prepare assit
auto dim = input->dims().size(); auto dim = input->dims().size();
...@@ -62,15 +62,24 @@ class TopkNPUKernel : public framework::OpKernel<T> { ...@@ -62,15 +62,24 @@ class TopkNPUKernel : public framework::OpKernel<T> {
{"dim", -1}, {"dim", -1},
{"largest", true}}; {"largest", true}};
Tensor tmp_indices(framework::proto::VarType::INT32);
tmp_indices.Resize(indices->dims());
tmp_indices.mutable_data<int>(ctx.GetPlace());
// run ascend // run ascend
auto runner = NpuOpRunner("TopKD", {*input, assist_seq_tensor}, auto runner = NpuOpRunner("TopKD", {*input, assist_seq_tensor},
{*output, *indices}, attr_input); {*output, tmp_indices}, attr_input);
auto stream = auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>() ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream(); .stream();
runner.Run(stream); runner.Run(stream);
// cast indices from INT32 to INT64
auto dst_dtype = ConvertToNpuDtype(indices->type());
auto runner_cast_indices =
NpuOpRunner("Cast", {tmp_indices}, {*indices},
{{"dst_type", static_cast<int>(dst_dtype)}});
runner_cast_indices.Run(stream);
} }
}; };
......
...@@ -35,21 +35,21 @@ class TestAccuracy(OpTest): ...@@ -35,21 +35,21 @@ class TestAccuracy(OpTest):
self.set_npu() self.set_npu()
self.init_dtype() self.init_dtype()
np.random.seed(SEED) np.random.seed(SEED)
pred = np.random.uniform(1, 2, [11, 1]).astype(self.dtype) n = 8192
label = pred.copy() infer = np.random.random((n, 1)).astype(self.dtype)
accuracy = np.array([1]).astype(self.dtype) indices = np.random.randint(0, 2, (n, 1)).astype('int64')
correct = np.array([11 * 1]).astype(self.dtype) label = np.random.randint(0, 2, (n, 1)).astype('int64')
total = np.array([11 * 1]).astype(self.dtype) self.inputs = {'Out': infer, 'Indices': indices, "Label": label}
num_correct = 0
self.inputs = { for rowid in range(n):
"Out": OpTest.np_dtype_to_fluid_dtype(pred), for ele in indices[rowid]:
"Label": OpTest.np_dtype_to_fluid_dtype(label), if ele == label[rowid]:
"Indices": OpTest.np_dtype_to_fluid_dtype(pred) num_correct += 1
} break
self.outputs = { self.outputs = {
"Accuracy": accuracy, 'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype),
"Correct": correct, 'Correct': np.array([num_correct]).astype("int32"),
"Total": total 'Total': np.array([n]).astype("int32")
} }
def set_npu(self): def set_npu(self):
...@@ -69,54 +69,23 @@ class TestAccuracy2(TestAccuracy): ...@@ -69,54 +69,23 @@ class TestAccuracy2(TestAccuracy):
self.set_npu() self.set_npu()
self.init_dtype() self.init_dtype()
np.random.seed(SEED) np.random.seed(SEED)
pred = np.random.uniform(1, 2, [11, 1]).astype(self.dtype) n = 8192
label = np.random.uniform(4, 5, [11, 1]).astype(self.dtype) infer = np.random.random((n, 100)).astype(self.dtype)
accuracy = np.array([0]).astype(self.dtype) indices = np.random.randint(0, 1000, (n, 100)).astype('int64')
correct = np.array([11 * 0]).astype(self.dtype) label = np.random.randint(0, 1000, (n, 1)).astype('int64')
total = np.array([11 * 1]).astype(self.dtype) self.inputs = {'Out': infer, 'Indices': indices, "Label": label}
num_correct = 0
self.inputs = { for rowid in range(n):
"Out": OpTest.np_dtype_to_fluid_dtype(pred), for ele in indices[rowid]:
"Label": OpTest.np_dtype_to_fluid_dtype(label), if ele == label[rowid]:
"Indices": OpTest.np_dtype_to_fluid_dtype(pred) num_correct += 1
} break
self.outputs = {
"Accuracy": accuracy,
"Correct": correct,
"Total": total
}
class TestAccuracy3(TestAccuracy):
def setUp(self):
self.op_type = "accuracy"
self.set_npu()
self.init_dtype()
np.random.seed(SEED)
a = np.random.randint(1, 2, [5, 1])
b = np.random.randint(0, 1, [5, 1])
pred = np.row_stack((a, b)).astype(self.dtype)
label = np.random.randint(1, 2, [10, 1]).astype(self.dtype)
accuracy = np.array([0.5]).astype(self.dtype)
correct = np.array([5]).astype(self.dtype)
total = np.array([10 * 1]).astype(self.dtype)
self.inputs = {
"Out": OpTest.np_dtype_to_fluid_dtype(pred),
"Label": OpTest.np_dtype_to_fluid_dtype(label),
"Indices": OpTest.np_dtype_to_fluid_dtype(pred)
}
self.outputs = { self.outputs = {
"Accuracy": accuracy, 'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype),
"Correct": correct, 'Correct': np.array([num_correct]).astype("int32"),
"Total": total 'Total': np.array([n]).astype("int32")
} }
class TestAccuracyInt(TestAccuracy):
def init_dtype(self):
self.dtype = np.int
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册