未验证 提交 c66586b4 编写于 作者: P pangyoki 提交者: GitHub

[NPU] fix accuracy npu op bug and change top_k's output to int64 (#32935)

* Output indices of top_k npu op change to int64

* fix accuracy npu bug

* fix errors

* change cast method to FillNpuTensorWithConstant

* change cast method to FillNpuTensorWithConstant
上级 5d627488
......@@ -23,91 +23,82 @@ template <typename DeviceContext, typename T>
class AccuracyNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* pred = ctx.Input<Tensor>("Out");
auto* inference = ctx.Input<Tensor>("Out");
auto* label = ctx.Input<Tensor>("Label");
// auto* logits = ctx.Input<Tensor>("Indices");
auto* indices = ctx.Input<Tensor>("Indices");
auto* acc = ctx.Output<Tensor>("Accuracy");
auto* accuracy = ctx.Output<Tensor>("Accuracy");
auto* correct = ctx.Output<Tensor>("Correct");
auto* total = ctx.Output<Tensor>("Total");
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
// cast pred
Tensor tmp_pred(pred->type());
tmp_pred.Resize(pred->dims());
tmp_pred.mutable_data<int>(ctx.GetPlace());
auto runner_cast_pred =
NpuOpRunner("Cast", {*pred}, {tmp_pred},
{{"dst_type", static_cast<int>(ACL_INT32)}});
runner_cast_pred.Run(stream);
// cast label
Tensor tmp_label(label->type());
tmp_label.Resize(label->dims());
tmp_label.mutable_data<int>(ctx.GetPlace());
auto runner_cast_label =
NpuOpRunner("Cast", {*label}, {tmp_label},
{{"dst_type", static_cast<int>(ACL_INT32)}});
runner_cast_label.Run(stream);
int num_samples = inference->dims()[0];
if (num_samples == 0) {
return;
}
// equal
Tensor tmp_equal(label->type());
tmp_equal.Resize(label->dims());
Tensor tmp_equal(framework::proto::VarType::BOOL);
tmp_equal.Resize(inference->dims());
tmp_equal.mutable_data<bool>(ctx.GetPlace());
auto runner_equal =
NpuOpRunner("Equal", {tmp_pred, tmp_label}, {tmp_equal}, {});
NpuOpRunner("Equal", {*indices, *label}, {tmp_equal}, {});
runner_equal.Run(stream);
// cast equal
Tensor tmp_equal_cast(label->type());
tmp_equal_cast.Resize(label->dims());
Tensor tmp_equal_cast(framework::proto::VarType::FP32);
tmp_equal_cast.Resize(inference->dims());
tmp_equal_cast.mutable_data<float>(ctx.GetPlace());
auto runner_cast_equal =
NpuOpRunner("Cast", {tmp_equal}, {tmp_equal_cast},
{{"dst_type", static_cast<float>(ACL_FLOAT)}});
auto runner_cast_equal = NpuOpRunner(
"Cast", {tmp_equal}, {tmp_equal_cast},
{{"dst_type",
static_cast<int>(ConvertToNpuDtype(tmp_equal_cast.type()))}});
runner_cast_equal.Run(stream);
// acc
acc->mutable_data<float>(ctx.GetPlace());
std::vector<int> axes_vec_1;
auto runner_acc = NpuOpRunner("ReduceMeanD", {tmp_equal_cast}, {*acc},
{{"keep_dims", false}, {"axes", axes_vec_1}});
runner_acc.Run(stream);
// correct
correct->mutable_data<float>(ctx.GetPlace());
std::vector<int> axes_vec_2;
auto runner_correct =
NpuOpRunner("ReduceSumD", {tmp_equal_cast}, {*correct},
{{"keep_dims", false}, {"axes", axes_vec_2}});
runner_correct.Run(stream);
// ones_tensor
Tensor ones_tensor(label->type());
ones_tensor.Resize(label->dims());
ones_tensor.mutable_data<int>(ctx.GetPlace());
auto runner_oneslike =
NpuOpRunner("OnesLike", {tmp_label}, {ones_tensor}, {});
runner_oneslike.Run(stream);
// ones_tensor_cast
Tensor ones_tensor_cast(label->type());
ones_tensor_cast.Resize(label->dims());
ones_tensor_cast.mutable_data<float>(ctx.GetPlace());
auto runner_ones_cast =
NpuOpRunner("Cast", {ones_tensor}, {ones_tensor_cast},
{{"dst_type", static_cast<float>(ACL_FLOAT)}});
runner_ones_cast.Run(stream);
// total
total->mutable_data<float>(ctx.GetPlace());
std::vector<int> axes_vec_3;
auto runner_total =
NpuOpRunner("ReduceSumD", {ones_tensor_cast}, {*total},
{{"keep_dims", false}, {"axes", axes_vec_3}});
runner_total.Run(stream);
// [correct]
// reduce_max
Tensor tmp_correct_max(framework::proto::VarType::FP32);
tmp_correct_max.Resize(framework::make_ddim({num_samples}));
tmp_correct_max.mutable_data<float>(ctx.GetPlace());
auto runner_reduce_max =
NpuOpRunner("ReduceMaxD", {tmp_equal_cast}, {tmp_correct_max},
{{"axes", std::vector<int>{1}}, {"keep_dims", false}});
runner_reduce_max.Run(stream);
// reduce_sum
Tensor tmp_correct(framework::proto::VarType::FP32);
tmp_correct.Resize(correct->dims());
tmp_correct.mutable_data<float>(ctx.GetPlace());
auto runner_reduce_sum =
NpuOpRunner("ReduceSumD", {tmp_correct_max}, {tmp_correct},
{{"axes", std::vector<int>{0}}, {"keep_dims", false}});
runner_reduce_sum.Run(stream);
// cast to int
correct->mutable_data<int>(ctx.GetPlace());
auto runner_cast_correct = NpuOpRunner(
"Cast", {tmp_correct}, {*correct},
{{"dst_type", static_cast<int>(ConvertToNpuDtype(correct->type()))}});
runner_cast_correct.Run(stream);
// [total]
total->mutable_data<int>(ctx.GetPlace());
FillNpuTensorWithConstant<int>(total, static_cast<int>(num_samples));
// use `total` of type `float32` for calculating accuracy
Tensor tmp_total(framework::proto::VarType::FP32);
tmp_total.Resize(total->dims());
tmp_total.mutable_data<float>(ctx.GetPlace());
FillNpuTensorWithConstant<float>(&tmp_total,
static_cast<float>(num_samples));
// [accuracy]
accuracy->mutable_data<float>(ctx.GetPlace());
auto runner_accuracy =
NpuOpRunner("Div", {tmp_correct, tmp_total}, {*accuracy}, {});
runner_accuracy.Run(stream);
}
};
......
......@@ -48,7 +48,7 @@ class TopkNPUKernel : public framework::OpKernel<T> {
size_t k = static_cast<int>(ctx.Attr<int>("k"));
output->mutable_data<T>(ctx.GetPlace());
indices->mutable_data<int>(ctx.GetPlace());
indices->mutable_data<int64_t>(ctx.GetPlace());
// prepare assit
auto dim = input->dims().size();
......@@ -62,15 +62,24 @@ class TopkNPUKernel : public framework::OpKernel<T> {
{"dim", -1},
{"largest", true}};
Tensor tmp_indices(framework::proto::VarType::INT32);
tmp_indices.Resize(indices->dims());
tmp_indices.mutable_data<int>(ctx.GetPlace());
// run ascend
auto runner = NpuOpRunner("TopKD", {*input, assist_seq_tensor},
{*output, *indices}, attr_input);
{*output, tmp_indices}, attr_input);
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
runner.Run(stream);
// cast indices from INT32 to INT64
auto dst_dtype = ConvertToNpuDtype(indices->type());
auto runner_cast_indices =
NpuOpRunner("Cast", {tmp_indices}, {*indices},
{{"dst_type", static_cast<int>(dst_dtype)}});
runner_cast_indices.Run(stream);
}
};
......
......@@ -35,21 +35,21 @@ class TestAccuracy(OpTest):
self.set_npu()
self.init_dtype()
np.random.seed(SEED)
pred = np.random.uniform(1, 2, [11, 1]).astype(self.dtype)
label = pred.copy()
accuracy = np.array([1]).astype(self.dtype)
correct = np.array([11 * 1]).astype(self.dtype)
total = np.array([11 * 1]).astype(self.dtype)
self.inputs = {
"Out": OpTest.np_dtype_to_fluid_dtype(pred),
"Label": OpTest.np_dtype_to_fluid_dtype(label),
"Indices": OpTest.np_dtype_to_fluid_dtype(pred)
}
n = 8192
infer = np.random.random((n, 1)).astype(self.dtype)
indices = np.random.randint(0, 2, (n, 1)).astype('int64')
label = np.random.randint(0, 2, (n, 1)).astype('int64')
self.inputs = {'Out': infer, 'Indices': indices, "Label": label}
num_correct = 0
for rowid in range(n):
for ele in indices[rowid]:
if ele == label[rowid]:
num_correct += 1
break
self.outputs = {
"Accuracy": accuracy,
"Correct": correct,
"Total": total
'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype),
'Correct': np.array([num_correct]).astype("int32"),
'Total': np.array([n]).astype("int32")
}
def set_npu(self):
......@@ -69,54 +69,23 @@ class TestAccuracy2(TestAccuracy):
self.set_npu()
self.init_dtype()
np.random.seed(SEED)
pred = np.random.uniform(1, 2, [11, 1]).astype(self.dtype)
label = np.random.uniform(4, 5, [11, 1]).astype(self.dtype)
accuracy = np.array([0]).astype(self.dtype)
correct = np.array([11 * 0]).astype(self.dtype)
total = np.array([11 * 1]).astype(self.dtype)
self.inputs = {
"Out": OpTest.np_dtype_to_fluid_dtype(pred),
"Label": OpTest.np_dtype_to_fluid_dtype(label),
"Indices": OpTest.np_dtype_to_fluid_dtype(pred)
}
self.outputs = {
"Accuracy": accuracy,
"Correct": correct,
"Total": total
}
class TestAccuracy3(TestAccuracy):
def setUp(self):
self.op_type = "accuracy"
self.set_npu()
self.init_dtype()
np.random.seed(SEED)
a = np.random.randint(1, 2, [5, 1])
b = np.random.randint(0, 1, [5, 1])
pred = np.row_stack((a, b)).astype(self.dtype)
label = np.random.randint(1, 2, [10, 1]).astype(self.dtype)
accuracy = np.array([0.5]).astype(self.dtype)
correct = np.array([5]).astype(self.dtype)
total = np.array([10 * 1]).astype(self.dtype)
self.inputs = {
"Out": OpTest.np_dtype_to_fluid_dtype(pred),
"Label": OpTest.np_dtype_to_fluid_dtype(label),
"Indices": OpTest.np_dtype_to_fluid_dtype(pred)
}
n = 8192
infer = np.random.random((n, 100)).astype(self.dtype)
indices = np.random.randint(0, 1000, (n, 100)).astype('int64')
label = np.random.randint(0, 1000, (n, 1)).astype('int64')
self.inputs = {'Out': infer, 'Indices': indices, "Label": label}
num_correct = 0
for rowid in range(n):
for ele in indices[rowid]:
if ele == label[rowid]:
num_correct += 1
break
self.outputs = {
"Accuracy": accuracy,
"Correct": correct,
"Total": total
'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype),
'Correct': np.array([num_correct]).astype("int32"),
'Total': np.array([n]).astype("int32")
}
class TestAccuracyInt(TestAccuracy):
def init_dtype(self):
self.dtype = np.int
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册