diff --git a/paddle/fluid/operators/metrics/accuracy_op_npu.cc b/paddle/fluid/operators/metrics/accuracy_op_npu.cc index 4ffcbaf55314a46888e15572e8477054b23ae2bb..9c5e157a97706b0c8a3034b05f7531277593152e 100644 --- a/paddle/fluid/operators/metrics/accuracy_op_npu.cc +++ b/paddle/fluid/operators/metrics/accuracy_op_npu.cc @@ -23,91 +23,82 @@ template class AccuracyNPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* pred = ctx.Input("Out"); + auto* inference = ctx.Input("Out"); auto* label = ctx.Input("Label"); - // auto* logits = ctx.Input("Indices"); + auto* indices = ctx.Input("Indices"); - auto* acc = ctx.Output("Accuracy"); + auto* accuracy = ctx.Output("Accuracy"); auto* correct = ctx.Output("Correct"); auto* total = ctx.Output("Total"); auto stream = ctx.template device_context() .stream(); - // cast pred - Tensor tmp_pred(pred->type()); - tmp_pred.Resize(pred->dims()); - tmp_pred.mutable_data(ctx.GetPlace()); - auto runner_cast_pred = - NpuOpRunner("Cast", {*pred}, {tmp_pred}, - {{"dst_type", static_cast(ACL_INT32)}}); - runner_cast_pred.Run(stream); - - // cast label - Tensor tmp_label(label->type()); - tmp_label.Resize(label->dims()); - tmp_label.mutable_data(ctx.GetPlace()); - auto runner_cast_label = - NpuOpRunner("Cast", {*label}, {tmp_label}, - {{"dst_type", static_cast(ACL_INT32)}}); - runner_cast_label.Run(stream); + int num_samples = inference->dims()[0]; + if (num_samples == 0) { + return; + } // equal - Tensor tmp_equal(label->type()); - tmp_equal.Resize(label->dims()); + Tensor tmp_equal(framework::proto::VarType::BOOL); + tmp_equal.Resize(inference->dims()); tmp_equal.mutable_data(ctx.GetPlace()); auto runner_equal = - NpuOpRunner("Equal", {tmp_pred, tmp_label}, {tmp_equal}, {}); + NpuOpRunner("Equal", {*indices, *label}, {tmp_equal}, {}); runner_equal.Run(stream); // cast equal - Tensor tmp_equal_cast(label->type()); - tmp_equal_cast.Resize(label->dims()); + Tensor tmp_equal_cast(framework::proto::VarType::FP32); + tmp_equal_cast.Resize(inference->dims()); tmp_equal_cast.mutable_data(ctx.GetPlace()); - auto runner_cast_equal = - NpuOpRunner("Cast", {tmp_equal}, {tmp_equal_cast}, - {{"dst_type", static_cast(ACL_FLOAT)}}); + auto runner_cast_equal = NpuOpRunner( + "Cast", {tmp_equal}, {tmp_equal_cast}, + {{"dst_type", + static_cast(ConvertToNpuDtype(tmp_equal_cast.type()))}}); runner_cast_equal.Run(stream); - // acc - acc->mutable_data(ctx.GetPlace()); - std::vector axes_vec_1; - auto runner_acc = NpuOpRunner("ReduceMeanD", {tmp_equal_cast}, {*acc}, - {{"keep_dims", false}, {"axes", axes_vec_1}}); - runner_acc.Run(stream); - - // correct - correct->mutable_data(ctx.GetPlace()); - std::vector axes_vec_2; - auto runner_correct = - NpuOpRunner("ReduceSumD", {tmp_equal_cast}, {*correct}, - {{"keep_dims", false}, {"axes", axes_vec_2}}); - runner_correct.Run(stream); - - // ones_tensor - Tensor ones_tensor(label->type()); - ones_tensor.Resize(label->dims()); - ones_tensor.mutable_data(ctx.GetPlace()); - auto runner_oneslike = - NpuOpRunner("OnesLike", {tmp_label}, {ones_tensor}, {}); - runner_oneslike.Run(stream); - - // ones_tensor_cast - Tensor ones_tensor_cast(label->type()); - ones_tensor_cast.Resize(label->dims()); - ones_tensor_cast.mutable_data(ctx.GetPlace()); - auto runner_ones_cast = - NpuOpRunner("Cast", {ones_tensor}, {ones_tensor_cast}, - {{"dst_type", static_cast(ACL_FLOAT)}}); - runner_ones_cast.Run(stream); - - // total - total->mutable_data(ctx.GetPlace()); - std::vector axes_vec_3; - auto runner_total = - NpuOpRunner("ReduceSumD", {ones_tensor_cast}, {*total}, - {{"keep_dims", false}, {"axes", axes_vec_3}}); - runner_total.Run(stream); + // [correct] + // reduce_max + Tensor tmp_correct_max(framework::proto::VarType::FP32); + tmp_correct_max.Resize(framework::make_ddim({num_samples})); + tmp_correct_max.mutable_data(ctx.GetPlace()); + auto runner_reduce_max = + NpuOpRunner("ReduceMaxD", {tmp_equal_cast}, {tmp_correct_max}, + {{"axes", std::vector{1}}, {"keep_dims", false}}); + runner_reduce_max.Run(stream); + + // reduce_sum + Tensor tmp_correct(framework::proto::VarType::FP32); + tmp_correct.Resize(correct->dims()); + tmp_correct.mutable_data(ctx.GetPlace()); + auto runner_reduce_sum = + NpuOpRunner("ReduceSumD", {tmp_correct_max}, {tmp_correct}, + {{"axes", std::vector{0}}, {"keep_dims", false}}); + runner_reduce_sum.Run(stream); + + // cast to int + correct->mutable_data(ctx.GetPlace()); + auto runner_cast_correct = NpuOpRunner( + "Cast", {tmp_correct}, {*correct}, + {{"dst_type", static_cast(ConvertToNpuDtype(correct->type()))}}); + runner_cast_correct.Run(stream); + + // [total] + total->mutable_data(ctx.GetPlace()); + FillNpuTensorWithConstant(total, static_cast(num_samples)); + + // use `total` of type `float32` for calculating accuracy + Tensor tmp_total(framework::proto::VarType::FP32); + tmp_total.Resize(total->dims()); + tmp_total.mutable_data(ctx.GetPlace()); + FillNpuTensorWithConstant(&tmp_total, + static_cast(num_samples)); + + // [accuracy] + accuracy->mutable_data(ctx.GetPlace()); + auto runner_accuracy = + NpuOpRunner("Div", {tmp_correct, tmp_total}, {*accuracy}, {}); + runner_accuracy.Run(stream); } }; diff --git a/paddle/fluid/operators/top_k_op_npu.cc b/paddle/fluid/operators/top_k_op_npu.cc index 684bd476b6ef21bf58a990c36b1ee6f820d82caf..9785e73a4044ebb345a442dd71ae04b42e55cad7 100644 --- a/paddle/fluid/operators/top_k_op_npu.cc +++ b/paddle/fluid/operators/top_k_op_npu.cc @@ -48,7 +48,7 @@ class TopkNPUKernel : public framework::OpKernel { size_t k = static_cast(ctx.Attr("k")); output->mutable_data(ctx.GetPlace()); - indices->mutable_data(ctx.GetPlace()); + indices->mutable_data(ctx.GetPlace()); // prepare assit auto dim = input->dims().size(); @@ -62,15 +62,24 @@ class TopkNPUKernel : public framework::OpKernel { {"dim", -1}, {"largest", true}}; + Tensor tmp_indices(framework::proto::VarType::INT32); + tmp_indices.Resize(indices->dims()); + tmp_indices.mutable_data(ctx.GetPlace()); + // run ascend auto runner = NpuOpRunner("TopKD", {*input, assist_seq_tensor}, - {*output, *indices}, attr_input); - + {*output, tmp_indices}, attr_input); auto stream = ctx.template device_context() .stream(); - runner.Run(stream); + + // cast indices from INT32 to INT64 + auto dst_dtype = ConvertToNpuDtype(indices->type()); + auto runner_cast_indices = + NpuOpRunner("Cast", {tmp_indices}, {*indices}, + {{"dst_type", static_cast(dst_dtype)}}); + runner_cast_indices.Run(stream); } }; diff --git a/python/paddle/fluid/tests/unittests/npu/test_accuracy_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_accuracy_op_npu.py index b5175bdb19c7e5bc2e981b7f76fc2b7471d73d6f..aa22863983b87deaf007c42d221c339d9fab63e2 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_accuracy_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_accuracy_op_npu.py @@ -35,21 +35,21 @@ class TestAccuracy(OpTest): self.set_npu() self.init_dtype() np.random.seed(SEED) - pred = np.random.uniform(1, 2, [11, 1]).astype(self.dtype) - label = pred.copy() - accuracy = np.array([1]).astype(self.dtype) - correct = np.array([11 * 1]).astype(self.dtype) - total = np.array([11 * 1]).astype(self.dtype) - - self.inputs = { - "Out": OpTest.np_dtype_to_fluid_dtype(pred), - "Label": OpTest.np_dtype_to_fluid_dtype(label), - "Indices": OpTest.np_dtype_to_fluid_dtype(pred) - } + n = 8192 + infer = np.random.random((n, 1)).astype(self.dtype) + indices = np.random.randint(0, 2, (n, 1)).astype('int64') + label = np.random.randint(0, 2, (n, 1)).astype('int64') + self.inputs = {'Out': infer, 'Indices': indices, "Label": label} + num_correct = 0 + for rowid in range(n): + for ele in indices[rowid]: + if ele == label[rowid]: + num_correct += 1 + break self.outputs = { - "Accuracy": accuracy, - "Correct": correct, - "Total": total + 'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype), + 'Correct': np.array([num_correct]).astype("int32"), + 'Total': np.array([n]).astype("int32") } def set_npu(self): @@ -69,54 +69,23 @@ class TestAccuracy2(TestAccuracy): self.set_npu() self.init_dtype() np.random.seed(SEED) - pred = np.random.uniform(1, 2, [11, 1]).astype(self.dtype) - label = np.random.uniform(4, 5, [11, 1]).astype(self.dtype) - accuracy = np.array([0]).astype(self.dtype) - correct = np.array([11 * 0]).astype(self.dtype) - total = np.array([11 * 1]).astype(self.dtype) - - self.inputs = { - "Out": OpTest.np_dtype_to_fluid_dtype(pred), - "Label": OpTest.np_dtype_to_fluid_dtype(label), - "Indices": OpTest.np_dtype_to_fluid_dtype(pred) - } - self.outputs = { - "Accuracy": accuracy, - "Correct": correct, - "Total": total - } - - -class TestAccuracy3(TestAccuracy): - def setUp(self): - self.op_type = "accuracy" - self.set_npu() - self.init_dtype() - np.random.seed(SEED) - a = np.random.randint(1, 2, [5, 1]) - b = np.random.randint(0, 1, [5, 1]) - pred = np.row_stack((a, b)).astype(self.dtype) - label = np.random.randint(1, 2, [10, 1]).astype(self.dtype) - accuracy = np.array([0.5]).astype(self.dtype) - correct = np.array([5]).astype(self.dtype) - total = np.array([10 * 1]).astype(self.dtype) - - self.inputs = { - "Out": OpTest.np_dtype_to_fluid_dtype(pred), - "Label": OpTest.np_dtype_to_fluid_dtype(label), - "Indices": OpTest.np_dtype_to_fluid_dtype(pred) - } + n = 8192 + infer = np.random.random((n, 100)).astype(self.dtype) + indices = np.random.randint(0, 1000, (n, 100)).astype('int64') + label = np.random.randint(0, 1000, (n, 1)).astype('int64') + self.inputs = {'Out': infer, 'Indices': indices, "Label": label} + num_correct = 0 + for rowid in range(n): + for ele in indices[rowid]: + if ele == label[rowid]: + num_correct += 1 + break self.outputs = { - "Accuracy": accuracy, - "Correct": correct, - "Total": total + 'Accuracy': np.array([num_correct / float(n)]).astype(self.dtype), + 'Correct': np.array([num_correct]).astype("int32"), + 'Total': np.array([n]).astype("int32") } -class TestAccuracyInt(TestAccuracy): - def init_dtype(self): - self.dtype = np.int - - if __name__ == '__main__': unittest.main()