【AMP OP&Test】unit test for accuracy_op (#51009)

* test_accuracy_op * add create_test_fp/bf16_class * cast after calculation * change convert_uint16_to_float_ifneed * delete TestAccuracyOpFp32 according to PR comment * fix the rtol setting rules in bfloat16 forward

【AMP OP&Test】unit test for accuracy_op (#51009)
* test_accuracy_op * add create_test_fp/bf16_class * cast after calculation * change convert_uint16_to_float_ifneed * delete TestAccuracyOpFp32 according to PR comment * fix the rtol setting rules in bfloat16 forward
8c61a95a · Bo Zhang · GitHub · 320a5b23 · 8c61a95a · 8c61a95a
Showing with 49 addition and 2 deletion

paddle/phi/kernels/gpu/accuracy_kernel.cu paddle/phi/kernels/gpu/accuracy_kernel.cu +5 -1

python/paddle/fluid/tests/unittests/test_accuracy_op.py python/paddle/fluid/tests/unittests/test_accuracy_op.py +44 -1

未找到文件。
--- a/paddle/phi/kernels/gpu/accuracy_kernel.cu
+++ b/paddle/phi/kernels/gpu/accuracy_kernel.cu
@@ -20,6 +20,8 @@
 #include "paddle/phi/backends/gpu/gpu_context.h"
 #include "paddle/phi/backends/gpu/gpu_info.h"
 #include "paddle/phi/backends/gpu/gpu_primitives.h"
+#include "paddle/phi/common/amp_type_traits.h"
+#include "paddle/phi/common/bfloat16.h"
 #include "paddle/phi/common/float16.h"
 #include "paddle/phi/core/kernel_registry.h"

@@ -34,6 +36,7 @@ __global__ void AccuracyCudaKernel(const int N,
                                   int* correct_data,
                                   T* accuracy,
                                   int* total_data) {
+  using MT = typename phi::dtype::MPTypeTrait<T>::Type;
  int count = 0;
  __shared__ int total[BlockSize];

@@ -64,7 +67,7 @@ __global__ void AccuracyCudaKernel(const int N,
 #endif
  if (threadIdx.x == 0) {
    *correct_data = result;
-    *accuracy = static_cast<T>(result) / static_cast<T>(N);
+    *accuracy = static_cast<T>(static_cast<MT>(result) / static_cast<MT>(N));
    *total_data = N;
  }
 }
@@ -136,6 +139,7 @@ PD_REGISTER_KERNEL(accuracy,
                   ALL_LAYOUT,
                   phi::AccuracyRawKernel,
                   phi::dtype::float16,
+                   phi::dtype::bfloat16,
                   float,
                   double) {
  kernel->InputAt(1).SetDataType(phi::DataType::INT64);

--- a/python/paddle/fluid/tests/unittests/test_accuracy_op.py
+++ b/python/paddle/fluid/tests/unittests/test_accuracy_op.py
@@ -19,7 +19,8 @@ from eager_op_test import OpTest

 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
+from paddle.fluid import Program, core, program_guard
+from paddle.fluid.tests.unittests.op_test import convert_float_to_uint16


 def accuracy_wrapper(infer, indices, label):
@@ -64,6 +65,48 @@ class TestAccuracyOpFp16(TestAccuracyOp):
        self.check_output(atol=1e-3)


+@unittest.skipIf(
+    not core.is_compiled_with_cuda()
+    or not core.is_bfloat16_supported(core.CUDAPlace(0)),
+    "core is not compiled with CUDA and not support the bfloat16",
+)
+class TestAccuracyOpBf16(OpTest):
+    def setUp(self):
+        self.op_type = "accuracy"
+        self.python_api = accuracy_wrapper
+        self.init_dtype()
+        n = 8192
+        infer = np.random.random((n, 1)).astype(np.float32)
+        indices = np.random.randint(0, 2, (n, 1)).astype('int64')
+        label = np.random.randint(0, 2, (n, 1)).astype('int64')
+        self.inputs = {
+            'Out': convert_float_to_uint16(infer),
+            'Indices': indices,
+            "Label": label,
+        }
+        num_correct = 0
+        for rowid in range(n):
+            for ele in indices[rowid]:
+                if ele == label[rowid]:
+                    num_correct += 1
+                    break
+        self.outputs = {
+            'Accuracy': convert_float_to_uint16(
+                np.array([num_correct / float(n)]).astype(np.float32)
+            ),
+            'Correct': np.array([num_correct]).astype("int32"),
+            'Total': np.array([n]).astype("int32"),
+        }
+
+    def init_dtype(self):
+        self.dtype = np.uint16
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(place, atol=1e-2)
+
+
 class TestAccuracyOpError(unittest.TestCase):
    def test_type_errors(self):
        with program_guard(Program(), Program()):