diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc
index 1d3e5e5162ca9d3b23d4164b6d994a3ae141d5cb..8bf1398f607c80421a1e0e4fc70b1596d29f9d2e 100644
--- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc
+++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc
@@ -15,9 +15,12 @@ limitations under the License. */
 #ifdef PADDLE_WITH_XPU
 #include "paddle/fluid/operators/amp/check_finite_and_unscale_op.h"
 #include "paddle/fluid/operators/amp/fp16_type_traits.h"
+#include "paddle/fluid/platform/device/device_wrapper.h"
 #include "paddle/fluid/platform/float16.h"
+
 namespace paddle {
 namespace operators {
+
 template <typename T>
 class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
   using MPDType = typename details::MPTypeTrait<T>::Type;
@@ -38,6 +41,8 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
     // cpy to cpu
     bool cpu_found_inf_data = false;
 
+    // number of inf and nans
+    int nums_inf_nans = 0;
     MPDType cpu_scale_data;
     if (platform::is_xpu_place(scale->place())) {
       memory::Copy(platform::CPUPlace(), static_cast<void*>(&cpu_scale_data),
@@ -52,48 +57,21 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
       const auto* x = xs[i];
       auto* out = outs[i];
       out->mutable_data<T>(dev_ctx.GetPlace());
-      framework::Tensor is_finite =
-          ctx.AllocateTmpTensor<bool, platform::XPUDeviceContext>(x->dims(),
-                                                                  dev_ctx);
-      framework::Tensor is_nan =
-          ctx.AllocateTmpTensor<bool, platform::XPUDeviceContext>(x->dims(),
-                                                                  dev_ctx);
-      framework::Tensor is_finite_and_nan =
-          ctx.AllocateTmpTensor<bool, platform::XPUDeviceContext>(x->dims(),
-                                                                  dev_ctx);
-      if (cpu_found_inf_data == false) {
-        int r = xpu::isfinite(dev_ctx.x_context(),
-                              reinterpret_cast<const XPUTyp*>(x->data<T>()),
-                              is_finite.data<bool>(), x->numel());
-        PADDLE_ENFORCE_EQ(
-            r, XPU_SUCCESS,
-            platform::errors::External("XPU API(isfinite) return wrong "
-                                       "value[%d %s]",
-                                       r, XPUAPIErrorMsg[r]));
-        r = xpu::logical_not(
-            dev_ctx.x_context(),
-            reinterpret_cast<const bool*>(is_finite.data<bool>()),
-            is_finite.data<bool>(), x->numel());
-        PADDLE_ENFORCE_EQ(
-            r, XPU_SUCCESS,
-            platform::errors::External("XPU API(logical_not) return wrong "
-                                       "value[%d %s]",
-                                       r, XPUAPIErrorMsg[r]));
-        r = xpu::any(dev_ctx.x_context(), is_finite.data<bool>(),
-                     found_inf_data, x->numel());
-        PADDLE_ENFORCE_EQ(
-            r, XPU_SUCCESS,
-            platform::errors::External("XPU API(any) return wrong "
-                                       "value[%d %s]",
-                                       r, XPUAPIErrorMsg[r]));
-        if (dev_ctx.x_context()->xpu_stream) {
-          dev_ctx.Wait();
-        }
-        memory::Copy(platform::CPUPlace(), &cpu_found_inf_data,
-                     dev_ctx.GetPlace(), found_inf_data, sizeof(bool));
+      framework::Tensor inf_nan_count =
+          ctx.AllocateTmpTensor<int, platform::XPUDeviceContext>(
+              found_inf->dims(), dev_ctx);
+
+      if (nums_inf_nans == 0) {
+        int r = xpu::count_nan_or_inf(
+            dev_ctx.x_context(), reinterpret_cast<const XPUTyp*>(x->data<T>()),
+            inf_nan_count.data<int>(), x->numel());
+        PADDLE_ENFORCE_XDNN_SUCCESS(r, "count_nan_or_inf");
+        memory::Copy(platform::CPUPlace(), &nums_inf_nans, dev_ctx.GetPlace(),
+                     inf_nan_count.data<int>(), sizeof(int));
       }
 
-      if (cpu_found_inf_data) {
+      if (nums_inf_nans > 0) {
+        cpu_found_inf_data = true;
         inverse_scale = 0.0;
       }
 
@@ -109,45 +87,25 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel<T> {
         int r = xpu::cast_v2(dev_ctx.x_context(),
                              reinterpret_cast<const float16*>(x->data<T>()),
                              float_x.data<MPDType>(), x->numel());
-        PADDLE_ENFORCE_EQ(
-            r, XPU_SUCCESS,
-            platform::errors::External("XPU API(cast_v2) return wrong "
-                                       "value[%d %s]",
-                                       r, XPUAPIErrorMsg[r]));
+        PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
 
         r = xpu::scale(dev_ctx.x_context(), float_x.data<MPDType>(),
                        float_out.data<MPDType>(), x->numel(), false,
                        inverse_scale, 0.0);
-        PADDLE_ENFORCE_EQ(
-            r, XPU_SUCCESS,
-            platform::errors::External("XPU API(scale) return wrong "
-                                       "value[%d %s]",
-                                       r, XPUAPIErrorMsg[r]));
+        PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale");
 
         r = xpu::cast_v2(dev_ctx.x_context(), float_out.data<MPDType>(),
                          reinterpret_cast<float16*>(out->data<T>()),
                          out->numel());
-
-        PADDLE_ENFORCE_EQ(
-            r, XPU_SUCCESS,
-            platform::errors::External("XPU API(cast_v2) return wrong "
-                                       "value[%d %s]",
-                                       r, XPUAPIErrorMsg[r]));
+        PADDLE_ENFORCE_XDNN_SUCCESS(r, "cast_v2");
       } else {
         int r = xpu::scale(dev_ctx.x_context(),
                            reinterpret_cast<const XPUTyp*>(x->data<T>()),
                            reinterpret_cast<XPUTyp*>(out->data<T>()),
                            x->numel(), false, inverse_scale, 0.0);
-        PADDLE_ENFORCE_EQ(
-            r, XPU_SUCCESS,
-            platform::errors::External("XPU API(scale) return wrong "
-                                       "value[%d %s]",
-                                       r, XPUAPIErrorMsg[r]));
+        PADDLE_ENFORCE_XDNN_SUCCESS(r, "scale");
       }
     }
-    if (dev_ctx.x_context()->xpu_stream) {
-      dev_ctx.Wait();
-    }
     memory::Copy(dev_ctx.GetPlace(), found_inf_data, platform::CPUPlace(),
                  &cpu_found_inf_data, sizeof(bool));
   }
diff --git a/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py
index 3ef4701cdf3d081986aaa7648e8b0c10aafca7e9..e6bc61b895abbe8506352195979db42d9448c4fc 100644
--- a/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_amp_check_finite_and_scale_op_xpu.py
@@ -19,84 +19,126 @@ import paddle
 import unittest
 import numpy as np
 from op_test_xpu import XPUOpTest
-from op_test import OpTest, skip_check_grad_ci
-import paddle.fluid as fluid
+from xpu.get_test_cover_info import create_test_class, get_xpu_op_support_types, XPUOpTestWrapper
 
 paddle.enable_static()
 
 
-class TestCheckFiniteAndUnscaleOp(XPUOpTest):
-
-    def setUp(self):
-        self.op_type = "check_finite_and_unscale"
-        self.init_dtype()
-        x = np.random.random((1024, 1024)).astype(self.dtype)
-        scale = np.random.random((1)).astype(self.dtype)
-        # self.attrs = {'stop_gradient': True}
-        self.inputs = {'X': [('x0', x)], 'Scale': scale}
-        self.outputs = {
-            'FoundInfinite': np.array([0]),
-            'Out': [('out0', x / scale)],
-        }
-
-    def init_dtype(self):
-        self.dtype = np.float32
-
-    def test_check_output(self):
-        if paddle.is_compiled_with_xpu():
-            place = paddle.XPUPlace(0)
-            self.check_output_with_place(place)
-
-
-# class TestCheckFiniteAndUnscaleOpWithNan(XPUOpTest):
-#     def setUp(self):
-#         self.op_type = "check_finite_and_unscale"
-#         self.init_dtype()
-#         x = np.random.random((1024, 1024)).astype(self.dtype)
-#         x[128][128] = np.nan
-#         print("x shape = ", x.shape)
-#         print(x)
-#         scale = np.random.random((1)).astype(self.dtype)
-
-#         self.inputs = {'X': [('x0', x)], 'Scale': scale}
-#         self.outputs = {
-#             'FoundInfinite': np.array([1]),
-#             'Out': [('out0', x)],
-#         }
-
-#     def init_dtype(self):
-#         self.dtype = np.float32
-
-#     def test_check_output(self):
-#         # When input contains nan, do not check the output,
-#         # since the output may be nondeterministic and will be discarded.
-#         if paddle.is_compiled_with_xpu():
-#             place = paddle.XPUPlace(0)
-#             self.check_output_with_place(place, no_check_set=['Out'])
-
-# class TestCheckFiniteAndUnscaleOpWithInf(XPUOpTest):
-#     def setUp(self):
-#         self.op_type = "check_finite_and_unscale"
-#         self.init_dtype()
-#         x = np.random.random((1024, 1024)).astype(self.dtype)
-#         x[128][128] = np.inf
-#         scale = np.random.random((1)).astype(self.dtype)
-
-#         self.inputs = {'X': [('x0', x)], 'Scale': scale}
-#         self.outputs = {
-#             'FoundInfinite': np.array([1]),
-#             'Out': [('out0', x)],
-#         }
-
-#     def init_dtype(self):
-#         self.dtype = np.float32
-
-#     def test_check_output(self):
-#         # When input contains inf, do not check the output,
-#         # since the output may be nondeterministic and will be discarded.
-#         if paddle.is_compiled_with_xpu():
-#             place = paddle.XPUPlace(0)
-#             self.check_output_with_place(place, no_check_set=['Out'])
+class XPUTestCheckFiniteAndUnscaleOp(XPUOpTestWrapper):
+
+    def __init__(self):
+        self.op_name = 'check_finite_and_unscale'
+        self.use_dynamic_create_class = False
+
+    class TestCheckFiniteAndUnscaleOpNormal(XPUOpTest):
+
+        def setUp(self):
+            self.op_type = "check_finite_and_unscale"
+            self.init_dtype()
+            x = np.random.random((8, 8)).astype(self.dtype)
+            scale = np.random.random((1)).astype(np.float32)
+            self.inputs = {'X': [('x0', x)], 'Scale': scale}
+            self.outputs = {
+                'FoundInfinite': np.array([0]),
+                'Out': [('out0', x / scale)],
+            }
+
+        def init_dtype(self):
+            self.dtype = self.in_type
+
+        def test_check_output(self):
+            if paddle.is_compiled_with_xpu():
+                place = paddle.XPUPlace(0)
+                self.check_output_with_place(place)
+
+    class TestCheckFiniteAndUnscaleOpWithNan(XPUOpTest):
+
+        def setUp(self):
+            self.op_type = "check_finite_and_unscale"
+            self.init_dtype()
+            x = np.random.random((256, 256)).astype(self.dtype)
+            idx1 = np.random.randint(255)
+            idx2 = np.random.randint(255)
+            x[idx1][idx2] = np.nan
+            x[idx2][idx1] = np.nan
+            scale = np.random.random((1)).astype(np.float32)
+
+            self.inputs = {'X': [('x0', x)], 'Scale': scale}
+            self.outputs = {
+                'FoundInfinite': np.array([1]),
+                'Out': [('out0', x)],
+            }
+
+        def init_dtype(self):
+            self.dtype = self.in_type
+
+        def test_check_output(self):
+            # When input contains nan, do not check the output,
+            # since the output may be nondeterministic and will be discarded.
+            if paddle.is_compiled_with_xpu():
+                place = paddle.XPUPlace(0)
+                self.check_output_with_place(place, no_check_set=['Out'])
+
+    class TestCheckFiniteAndUnscaleOpWithInf(XPUOpTest):
+
+        def setUp(self):
+            self.op_type = "check_finite_and_unscale"
+            self.init_dtype()
+            x = np.random.random((256, 256)).astype(self.dtype)
+            idx1 = np.random.randint(255)
+            idx2 = np.random.randint(255)
+            x[idx1][idx2] = np.nan
+            x[idx2][idx1] = np.nan
+            scale = np.random.random((1)).astype(np.float32)
+            myscale = np.array([0.05]).astype(self.dtype)
+            self.inputs = {'X': [('x0', x)], 'Scale': scale}
+            self.outputs = {
+                'FoundInfinite': np.array([1]),
+                'Out': [('out0', x)],
+            }
+
+        def init_dtype(self):
+            self.dtype = self.in_type
+
+        def test_check_output(self):
+            # When input contains inf, do not check the output,
+            # since the output may be nondeterministic and will be discarded.
+            if paddle.is_compiled_with_xpu():
+                place = paddle.XPUPlace(0)
+                self.check_output_with_place(place, no_check_set=['Out'])
+
+    class TestCheckFiniteAndUnscaleOpWithInfAndNan(XPUOpTest):
+
+        def setUp(self):
+            self.op_type = "check_finite_and_unscale"
+            self.init_dtype()
+            x = np.random.random((256, 256)).astype(self.dtype)
+            idx1 = np.random.randint(255)
+            idx2 = np.random.randint(255)
+            x[idx1][idx2] = np.inf
+            x[idx2][idx1] = np.nan
+            scale = np.random.random((1)).astype(np.float32)
+            myscale = np.array([0.05]).astype(self.dtype)
+            self.inputs = {'X': [('x0', x)], 'Scale': scale}
+            self.outputs = {
+                'FoundInfinite': np.array([1]),
+                'Out': [('out0', x)],
+            }
+
+        def init_dtype(self):
+            self.dtype = self.in_type
+
+        def test_check_output(self):
+            # When input contains inf, do not check the output,
+            # since the output may be nondeterministic and will be discarded.
+            if paddle.is_compiled_with_xpu():
+                place = paddle.XPUPlace(0)
+                self.check_output_with_place(place, no_check_set=['Out'])
+
+
+support_types = get_xpu_op_support_types('check_finite_and_unscale')
+for stype in support_types:
+    create_test_class(globals(), XPUTestCheckFiniteAndUnscaleOp, stype)
 
 if __name__ == '__main__':
     unittest.main()