diff --git a/python/paddle/fluid/tests/unittests/prim/model/CMakeLists.txt b/python/paddle/fluid/tests/unittests/prim/model/CMakeLists.txt
index 2205502a094d228e973c877c8d4e91c0dfc289bc..9aab235837718921ca5c91e7c3e1e4992aae2277 100644
--- a/python/paddle/fluid/tests/unittests/prim/model/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/prim/model/CMakeLists.txt
@@ -13,7 +13,5 @@ set_tests_properties(test_bert_prim_cinn PROPERTIES TIMEOUT 500)
 
 if(WITH_CINN)
   set_tests_properties(test_resnet_prim_cinn PROPERTIES LABELS "RUN_TYPE=CINN")
-  set_tests_properties(
-    test_bert_prim_cinn PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT
-                                   "FLAGS_deny_cinn_ops=dropout")
+  set_tests_properties(test_bert_prim_cinn PROPERTIES LABELS "RUN_TYPE=CINN")
 endif()
diff --git a/python/paddle/fluid/tests/unittests/prim/model/test_bert_prim_cinn.py b/python/paddle/fluid/tests/unittests/prim/model/test_bert_prim_cinn.py
index 71d38e85fe362a337cf0ff00569db99b3255b24c..dc1be501ce30e713d9b66426bec162cb5732d2fa 100644
--- a/python/paddle/fluid/tests/unittests/prim/model/test_bert_prim_cinn.py
+++ b/python/paddle/fluid/tests/unittests/prim/model/test_bert_prim_cinn.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import os
-import platform
 import time
 import unittest
 
@@ -33,6 +32,44 @@ MODULE_NAME = 'test_bert_prim_cinn'
 MD5SUM = '71e730ee8d7aa77a215b7e898aa089af'
 SAVE_NAME = 'bert_training_data.npz'
 
+
+DY2ST_PRIM_GT = [
+    11.144556999206543,
+    10.343620300292969,
+    10.330279350280762,
+    10.276118278503418,
+    10.222086906433105,
+    10.194628715515137,
+    10.14902114868164,
+    10.096250534057617,
+    10.104615211486816,
+    9.985644340515137,
+]
+DY2ST_CINN_GT = [
+    10.649632453918457,
+    10.333406448364258,
+    10.33541202545166,
+    10.260543823242188,
+    10.219606399536133,
+    10.176884651184082,
+    10.124699592590332,
+    10.072620391845703,
+    10.112163543701172,
+    9.969393730163574,
+]
+DY2ST_PRIM_CINN_GT = [
+    11.144556999206543,
+    10.343620300292969,
+    10.330279350280762,
+    10.276118278503418,
+    10.222086906433105,
+    10.194628715515137,
+    10.149020195007324,
+    10.096250534057617,
+    10.104615211486816,
+    9.985644340515137,
+]
+
 if core.is_compiled_with_cuda():
     paddle.set_flags({'FLAGS_cudnn_deterministic': True})
 
@@ -42,9 +79,7 @@ def train(to_static, enable_prim, enable_cinn):
         paddle.set_device('gpu')
     else:
         paddle.set_device('cpu')
-    fluid.core._set_prim_all_enabled(
-        enable_prim and platform.system() == 'Linux'
-    )
+    fluid.core._set_prim_all_enabled(enable_prim)
 
     np.random.seed(SEED)
     paddle.seed(SEED)
@@ -95,7 +130,7 @@ def train(to_static, enable_prim, enable_cinn):
         loss.backward()
         optimizer.minimize(loss)
         bert.clear_gradients()
-        losses.append(loss)
+        losses.append(loss.numpy().item())
 
         print(
             "step: {}, loss: {}, batch_cost: {:.5}".format(
@@ -106,6 +141,7 @@ def train(to_static, enable_prim, enable_cinn):
         )
         if step >= 9:
             break
+    print(losses)
     return losses
 
 
@@ -113,28 +149,42 @@ class TestBert(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         download(URL, MODULE_NAME, MD5SUM, SAVE_NAME)
-        cls.dy2st = train(to_static=True, enable_prim=False, enable_cinn=False)
 
+    def tearDown(self):
+        paddle.set_flags({'FLAGS_deny_cinn_ops': ''})
+
+    @unittest.skipIf(
+        not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()),
+        "paddle is not compiled with CINN and CUDA",
+    )
     def test_prim(self):
         dy2st_prim = train(to_static=True, enable_prim=True, enable_cinn=False)
-        np.testing.assert_allclose(self.dy2st, dy2st_prim, rtol=1e-1)
+        np.testing.assert_allclose(dy2st_prim, DY2ST_PRIM_GT, rtol=1e-5)
 
     @unittest.skipIf(
-        not paddle.is_compiled_with_cinn(), "paddle is not compiled with CINN"
+        not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()),
+        "paddle is not compiled with CINN and CUDA",
     )
     def test_cinn(self):
+        paddle.set_flags({'FLAGS_deny_cinn_ops': "dropout"})
         dy2st_cinn = train(to_static=True, enable_prim=False, enable_cinn=True)
-        np.testing.assert_allclose(self.dy2st, dy2st_cinn, rtol=1e-6)
+        np.testing.assert_allclose(dy2st_cinn, DY2ST_CINN_GT, rtol=1e-5)
 
     @unittest.skipIf(
-        not paddle.is_compiled_with_cinn(), "paddle is not compiled with CINN"
+        not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()),
+        "paddle is not compiled with CINN and CUDA",
     )
     def test_prim_cinn(self):
+        paddle.set_flags(
+            {'FLAGS_deny_cinn_ops': "gaussian_random;uniform_random"}
+        )
         core._add_skip_comp_ops("layer_norm")
         dy2st_prim_cinn = train(
             to_static=True, enable_prim=True, enable_cinn=True
         )
-        np.testing.assert_allclose(self.dy2st, dy2st_prim_cinn, rtol=1e-1)
+        np.testing.assert_allclose(
+            dy2st_prim_cinn, DY2ST_PRIM_CINN_GT, rtol=1e-5
+        )
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/tests/unittests/prim/model/test_resnet_prim_cinn.py b/python/paddle/fluid/tests/unittests/prim/model/test_resnet_prim_cinn.py
index 07aef3c55ffd77391dfc5fa306307849b3ca035d..deda6671d52a86347febf60ab59cdfbdf8d6aebd 100644
--- a/python/paddle/fluid/tests/unittests/prim/model/test_resnet_prim_cinn.py
+++ b/python/paddle/fluid/tests/unittests/prim/model/test_resnet_prim_cinn.py
@@ -29,6 +29,82 @@ l2_decay = 1e-4
 batch_size = 2
 epoch_num = 1
 
+# In V100, 16G, CUDA 11.2, the results are as follows:
+# DY2ST_PRIM_GT = [
+#     5.8473358154296875,
+#     8.354944229125977,
+#     5.098367691040039,
+#     8.533346176147461,
+#     8.179085731506348,
+#     7.285282135009766,
+#     9.824585914611816,
+#     8.56928825378418,
+#     8.539499282836914,
+#     10.256929397583008,
+# ]
+# DY2ST_CINN_GT = [
+#     5.847336769104004,
+#     8.336246490478516,
+#     5.108744144439697,
+#     8.316713333129883,
+#     8.175262451171875,
+#     7.590441703796387,
+#     9.895681381225586,
+#     8.196207046508789,
+#     8.438933372497559,
+#     10.305074691772461,
+# ]
+# DY2ST_PRIM_CINN_GT = [
+#     5.8473358154296875,
+#     8.322463989257812,
+#     5.169863700866699,
+#     8.399882316589355,
+#     7.859550476074219,
+#     7.4672698974609375,
+#     9.828727722167969,
+#     8.270355224609375,
+#     8.456792831420898,
+#     9.919631958007812,
+# ]
+
+# The results in ci as as follows:
+DY2ST_PRIM_GT = [
+    5.82879114151001,
+    8.333706855773926,
+    5.07769250869751,
+    8.66937255859375,
+    8.411705017089844,
+    7.252340793609619,
+    9.683248519897461,
+    8.177335739135742,
+    8.195427894592285,
+    10.219732284545898,
+]
+DY2ST_CINN_GT = [
+    5.828789710998535,
+    8.340764999389648,
+    4.998944282531738,
+    8.474305152893066,
+    8.09157943725586,
+    7.440057754516602,
+    9.907357215881348,
+    8.304681777954102,
+    8.383116722106934,
+    10.120304107666016,
+]
+DY2ST_PRIM_CINN_GT = [
+    5.828784942626953,
+    8.341737747192383,
+    5.113619327545166,
+    8.625601768493652,
+    8.082450866699219,
+    7.4913249015808105,
+    9.858025550842285,
+    8.287693977355957,
+    8.435812950134277,
+    10.372406005859375,
+]
+
 if core.is_compiled_with_cuda():
     paddle.set_flags({'FLAGS_cudnn_deterministic': True})
 
@@ -109,7 +185,7 @@ def train(to_static, enable_prim, enable_cinn):
             total_acc1 += acc_top1
             total_acc5 += acc_top5
             total_sample += 1
-            losses.append(avg_loss.numpy())
+            losses.append(avg_loss.numpy().item())
 
             end_time = time.time()
             print(
@@ -123,49 +199,42 @@ def train(to_static, enable_prim, enable_cinn):
                     end_time - start_time,
                 )
             )
-            if batch_id == 10:
+            if batch_id >= 9:
                 # avoid dataloader throw abort signaal
                 data_loader._reset()
                 break
-
+    print(losses)
     return losses
 
 
 class TestResnet(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.dy2st = train(to_static=True, enable_prim=False, enable_cinn=False)
-
+    @unittest.skipIf(
+        not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()),
+        "paddle is not compiled with CINN and CUDA",
+    )
     def test_prim(self):
-        # todo: to be removed after adjust of rtol
-        core._set_prim_forward_blacklist("batch_norm")
-        core._add_skip_comp_ops("batch_norm")
         dy2st_prim = train(to_static=True, enable_prim=True, enable_cinn=False)
-        # NOTE: Now dy2st is equal to dy2st_prim. With the splitting of kernels, the threshold here may need to be adjusted
-        np.testing.assert_allclose(self.dy2st, dy2st_prim, rtol=1e-6)
+        np.testing.assert_allclose(dy2st_prim, DY2ST_PRIM_GT, rtol=1e-5)
 
     @unittest.skipIf(
-        not paddle.is_compiled_with_cinn(), "padle is not compiled with CINN"
+        not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()),
+        "paddle is not compiled with CINN and CUDA",
     )
     def test_cinn(self):
         dy2st_cinn = train(to_static=True, enable_prim=False, enable_cinn=True)
-        # TODO(0x45f): The following is only temporary thresholds, and the final thresholds needs to be discussed
-        np.testing.assert_allclose(self.dy2st[0:2], dy2st_cinn[0:2], rtol=1e-3)
-        np.testing.assert_allclose(self.dy2st, dy2st_cinn, rtol=1e-1)
+        np.testing.assert_allclose(dy2st_cinn, DY2ST_CINN_GT, rtol=1e-5)
 
     @unittest.skipIf(
-        not paddle.is_compiled_with_cinn(), "padle is not compiled with CINN"
+        not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()),
+        "paddle is not compiled with CINN and CUDA",
     )
     def test_prim_cinn(self):
-        core._set_prim_forward_blacklist("flatten_contiguous_range")
         dy2st_prim_cinn = train(
             to_static=True, enable_prim=True, enable_cinn=True
         )
-        # TODO(0x45f): The following is only temporary thresholds, and the final thresholds need to be discussed
         np.testing.assert_allclose(
-            self.dy2st[0:2], dy2st_prim_cinn[0:2], rtol=1e-2
+            dy2st_prim_cinn, DY2ST_PRIM_CINN_GT, rtol=1e-5
         )
-        np.testing.assert_allclose(self.dy2st, dy2st_prim_cinn, rtol=1e-1)
 
 
 if __name__ == '__main__':