diff --git a/python/paddle/fluid/tests/unittests/prim/model/CMakeLists.txt b/python/paddle/fluid/tests/unittests/prim/model/CMakeLists.txt index 2205502a094d228e973c877c8d4e91c0dfc289bc..9aab235837718921ca5c91e7c3e1e4992aae2277 100644 --- a/python/paddle/fluid/tests/unittests/prim/model/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/prim/model/CMakeLists.txt @@ -13,7 +13,5 @@ set_tests_properties(test_bert_prim_cinn PROPERTIES TIMEOUT 500) if(WITH_CINN) set_tests_properties(test_resnet_prim_cinn PROPERTIES LABELS "RUN_TYPE=CINN") - set_tests_properties( - test_bert_prim_cinn PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT - "FLAGS_deny_cinn_ops=dropout") + set_tests_properties(test_bert_prim_cinn PROPERTIES LABELS "RUN_TYPE=CINN") endif() diff --git a/python/paddle/fluid/tests/unittests/prim/model/test_bert_prim_cinn.py b/python/paddle/fluid/tests/unittests/prim/model/test_bert_prim_cinn.py index 71d38e85fe362a337cf0ff00569db99b3255b24c..dc1be501ce30e713d9b66426bec162cb5732d2fa 100644 --- a/python/paddle/fluid/tests/unittests/prim/model/test_bert_prim_cinn.py +++ b/python/paddle/fluid/tests/unittests/prim/model/test_bert_prim_cinn.py @@ -13,7 +13,6 @@ # limitations under the License. import os -import platform import time import unittest @@ -33,6 +32,44 @@ MODULE_NAME = 'test_bert_prim_cinn' MD5SUM = '71e730ee8d7aa77a215b7e898aa089af' SAVE_NAME = 'bert_training_data.npz' + +DY2ST_PRIM_GT = [ + 11.144556999206543, + 10.343620300292969, + 10.330279350280762, + 10.276118278503418, + 10.222086906433105, + 10.194628715515137, + 10.14902114868164, + 10.096250534057617, + 10.104615211486816, + 9.985644340515137, +] +DY2ST_CINN_GT = [ + 10.649632453918457, + 10.333406448364258, + 10.33541202545166, + 10.260543823242188, + 10.219606399536133, + 10.176884651184082, + 10.124699592590332, + 10.072620391845703, + 10.112163543701172, + 9.969393730163574, +] +DY2ST_PRIM_CINN_GT = [ + 11.144556999206543, + 10.343620300292969, + 10.330279350280762, + 10.276118278503418, + 10.222086906433105, + 10.194628715515137, + 10.149020195007324, + 10.096250534057617, + 10.104615211486816, + 9.985644340515137, +] + if core.is_compiled_with_cuda(): paddle.set_flags({'FLAGS_cudnn_deterministic': True}) @@ -42,9 +79,7 @@ def train(to_static, enable_prim, enable_cinn): paddle.set_device('gpu') else: paddle.set_device('cpu') - fluid.core._set_prim_all_enabled( - enable_prim and platform.system() == 'Linux' - ) + fluid.core._set_prim_all_enabled(enable_prim) np.random.seed(SEED) paddle.seed(SEED) @@ -95,7 +130,7 @@ def train(to_static, enable_prim, enable_cinn): loss.backward() optimizer.minimize(loss) bert.clear_gradients() - losses.append(loss) + losses.append(loss.numpy().item()) print( "step: {}, loss: {}, batch_cost: {:.5}".format( @@ -106,6 +141,7 @@ def train(to_static, enable_prim, enable_cinn): ) if step >= 9: break + print(losses) return losses @@ -113,28 +149,42 @@ class TestBert(unittest.TestCase): @classmethod def setUpClass(cls): download(URL, MODULE_NAME, MD5SUM, SAVE_NAME) - cls.dy2st = train(to_static=True, enable_prim=False, enable_cinn=False) + def tearDown(self): + paddle.set_flags({'FLAGS_deny_cinn_ops': ''}) + + @unittest.skipIf( + not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()), + "paddle is not compiled with CINN and CUDA", + ) def test_prim(self): dy2st_prim = train(to_static=True, enable_prim=True, enable_cinn=False) - np.testing.assert_allclose(self.dy2st, dy2st_prim, rtol=1e-1) + np.testing.assert_allclose(dy2st_prim, DY2ST_PRIM_GT, rtol=1e-5) @unittest.skipIf( - not paddle.is_compiled_with_cinn(), "paddle is not compiled with CINN" + not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()), + "paddle is not compiled with CINN and CUDA", ) def test_cinn(self): + paddle.set_flags({'FLAGS_deny_cinn_ops': "dropout"}) dy2st_cinn = train(to_static=True, enable_prim=False, enable_cinn=True) - np.testing.assert_allclose(self.dy2st, dy2st_cinn, rtol=1e-6) + np.testing.assert_allclose(dy2st_cinn, DY2ST_CINN_GT, rtol=1e-5) @unittest.skipIf( - not paddle.is_compiled_with_cinn(), "paddle is not compiled with CINN" + not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()), + "paddle is not compiled with CINN and CUDA", ) def test_prim_cinn(self): + paddle.set_flags( + {'FLAGS_deny_cinn_ops': "gaussian_random;uniform_random"} + ) core._add_skip_comp_ops("layer_norm") dy2st_prim_cinn = train( to_static=True, enable_prim=True, enable_cinn=True ) - np.testing.assert_allclose(self.dy2st, dy2st_prim_cinn, rtol=1e-1) + np.testing.assert_allclose( + dy2st_prim_cinn, DY2ST_PRIM_CINN_GT, rtol=1e-5 + ) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/prim/model/test_resnet_prim_cinn.py b/python/paddle/fluid/tests/unittests/prim/model/test_resnet_prim_cinn.py index 07aef3c55ffd77391dfc5fa306307849b3ca035d..deda6671d52a86347febf60ab59cdfbdf8d6aebd 100644 --- a/python/paddle/fluid/tests/unittests/prim/model/test_resnet_prim_cinn.py +++ b/python/paddle/fluid/tests/unittests/prim/model/test_resnet_prim_cinn.py @@ -29,6 +29,82 @@ l2_decay = 1e-4 batch_size = 2 epoch_num = 1 +# In V100, 16G, CUDA 11.2, the results are as follows: +# DY2ST_PRIM_GT = [ +# 5.8473358154296875, +# 8.354944229125977, +# 5.098367691040039, +# 8.533346176147461, +# 8.179085731506348, +# 7.285282135009766, +# 9.824585914611816, +# 8.56928825378418, +# 8.539499282836914, +# 10.256929397583008, +# ] +# DY2ST_CINN_GT = [ +# 5.847336769104004, +# 8.336246490478516, +# 5.108744144439697, +# 8.316713333129883, +# 8.175262451171875, +# 7.590441703796387, +# 9.895681381225586, +# 8.196207046508789, +# 8.438933372497559, +# 10.305074691772461, +# ] +# DY2ST_PRIM_CINN_GT = [ +# 5.8473358154296875, +# 8.322463989257812, +# 5.169863700866699, +# 8.399882316589355, +# 7.859550476074219, +# 7.4672698974609375, +# 9.828727722167969, +# 8.270355224609375, +# 8.456792831420898, +# 9.919631958007812, +# ] + +# The results in ci as as follows: +DY2ST_PRIM_GT = [ + 5.82879114151001, + 8.333706855773926, + 5.07769250869751, + 8.66937255859375, + 8.411705017089844, + 7.252340793609619, + 9.683248519897461, + 8.177335739135742, + 8.195427894592285, + 10.219732284545898, +] +DY2ST_CINN_GT = [ + 5.828789710998535, + 8.340764999389648, + 4.998944282531738, + 8.474305152893066, + 8.09157943725586, + 7.440057754516602, + 9.907357215881348, + 8.304681777954102, + 8.383116722106934, + 10.120304107666016, +] +DY2ST_PRIM_CINN_GT = [ + 5.828784942626953, + 8.341737747192383, + 5.113619327545166, + 8.625601768493652, + 8.082450866699219, + 7.4913249015808105, + 9.858025550842285, + 8.287693977355957, + 8.435812950134277, + 10.372406005859375, +] + if core.is_compiled_with_cuda(): paddle.set_flags({'FLAGS_cudnn_deterministic': True}) @@ -109,7 +185,7 @@ def train(to_static, enable_prim, enable_cinn): total_acc1 += acc_top1 total_acc5 += acc_top5 total_sample += 1 - losses.append(avg_loss.numpy()) + losses.append(avg_loss.numpy().item()) end_time = time.time() print( @@ -123,49 +199,42 @@ def train(to_static, enable_prim, enable_cinn): end_time - start_time, ) ) - if batch_id == 10: + if batch_id >= 9: # avoid dataloader throw abort signaal data_loader._reset() break - + print(losses) return losses class TestResnet(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.dy2st = train(to_static=True, enable_prim=False, enable_cinn=False) - + @unittest.skipIf( + not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()), + "paddle is not compiled with CINN and CUDA", + ) def test_prim(self): - # todo: to be removed after adjust of rtol - core._set_prim_forward_blacklist("batch_norm") - core._add_skip_comp_ops("batch_norm") dy2st_prim = train(to_static=True, enable_prim=True, enable_cinn=False) - # NOTE: Now dy2st is equal to dy2st_prim. With the splitting of kernels, the threshold here may need to be adjusted - np.testing.assert_allclose(self.dy2st, dy2st_prim, rtol=1e-6) + np.testing.assert_allclose(dy2st_prim, DY2ST_PRIM_GT, rtol=1e-5) @unittest.skipIf( - not paddle.is_compiled_with_cinn(), "padle is not compiled with CINN" + not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()), + "paddle is not compiled with CINN and CUDA", ) def test_cinn(self): dy2st_cinn = train(to_static=True, enable_prim=False, enable_cinn=True) - # TODO(0x45f): The following is only temporary thresholds, and the final thresholds needs to be discussed - np.testing.assert_allclose(self.dy2st[0:2], dy2st_cinn[0:2], rtol=1e-3) - np.testing.assert_allclose(self.dy2st, dy2st_cinn, rtol=1e-1) + np.testing.assert_allclose(dy2st_cinn, DY2ST_CINN_GT, rtol=1e-5) @unittest.skipIf( - not paddle.is_compiled_with_cinn(), "padle is not compiled with CINN" + not (paddle.is_compiled_with_cinn() and paddle.is_compiled_with_cuda()), + "paddle is not compiled with CINN and CUDA", ) def test_prim_cinn(self): - core._set_prim_forward_blacklist("flatten_contiguous_range") dy2st_prim_cinn = train( to_static=True, enable_prim=True, enable_cinn=True ) - # TODO(0x45f): The following is only temporary thresholds, and the final thresholds need to be discussed np.testing.assert_allclose( - self.dy2st[0:2], dy2st_prim_cinn[0:2], rtol=1e-2 + dy2st_prim_cinn, DY2ST_PRIM_CINN_GT, rtol=1e-5 ) - np.testing.assert_allclose(self.dy2st, dy2st_prim_cinn, rtol=1e-1) if __name__ == '__main__':