diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py index 7b2546f70ad1b2f30a5a75a0b8aa734821383655..b2767b1dd1cbfa7ab4ea209bb8cee3b648e5cb7c 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py @@ -107,6 +107,11 @@ black_list = { # fp16 is slower than fp32, though fp16 is supported. 'lookup_table', 'lookup_table_v2', + 'linear_interp_v2', + 'nearest_interp_v2', + 'bilinear_interp_v2', + 'bicubic_interp_v2', + 'trilinear_interp_v2', # default fp32 can avoid return inf when the sum value large than 65504 'reduce_sum', } diff --git a/python/paddle/fluid/contrib/tests/test_amp_list.py b/python/paddle/fluid/contrib/tests/test_amp_list.py index fb46df1377627c5ad59e4bbe85de854122f868df..93c99b5ea1c8ffd2aa394c6f2dd8977c5deee9f5 100644 --- a/python/paddle/fluid/contrib/tests/test_amp_list.py +++ b/python/paddle/fluid/contrib/tests/test_amp_list.py @@ -30,6 +30,13 @@ class TestAMPList(unittest.TestCase): self.assertTrue(op not in amp_list.black_list) self.assertTrue(op not in amp_list.unsupported_list) + default_black_list = [ + 'linear_interp_v2', 'nearest_interp_v2', 'bilinear_interp_v2', + 'bicubic_interp_v2', 'trilinear_interp_v2' + ] + for op in default_black_list: + self.assertTrue(op in amp_list.black_list) + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/dygraph/amp/auto_cast.py b/python/paddle/fluid/dygraph/amp/auto_cast.py index 49c036843428a4368dc5cee6f97ffff947938563..87df808213656254067fa08117c92db5b1a947fd 100644 --- a/python/paddle/fluid/dygraph/amp/auto_cast.py +++ b/python/paddle/fluid/dygraph/amp/auto_cast.py @@ -56,6 +56,12 @@ BLACK_LIST = { 'cross_entropy2', # default fp32 can avoid return inf when the sum value large than 65504 'reduce_sum', + # FP16 performance of grad op is worse than that of FP32. Use FP32 by default. + 'linear_interp_v2', + 'nearest_interp_v2', + 'bilinear_interp_v2', + 'bicubic_interp_v2', + 'trilinear_interp_v2', } AMP_RELATED_FLAGS = [ @@ -72,7 +78,16 @@ AMP_RELATED_FLAGS_SETTING = { PURE_FP16_WHITE_LIST = {' '} PURE_FP16_BLACK_LIST = { - 'lookup_table', 'lookup_table_v2', 'scatter', 'scatter_grad' + 'lookup_table', + 'lookup_table_v2', + 'scatter', + 'scatter_grad', + # FP16 performance of grad op is worse than that of FP32. Use FP32 by default. + 'linear_interp_v2', + 'nearest_interp_v2', + 'bilinear_interp_v2', + 'bicubic_interp_v2', + 'trilinear_interp_v2', } BF16_WHITE_LIST = {'conv2d', 'matmul_v2'}