From 355caee18b18e8517d069619694450d0206012b4 Mon Sep 17 00:00:00 2001 From: wangchaochaohu Date: Wed, 27 May 2020 14:49:18 +0800 Subject: [PATCH] fix conv_transpose Op fp16 error test=develop (#24695) --- paddle/fluid/operators/conv_cudnn_helper.h | 4 +- .../operators/conv_transpose_cudnn_op.cu | 6 +- .../unittests/test_conv2d_transpose_op.py | 174 +++++++++++++++--- .../white_list/op_accuracy_white_list.py | 3 +- 4 files changed, 158 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/operators/conv_cudnn_helper.h b/paddle/fluid/operators/conv_cudnn_helper.h index d20311d091c..fadffaee71d 100644 --- a/paddle/fluid/operators/conv_cudnn_helper.h +++ b/paddle/fluid/operators/conv_cudnn_helper.h @@ -148,7 +148,7 @@ struct SearchAlgorithm { } #endif - if (!exhaustive) { + if (!exhaustive && !deterministic) { #if CUDNN_VERSION >= 7001 int perf_count; int best_algo_idx = 0; @@ -185,6 +185,8 @@ struct SearchAlgorithm { workspace_size_limit, &algo)); #endif VLOG(3) << "choose algo " << algo; + } else if (deterministic) { + algo = static_cast(1); } else { auto& dev_ctx = ctx.template device_context(); diff --git a/paddle/fluid/operators/conv_transpose_cudnn_op.cu b/paddle/fluid/operators/conv_transpose_cudnn_op.cu index a515b153581..99ec1e04810 100644 --- a/paddle/fluid/operators/conv_transpose_cudnn_op.cu +++ b/paddle/fluid/operators/conv_transpose_cudnn_op.cu @@ -245,7 +245,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel { int output_offset = transformed_output.numel() / transformed_output.dims()[0] / groups; int filter_offset = filter->numel() / groups; - T alpha = static_cast(1.0), beta = static_cast(0.0); + ScalingParamType alpha = 1.0f; + ScalingParamType beta = 0.0f; auto workspace_handle = dev_ctx.cudnn_workspace_handle(); for (int g = 0; g < groups; g++) { auto cudnn_func = [&](void* cudnn_workspace) { @@ -493,7 +494,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel { int output_grad_offset = transformed_output_grad.numel() / transformed_output_grad.dims()[0] / groups; int filter_offset = filter->numel() / groups; - T alpha = static_cast(1.0), beta = static_cast(0.0); + ScalingParamType alpha = 1.0f; + ScalingParamType beta = 0.0f; auto workspace_handle = dev_ctx.cudnn_workspace_handle(); if (input_grad) { // Because beta is zero, it is unnecessary to reset input_grad. diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py index 607e97ef430..f4418150e8a 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py @@ -109,6 +109,7 @@ class TestConv2dTransposeOp(OpTest): def setUp(self): # init as conv transpose self.dtype = np.float64 + self.need_check_grad = True self.is_test = False self.use_cudnn = False self.use_mkldnn = False @@ -152,35 +153,40 @@ class TestConv2dTransposeOp(OpTest): self.check_output(check_dygraph=(self.use_mkldnn == False)) def test_check_grad_no_input(self): - if self.use_cudnn: - place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['Filter'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Input'])) - else: - self.check_grad(['Filter'], 'Output', no_grad_set=set(['Input'])) + if self.need_check_grad: + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['Filter'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Input'])) + else: + self.check_grad( + ['Filter'], 'Output', no_grad_set=set(['Input'])) def test_check_grad_no_filter(self): - if self.use_cudnn: - place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['Input'], 'Output', no_grad_set=set(['Filter'])) - else: - self.check_grad(['Input'], 'Output', no_grad_set=set(['Filter'])) + if self.need_check_grad: + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['Input'], 'Output', no_grad_set=set(['Filter'])) + else: + self.check_grad( + ['Input'], 'Output', no_grad_set=set(['Filter'])) def test_check_grad(self): - if self.use_cudnn: - place = core.CUDAPlace(0) - self.check_grad_with_place( - place, - set(['Input', 'Filter']), - 'Output', - max_relative_error=0.02) - else: - self.check_grad( - set(['Input', 'Filter']), 'Output', max_relative_error=0.02) + if self.need_check_grad: + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, + set(['Input', 'Filter']), + 'Output', + max_relative_error=0.02) + else: + self.check_grad( + set(['Input', 'Filter']), 'Output', max_relative_error=0.02) def init_test_case(self): self.pad = [0, 0] @@ -708,6 +714,124 @@ class TestDepthwiseConvTransposeAsymmetricPad_NHWC(TestConv2dTransposeOp): self.data_format = 'NHWC' +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNN_FP16(TestConv2dTransposeOp): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [1, 1] + self.stride = [1, 1] + self.groups = 1 + self.dilations = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3] + + def init_op_type(self): + self.need_check_grad = False + self.use_cudnn = True + self.op_type = "conv2d_transpose" + + def test_check_output(self): + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_output_with_place( + place, atol=0.02, check_dygraph=(self.use_mkldnn == False)) + else: + self.check_output(check_dygraph=(self.use_mkldnn == False)) + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNN_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [0, 0] + self.stride = [1, 1] + self.dilations = [1, 1] + self.groups = 1 + self.input_size = [2, 5, 5, 3] # NHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3] + self.data_format = 'NHWC' + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNWithSymmetricPad_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [1, 1] + self.stride = [1, 1] + self.groups = 1 + self.dilations = [1, 1] + self.input_size = [2, 5, 5, 3] # NHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3] + self.data_format = 'NHWC' + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNWithAsymmetricPad_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [1, 0, 2, 3] + self.stride = [2, 2] + self.groups = 1 + self.dilations = [1, 1] + self.input_size = [2, 5, 5, 3] # NHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3] + self.data_format = 'NHWC' + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNWithStride_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [1, 1] + self.stride = [2, 2] + self.groups = 1 + self.dilations = [1, 1] + self.input_size = [2, 5, 5, 3] # NHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3] + self.data_format = 'NHWC' + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNWithGroups_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [1, 1] + self.stride = [1, 1] + self.dilations = [1, 1] + self.groups = 2 + self.input_size = [2, 5, 5, 4] # NCHW + f_c = self.input_size[-1] + self.filter_size = [f_c, 3, 3, 3] + self.data_format = 'NHWC' + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNWithEvenUpsample_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [2, 2] + self.stride = [2, 2] + self.groups = 1 + self.dilations = [1, 1] + self.output_size = [14, 14] + self.input_size = [2, 7, 7, 3] # NHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 5, 5] + self.data_format = 'NHWC' + + class TestConv2dTransposeAPI(unittest.TestCase): def test_case1(self): data1 = fluid.layers.data( diff --git a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py index 0f5918544a3..ae99aeff557 100644 --- a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py +++ b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py @@ -80,5 +80,6 @@ NO_FP16_CHECK_GRAD_OP_LIST = [ 'fused_elemwise_activation', \ 'pool2d', \ 'pool3d', \ - 'softmax' + 'softmax',\ + 'conv2d_transpose' ] -- GitLab