diff --git a/paddle/fluid/operators/conv_cudnn_helper.h b/paddle/fluid/operators/conv_cudnn_helper.h index d20311d091ce48311693656f7b741ea769ef91db..fadffaee71d21e736bcc0ab696e44277a85345eb 100644 --- a/paddle/fluid/operators/conv_cudnn_helper.h +++ b/paddle/fluid/operators/conv_cudnn_helper.h @@ -148,7 +148,7 @@ struct SearchAlgorithm { } #endif - if (!exhaustive) { + if (!exhaustive && !deterministic) { #if CUDNN_VERSION >= 7001 int perf_count; int best_algo_idx = 0; @@ -185,6 +185,8 @@ struct SearchAlgorithm { workspace_size_limit, &algo)); #endif VLOG(3) << "choose algo " << algo; + } else if (deterministic) { + algo = static_cast(1); } else { auto& dev_ctx = ctx.template device_context(); diff --git a/paddle/fluid/operators/conv_transpose_cudnn_op.cu b/paddle/fluid/operators/conv_transpose_cudnn_op.cu index a515b153581f2cb9585ce39ceeda91d88b2ad334..99ec1e048101b281e71005f6fde328c664ba66be 100644 --- a/paddle/fluid/operators/conv_transpose_cudnn_op.cu +++ b/paddle/fluid/operators/conv_transpose_cudnn_op.cu @@ -245,7 +245,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel { int output_offset = transformed_output.numel() / transformed_output.dims()[0] / groups; int filter_offset = filter->numel() / groups; - T alpha = static_cast(1.0), beta = static_cast(0.0); + ScalingParamType alpha = 1.0f; + ScalingParamType beta = 0.0f; auto workspace_handle = dev_ctx.cudnn_workspace_handle(); for (int g = 0; g < groups; g++) { auto cudnn_func = [&](void* cudnn_workspace) { @@ -493,7 +494,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel { int output_grad_offset = transformed_output_grad.numel() / transformed_output_grad.dims()[0] / groups; int filter_offset = filter->numel() / groups; - T alpha = static_cast(1.0), beta = static_cast(0.0); + ScalingParamType alpha = 1.0f; + ScalingParamType beta = 0.0f; auto workspace_handle = dev_ctx.cudnn_workspace_handle(); if (input_grad) { // Because beta is zero, it is unnecessary to reset input_grad. diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py index 607e97ef430e15201197eefc711918a6afd20794..f4418150e8a69d795ff544073b6ba6dd7431e44b 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py @@ -109,6 +109,7 @@ class TestConv2dTransposeOp(OpTest): def setUp(self): # init as conv transpose self.dtype = np.float64 + self.need_check_grad = True self.is_test = False self.use_cudnn = False self.use_mkldnn = False @@ -152,35 +153,40 @@ class TestConv2dTransposeOp(OpTest): self.check_output(check_dygraph=(self.use_mkldnn == False)) def test_check_grad_no_input(self): - if self.use_cudnn: - place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['Filter'], - 'Output', - max_relative_error=0.02, - no_grad_set=set(['Input'])) - else: - self.check_grad(['Filter'], 'Output', no_grad_set=set(['Input'])) + if self.need_check_grad: + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['Filter'], + 'Output', + max_relative_error=0.02, + no_grad_set=set(['Input'])) + else: + self.check_grad( + ['Filter'], 'Output', no_grad_set=set(['Input'])) def test_check_grad_no_filter(self): - if self.use_cudnn: - place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['Input'], 'Output', no_grad_set=set(['Filter'])) - else: - self.check_grad(['Input'], 'Output', no_grad_set=set(['Filter'])) + if self.need_check_grad: + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['Input'], 'Output', no_grad_set=set(['Filter'])) + else: + self.check_grad( + ['Input'], 'Output', no_grad_set=set(['Filter'])) def test_check_grad(self): - if self.use_cudnn: - place = core.CUDAPlace(0) - self.check_grad_with_place( - place, - set(['Input', 'Filter']), - 'Output', - max_relative_error=0.02) - else: - self.check_grad( - set(['Input', 'Filter']), 'Output', max_relative_error=0.02) + if self.need_check_grad: + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, + set(['Input', 'Filter']), + 'Output', + max_relative_error=0.02) + else: + self.check_grad( + set(['Input', 'Filter']), 'Output', max_relative_error=0.02) def init_test_case(self): self.pad = [0, 0] @@ -708,6 +714,124 @@ class TestDepthwiseConvTransposeAsymmetricPad_NHWC(TestConv2dTransposeOp): self.data_format = 'NHWC' +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNN_FP16(TestConv2dTransposeOp): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [1, 1] + self.stride = [1, 1] + self.groups = 1 + self.dilations = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + f_c = self.input_size[1] + self.filter_size = [f_c, 6, 3, 3] + + def init_op_type(self): + self.need_check_grad = False + self.use_cudnn = True + self.op_type = "conv2d_transpose" + + def test_check_output(self): + if self.use_cudnn: + place = core.CUDAPlace(0) + self.check_output_with_place( + place, atol=0.02, check_dygraph=(self.use_mkldnn == False)) + else: + self.check_output(check_dygraph=(self.use_mkldnn == False)) + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNN_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [0, 0] + self.stride = [1, 1] + self.dilations = [1, 1] + self.groups = 1 + self.input_size = [2, 5, 5, 3] # NHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3] + self.data_format = 'NHWC' + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNWithSymmetricPad_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [1, 1] + self.stride = [1, 1] + self.groups = 1 + self.dilations = [1, 1] + self.input_size = [2, 5, 5, 3] # NHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3] + self.data_format = 'NHWC' + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNWithAsymmetricPad_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [1, 0, 2, 3] + self.stride = [2, 2] + self.groups = 1 + self.dilations = [1, 1] + self.input_size = [2, 5, 5, 3] # NHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3] + self.data_format = 'NHWC' + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNWithStride_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [1, 1] + self.stride = [2, 2] + self.groups = 1 + self.dilations = [1, 1] + self.input_size = [2, 5, 5, 3] # NHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 3, 3] + self.data_format = 'NHWC' + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNWithGroups_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [1, 1] + self.stride = [1, 1] + self.dilations = [1, 1] + self.groups = 2 + self.input_size = [2, 5, 5, 4] # NCHW + f_c = self.input_size[-1] + self.filter_size = [f_c, 3, 3, 3] + self.data_format = 'NHWC' + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") +class TestCUDNNWithEvenUpsample_NHWC_FP16(TestCUDNN_FP16): + def init_test_case(self): + self.dtype = np.float16 + self.pad = [2, 2] + self.stride = [2, 2] + self.groups = 1 + self.dilations = [1, 1] + self.output_size = [14, 14] + self.input_size = [2, 7, 7, 3] # NHWC + f_c = self.input_size[-1] + self.filter_size = [f_c, 6, 5, 5] + self.data_format = 'NHWC' + + class TestConv2dTransposeAPI(unittest.TestCase): def test_case1(self): data1 = fluid.layers.data( diff --git a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py index 0f5918544a3bf29757241efb74938e4aa5ff0a6f..ae99aeff557e4aa31f2868fbb8be9d038d5538ca 100644 --- a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py +++ b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py @@ -80,5 +80,6 @@ NO_FP16_CHECK_GRAD_OP_LIST = [ 'fused_elemwise_activation', \ 'pool2d', \ 'pool3d', \ - 'softmax' + 'softmax',\ + 'conv2d_transpose' ]