fix conv_transpose Op fp16 error test=develop (#24695) (#24784)

863f9e55 · wangchaochaohu · GitHub · 627d5567 · 863f9e55 · 863f9e55
4 changed file
--- a/paddle/fluid/operators/conv_cudnn_helper.h
+++ b/paddle/fluid/operators/conv_cudnn_helper.h
@@ -148,7 +148,7 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
    }
 #endif
-    if (!exhaustive) {
+    if (!exhaustive && !deterministic) {
 #if CUDNN_VERSION >= 7001
      int perf_count;
      int best_algo_idx = 0;
@@ -185,6 +185,8 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
              workspace_size_limit, &algo));
 #endif
      VLOG(3) << "choose algo " << algo;
+    } else if (deterministic) {
+      algo = static_cast<cudnnConvolutionFwdAlgo_t>(1);
    } else {
      auto& dev_ctx =
          ctx.template device_context<platform::CUDADeviceContext>();

--- a/paddle/fluid/operators/conv_transpose_cudnn_op.cu
+++ b/paddle/fluid/operators/conv_transpose_cudnn_op.cu
@@ -245,7 +245,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
    int output_offset =
        transformed_output.numel() / transformed_output.dims()[0] / groups;
    int filter_offset = filter->numel() / groups;
-    T alpha = static_cast<T>(1.0), beta = static_cast<T>(0.0);
+    ScalingParamType<T> alpha = 1.0f;
+    ScalingParamType<T> beta = 0.0f;
    auto workspace_handle = dev_ctx.cudnn_workspace_handle();
    for (int g = 0; g < groups; g++) {
      auto cudnn_func = [&](void* cudnn_workspace) {
@@ -493,7 +494,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
    int output_grad_offset = transformed_output_grad.numel() /
                             transformed_output_grad.dims()[0] / groups;
    int filter_offset = filter->numel() / groups;
-    T alpha = static_cast<T>(1.0), beta = static_cast<T>(0.0);
+    ScalingParamType<T> alpha = 1.0f;
+    ScalingParamType<T> beta = 0.0f;
    auto workspace_handle = dev_ctx.cudnn_workspace_handle();
    if (input_grad) {
      // Because beta is zero, it is unnecessary to reset input_grad.

--- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
@@ -109,6 +109,7 @@ class TestConv2dTransposeOp(OpTest):
    def setUp(self):
        # init as conv transpose
        self.dtype = np.float64
+        self.need_check_grad = True
        self.is_test = False
        self.use_cudnn = False
        self.use_mkldnn = False
@@ -152,35 +153,40 @@ class TestConv2dTransposeOp(OpTest):
            self.check_output(check_dygraph=(self.use_mkldnn == False))
    def test_check_grad_no_input(self):
-        if self.use_cudnn:
+        if self.need_check_grad:
-            place = core.CUDAPlace(0)
+            if self.use_cudnn:
-            self.check_grad_with_place(
+                place = core.CUDAPlace(0)
-                place, ['Filter'],
+                self.check_grad_with_place(
-                'Output',
+                    place, ['Filter'],
-                max_relative_error=0.02,
+                    'Output',
-                no_grad_set=set(['Input']))
+                    max_relative_error=0.02,
-        else:
+                    no_grad_set=set(['Input']))
-            self.check_grad(['Filter'], 'Output', no_grad_set=set(['Input']))
+            else:
+                self.check_grad(
+                    ['Filter'], 'Output', no_grad_set=set(['Input']))
    def test_check_grad_no_filter(self):
-        if self.use_cudnn:
+        if self.need_check_grad:
-            place = core.CUDAPlace(0)
+            if self.use_cudnn:
-            self.check_grad_with_place(
+                place = core.CUDAPlace(0)
-                place, ['Input'], 'Output', no_grad_set=set(['Filter']))
+                self.check_grad_with_place(
-        else:
+                    place, ['Input'], 'Output', no_grad_set=set(['Filter']))
-            self.check_grad(['Input'], 'Output', no_grad_set=set(['Filter']))
+            else:
+                self.check_grad(
+                    ['Input'], 'Output', no_grad_set=set(['Filter']))
    def test_check_grad(self):
-        if self.use_cudnn:
+        if self.need_check_grad:
-            place = core.CUDAPlace(0)
+            if self.use_cudnn:
-            self.check_grad_with_place(
+                place = core.CUDAPlace(0)
-                place,
+                self.check_grad_with_place(
-                set(['Input', 'Filter']),
+                    place,
-                'Output',
+                    set(['Input', 'Filter']),
-                max_relative_error=0.02)
+                    'Output',
-        else:
+                    max_relative_error=0.02)
-            self.check_grad(
+            else:
-                set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
+                self.check_grad(
+                    set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
    def init_test_case(self):
        self.pad = [0, 0]
@@ -708,6 +714,124 @@ class TestDepthwiseConvTransposeAsymmetricPad_NHWC(TestConv2dTransposeOp):
        self.data_format = 'NHWC'
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNN_FP16(TestConv2dTransposeOp):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [1, 1]
+        self.stride = [1, 1]
+        self.groups = 1
+        self.dilations = [1, 1]
+        self.input_size = [2, 3, 5, 5]  # NCHW
+        f_c = self.input_size[1]
+        self.filter_size = [f_c, 6, 3, 3]
+    def init_op_type(self):
+        self.need_check_grad = False
+        self.use_cudnn = True
+        self.op_type = "conv2d_transpose"
+    def test_check_output(self):
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(
+                place, atol=0.02, check_dygraph=(self.use_mkldnn == False))
+        else:
+            self.check_output(check_dygraph=(self.use_mkldnn == False))
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNN_NHWC_FP16(TestCUDNN_FP16):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [0, 0]
+        self.stride = [1, 1]
+        self.dilations = [1, 1]
+        self.groups = 1
+        self.input_size = [2, 5, 5, 3]  # NHWC
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 6, 3, 3]
+        self.data_format = 'NHWC'
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNNWithSymmetricPad_NHWC_FP16(TestCUDNN_FP16):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [1, 1]
+        self.stride = [1, 1]
+        self.groups = 1
+        self.dilations = [1, 1]
+        self.input_size = [2, 5, 5, 3]  # NHWC
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 6, 3, 3]
+        self.data_format = 'NHWC'
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNNWithAsymmetricPad_NHWC_FP16(TestCUDNN_FP16):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [1, 0, 2, 3]
+        self.stride = [2, 2]
+        self.groups = 1
+        self.dilations = [1, 1]
+        self.input_size = [2, 5, 5, 3]  # NHWC
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 6, 3, 3]
+        self.data_format = 'NHWC'
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNNWithStride_NHWC_FP16(TestCUDNN_FP16):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [1, 1]
+        self.stride = [2, 2]
+        self.groups = 1
+        self.dilations = [1, 1]
+        self.input_size = [2, 5, 5, 3]  # NHWC
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 6, 3, 3]
+        self.data_format = 'NHWC'
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNNWithGroups_NHWC_FP16(TestCUDNN_FP16):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [1, 1]
+        self.stride = [1, 1]
+        self.dilations = [1, 1]
+        self.groups = 2
+        self.input_size = [2, 5, 5, 4]  # NCHW
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 3, 3, 3]
+        self.data_format = 'NHWC'
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNNWithEvenUpsample_NHWC_FP16(TestCUDNN_FP16):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [2, 2]
+        self.stride = [2, 2]
+        self.groups = 1
+        self.dilations = [1, 1]
+        self.output_size = [14, 14]
+        self.input_size = [2, 7, 7, 3]  # NHWC
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 6, 5, 5]
+        self.data_format = 'NHWC'
 class TestConv2dTransposeAPI(unittest.TestCase):
    def test_case1(self):
        data1 = fluid.layers.data(

--- a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
+++ b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
@@ -80,5 +80,6 @@ NO_FP16_CHECK_GRAD_OP_LIST = [
    'fused_elemwise_activation', \
    'pool2d', \
    'pool3d', \
-    'softmax'
+    'softmax',\
+    'conv2d_transpose'
 ]