From 355caee18b18e8517d069619694450d0206012b4 Mon Sep 17 00:00:00 2001
From: wangchaochaohu <wangchao66@baidu.com>
Date: Wed, 27 May 2020 14:49:18 +0800
Subject: [PATCH] fix conv_transpose Op fp16 error test=develop (#24695)

---
 paddle/fluid/operators/conv_cudnn_helper.h    |   4 +-
 .../operators/conv_transpose_cudnn_op.cu      |   6 +-
 .../unittests/test_conv2d_transpose_op.py     | 174 +++++++++++++++---
 .../white_list/op_accuracy_white_list.py      |   3 +-
 4 files changed, 158 insertions(+), 29 deletions(-)
diff --git a/paddle/fluid/operators/conv_cudnn_helper.h b/paddle/fluid/operators/conv_cudnn_helper.h
index d20311d091c..fadffaee71d 100644
--- a/paddle/fluid/operators/conv_cudnn_helper.h
+++ b/paddle/fluid/operators/conv_cudnn_helper.h
@@ -148,7 +148,7 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
     }
 #endif
 
-    if (!exhaustive) {
+    if (!exhaustive && !deterministic) {
 #if CUDNN_VERSION >= 7001
       int perf_count;
       int best_algo_idx = 0;
@@ -185,6 +185,8 @@ struct SearchAlgorithm<cudnnConvolutionFwdAlgoPerf_t> {
               workspace_size_limit, &algo));
 #endif
       VLOG(3) << "choose algo " << algo;
+    } else if (deterministic) {
+      algo = static_cast<cudnnConvolutionFwdAlgo_t>(1);
     } else {
       auto& dev_ctx =
           ctx.template device_context<platform::CUDADeviceContext>();
diff --git a/paddle/fluid/operators/conv_transpose_cudnn_op.cu b/paddle/fluid/operators/conv_transpose_cudnn_op.cu
index a515b153581..99ec1e04810 100644
--- a/paddle/fluid/operators/conv_transpose_cudnn_op.cu
+++ b/paddle/fluid/operators/conv_transpose_cudnn_op.cu
@@ -245,7 +245,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel<T> {
     int output_offset =
         transformed_output.numel() / transformed_output.dims()[0] / groups;
     int filter_offset = filter->numel() / groups;
-    T alpha = static_cast<T>(1.0), beta = static_cast<T>(0.0);
+    ScalingParamType<T> alpha = 1.0f;
+    ScalingParamType<T> beta = 0.0f;
     auto workspace_handle = dev_ctx.cudnn_workspace_handle();
     for (int g = 0; g < groups; g++) {
       auto cudnn_func = [&](void* cudnn_workspace) {
@@ -493,7 +494,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel<T> {
     int output_grad_offset = transformed_output_grad.numel() /
                              transformed_output_grad.dims()[0] / groups;
     int filter_offset = filter->numel() / groups;
-    T alpha = static_cast<T>(1.0), beta = static_cast<T>(0.0);
+    ScalingParamType<T> alpha = 1.0f;
+    ScalingParamType<T> beta = 0.0f;
     auto workspace_handle = dev_ctx.cudnn_workspace_handle();
     if (input_grad) {
       // Because beta is zero, it is unnecessary to reset input_grad.
diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
index 607e97ef430..f4418150e8a 100644
--- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py
@@ -109,6 +109,7 @@ class TestConv2dTransposeOp(OpTest):
     def setUp(self):
         # init as conv transpose
         self.dtype = np.float64
+        self.need_check_grad = True
         self.is_test = False
         self.use_cudnn = False
         self.use_mkldnn = False
@@ -152,35 +153,40 @@ class TestConv2dTransposeOp(OpTest):
             self.check_output(check_dygraph=(self.use_mkldnn == False))
 
     def test_check_grad_no_input(self):
-        if self.use_cudnn:
-            place = core.CUDAPlace(0)
-            self.check_grad_with_place(
-                place, ['Filter'],
-                'Output',
-                max_relative_error=0.02,
-                no_grad_set=set(['Input']))
-        else:
-            self.check_grad(['Filter'], 'Output', no_grad_set=set(['Input']))
+        if self.need_check_grad:
+            if self.use_cudnn:
+                place = core.CUDAPlace(0)
+                self.check_grad_with_place(
+                    place, ['Filter'],
+                    'Output',
+                    max_relative_error=0.02,
+                    no_grad_set=set(['Input']))
+            else:
+                self.check_grad(
+                    ['Filter'], 'Output', no_grad_set=set(['Input']))
 
     def test_check_grad_no_filter(self):
-        if self.use_cudnn:
-            place = core.CUDAPlace(0)
-            self.check_grad_with_place(
-                place, ['Input'], 'Output', no_grad_set=set(['Filter']))
-        else:
-            self.check_grad(['Input'], 'Output', no_grad_set=set(['Filter']))
+        if self.need_check_grad:
+            if self.use_cudnn:
+                place = core.CUDAPlace(0)
+                self.check_grad_with_place(
+                    place, ['Input'], 'Output', no_grad_set=set(['Filter']))
+            else:
+                self.check_grad(
+                    ['Input'], 'Output', no_grad_set=set(['Filter']))
 
     def test_check_grad(self):
-        if self.use_cudnn:
-            place = core.CUDAPlace(0)
-            self.check_grad_with_place(
-                place,
-                set(['Input', 'Filter']),
-                'Output',
-                max_relative_error=0.02)
-        else:
-            self.check_grad(
-                set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
+        if self.need_check_grad:
+            if self.use_cudnn:
+                place = core.CUDAPlace(0)
+                self.check_grad_with_place(
+                    place,
+                    set(['Input', 'Filter']),
+                    'Output',
+                    max_relative_error=0.02)
+            else:
+                self.check_grad(
+                    set(['Input', 'Filter']), 'Output', max_relative_error=0.02)
 
     def init_test_case(self):
         self.pad = [0, 0]
@@ -708,6 +714,124 @@ class TestDepthwiseConvTransposeAsymmetricPad_NHWC(TestConv2dTransposeOp):
         self.data_format = 'NHWC'
 
 
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNN_FP16(TestConv2dTransposeOp):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [1, 1]
+        self.stride = [1, 1]
+        self.groups = 1
+        self.dilations = [1, 1]
+        self.input_size = [2, 3, 5, 5]  # NCHW
+        f_c = self.input_size[1]
+        self.filter_size = [f_c, 6, 3, 3]
+
+    def init_op_type(self):
+        self.need_check_grad = False
+        self.use_cudnn = True
+        self.op_type = "conv2d_transpose"
+
+    def test_check_output(self):
+        if self.use_cudnn:
+            place = core.CUDAPlace(0)
+            self.check_output_with_place(
+                place, atol=0.02, check_dygraph=(self.use_mkldnn == False))
+        else:
+            self.check_output(check_dygraph=(self.use_mkldnn == False))
+
+
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNN_NHWC_FP16(TestCUDNN_FP16):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [0, 0]
+        self.stride = [1, 1]
+        self.dilations = [1, 1]
+        self.groups = 1
+        self.input_size = [2, 5, 5, 3]  # NHWC
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 6, 3, 3]
+        self.data_format = 'NHWC'
+
+
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNNWithSymmetricPad_NHWC_FP16(TestCUDNN_FP16):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [1, 1]
+        self.stride = [1, 1]
+        self.groups = 1
+        self.dilations = [1, 1]
+        self.input_size = [2, 5, 5, 3]  # NHWC
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 6, 3, 3]
+        self.data_format = 'NHWC'
+
+
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNNWithAsymmetricPad_NHWC_FP16(TestCUDNN_FP16):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [1, 0, 2, 3]
+        self.stride = [2, 2]
+        self.groups = 1
+        self.dilations = [1, 1]
+        self.input_size = [2, 5, 5, 3]  # NHWC
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 6, 3, 3]
+        self.data_format = 'NHWC'
+
+
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNNWithStride_NHWC_FP16(TestCUDNN_FP16):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [1, 1]
+        self.stride = [2, 2]
+        self.groups = 1
+        self.dilations = [1, 1]
+        self.input_size = [2, 5, 5, 3]  # NHWC
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 6, 3, 3]
+        self.data_format = 'NHWC'
+
+
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNNWithGroups_NHWC_FP16(TestCUDNN_FP16):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [1, 1]
+        self.stride = [1, 1]
+        self.dilations = [1, 1]
+        self.groups = 2
+        self.input_size = [2, 5, 5, 4]  # NCHW
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 3, 3, 3]
+        self.data_format = 'NHWC'
+
+
+@unittest.skipIf(not core.is_compiled_with_cuda(),
+                 "core is not compiled with CUDA")
+class TestCUDNNWithEvenUpsample_NHWC_FP16(TestCUDNN_FP16):
+    def init_test_case(self):
+        self.dtype = np.float16
+        self.pad = [2, 2]
+        self.stride = [2, 2]
+        self.groups = 1
+        self.dilations = [1, 1]
+        self.output_size = [14, 14]
+        self.input_size = [2, 7, 7, 3]  # NHWC
+        f_c = self.input_size[-1]
+        self.filter_size = [f_c, 6, 5, 5]
+        self.data_format = 'NHWC'
+
+
 class TestConv2dTransposeAPI(unittest.TestCase):
     def test_case1(self):
         data1 = fluid.layers.data(
diff --git a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
index 0f5918544a3..ae99aeff557 100644
--- a/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
+++ b/python/paddle/fluid/tests/unittests/white_list/op_accuracy_white_list.py
@@ -80,5 +80,6 @@ NO_FP16_CHECK_GRAD_OP_LIST = [
     'fused_elemwise_activation', \
     'pool2d', \
     'pool3d', \
-    'softmax'
+    'softmax',\
+    'conv2d_transpose'
 ]
-- 
GitLab