add fp16 support to conv3d

8b169272 · Kexin Zhao · Kexin Zhao · fd1971ca · 8b169272 · 8b169272
Showing with 89 addition and 20 deletion

paddle/fluid/operators/conv_cudnn_op.cu.cc paddle/fluid/operators/conv_cudnn_op.cu.cc +2 -1

python/paddle/fluid/tests/unittests/test_conv3d_op.py python/paddle/fluid/tests/unittests/test_conv3d_op.py +87 -19

未找到文件。
--- a/paddle/fluid/operators/conv_cudnn_op.cu.cc
+++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc
@@ -366,7 +366,8 @@ REGISTER_OP_KERNEL(conv2d_grad, CUDNN, plat::CUDAPlace,

 REGISTER_OP_KERNEL(conv3d, CUDNN, plat::CUDAPlace,
                   paddle::operators::CUDNNConvOpKernel<float>,
-                   paddle::operators::CUDNNConvOpKernel<double>);
+                   paddle::operators::CUDNNConvOpKernel<double>,
+                   paddle::operators::CUDNNConvOpKernel<plat::float16>);
 REGISTER_OP_KERNEL(conv3d_grad, CUDNN, plat::CUDAPlace,
                   paddle::operators::CUDNNConvGradOpKernel<float>,
                   paddle::operators::CUDNNConvGradOpKernel<double>);
--- a/python/paddle/fluid/tests/unittests/test_conv3d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv3d_op.py
@@ -70,9 +70,11 @@ def conv3d_forward_naive(input, filter, group, conv_param):

 class TestConv3dOp(OpTest):
    def setUp(self):
+        self.op_type = "conv3d"
        self.use_cudnn = False
+        self.dtype = np.float32
+        self.init_kernel_type()
        self.init_group()
-        self.init_op_type()
        self.init_dilation()
        self.init_test_case()

@@ -80,20 +82,24 @@ class TestConv3dOp(OpTest):
            'stride': self.stride,
            'pad': self.pad,
            'dilations': self.dilations,
-            'use_cudnn': self.use_cudnn,
            'data_format': 'AnyLayout'  # TODO(dzhwinter) : should be fix latter
        }
-        input = np.random.random(self.input_size).astype("float32")
-        filter = np.random.random(self.filter_size).astype("float32")
+
+        input = np.random.random(self.input_size).astype(self.dtype)
+        filter = np.random.random(self.filter_size).astype(self.dtype)
        output = conv3d_forward_naive(input, filter, self.groups,
-                                      conv3d_param).astype("float32")
+                                      conv3d_param).astype(self.dtype)

-        self.inputs = {'Input': input, 'Filter': filter}
+        self.inputs = {
+            'Input': OpTest.np_dtype_to_fluid_dtype(input),
+            'Filter': OpTest.np_dtype_to_fluid_dtype(filter)
+        }
        self.attrs = {
            'strides': self.stride,
            'paddings': self.pad,
            'groups': self.groups,
-            'dilations': self.dilations
+            'dilations': self.dilations,
+            'use_cudnn': self.use_cudnn
        }
        self.outputs = {'Output': output}

@@ -108,6 +114,8 @@ class TestConv3dOp(OpTest):
            self.check_output()

    def test_check_grad(self):
+        if self.dtype == np.float16:
+            return
        if self.testcudnn():
            place = core.CUDAPlace(0)
            self.check_grad_with_place(
@@ -120,6 +128,8 @@ class TestConv3dOp(OpTest):
                set(['Input', 'Filter']), 'Output', max_relative_error=0.03)

    def test_check_grad_no_filter(self):
+        if self.dtype == np.float16:
+            return
        if self.testcudnn():
            place = core.CUDAPlace(0)
            self.check_grad_with_place(
@@ -135,6 +145,8 @@ class TestConv3dOp(OpTest):
                no_grad_set=set(['Filter']))

    def test_check_grad_no_input(self):
+        if self.dtype == np.float16:
+            return
        if self.testcudnn():
            place = core.CUDAPlace(0)
            self.check_grad_with_place(
@@ -163,8 +175,8 @@ class TestConv3dOp(OpTest):
    def init_group(self):
        self.groups = 1

-    def init_op_type(self):
-        self.op_type = "conv3d"
+    def init_kernel_type(self):
+        pass


 class TestCase1(TestConv3dOp):
@@ -235,34 +247,90 @@ class TestWithDilation(TestConv3dOp):
        self.groups = 3


+#----------------Conv3dCUDNN----------------
 class TestCUDNN(TestConv3dOp):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_cudnn = True
-        self.op_type = "conv3d"
+
+
+class TestFP16CUDNN(TestConv3dOp):
+    def init_kernel_type(self):
+        self.use_cudnn = True
+        self.dtype = np.float16
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=2e-2)


 class TestWithGroup1CUDNN(TestWithGroup1):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_cudnn = True
-        self.op_type = "conv3d"
+
+
+class TestFP16WithGroup1CUDNN(TestWithGroup1):
+    def init_kernel_type(self):
+        self.use_cudnn = True
+        self.dtype = np.float16
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=2e-2)


 class TestWithGroup2CUDNN(TestWithGroup2):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_cudnn = True
-        self.op_type = "conv3d"
+
+
+class TestFP16WithGroup2CUDNN(TestWithGroup2):
+    def init_kernel_type(self):
+        self.use_cudnn = True
+        self.dtype = np.float16
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=2e-2)


 class TestWith1x1CUDNN(TestWith1x1):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_cudnn = True
-        self.op_type = "conv3d"
+
+
+class TestFP16With1x1CUDNN(TestWith1x1):
+    def init_kernel_type(self):
+        self.use_cudnn = True
+        self.dtype = np.float16
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=2e-2)


 class TestWithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1):
-    def init_op_type(self):
+    def init_kernel_type(self):
        self.use_cudnn = True
-        self.op_type = "conv3d"
+
+
+class TestFP16WithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1):
+    def init_kernel_type(self):
+        self.use_cudnn = True
+        self.dtype = np.float16
+
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            if core.is_float16_supported(place):
+                self.check_output_with_place(place, atol=2e-2)


 # FIXME(typhoonzero): find a way to determine if