提交 8b169272 编写于 作者: K Kexin Zhao 提交者: Kexin Zhao

add fp16 support to conv3d

上级 fd1971ca
......@@ -366,7 +366,8 @@ REGISTER_OP_KERNEL(conv2d_grad, CUDNN, plat::CUDAPlace,
REGISTER_OP_KERNEL(conv3d, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvOpKernel<float>,
paddle::operators::CUDNNConvOpKernel<double>);
paddle::operators::CUDNNConvOpKernel<double>,
paddle::operators::CUDNNConvOpKernel<plat::float16>);
REGISTER_OP_KERNEL(conv3d_grad, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvGradOpKernel<float>,
paddle::operators::CUDNNConvGradOpKernel<double>);
......@@ -70,9 +70,11 @@ def conv3d_forward_naive(input, filter, group, conv_param):
class TestConv3dOp(OpTest):
def setUp(self):
self.op_type = "conv3d"
self.use_cudnn = False
self.dtype = np.float32
self.init_kernel_type()
self.init_group()
self.init_op_type()
self.init_dilation()
self.init_test_case()
......@@ -80,20 +82,24 @@ class TestConv3dOp(OpTest):
'stride': self.stride,
'pad': self.pad,
'dilations': self.dilations,
'use_cudnn': self.use_cudnn,
'data_format': 'AnyLayout' # TODO(dzhwinter) : should be fix latter
}
input = np.random.random(self.input_size).astype("float32")
filter = np.random.random(self.filter_size).astype("float32")
input = np.random.random(self.input_size).astype(self.dtype)
filter = np.random.random(self.filter_size).astype(self.dtype)
output = conv3d_forward_naive(input, filter, self.groups,
conv3d_param).astype("float32")
conv3d_param).astype(self.dtype)
self.inputs = {'Input': input, 'Filter': filter}
self.inputs = {
'Input': OpTest.np_dtype_to_fluid_dtype(input),
'Filter': OpTest.np_dtype_to_fluid_dtype(filter)
}
self.attrs = {
'strides': self.stride,
'paddings': self.pad,
'groups': self.groups,
'dilations': self.dilations
'dilations': self.dilations,
'use_cudnn': self.use_cudnn
}
self.outputs = {'Output': output}
......@@ -108,6 +114,8 @@ class TestConv3dOp(OpTest):
self.check_output()
def test_check_grad(self):
if self.dtype == np.float16:
return
if self.testcudnn():
place = core.CUDAPlace(0)
self.check_grad_with_place(
......@@ -120,6 +128,8 @@ class TestConv3dOp(OpTest):
set(['Input', 'Filter']), 'Output', max_relative_error=0.03)
def test_check_grad_no_filter(self):
if self.dtype == np.float16:
return
if self.testcudnn():
place = core.CUDAPlace(0)
self.check_grad_with_place(
......@@ -135,6 +145,8 @@ class TestConv3dOp(OpTest):
no_grad_set=set(['Filter']))
def test_check_grad_no_input(self):
if self.dtype == np.float16:
return
if self.testcudnn():
place = core.CUDAPlace(0)
self.check_grad_with_place(
......@@ -163,8 +175,8 @@ class TestConv3dOp(OpTest):
def init_group(self):
self.groups = 1
def init_op_type(self):
self.op_type = "conv3d"
def init_kernel_type(self):
pass
class TestCase1(TestConv3dOp):
......@@ -235,34 +247,90 @@ class TestWithDilation(TestConv3dOp):
self.groups = 3
#----------------Conv3dCUDNN----------------
class TestCUDNN(TestConv3dOp):
def init_op_type(self):
def init_kernel_type(self):
self.use_cudnn = True
self.op_type = "conv3d"
class TestFP16CUDNN(TestConv3dOp):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=2e-2)
class TestWithGroup1CUDNN(TestWithGroup1):
def init_op_type(self):
def init_kernel_type(self):
self.use_cudnn = True
self.op_type = "conv3d"
class TestFP16WithGroup1CUDNN(TestWithGroup1):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=2e-2)
class TestWithGroup2CUDNN(TestWithGroup2):
def init_op_type(self):
def init_kernel_type(self):
self.use_cudnn = True
self.op_type = "conv3d"
class TestFP16WithGroup2CUDNN(TestWithGroup2):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=2e-2)
class TestWith1x1CUDNN(TestWith1x1):
def init_op_type(self):
def init_kernel_type(self):
self.use_cudnn = True
self.op_type = "conv3d"
class TestFP16With1x1CUDNN(TestWith1x1):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=2e-2)
class TestWithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1):
def init_op_type(self):
def init_kernel_type(self):
self.use_cudnn = True
self.op_type = "conv3d"
class TestFP16WithInput1x1Filter1x1CUDNN(TestWithInput1x1Filter1x1):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16
def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=2e-2)
# FIXME(typhoonzero): find a way to determine if
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册