diff --git a/paddle/fluid/operators/conv_cudnn_op.cu.cc b/paddle/fluid/operators/conv_cudnn_op.cu.cc index aec0aba6f7b41f64cc0adfaf8070b8ac5671f0d4..bff62f050c7577d71b56473d82047632c824e11e 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu.cc +++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc @@ -134,8 +134,8 @@ class CUDNNConvOpKernel : public framework::OpKernel { platform::CUDAPlace gpu = boost::get(ctx.GetPlace()); cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes); // ------------------- cudnn conv forward --------------------- - T alpha = static_cast(1.0f); - T beta = static_cast(0.0f); + typename platform::CudnnDataType::ScalingParamType alpha = 1.0f, + beta = 0.0f; for (int i = 0; i < groups; i++) { PADDLE_ENFORCE(platform::dynload::cudnnConvolutionForward( handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in, @@ -321,7 +321,7 @@ namespace plat = paddle::platform; REGISTER_OP_KERNEL(conv2d, CUDNN, plat::CUDAPlace, paddle::operators::CUDNNConvOpKernel, paddle::operators::CUDNNConvOpKernel, - paddle::operators::CUDNNConvOpKernel < plat::float16); + paddle::operators::CUDNNConvOpKernel); REGISTER_OP_KERNEL(conv2d_grad, CUDNN, plat::CUDAPlace, paddle::operators::CUDNNConvGradOpKernel, paddle::operators::CUDNNConvGradOpKernel); diff --git a/paddle/fluid/platform/cudnn_helper.h b/paddle/fluid/platform/cudnn_helper.h index 510a1707ba8806df0174463568c86a3f654cf411..7e001ecc56173db76e8c576e7efd66f41192f292 100644 --- a/paddle/fluid/platform/cudnn_helper.h +++ b/paddle/fluid/platform/cudnn_helper.h @@ -85,13 +85,14 @@ template <> class CudnnDataType { public: static const cudnnDataType_t type = CUDNN_DATA_HALF; - typedef const float16 ScalingParamType; + // The scaling param type is float for HALF and FLOAT tensors + typedef const float ScalingParamType; static ScalingParamType* kOne() { - static ScalingParamType v = static_cast(1.0); + static ScalingParamType v = 1.0; return &v; } static ScalingParamType* kZero() { - static ScalingParamType v = static_cast(0.0); + static ScalingParamType v = 0.0; return &v; } }; diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_op.py index a16b6c8e927a361dc39490e349c20004fed9f59b..badf7a8cb4cd2a9aa90b4f570ae4416ef64b4e25 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_op.py @@ -79,7 +79,7 @@ class TestConv2dOp(OpTest): input = np.random.random(self.input_size).astype(self.dtype) filter = np.random.random(self.filter_size).astype(self.dtype) - output = conv2d_forward_naive(self.input, self.filter, self.groups, + output = conv2d_forward_naive(input, filter, self.groups, conv2d_param).astype(self.dtype) # numpy float16 is binded to paddle::platform::float16 @@ -88,9 +88,12 @@ class TestConv2dOp(OpTest): # uint16_t in paddle or np.uint16 in numpy, which are # themselves binded together. self.inputs = { - 'Input': input.view(np.uint16) - if self.dtype == np.float16 else input, - 'Filter': create_view(filter) + #'Input': (input.view(np.uint16) + # if self.dtype == np.float16 else input), + #'Filter': (filter.view(np.uint16) + # if self.dtype == np.float16 else filter) + 'Input': OpTest.create_view(input), + 'Filter': OpTest.create_view(filter) } self.attrs = { 'strides': self.stride, @@ -254,7 +257,7 @@ class TestFP16CUDNN(TestCUDNN): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_output_with_place(place, atol=1e-1) + self.check_output_with_place(place, atol=2e-2) def test_check_grad(self): pass