From 8d8527fb616445f4a29317f25f9fcbd9eb369b37 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Tue, 18 Feb 2020 10:12:54 +0800 Subject: [PATCH] register fp16 kernel for some ops (#22650) test=release/1.7 --- paddle/fluid/operators/conv_transpose_cudnn_op.cu | 9 +++++++-- paddle/fluid/operators/expand_op.cu | 4 ++++ paddle/fluid/operators/pad2d_op.cu | 8 ++++++-- paddle/fluid/operators/squeeze_op.cu.cc | 5 +++++ paddle/fluid/operators/unsqueeze_op.cu.cc | 7 +++++++ 5 files changed, 29 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/conv_transpose_cudnn_op.cu b/paddle/fluid/operators/conv_transpose_cudnn_op.cu index b827f200c0..6955b934a8 100644 --- a/paddle/fluid/operators/conv_transpose_cudnn_op.cu +++ b/paddle/fluid/operators/conv_transpose_cudnn_op.cu @@ -261,7 +261,7 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel { int output_offset = transformed_output.numel() / transformed_output.dims()[0] / groups; int filter_offset = filter->numel() / groups; - T alpha = 1.0f, beta = 0.0f; + T alpha = static_cast(1.0), beta = static_cast(0.0); auto workspace_handle = dev_ctx.cudnn_workspace_handle(); for (int g = 0; g < groups; g++) { auto cudnn_func = [&](void* cudnn_workspace) { @@ -507,7 +507,7 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel { int output_grad_offset = transformed_output_grad.numel() / transformed_output_grad.dims()[0] / groups; int filter_offset = filter->numel() / groups; - T alpha = 1.0f, beta = 0.0f; + T alpha = static_cast(1.0), beta = static_cast(0.0); auto workspace_handle = dev_ctx.cudnn_workspace_handle(); if (input_grad) { T* input_grad_data = input_grad->mutable_data(ctx.GetPlace()); @@ -569,17 +569,22 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; +namespace plat = paddle::platform; REGISTER_OP_KERNEL(conv2d_transpose, CUDNN, ::paddle::platform::CUDAPlace, + ops::CUDNNConvTransposeOpKernel, ops::CUDNNConvTransposeOpKernel, ops::CUDNNConvTransposeOpKernel); REGISTER_OP_KERNEL(conv2d_transpose_grad, CUDNN, ::paddle::platform::CUDAPlace, + ops::CUDNNConvTransposeGradOpKernel, ops::CUDNNConvTransposeGradOpKernel, ops::CUDNNConvTransposeGradOpKernel); REGISTER_OP_KERNEL(conv3d_transpose, CUDNN, ::paddle::platform::CUDAPlace, + ops::CUDNNConvTransposeOpKernel, ops::CUDNNConvTransposeOpKernel, ops::CUDNNConvTransposeOpKernel); REGISTER_OP_KERNEL(conv3d_transpose_grad, CUDNN, ::paddle::platform::CUDAPlace, + ops::CUDNNConvTransposeGradOpKernel, ops::CUDNNConvTransposeGradOpKernel, ops::CUDNNConvTransposeGradOpKernel); diff --git a/paddle/fluid/operators/expand_op.cu b/paddle/fluid/operators/expand_op.cu index cf913f56dd..f2f8e2f741 100644 --- a/paddle/fluid/operators/expand_op.cu +++ b/paddle/fluid/operators/expand_op.cu @@ -14,9 +14,12 @@ limitations under the License. */ #include "paddle/fluid/operators/expand_op.h" namespace ops = paddle::operators; +namespace plat = paddle::platform; + REGISTER_OP_CUDA_KERNEL( expand, ops::ExpandKernel, ops::ExpandKernel, + ops::ExpandKernel, ops::ExpandKernel, ops::ExpandKernel, ops::ExpandKernel); @@ -24,5 +27,6 @@ REGISTER_OP_CUDA_KERNEL( expand_grad, ops::ExpandGradKernel, ops::ExpandGradKernel, + ops::ExpandGradKernel, ops::ExpandGradKernel, ops::ExpandGradKernel); diff --git a/paddle/fluid/operators/pad2d_op.cu b/paddle/fluid/operators/pad2d_op.cu index 05fad5b3bb..f88b195b64 100644 --- a/paddle/fluid/operators/pad2d_op.cu +++ b/paddle/fluid/operators/pad2d_op.cu @@ -461,8 +461,12 @@ class Pad2dGradCUDAKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(pad2d, ops::Pad2dCUDAKernel, +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL(pad2d, ops::Pad2dCUDAKernel, + ops::Pad2dCUDAKernel, ops::Pad2dCUDAKernel, ops::Pad2dCUDAKernel, ops::Pad2dCUDAKernel); -REGISTER_OP_CUDA_KERNEL(pad2d_grad, ops::Pad2dGradCUDAKernel, +REGISTER_OP_CUDA_KERNEL(pad2d_grad, ops::Pad2dGradCUDAKernel, + ops::Pad2dGradCUDAKernel, ops::Pad2dGradCUDAKernel); diff --git a/paddle/fluid/operators/squeeze_op.cu.cc b/paddle/fluid/operators/squeeze_op.cu.cc index 50fee1497e..61a3a39de4 100644 --- a/paddle/fluid/operators/squeeze_op.cu.cc +++ b/paddle/fluid/operators/squeeze_op.cu.cc @@ -15,10 +15,12 @@ limitations under the License. */ #include "paddle/fluid/operators/squeeze_op.h" namespace ops = paddle::operators; +namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL( squeeze, ops::SqueezeKernel, ops::SqueezeKernel, + ops::SqueezeKernel, ops::SqueezeKernel, ops::SqueezeKernel, ops::SqueezeKernel); @@ -26,12 +28,14 @@ REGISTER_OP_CUDA_KERNEL( squeeze_grad, ops::SqueezeGradKernel, ops::SqueezeGradKernel, + ops::SqueezeGradKernel, ops::SqueezeGradKernel, ops::SqueezeGradKernel, ops::SqueezeGradKernel); REGISTER_OP_CUDA_KERNEL( squeeze2, ops::Squeeze2Kernel, ops::Squeeze2Kernel, + ops::Squeeze2Kernel, ops::Squeeze2Kernel, ops::Squeeze2Kernel, ops::Squeeze2Kernel); @@ -39,6 +43,7 @@ REGISTER_OP_CUDA_KERNEL( squeeze2_grad, ops::Squeeze2GradKernel, ops::Squeeze2GradKernel, + ops::Squeeze2GradKernel, ops::Squeeze2GradKernel, ops::Squeeze2GradKernel, ops::Squeeze2GradKernel); diff --git a/paddle/fluid/operators/unsqueeze_op.cu.cc b/paddle/fluid/operators/unsqueeze_op.cu.cc index ffdd61170e..3258de53b8 100644 --- a/paddle/fluid/operators/unsqueeze_op.cu.cc +++ b/paddle/fluid/operators/unsqueeze_op.cu.cc @@ -15,10 +15,12 @@ limitations under the License. */ #include "paddle/fluid/operators/unsqueeze_op.h" namespace ops = paddle::operators; +namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL( unsqueeze, ops::UnsqueezeKernel, ops::UnsqueezeKernel, + ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel); @@ -26,6 +28,8 @@ REGISTER_OP_CUDA_KERNEL( unsqueeze_grad, ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, + ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel); @@ -33,6 +37,7 @@ REGISTER_OP_CUDA_KERNEL( unsqueeze2, ops::UnsqueezeKernel, ops::UnsqueezeKernel, + ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel); @@ -40,6 +45,8 @@ REGISTER_OP_CUDA_KERNEL( unsqueeze2_grad, ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, + ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel); -- GitLab