diff --git a/mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h index b929bbee5096bea3ef62c78c6d870c800da06591..3bf141fc0b36914d9d37c7aa0fe35e58bcf0bb09 100644 --- a/mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/math/binary_op_gpu_kernel.h @@ -218,7 +218,7 @@ class BinaryOpGpuKernel : public GpuKernel { } } CHECK_CUDNN_RET_WITH_EXCEPT( - cudnnSetOpTensorDescriptor(opTensor_descriptor_, tensor_op_, cudnn_data_type_, CUDNN_NOT_PROPAGATE_NAN), + cudnnSetOpTensorDescriptor(opTensor_descriptor_, tensor_op_, CUDNN_DATA_FLOAT, CUDNN_NOT_PROPAGATE_NAN), "cudnnSetOpTensorDescriptor failed"); return; } diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h index 75b2a97cf85c7a12b55dbab90eb80f229af6407e..546e706d2bd9050d86def5c770797621126e2aad 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_gpu_kernel.h @@ -142,10 +142,14 @@ class Conv2dGpuFwdKernel : public GpuKernel { } CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_, - CUDNN_CROSS_CORRELATION, cudnn_data_type_), + CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "cudnnSetConvolution2dDescriptor failed"); input_descriptor_real = input_desc_; } + if (cudnn_data_type_ == CUDNN_DATA_HALF) { + CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH), + "cudnnSetConvolutionMathType failed.") + } SelectAlgorithm(input_descriptor_real); InitSizeLists(); return true; @@ -240,7 +244,7 @@ class Conv2dGpuFwdKernel : public GpuKernel { "cudnnSetTensor4dDescriptor failed"); CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_, - dilation_, dilation_, CUDNN_CROSS_CORRELATION, cudnn_data_type_), + dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "cudnnSetConvolution2dDescriptor failed"); } @@ -276,6 +280,9 @@ class Conv2dGpuFwdKernel : public GpuKernel { "cudnnGetConvolutionForwardAlgorithm_v7 failed"); conv_algorithm_ = perf_results.algo; } + if (cudnn_data_type_ == CUDNN_DATA_HALF) { + conv_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; + } } cudnnHandle_t cudnn_handle_; cudnnTensorDescriptor_t input_desc_; diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h index e481fd448e926e1eb71b5e052a37cb56988ad11e..6b7ee657c2dc0b4fa860185db8a6323ff70fb58e 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_filter_gpu_kernel.h @@ -141,10 +141,14 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { } CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_, - CUDNN_CROSS_CORRELATION, cudnn_data_type_), + CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "GetConvolution2dDescriptor failed"); x_desc_real = x_desc_; } + if (cudnn_data_type_ == CUDNN_DATA_HALF) { + CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH), + "cudnnSetConvolutionMathType failed.") + } SelectAlgorithm(x_desc_real); InitSizeLists(); return true; @@ -239,7 +243,7 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { "cudnnSetTensor4dDescriptor failed"); CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_, - dilation_, dilation_, CUDNN_CROSS_CORRELATION, cudnn_data_type_), + dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "cudnnSetConvolution2dDescriptor failed"); } void SelectAlgorithm(cudnnTensorDescriptor_t x_desc_real) { @@ -258,6 +262,9 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { "GetConvolutionBackwardFilterAlgorithm failed"); algo_ = perf_results.algo; } + if (cudnn_data_type_ == CUDNN_DATA_HALF) { + algo_ = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1; + } } void GetFilterShape(const CNodePtr &kernel_node, std::vector *filter_shape) { auto shp_tuple_x = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("filter_sizes")->cast()->value(); diff --git a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h index 008abcc65859f6bf5e99c10df0e5446fd84f64df..af12083364d45717440ac07391cfab465c570ec2 100644 --- a/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h +++ b/mindspore/ccsrc/kernel/gpu/nn/conv2d_grad_input_gpu_kernel.h @@ -142,10 +142,14 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { } CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetConvolution2dDescriptor(conv_desc_, pad_height_, pad_width_, stride_, stride_, dilation_, dilation_, - CUDNN_CROSS_CORRELATION, cudnn_data_type_), + CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "cudnnSetConvolution2dDescriptor failed"); dx_desc_real = dx_desc_; } + if (cudnn_data_type_ == CUDNN_DATA_HALF) { + CHECK_CUDNN_RET_WITH_EXCEPT(cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH), + "cudnnSetConvolutionMathType failed.") + } SelectAlgorithm(dx_desc_real); InitSizeLists(); return true; @@ -239,7 +243,7 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { "cudnnSetTensor4dDescriptor failed"); CHECK_CUDNN_RET_WITH_EXCEPT( cudnnSetConvolution2dDescriptor(conv_desc_, use_pad_ ? 0 : pad_top_, use_pad_ ? 0 : pad_left_, stride_, stride_, - dilation_, dilation_, CUDNN_CROSS_CORRELATION, cudnn_data_type_), + dilation_, dilation_, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT), "cudnnSetConvolution2dDescriptor failed"); } void SelectAlgorithm(cudnnTensorDescriptor_t dx_desc_real) { @@ -258,6 +262,9 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { "cudnnGetConvolutionBackwardDataAlgorithm_v7 failed"); algo_ = perf_results.algo; } + if (cudnn_data_type_ == CUDNN_DATA_HALF) { + algo_ = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1; + } } void GetInputShape(const CNodePtr &kernel_node, std::vector *input_shape) { auto shp_tuple_x = AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("input_sizes")->cast()->value();