diff --git a/paddle/fluid/operators/math/softmax.cu b/paddle/fluid/operators/math/softmax.cu index 34ea6a91ce7743462d378cf471a5ec3a12ca51d1..5518ebed3f792a5acdfbb27976bc2c6dbd78069a 100644 --- a/paddle/fluid/operators/math/softmax.cu +++ b/paddle/fluid/operators/math/softmax.cu @@ -89,6 +89,7 @@ void SoftmaxGradCUDNNFunctor::operator()( XGrad->mutable_data(context.GetPlace()))); } +template class SoftmaxCUDNNFunctor; template class SoftmaxCUDNNFunctor; template class SoftmaxCUDNNFunctor; template class SoftmaxGradCUDNNFunctor; diff --git a/paddle/fluid/operators/softmax_cudnn_op.cu.cc b/paddle/fluid/operators/softmax_cudnn_op.cu.cc index 47cb336d87f8627d86ac33d6ac32c04d5d93f753..5596fa0648ccc151bc0d11de9c556599428a8d71 100644 --- a/paddle/fluid/operators/softmax_cudnn_op.cu.cc +++ b/paddle/fluid/operators/softmax_cudnn_op.cu.cc @@ -56,7 +56,9 @@ class SoftmaxGradCUDNNKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_KERNEL(softmax, CUDNN, ::paddle::platform::CUDAPlace, - ops::SoftmaxCUDNNKernel); -REGISTER_OP_KERNEL(softmax_grad, CUDNN, ::paddle::platform::CUDAPlace, +namespace plat = paddle::platform; +REGISTER_OP_KERNEL(softmax, CUDNN, plat::CUDAPlace, + ops::SoftmaxCUDNNKernel, + ops::SoftmaxCUDNNKernel); +REGISTER_OP_KERNEL(softmax_grad, CUDNN, plat::CUDAPlace, ops::SoftmaxGradCUDNNKernel); diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index 4c8326eeabcc56b32755fc49a6df2f1cb192af78..e2c0f915d96b7746191572fa27b725d90cb6e2e5 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -13,6 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/softmax_op.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/cudnn_helper.h" +#endif #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" @@ -41,29 +44,30 @@ class SoftmaxOp : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { // choose cudnn kernel if the runtime supported. - bool use_cudnn = ctx.Attr("use_cudnn"); - bool runtime_cudnn_support = false; + framework::LibraryType library_{framework::LibraryType::kPlain}; #ifdef PADDLE_WITH_CUDA - if (platform::is_gpu_place(ctx.GetPlace())) { - auto& dev_ctx = - ctx.template device_context(); - runtime_cudnn_support = dev_ctx.cudnn_handle() != nullptr ? true : false; - } -#endif - framework::LibraryType library_ = framework::LibraryType::kPlain; - if (use_cudnn && runtime_cudnn_support) { + if (platform::CanCUDNNBeUsed(ctx)) { library_ = framework::LibraryType::kCUDNN; } +#endif #ifdef PADDLE_WITH_MKLDNN if (library_ == framework::LibraryType::kPlain && platform::CanMKLDNNBeUsed(ctx)) { library_ = framework::LibraryType::kMKLDNN; } #endif + + auto input_data_type = + framework::ToDataType(ctx.Input("X")->type()); + if (input_data_type == framework::proto::VarType::FP16) { + PADDLE_ENFORCE_EQ(library_, framework::LibraryType::kCUDNN, + "float16 can only be used when CUDNN is used"); + } + std::string data_format = ctx.Attr("data_format"); - return framework::OpKernelType( - framework::ToDataType(ctx.Input("X")->type()), ctx.GetPlace(), - framework::StringToDataLayout(data_format), library_); + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::StringToDataLayout(data_format), + library_); } }; class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker { @@ -130,19 +134,12 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { // choose cudnn kernel if the runtime supported. - bool use_cudnn = ctx.Attr("use_cudnn"); - bool runtime_cudnn_support = false; + framework::LibraryType library_{framework::LibraryType::kPlain}; #ifdef PADDLE_WITH_CUDA - if (platform::is_gpu_place(ctx.GetPlace())) { - auto& dev_ctx = - ctx.template device_context(); - runtime_cudnn_support = dev_ctx.cudnn_handle() != nullptr ? true : false; - } -#endif - framework::LibraryType library_ = framework::LibraryType::kPlain; - if (use_cudnn && runtime_cudnn_support) { + if (platform::CanCUDNNBeUsed(ctx)) { library_ = framework::LibraryType::kCUDNN; } +#endif std::string data_format = ctx.Attr("data_format"); return framework::OpKernelType( framework::ToDataType(ctx.Input("X")->type()), ctx.GetPlace(), diff --git a/python/paddle/fluid/tests/unittests/test_softmax_op.py b/python/paddle/fluid/tests/unittests/test_softmax_op.py index d32c719a5f137a96a8af6c77f46ad14869679455..33d60c7e31ce0817ad26ea1c1c974339936052d3 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_op.py @@ -27,22 +27,22 @@ def stable_softmax(x): class TestSoftmaxOp(OpTest): def setUp(self): - self.use_mkldnn = False self.op_type = "softmax" self.use_cudnn = False - self.init_op_type() - self.inputs = { - 'X': np.random.uniform(0.1, 1, [10, 10]).astype("float32") - } - self.outputs = { - 'Out': np.apply_along_axis(stable_softmax, 1, self.inputs['X']) - } + self.use_mkldnn = False + self.dtype = np.float32 + self.init_kernel_type() + + x = np.random.uniform(0.1, 1, [10, 10]).astype(self.dtype) + out = np.apply_along_axis(stable_softmax, 1, x) + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} self.attrs = { 'use_cudnn': self.use_cudnn, 'use_mkldnn': self.use_mkldnn } - def init_op_type(self): + def init_kernel_type(self): pass def test_check_output(self): @@ -53,6 +53,8 @@ class TestSoftmaxOp(OpTest): self.check_output() def test_check_grad(self): + if self.dtype == np.float16: + return if self.use_cudnn: place = core.CUDAPlace(0) self.check_grad_with_place( @@ -62,12 +64,24 @@ class TestSoftmaxOp(OpTest): class TestSoftmaxCUDNNOp(TestSoftmaxOp): - def init_op_type(self): + def init_kernel_type(self): self.use_cudnn = True -class TestMKLDNN(TestSoftmaxOp): - def init_op_type(self): +class TestSoftmaxFP16CUDNNOp(TestSoftmaxOp): + def init_kernel_type(self): + self.use_cudnn = True + self.dtype = np.float16 + + def test_check_output(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + if core.is_float16_supported(place): + self.check_output_with_place(place, atol=1e-3) + + +class TestSoftmaxMKLDNNOp(TestSoftmaxOp): + def init_kernel_type(self): self.use_mkldnn = True