From 8e67629c81c54014594f591cf4a92b2d3f42508e Mon Sep 17 00:00:00 2001 From: jakpiase Date: Wed, 23 Mar 2022 13:58:36 +0100 Subject: [PATCH] Added support for BF16 datatype for all oneDNN activation kernels (#40721) * added missing BF16 activations * added softplus bf16 * minor change * disabled tests for GPU --- paddle/fluid/operators/abs_op.cc | 28 ++++- .../operators/mkldnn/activation_mkldnn_op.cc | 43 +++---- .../mkldnn/test_activation_bf16_mkldnn_op.py | 107 +++++++++++++++++- .../mkldnn/test_softplus_mkldnn_op.py | 40 ++++++- 4 files changed, 183 insertions(+), 35 deletions(-) diff --git a/paddle/fluid/operators/abs_op.cc b/paddle/fluid/operators/abs_op.cc index e1460629fb..71bcb4e201 100644 --- a/paddle/fluid/operators/abs_op.cc +++ b/paddle/fluid/operators/abs_op.cc @@ -30,6 +30,21 @@ namespace operators { class AbsOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; + + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto input_data_type = + framework::OperatorWithKernel::IndicateVarDataType(ctx, "X"); + +#ifdef PADDLE_WITH_MKLDNN + if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); + } }; class AbsOpMaker : public framework::OpProtoAndCheckerMaker { @@ -72,8 +87,17 @@ class AbsGradOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto dtype = OperatorWithKernel::IndicateVarDataType(ctx, "X"); - return framework::OpKernelType(dtype, ctx.GetPlace()); + auto input_data_type = + framework::OperatorWithKernel::IndicateVarDataType(ctx, "X"); + +#ifdef PADDLE_WITH_MKLDNN + if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); } }; diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index e8c80096dd..bdd868c1e2 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -315,15 +315,7 @@ using ExpMKLDNNGradUseOutFunctor = MKLDNNActivationGradUseOutFunc< namespace ops = paddle::operators; -#define REGISTER_ACTIVATION_MKLDNN_KERNEL(act_type, functor, grad_functor) \ - REGISTER_OP_KERNEL(act_type, MKLDNN, ::paddle::platform::CPUPlace, \ - ops::MKLDNNActivationKernel>); \ - REGISTER_OP_KERNEL( \ - act_type##_grad, MKLDNN, ::paddle::platform::CPUPlace, \ - ops::MKLDNNActivationGradKernel>); - -#define REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(act_type, functor, \ - grad_functor) \ +#define REGISTER_ACTIVATION_MKLDNN_KERNEL(act_type, functor, grad_functor) \ REGISTER_OP_KERNEL( \ act_type, MKLDNN, ::paddle::platform::CPUPlace, \ ops::MKLDNNActivationKernel>, \ @@ -339,30 +331,27 @@ namespace ops = paddle::operators; ops::MKLDNNActivationKernel>); #define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro) \ - __macro(relu6, Relu6MKLDNNFunctor, Relu6MKLDNNGradFunctor); \ - __macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ - __macro(swish, SwishMKLDNNFunctor, SwishMKLDNNGradFunctor); \ - __macro(hard_swish, HardSwishMKLDNNFunctor, HardSwishMKLDNNGradFunctor); \ - __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradUseOutFunctor); \ __macro(abs, AbsMKLDNNFunctor, AbsMKLDNNGradFunctor); \ __macro(elu, EluMKLDNNFunctor, EluMKLDNNGradUseOutFunctor); \ - __macro(exp, ExpMKLDNNFunctor, ExpMKLDNNGradUseOutFunctor); + __macro(exp, ExpMKLDNNFunctor, ExpMKLDNNGradUseOutFunctor); \ + __macro(gelu, GeluMKLDNNFunctor, GeluMKLDNNGradFunctor); \ + __macro(hard_swish, HardSwishMKLDNNFunctor, HardSwishMKLDNNGradFunctor); \ + __macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ + __macro(mish, MishMKLDNNFunctor, MishMKLDNNGradFunctor); \ + __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ + __macro(relu6, Relu6MKLDNNFunctor, Relu6MKLDNNGradFunctor); \ + __macro(sigmoid, SigmoidMKLDNNFunctor, SigmoidMKLDNNGradUseOutFunctor); \ + __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradUseOutFunctor); \ + __macro(swish, SwishMKLDNNFunctor, SwishMKLDNNGradFunctor); \ + __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradUseOutFunctor); FOR_EACH_MKLDNN_KERNEL_FUNCTOR(REGISTER_ACTIVATION_MKLDNN_KERNEL); -REGISTER_ACTIVATION_MKLDNN_KERNEL_FWD_ONLY(round, RoundMKLDNNFunctor); -REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(relu, ReluMKLDNNFunctor, - ReluMKLDNNGradFunctor); -REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(gelu, GeluMKLDNNFunctor, - GeluMKLDNNGradFunctor); -REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(sigmoid, SigmoidMKLDNNFunctor, - SigmoidMKLDNNGradUseOutFunctor); -REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(sqrt, SqrtMKLDNNFunctor, - SqrtMKLDNNGradUseOutFunctor); -REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(mish, MishMKLDNNFunctor, - MishMKLDNNGradFunctor); +REGISTER_ACTIVATION_MKLDNN_KERNEL_FWD_ONLY(round, RoundMKLDNNFunctor); namespace ops = paddle::operators; REGISTER_OP_KERNEL( softplus, MKLDNN, paddle::platform::CPUPlace, - ops::MKLDNNActivationKernel>); + ops::MKLDNNActivationKernel>, + ops::MKLDNNActivationKernel< + ops::SoftplusMKLDNNFunctor>); diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_bf16_mkldnn_op.py index 8e0fdf7645..ac851bf9fe 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_bf16_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_bf16_mkldnn_op.py @@ -50,11 +50,11 @@ class MKLDNNBF16ActivationOp(object): self.dtype = np.uint16 self.init_data() self.config() + self.set_attrs() self.out = self.op_forward(self.x) self.inputs = {'X': convert_float_to_uint16(self.x)} self.outputs = {'Out': self.out} - self.set_attrs() def calculate_grads(self): self.dx = self.op_grad(self.out, self.x) @@ -162,5 +162,110 @@ class TestMKLDNNMishBF16Op(MKLDNNBF16ActivationOp, TestActivation): return dout * ((np.exp(x) * omega) / delta**2) +class TestMKLDNNRelu6BF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): + self.op_type = "relu6" + + def op_forward(self, x): + return np.clip(x, 0, 6) + + def op_grad(self, dout, x): + return np.where((x > 0) & (x <= 6), dout, 0) + + +class TestMKLDNNLeakyReluBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): + self.op_type = "leaky_relu" + + def op_forward(self, x): + return np.where(x > 0, x, self.alpha * x) + + def op_grad(self, dout, x): + return np.where(x > 0, dout, self.alpha * dout) + + def set_attrs(self): + self.alpha = 0.2 + self.attrs = {"use_mkldnn": True, "alpha": self.alpha} + + +class TestMKLDNNSwishBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): + self.op_type = "swish" + + def expit(self, val): + return 1 / (1 + np.exp(-self.beta * val)) + + def op_forward(self, x): + return x * self.expit(x) + + def op_grad(self, dout, x): + return dout * self.expit(x) * (1 + self.beta * x * (1 - self.expit(x))) + + def set_attrs(self): + self.beta = 0.2 + self.attrs = {"use_mkldnn": True, "beta": self.beta} + + +class TestMKLDNNHardSwishBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): + self.op_type = "hard_swish" + + def op_forward(self, x): + result = np.where(x < -3, 0, x) + return np.where(result > 3, result, result * (result + 3) / 6) + + def op_grad(self, dout, x): + result = np.where(x < -3, 0, x) + return np.where(result > 3, dout, dout * (2 * x + 3) / 6) + + +class TestMKLDNNTanhBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): + self.op_type = "tanh" + + def op_forward(self, x): + return np.tanh(x) + + def op_grad(self, dout, x): + return dout * (1 - np.tanh(x)**2) + + +class TestMKLDNNAbsBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): + self.op_type = "abs" + + def op_forward(self, x): + return np.absolute(x) + + def op_grad(self, dout, x): + return dout * np.sign(x) + + +class TestMKLDNNEluBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): + self.op_type = "elu" + + def op_forward(self, x): + return np.where(x > 0, x, self.alpha * (np.exp(x) - 1)) + + def op_grad(self, dout, x): + return np.where(x > 0, dout, dout * self.alpha * np.exp(x)) + + def set_attrs(self): + self.alpha = 0.2 + self.attrs = {"use_mkldnn": True, "alpha": self.alpha} + + +class TestMKLDNNExpBF16Op(MKLDNNBF16ActivationOp, TestActivation): + def config(self): + self.op_type = "exp" + + def op_forward(self, x): + return np.exp(x) + + def op_grad(self, dout, x): + return dout * np.exp(x) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py index 92699cdbd2..c2911114e4 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest import numpy as np -from paddle.fluid.tests.unittests.op_test import OpTest, OpTestTool +from paddle.fluid.tests.unittests.op_test import OpTest, OpTestTool, convert_float_to_uint16 import paddle import paddle.fluid as fluid import paddle.fluid.core as core @@ -30,23 +30,32 @@ def ref_softplus(x, beta, threshold): return out -@OpTestTool.skip_if(not (isinstance(_current_expected_place(), core.CPUPlace)), - "GPU is not supported") +@OpTestTool.skip_if_not_cpu_bf16() class TestSoftplusOneDNNOp(OpTest): def setUp(self): self.op_type = "softplus" self.beta = 1 self.threshold = 20 self.config() + self.set_dtype() self.attrs = {'use_mkldnn': True, 'beta': self.beta} - self.inputs = {'X': np.random.random(self.x_shape).astype(np.float32)} + self.x = np.random.random(self.x_shape) + self.out = ref_softplus(self.x, self.beta, self.threshold) + + if self.dtype != np.float32: + self.x = convert_float_to_uint16(self.x) + + self.inputs = {'X': self.out} self.outputs = { - 'Out': ref_softplus(self.inputs['X'], self.beta, self.threshold) + 'Out': ref_softplus(self.out, self.beta, self.threshold) } def config(self): self.x_shape = (10, 10) + def set_dtype(self): + self.dtype = np.float32 + def test_check_output(self): self.check_output() @@ -73,6 +82,27 @@ class TestSoftplus3DExtendedFunctorOneDNNOp(TestSoftplusOneDNNOp): self.beta = 0.4 +class TestSoftplusBF16OneDNNOp(TestSoftplusOneDNNOp): + def set_dtype(self): + self.dtype = np.uint16 + + +class TestSoftplus4DBF16OneDNNOp(TestSoftplus4DOneDNNOp): + def set_dtype(self): + self.dtype = np.uint16 + + +class TestSoftplus6DBF16OneDNNOp(TestSoftplus6DOneDNNOp): + def set_dtype(self): + self.dtype = np.uint16 + + +class TestSoftplus3DExtendedFunctorBF16OneDNNOp( + TestSoftplus3DExtendedFunctorOneDNNOp): + def set_dtype(self): + self.dtype = np.uint16 + + if __name__ == "__main__": paddle.enable_static() unittest.main() -- GitLab