From 62aff0a7ac39ad45adf7541982197e234019b626 Mon Sep 17 00:00:00 2001
From: Adam <38704900+grygielski@users.noreply.github.com>
Date: Tue, 7 Apr 2020 15:02:32 +0200
Subject: [PATCH] Add DNNL GELU kernels (#22426)

---
 paddle/fluid/operators/gelu_op.cc             | 26 ++++++++-
 .../operators/mkldnn/activation_mkldnn_op.cc  | 25 +++++++++
 .../mkldnn/test_activation_mkldnn_op.py       | 55 ++++++++++++++++++-
 3 files changed, 103 insertions(+), 3 deletions(-)
diff --git a/paddle/fluid/operators/gelu_op.cc b/paddle/fluid/operators/gelu_op.cc
index 76af785f132..c72cabad891 100644
--- a/paddle/fluid/operators/gelu_op.cc
+++ b/paddle/fluid/operators/gelu_op.cc
@@ -44,8 +44,19 @@ class GeluOp : public framework::OperatorWithKernel {
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext &ctx) const override {
+    framework::LibraryType library{framework::LibraryType::kPlain};
+    framework::DataLayout layout = framework::DataLayout::kAnyLayout;
+#ifdef PADDLE_WITH_MKLDNN
+    auto it = this->Attrs().find("use_mkldnn");
+    if (library == framework::LibraryType::kPlain &&
+        it != this->Attrs().end() && platform::CanMKLDNNBeUsed(ctx)) {
+      library = framework::LibraryType::kMKLDNN;
+      layout = framework::DataLayout::kMKLDNN;
+    }
+#endif
     return framework::OpKernelType(
-        OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
+        OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace(),
+        layout, library);
   }
 };
 
@@ -73,8 +84,19 @@ class GeluGradOp : public framework::OperatorWithKernel {
  protected:
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext &ctx) const override {
+    framework::LibraryType library{framework::LibraryType::kPlain};
+    framework::DataLayout layout = framework::DataLayout::kAnyLayout;
+#ifdef PADDLE_WITH_MKLDNN
+    auto it = this->Attrs().find("use_mkldnn");
+    if (library == framework::LibraryType::kPlain &&
+        it != this->Attrs().end() && platform::CanMKLDNNBeUsed(ctx)) {
+      library = framework::LibraryType::kMKLDNN;
+      layout = framework::DataLayout::kMKLDNN;
+    }
+#endif
     return framework::OpKernelType(
-        OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
+        OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace(),
+        layout, library);
   }
 };
 
diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
index ec1fd4e5bcd..0aa04aef5ab 100644
--- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
@@ -162,6 +162,30 @@ struct MKLDNNActivationGradFunc : public BaseActivationFunctor<T> {
   }
 };
 
+template <typename T>
+struct GeluMKLDNNFunctor : public BaseActivationFunctor<T> {
+  void operator()(const framework::ExecutionContext &ctx) const {
+    const bool approximate = ctx.Attr<bool>("approximate");
+    if (approximate) {
+      eltwise_forward<T>(ctx, mkldnn::algorithm::eltwise_gelu_tanh);
+    } else {
+      eltwise_forward<T>(ctx, mkldnn::algorithm::eltwise_gelu_erf);
+    }
+  }
+};
+
+template <typename T>
+struct GeluMKLDNNGradFunctor : public BaseActivationFunctor<T> {
+  void operator()(const framework::ExecutionContext &ctx) const {
+    const bool approximate = ctx.Attr<bool>("approximate");
+    if (approximate) {
+      eltwise_grad<T>(ctx, mkldnn::algorithm::eltwise_gelu_tanh);
+    } else {
+      eltwise_grad<T>(ctx, mkldnn::algorithm::eltwise_gelu_erf);
+    }
+  }
+};
+
 template <typename T>
 using ReluMKLDNNFunctor =
     MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_relu>;
@@ -216,6 +240,7 @@ namespace ops = paddle::operators;
 #define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro)                  \
   __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor);       \
   __macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \
+  __macro(gelu, GeluMKLDNNFunctor, GeluMKLDNNGradFunctor);       \
   __macro(swish, SwishMKLDNNFunctor, SwishMKLDNNGradFunctor);    \
   __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor);       \
   __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor);       \
diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py
index da1a6ee9669..9f635c3f267 100644
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py
@@ -19,7 +19,8 @@ import numpy as np
 from scipy.special import expit
 import paddle.fluid.core as core
 from paddle.fluid.tests.unittests.op_test import OpTest
-from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs, TestLeakyRelu, TestSwish
+from paddle.fluid.tests.unittests.test_activation_op import TestActivation, TestRelu, TestTanh, TestSqrt, TestAbs, TestLeakyRelu, TestSwish
+from paddle.fluid.tests.unittests.test_gelu_op import gelu
 from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd
 
 
@@ -59,6 +60,32 @@ class TestMKLDNNLeakyReluDim2(TestLeakyRelu):
             ['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
 
 
+class TestMKLDNNGeluDim2(TestActivation):
+    def setUp(self):
+        self.op_type = "gelu"
+        self.dtype = np.float32
+
+        x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+        out = gelu(x, False)
+
+        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
+        self.outputs = {'Out': out}
+        self.attrs = {"use_mkldnn": True}
+
+
+class TestMKLDNNGeluDim2Approx(TestActivation):
+    def setUp(self):
+        self.op_type = "gelu"
+        self.dtype = np.float32
+
+        x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype)
+        out = gelu(x, True)
+
+        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
+        self.outputs = {'Out': out}
+        self.attrs = {"use_mkldnn": True, "approximate": True}
+
+
 class TestMKLDNNTanhDim2(TestTanh):
     def setUp(self):
         super(TestMKLDNNTanhDim2, self).setUp()
@@ -185,6 +212,32 @@ class TestMKLDNNLeakyReluDim4(TestLeakyRelu):
             ['X'], 'Out', max_relative_error=0.007, check_dygraph=False)
 
 
+class TestMKLDNNGeluDim4(TestActivation):
+    def setUp(self):
+        self.op_type = "gelu"
+        self.dtype = np.float32
+
+        x = np.random.uniform(-1, 1, [2, 4, 3, 5]).astype(self.dtype)
+        out = gelu(x, False)
+
+        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
+        self.outputs = {'Out': out}
+        self.attrs = {"use_mkldnn": True}
+
+
+class TestMKLDNNGeluDim4Approx(TestActivation):
+    def setUp(self):
+        self.op_type = "gelu"
+        self.dtype = np.float32
+
+        x = np.random.uniform(-1, 1, [2, 4, 3, 5]).astype(self.dtype)
+        out = gelu(x, True)
+
+        self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
+        self.outputs = {'Out': out}
+        self.attrs = {"use_mkldnn": True, "approximate": True}
+
+
 class TestMKLDNNTanhDim4(TestTanh):
     def setUp(self):
         super(TestMKLDNNTanhDim4, self).setUp()
-- 
GitLab