Added softplus FP32 FWD OneDNN kernel (#36382)

* added softplus * refactored softplus op * deleted unnecessary file * added missing file * added formatting * disabled tests if GPU is used * added reviewer suggestion * unified softplus kernel

Added softplus FP32 FWD OneDNN kernel (#36382)
* added softplus * refactored softplus op * deleted unnecessary file * added missing file * added formatting * disabled tests if GPU is used * added reviewer suggestion * unified softplus kernel
bdac9ff6 · jakpiase · GitHub · 4c0ad772 · bdac9ff6 · bdac9ff6
3 changed file
--- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
@@ -13,6 +13,7 @@
   limitations under the License. */
 #include "paddle/fluid/operators/activation_op.h"
+#include "paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h"
 #include "paddle/fluid/platform/mkldnn_reuse.h"
 namespace paddle {
@@ -169,6 +170,13 @@ struct GeluMKLDNNGradFunctor : public BaseActivationFunctor<T> {
  }
 };
+template <typename T>
+struct SoftplusMKLDNNFunctor : public BaseActivationFunctor<T> {
+  void operator()(const framework::ExecutionContext &ctx) const {
+    custom_softplus_eltwise_forward<T>(ctx);
+  }
+};
 template <typename T>
 using ReluMKLDNNFunctor =
    MKLDNNActivationFunc<T, mkldnn::algorithm::eltwise_relu>;
@@ -272,3 +280,8 @@ REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(gelu, GeluMKLDNNFunctor,
                                       GeluMKLDNNGradFunctor);
 REGISTER_ACTIVATION_MKLDNN_BF16_KERNEL(sigmoid, SigmoidMKLDNNFunctor,
                                       SigmoidMKLDNNGradFunctor);
+namespace ops = paddle::operators;
+REGISTER_OP_KERNEL(
+    softplus, MKLDNN, paddle::platform::CPUPlace,
+    ops::MKLDNNActivationKernel<ops::SoftplusMKLDNNFunctor<float>>);
--- a/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h
+++ b/paddle/fluid/operators/mkldnn/softplus_mkldnn_op.h
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/platform/mkldnn_reuse.h"
+namespace paddle {
+namespace operators {
+using paddle::framework::Tensor;
+template <typename T>
+class SoftplusMKLDNNHandler
+    : public platform::MKLDNNHandlerNoCachingT<T, dnnl::binary> {
+ public:
+  SoftplusMKLDNNHandler(const Tensor* x, const float beta,
+                        const mkldnn::engine engine, platform::Place cpu_place)
+      : platform::MKLDNNHandlerNoCachingT<T, dnnl::binary>(engine, cpu_place) {
+    auto x_tz = framework::vectorize(x->dims());
+    auto x_md =
+        dnnl::memory::desc(x_tz, platform::MKLDNNGetDataType<T>(), x->format());
+    auto beta_tz = std::vector<int64_t>(x_tz.size(), 1);
+    auto beta_md = dnnl::memory::desc(beta_tz, platform::MKLDNNGetDataType<T>(),
+                                      x->format());
+    dnnl::post_ops post_ops;
+    post_ops.append_eltwise(1.0f, dnnl::algorithm::eltwise_soft_relu, 0.0f,
+                            0.0f);
+    if (beta != 1.0f) {
+      post_ops.append_eltwise(1.0f, dnnl::algorithm::eltwise_linear,
+                              1.0f / beta, 0.0f);
+    }
+    dnnl::primitive_attr attrs;
+    attrs.set_post_ops(post_ops);
+    this->AcquireForwardPrimitiveDescriptor(attrs, dnnl::algorithm::binary_mul,
+                                            x_md, beta_md, x_md);
+  }
+  std::shared_ptr<mkldnn::memory> AcquireBetaMemory(const float* beta) {
+    return this->AcquireMemoryFromPrimitive(
+        this->fwd_pd_->src1_desc(), platform::to_void_cast<float>(beta));
+  }
+};
+template <typename T>
+void custom_softplus_eltwise_forward(const framework::ExecutionContext& ctx) {
+  const auto& dev_ctx =
+      ctx.template device_context<platform::MKLDNNDeviceContext>();
+  const auto& mkldnn_engine = dev_ctx.GetEngine();
+  const auto* x = ctx.Input<Tensor>("X");
+  auto* out = ctx.Output<Tensor>("Out");
+  bool is_inplaced = x->IsSharedBufferWith(*out);
+  const float beta = ctx.Attr<float>("beta");
+  SoftplusMKLDNNHandler<T> handler(x, beta, mkldnn_engine, ctx.GetPlace());
+  auto src_memory_p = handler.AcquireSrcMemory(x);
+  auto beta_memory_p = handler.AcquireBetaMemory(&beta);
+  auto dst_memory_p =
+      is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
+  auto binary_p = handler.AcquireForwardPrimitive();
+  auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
+  const std::unordered_map<int, dnnl::memory> args = {
+      {DNNL_ARG_SRC_0, *src_memory_p},
+      {DNNL_ARG_SRC_1, *beta_memory_p},
+      {DNNL_ARG_DST, *dst_memory_p}};
+  binary_p->execute(astream, args);
+  astream.wait();
+  out->set_layout(framework::DataLayout::kMKLDNN);
+  out->set_format(platform::GetMKLDNNFormat(*dst_memory_p));
+}
+}  // namespace operators
+}  // namespace paddle
--- a/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
+++ b/python/paddle/fluid/tests/unittests/mkldnn/test_softplus_mkldnn_op.py
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import unittest
+import numpy as np
+from paddle.fluid.tests.unittests.op_test import OpTest, OpTestTool
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.framework import _current_expected_place
+def ref_softplus(x, beta, threshold):
+    x_beta = beta * x
+    out = np.select([x_beta <= threshold, x_beta > threshold],
+                    [np.log(1 + np.exp(x_beta)) / beta, x])
+    return out
+@OpTestTool.skip_if(not (isinstance(_current_expected_place(), core.CPUPlace)),
+                    "GPU is not supported")
+class TestSoftplusOneDNNOp(OpTest):
+    def setUp(self):
+        self.op_type = "softplus"
+        self.beta = 1
+        self.threshold = 20
+        self.config()
+        self.attrs = {'use_mkldnn': True, 'beta': self.beta}
+        self.inputs = {'X': np.random.random(self.x_shape).astype(np.float32)}
+        self.outputs = {
+            'Out': ref_softplus(self.inputs['X'], self.beta, self.threshold)
+        }
+    def config(self):
+        self.x_shape = (10, 10)
+    def test_check_output(self):
+        self.check_output()
+class TestSoftplus4DOneDNNOp(TestSoftplusOneDNNOp):
+    def config(self):
+        self.x_shape = (10, 5, 4, 2)
+class TestSoftplus6DOneDNNOp(TestSoftplusOneDNNOp):
+    def config(self):
+        self.x_shape = (3, 2, 2, 5, 4, 2)
+class TestSoftplus6DExtendedFunctorOneDNNOp(TestSoftplusOneDNNOp):
+    def config(self):
+        self.x_shape = (3, 5, 2, 5, 4, 2)
+        self.beta = 2.5
+class TestSoftplus3DExtendedFunctorOneDNNOp(TestSoftplusOneDNNOp):
+    def config(self):
+        self.x_shape = (20, 4, 2)
+        self.beta = 0.4
+if __name__ == "__main__":
+    paddle.enable_static()
+    unittest.main()