From abee05a8c877bcb24bf15ebf582690bd85558d6b Mon Sep 17 00:00:00 2001 From: Sylwester Fraczek Date: Thu, 19 Mar 2020 10:02:20 +0100 Subject: [PATCH] added mkldnn swish activation (#23041) --- .../conv_activation_mkldnn_fuse_pass.cc | 7 +++ .../mkldnn/conv_activation_mkldnn_fuse_pass.h | 7 +++ ...conv_activation_mkldnn_fuse_pass_tester.cc | 3 ++ .../inference/api/paddle_pass_builder.cc | 1 + paddle/fluid/operators/activation_op.cc | 7 +++ .../operators/mkldnn/activation_mkldnn_op.cc | 27 ++++++++-- paddle/fluid/platform/mkldnn_reuse.h | 8 +-- .../mkldnn/test_activation_mkldnn_op.py | 49 ++++++++++++++++++- 8 files changed, 101 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc index 2226169e65..8bc9072948 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc @@ -60,6 +60,10 @@ void ConvActivationFusePass::ApplyImpl(ir::Graph* graph) const { if (activation_type() == "relu6") { desc->SetAttr("fuse_alpha", boost::get(activation->Op()->GetAttr("threshold"))); + } else if (activation_type() == "swish") { + // paddle uses beta but mkldnn uses alpha for swish + desc->SetAttr("fuse_alpha", + activation->Op()->GetAttrIfExists("beta")); } else { desc->SetAttr("fuse_alpha", activation->Op()->GetAttrIfExists("alpha")); @@ -95,3 +99,6 @@ REGISTER_PASS(conv_leaky_relu_mkldnn_fuse_pass, REGISTER_PASS(conv_relu6_mkldnn_fuse_pass, paddle::framework::ir::Conv2DReLU6FusePass); + +REGISTER_PASS(conv_swish_mkldnn_fuse_pass, + paddle::framework::ir::Conv2DSwishFusePass); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h index 7c6dc238a5..ac15fc0451 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h @@ -50,6 +50,13 @@ class Conv2DReLU6FusePass : public ConvActivationFusePass { public: std::string activation_type() const { return "relu6"; } }; +/* + * Fuse Conv and Swish class + */ +class Conv2DSwishFusePass : public ConvActivationFusePass { + public: + std::string activation_type() const { return "swish"; } +}; } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc index ec38788bb4..f4155568cf 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc @@ -40,6 +40,8 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name, op->SetAttr("alpha", 0.02f); } else if (type == "relu6") { op->SetAttr("threshold", 6.0f); + } else if (type == "swish") { + op->SetAttr("beta", 1.0f); } } op->SetOutput("Out", outputs); @@ -133,6 +135,7 @@ TEST(ConvActivationFusePass, conv_leaky_relu_fuse_pass) { MainTest("leaky_relu"); } TEST(ConvActivationFusePass, conv_relu6_fuse_pass) { MainTest("relu6"); } +TEST(ConvActivationFusePass, conv_swish_fuse_pass) { MainTest("swish"); } } // namespace ir } // namespace framework diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index ccd5ded466..e29a3e3ca2 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -196,6 +196,7 @@ void CpuPassStrategy::EnableMKLDNN() { "conv_relu_mkldnn_fuse_pass", // "conv_leaky_relu_mkldnn_fuse_pass", // "conv_relu6_mkldnn_fuse_pass", // + "conv_swish_mkldnn_fuse_pass", // // Disabled due to topology-dependent speed-up // "fc_mkldnn_pass" })) { diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 71f67466cb..124470f0c3 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -589,6 +589,13 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "Input of Swish operator"); AddOutput("Out", "Output of Swish operator"); AddAttr("beta", "Constant beta of swish operator").SetDefault(1.0f); + AddAttr("use_mkldnn", + "(bool, default false) Only used in mkldnn kernel") + .SetDefault(false); + AddAttr("is_test", + "(bool, default false) Set to true for inference only, false " + "for training. Some layers may run faster when this is true.") + .SetDefault(false); AddComment(R"DOC( Swish Activation Operator. diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 3b367c9a5b..b68cb325a7 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -73,8 +73,13 @@ void eltwise_forward(const framework::ExecutionContext &ctx, const auto *x = ctx.Input("X"); auto *y = ctx.Output("Out"); - const T alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 0; - const T beta = ctx.HasAttr("beta") ? ctx.Attr("beta") : 0; + T alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 0; + T beta = ctx.HasAttr("beta") ? ctx.Attr("beta") : 0; + + // paddle uses beta but mkldnn uses alpha for swish + if (algorithm == mkldnn::algorithm::eltwise_swish) { + std::swap(alpha, beta); + } PADDLE_ENFORCE( x->dims().size() == 2 || x->dims().size() == 3 || x->dims().size() == 4, @@ -112,8 +117,13 @@ void eltwise_grad(const framework::ExecutionContext &ctx, const auto *diff_y = ctx.Input(framework::GradVarName("Out")); auto *diff_x = ctx.Output(framework::GradVarName("X")); - const T alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 0; - const T beta = ctx.HasAttr("beta") ? ctx.Attr("beta") : 0; + T alpha = ctx.HasAttr("alpha") ? ctx.Attr("alpha") : 0; + T beta = ctx.HasAttr("beta") ? ctx.Attr("beta") : 0; + + // paddle uses beta but mkldnn uses alpha for swish + if (algorithm == mkldnn::algorithm::eltwise_swish) { + std::swap(alpha, beta); + } auto diff_dst_tz = framework::vectorize(diff_y->dims()); @@ -162,6 +172,10 @@ template using ReluMKLDNNFunctor = MKLDNNActivationFunc; +template +using SwishMKLDNNFunctor = + MKLDNNActivationFunc; + template using TanhMKLDNNFunctor = MKLDNNActivationFunc; @@ -178,6 +192,10 @@ template using ReluMKLDNNGradFunctor = MKLDNNActivationGradFunc; +template +using SwishMKLDNNGradFunctor = + MKLDNNActivationGradFunc; + template using TanhMKLDNNGradFunctor = MKLDNNActivationGradFunc; @@ -204,6 +222,7 @@ namespace ops = paddle::operators; #define FOR_EACH_MKLDNN_KERNEL_FUNCTOR(__macro) \ __macro(relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ __macro(leaky_relu, ReluMKLDNNFunctor, ReluMKLDNNGradFunctor); \ + __macro(swish, SwishMKLDNNFunctor, SwishMKLDNNGradFunctor); \ __macro(tanh, TanhMKLDNNFunctor, TanhMKLDNNGradFunctor); \ __macro(sqrt, SqrtMKLDNNFunctor, SqrtMKLDNNGradFunctor); \ __macro(abs, AbsMKLDNNFunctor, AbsMKLDNNGradFunctor); diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index f8ee9b9639..25b285ccc8 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -978,13 +978,15 @@ class ConvMKLDNNTemplateHandler : public MKLDNNHandler { constexpr float scale = 1.0f; post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu, fuse_alpha, fuse_beta); - } - - if (fuse_activation == "relu6") { + } else if (fuse_activation == "relu6") { constexpr float scale = 1.0f; post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_bounded_relu, fuse_alpha, fuse_beta); + } else if (fuse_activation == "swish") { + constexpr float scale = 1.0f; + post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_swish, + fuse_alpha, fuse_beta); } conv_attr.set_post_ops(post_operations); return conv_attr; diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py index c988e6275f..da1a6ee966 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_activation_mkldnn_op.py @@ -16,9 +16,10 @@ from __future__ import print_function import unittest import numpy as np +from scipy.special import expit import paddle.fluid.core as core from paddle.fluid.tests.unittests.op_test import OpTest -from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs, TestLeakyRelu +from paddle.fluid.tests.unittests.test_activation_op import TestRelu, TestTanh, TestSqrt, TestAbs, TestLeakyRelu, TestSwish from mkldnn_op_test import check_if_mkldnn_primitives_exist_in_bwd @@ -111,6 +112,29 @@ class TestMKLDNNAbsDim2(TestAbs): ['X'], 'Out', max_relative_error=0.007, check_dygraph=False) +class TestMKLDNNSwishDim2(TestSwish): + def setUp(self): + super(TestMKLDNNSwishDim2, self).setUp() + + x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) + beta = 2.3 + out = x * expit(beta * x) + + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + self.attrs = {"use_mkldnn": True, "beta": beta} + + def test_check_output(self): + # TODO(wangzhongpu): support mkldnn op in dygraph mode + self.check_output() + + def test_check_grad(self): + if self.dtype == np.float16: + return + # TODO(wangzhongpu): support mkldnn op in dygraph mode + self.check_grad(['X'], 'Out') + + class TestMKLDNNReluDim4(TestRelu): def setUp(self): super(TestMKLDNNReluDim4, self).setUp() @@ -228,6 +252,29 @@ class TestMKLDNNAbsDim4(TestAbs): ['X'], 'Out', max_relative_error=0.007, check_dygraph=False) +class TestMKLDNNSwishDim4(TestSwish): + def setUp(self): + super(TestMKLDNNSwishDim4, self).setUp() + + x = np.random.uniform(0.1, 1, [2, 4, 3, 5]).astype("float32") + beta = 2.3 + out = x * expit(beta * x) + + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + self.attrs = {"use_mkldnn": True, "beta": beta} + + def test_check_output(self): + # TODO(wangzhongpu): support mkldnn op in dygraph mode + self.check_output() + + def test_check_grad(self): + if self.dtype == np.float16: + return + # TODO(wangzhongpu): support mkldnn op in dygraph mode + self.check_grad(['X'], 'Out') + + # Check if primitives already exist in backward class TestMKLDNNAbsPrimitivesAlreadyExist(unittest.TestCase): def setUp(self): -- GitLab