From 5d34ef61cbeacb7089f6e28de685c79db324f207 Mon Sep 17 00:00:00 2001 From: Michal Gallus Date: Tue, 4 Sep 2018 12:02:57 +0200 Subject: [PATCH] Fuse MKLDNN's Conv + ReLU --- paddle/fluid/operators/conv_mkldnn_op.cc | 46 +++++++++++++++---- paddle/fluid/operators/conv_op.cc | 2 + .../fluid/transpiler/inference_transpiler.py | 39 ++++++++++++++-- 3 files changed, 75 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/operators/conv_mkldnn_op.cc b/paddle/fluid/operators/conv_mkldnn_op.cc index c5cbadc892..53e705c8ca 100644 --- a/paddle/fluid/operators/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/conv_mkldnn_op.cc @@ -296,6 +296,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { std::vector strides = ctx.Attr>("strides"); std::vector paddings = ctx.Attr>("paddings"); std::vector dilations = ctx.Attr>("dilations"); + bool fuse_relu = ctx.Attr("fuse_relu"); int groups = ctx.Attr("groups"); // TODO(pzelazko-intel) add support for group convolution and dilation @@ -348,11 +349,12 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { bias_tz = paddle::framework::vectorize2int(bias->dims()); auto bias_md = platform::MKLDNNMemDesc( bias_tz, platform::MKLDNNGetDataType(), memory::format::x); - conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, - strides, paddings, mkldnn_engine); + conv_pd = + ConvFwdPrimitiveDesc(src_md, weights_md, bias_md, dst_md, strides, + paddings, mkldnn_engine, fuse_relu); } else { conv_pd = ConvFwdPrimitiveDesc(src_md, weights_md, dst_md, strides, - paddings, mkldnn_engine); + paddings, mkldnn_engine, fuse_relu); } // Save conv_pd/src_memory/weights_memory for backward pass dev_ctx.SetBlob(key_conv_pd, conv_pd); @@ -402,11 +404,26 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { } private: + mkldnn::primitive_attr AddRelu() const { + // Fusion with ReLU layer is executed through the PostOps feature. Create a + // PostOps object and configure it to execute an eltwise relu operation. + mkldnn::primitive_attr conv_attr; + constexpr float scale = 1.0f; + constexpr float negative_slope = 0.0f; + constexpr float placeholder = 0.0f; + mkldnn::post_ops post_operations; + post_operations.append_eltwise(scale, mkldnn::algorithm::eltwise_relu, + negative_slope, placeholder); + conv_attr.set_post_ops(post_operations); + return conv_attr; + } + std::unique_ptr ConvFwdPrimitiveDesc(const memory::desc& src, const memory::desc& weights, const memory::desc& dst, const std::vector& strides, const std::vector& paddings, - const mkldnn::engine& engine) const { + const mkldnn::engine& engine, + const bool fuse_relu) const { memory::dims stride_dims = {strides[0], strides[1]}; memory::dims padding_dims = {paddings[0], paddings[1]}; @@ -415,8 +432,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { dst, stride_dims, padding_dims, padding_dims, mkldnn::padding_kind::zero); - auto p_conv_pd = - new mkldnn::convolution_forward::primitive_desc(conv_desc, engine); + mkldnn::primitive_attr conv_attr; + if (fuse_relu) { + conv_attr = AddRelu(); + } + + auto p_conv_pd = new mkldnn::convolution_forward::primitive_desc( + conv_desc, conv_attr, engine); return std::unique_ptr( p_conv_pd); @@ -427,7 +449,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { const memory::desc& bias, const memory::desc& dst, const std::vector& strides, const std::vector& paddings, - const mkldnn::engine& engine) const { + const mkldnn::engine& engine, + const bool fuse_relu) const { memory::dims stride_dims = {strides[0], strides[1]}; memory::dims padding_dims = {paddings[0], paddings[1]}; @@ -436,8 +459,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { bias, dst, stride_dims, padding_dims, padding_dims, mkldnn::padding_kind::zero); - auto p_conv_pd = - new mkldnn::convolution_forward::primitive_desc(conv_desc, engine); + mkldnn::primitive_attr conv_attr; + if (fuse_relu) { + conv_attr = AddRelu(); + } + + auto p_conv_pd = new mkldnn::convolution_forward::primitive_desc( + conv_desc, conv_attr, engine); return std::unique_ptr( p_conv_pd); diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index 61ca80877a..3332e64301 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -161,6 +161,8 @@ void Conv2DOpMaker::Make() { AddAttr("use_mkldnn", "(bool, default false) Only used in mkldnn kernel") .SetDefault(false); + AddAttr("fuse_relu", "(bool, default false) Only used in mkldnn kernel") + .SetDefault(false); AddAttr( "data_format", "(string, default NCHW) Only used in " diff --git a/python/paddle/fluid/transpiler/inference_transpiler.py b/python/paddle/fluid/transpiler/inference_transpiler.py index 02fefe32df..adad2428f7 100644 --- a/python/paddle/fluid/transpiler/inference_transpiler.py +++ b/python/paddle/fluid/transpiler/inference_transpiler.py @@ -60,12 +60,46 @@ class InferenceTranspiler(object): if not isinstance(scope, core.Scope): raise TypeError("scope should be as Scope type or None") use_mkldnn = bool(os.getenv("FLAGS_use_mkldnn", False)) + self._fuse_batch_norm(program, place, scope) if use_mkldnn: - self._fuse_relu_mkldnn(program) self._fuse_conv_bias_mkldnn(program) + self._fuse_conv_relu_mkldnn(program) + self._fuse_bn_relu_mkldnn(program) + + def _fuse_conv_relu_mkldnn(self, program): + ''' + Transpile the program by fused relu activation for MKLDNN program. + Relu activation following convolution OP can be fused by adding + 'fuse_relu' attribute to convolution OP. + The result of fuse is: + - before: + - conv->relu->any_other_op + - after: + - conv->any_other_op + :param program: program to transpile + :type program: Program + ''' + self.block = program.block(0) + + i = 0 + while i < len(self.block.ops): + current_op = self.block.ops[i] + if current_op.type in ['conv2d']: + next_op = self.block.ops[i + 1] + if next_op.type == 'relu': + # modify conv OP to include relu + current_op.set_attr("fuse_relu", True) + # remove conv OP + self.block._remove_op(i + 1) + i = i + 1 + + # TODO(luotao): use clone() method to flush the program.desc in force, + # since some large program.desc will not be flushed immediately. + # And a better solution will be considered later. + program = program.clone() - def _fuse_relu_mkldnn(self, program): + def _fuse_bn_relu_mkldnn(self, program): ''' Transpile the program by fused relu activation for MKLDNN program. @@ -159,7 +193,6 @@ class InferenceTranspiler(object): self._fuse_conv_bias(i, current_op, next_op) self.block._remove_op(i + 1) # Remove old conv self.block._remove_op(i + 1) # Remove elementwise_add - i = i + 1 i = i + 1 self._remove_unused_var() -- GitLab