From d2b938ef5ad0dd0be1a834e78b444f18ccc68806 Mon Sep 17 00:00:00 2001 From: qingqing01 Date: Thu, 21 Mar 2019 18:52:38 +0800 Subject: [PATCH] Refine gradient proto maker and python API for affine_channel_op (#16340) * Rewrite gradient ProtoMaker for affine_channel_op to remove the Output as the input. * Add act in Python API to make the act can be in-place by layer_help.py --- paddle/fluid/API.spec | 2 +- paddle/fluid/operators/affine_channel_op.cc | 24 +++++++++++++++++++-- python/paddle/fluid/layers/nn.py | 10 +++++++-- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 3f9cc30a1..9b111e09e 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -205,7 +205,7 @@ paddle.fluid.layers.maxout (ArgSpec(args=['x', 'groups', 'name'], varargs=None, paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5f207ae10589ebe38a63575ef6ff8e1e')) paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '51def402b8910e163cbace9d0c0526ed')) paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '77a6d80aa5551ca70324fc975c44507f')) -paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None)), ('document', '2f46f1ff39a13ab00857e7b9f44b2fa7')) +paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name', 'act'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None, None)), ('document', 'ab84fdc6dc60f3ad9aa397e6007e3bf9')) paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '70e3b5182a18b40b47ecabd7c8490a35')) paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', '9bb77f8dc002dd2ce75d4769eaaf5007')) paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd256cba1c41a5ed92ce3f31e24a2ca6d')) diff --git a/paddle/fluid/operators/affine_channel_op.cc b/paddle/fluid/operators/affine_channel_op.cc index 8f1adab89..268a5b894 100644 --- a/paddle/fluid/operators/affine_channel_op.cc +++ b/paddle/fluid/operators/affine_channel_op.cc @@ -113,6 +113,27 @@ class AffineChannelOpGrad : public framework::OperatorWithKernel { } }; +class AffineChannelGradMaker : public framework::SingleGradOpDescMaker { + public: + using framework::SingleGradOpDescMaker::SingleGradOpDescMaker; + + std::unique_ptr Apply() const override { + auto* op = new framework::OpDesc(); + op->SetType("affine_channel_grad"); + op->SetInput("X", Input("X")); + op->SetInput(framework::GradVarName("Out"), OutputGrad("Out")); + op->SetInput("Scale", Input("Scale")); + + op->SetAttrMap(Attrs()); + + op->SetOutput(framework::GradVarName("X"), InputGrad("X")); + op->SetOutput(framework::GradVarName("Scale"), InputGrad("Scale")); + op->SetOutput(framework::GradVarName("Bias"), InputGrad("Bias")); + + return std::unique_ptr(op); + } +}; + template using EigenArrayMap = Eigen::Map>; @@ -260,8 +281,7 @@ namespace ops = paddle::operators; using CPU = paddle::platform::CPUDeviceContext; REGISTER_OPERATOR(affine_channel, ops::AffineChannelOp, - ops::AffineChannelOpMaker, - paddle::framework::DefaultGradOpDescMaker); + ops::AffineChannelOpMaker, ops::AffineChannelGradMaker); REGISTER_OPERATOR(affine_channel_grad, ops::AffineChannelOpGrad); REGISTER_OP_CPU_KERNEL(affine_channel, ops::AffineChannelKernel, diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 28655314d..7b72b3826 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -9706,7 +9706,12 @@ def sequence_reverse(x, name=None): return out -def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None): +def affine_channel(x, + scale=None, + bias=None, + data_layout='NCHW', + name=None, + act=None): """ Applies a separate affine transformation to each channel of the input. Useful for replacing spatial batch norm with its equivalent fixed @@ -9725,6 +9730,7 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None): data_layout (string, default NCHW): NCHW or NHWC. If input is 2D tensor, you can ignore data_layout. name (str, default None): The name of this layer. + act (str, default None): Activation to be applied to the output of this layer. Returns: out (Variable): A tensor of the same shape and data layout with x. @@ -9744,7 +9750,7 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None): 'Bias': bias}, attrs={"data_layout": data_layout}, outputs={"Out": out}) - return out + return helper.append_activation(pre_activation) def similarity_focus(input, axis, indexes, name=None): -- GitLab