pool_with_index_op.cc 12.1 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
C
chengduoZH 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

15
#include <memory>
16

F
From00 已提交
17 18 19 20 21
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/backward.h"
#include "paddle/phi/infermeta/unary.h"
C
chengduoZH 已提交
22 23 24 25

namespace paddle {
namespace operators {

Y
Yang Yang 已提交
26
inline int MaxPoolOutputSize(int input_size, int filter_size, int padding,
C
chengduoZH 已提交
27
                             int stride) {
C
chengduoZH 已提交
28 29 30 31 32 33 34 35
  int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
  return output_size;
}

class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

C
chengduoZH 已提交
36
 protected:
37
  framework::OpKernelType GetExpectedKernelType(
C
chengduoZH 已提交
38
      const framework::ExecutionContext &ctx) const override {
39 40 41
    return framework::OpKernelType(
        OperatorWithKernel::IndicateVarDataType(ctx, "X"),
        ctx.device_context());
C
chengduoZH 已提交
42
  }
C
chengduoZH 已提交
43 44 45 46 47 48
};

class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

C
chengduoZH 已提交
49
 protected:
50
  framework::OpKernelType GetExpectedKernelType(
C
chengduoZH 已提交
51
      const framework::ExecutionContext &ctx) const override {
52 53 54
    return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
                                       ctx, framework::GradVarName("Out")),
                                   ctx.device_context());
C
chengduoZH 已提交
55
  }
C
chengduoZH 已提交
56 57 58 59
};

class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
60
  void Make() override {
C
chengduoZH 已提交
61 62
    AddInput(
        "X",
K
kexinzhao 已提交
63 64 65 66
        "(Tensor) The input tensor of pooling operator. "
        "The format of input tensor is NCHW, where N is batch size, C is the "
        "number of channels, H is the height of the image, "
        "and W is the width of the image.");
C
chengduoZH 已提交
67
    AddOutput("Out",
K
kexinzhao 已提交
68 69 70 71 72
              "(Tensor) The output tensor of pooling operator. "
              "The format of output tensor is also NCHW, "
              "where N is batch size, C is "
              "the number of channels, H is the height of the image "
              "and W is the width of the image.");
C
chengduoZH 已提交
73
    AddOutput("Mask",
K
kexinzhao 已提交
74 75 76 77 78 79
              "(Tensor) The Mask tensor of pooling operator."
              "The format of output tensor is also NCHW, "
              "where N is batch size, C is the number of channels, "
              "H is the height of the image, "
              "and W is the width of the image. "
              "It represents the index in the current feature map.");
C
chengduoZH 已提交
80

C
fix bug  
chengduoZH 已提交
81
    AddAttr<std::vector<int>>("ksize",
K
kexinzhao 已提交
82 83
                              "(vector<int>) The pooling window size(height, "
                              "width) of pooling operator. "
C
chengduoZH 已提交
84
                              "If global_pooling = true, ksize and paddings "
C
fix bug  
chengduoZH 已提交
85 86
                              "will be ignored.");  // TODO(Chengduo): Add
                                                    // checker. (Currently,
C
fix doc  
chengduoZH 已提交
87
    // TypedAttrChecker don't support vector type.)
C
fix bug  
chengduoZH 已提交
88
    AddAttr<bool>(
C
chengduoZH 已提交
89
        "global_pooling",
C
chengduoZH 已提交
90
        "(bool, default:false) Whether to use the global pooling. "
C
chengduoZH 已提交
91
        "If global_pooling = true, ksize and paddings will be ignored.")
C
chengduoZH 已提交
92
        .SetDefault(false);
93 94 95 96 97 98 99 100
    AddAttr<bool>(
        "adaptive",
        "(bool, default False) When true, will perform adaptive pooling "
        "instead, "
        "output shape in H and W dimensions will be same as ksize, input data "
        "will be divided into grids specify by ksize averagely and perform "
        "pooling in each grid area to get output pooling value.")
        .SetDefault(false);
K
kexinzhao 已提交
101 102 103
    AddAttr<std::vector<int>>("strides",
                              "(vector<int>, default {1, 1}), strides(height, "
                              "width) of pooling operator.")
C
chengduoZH 已提交
104
        .SetDefault({1, 1});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
105
    // TypedAttrChecker don't support vector type.)
C
chengduoZH 已提交
106 107
    AddAttr<std::vector<int>>(
        "paddings",
C
chengduoZH 已提交
108
        "(vector<int>, default:{0, 0}), paddings(height, width) of pooling "
K
kexinzhao 已提交
109
        "operator. "
C
chengduoZH 已提交
110
        "If global_pooling = true, paddings and will be ignored.")
C
chengduoZH 已提交
111
        .SetDefault({0, 0});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
112
    // TypedAttrChecker don't support vector type.)
C
chengduoZH 已提交
113 114

    AddComment(R"DOC(
K
kexinzhao 已提交
115 116
MaxPool2d Operator.

C
chengduoZH 已提交
117
The maxPooling2d with index operation calculates the output and the mask
K
kexinzhao 已提交
118 119 120 121
based on the input, ksize, strides, and paddings parameters. Input(X) and
output(Out, Mask) are in NCHW format, where N is batch size, C is the
number of channels, H is the height of the feature, 
and W is the width of the feature.
C
chengduoZH 已提交
122 123
Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively.
C
chengduoZH 已提交
124 125 126 127
The input(X) size and output(Out, Mask) size may be different.

Example:
  Input:
K
kexinzhao 已提交
128
       X shape: $(N, C, H_{in}, W_{in})$
C
chengduoZH 已提交
129
  Output:
K
kexinzhao 已提交
130 131
       Out shape: $(N, C, H_{out}, W_{out})$
       Mask shape: $(N, C, H_{out}, W_{out})$
C
chengduoZH 已提交
132
  Where
K
kexinzhao 已提交
133
       $$
C
chengduoZH 已提交
134 135
       H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
       W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
K
kexinzhao 已提交
136
       $$
137 138 139 140 141 142
  
  For adaptive = true:
       $$
       H_{out} = ksize[0]   W_{out} = ksize[1]
       $$
      
K
kexinzhao 已提交
143

C
chengduoZH 已提交
144 145 146 147 148 149
)DOC");
  }
};

class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
150
  void Make() override {
K
kexinzhao 已提交
151 152 153 154 155 156
    AddInput("X",
             "(Tensor) The input tensor of pooling operator. "
             "The format of input tensor is NCDHW, where N is batch size, C is "
             "the number of channels, and D, H and W are the depth, height and "
             "width of "
             "the image, respectively");
C
chengduoZH 已提交
157
    AddOutput("Out",
K
kexinzhao 已提交
158 159 160 161 162
              "(Tensor) The output tensor of pooling operator. "
              "The format of output tensor is also NCDHW, "
              "where N is the batch size, C is the number of channels, "
              "and D, H and W are the depth, height and "
              "width of the image, respectively.");
C
chengduoZH 已提交
163
    AddOutput("Mask",
K
kexinzhao 已提交
164 165 166 167 168 169
              "(Tensor) The Mask tensor of pooling operator. "
              "The format of output tensor is also NCDHW, "
              "where N is the batch size, C is the number of channels, and "
              "D, H and W are the depth, height and width "
              "of the image, respectively. "
              "It represents the index in the current feature map.");
C
chengduoZH 已提交
170

C
fix bug  
chengduoZH 已提交
171
    AddAttr<std::vector<int>>("ksize",
K
kexinzhao 已提交
172 173
                              "(vector<int>) The pooling window size(depth, "
                              "height, width) of pooling operator. "
C
chengduoZH 已提交
174
                              "If global_pooling = true, ksize and paddings "
C
fix bug  
chengduoZH 已提交
175 176
                              "will be ignored.");  // TODO(Chengduo): Add
                                                    // checker. (Currently,
C
fix doc  
chengduoZH 已提交
177
    // TypedAttrChecker don't support vector type.)
C
fix bug  
chengduoZH 已提交
178
    AddAttr<bool>(
C
chengduoZH 已提交
179
        "global_pooling",
K
kexinzhao 已提交
180
        "(bool, default false) Whether to use the global pooling. "
C
chengduoZH 已提交
181
        "If global_pooling = true, ksize and paddings will be ignored.")
C
chengduoZH 已提交
182
        .SetDefault(false);
183 184 185 186 187 188 189 190
    AddAttr<bool>(
        "adaptive",
        "(bool, default False) When true, will perform adaptive pooling "
        "instead, "
        "output shape in H and W dimensions will be same as ksize, input data "
        "will be divided into grids specify by ksize averagely and perform "
        "pooling in each grid area to get output pooling value.")
        .SetDefault(false);
C
fix doc  
chengduoZH 已提交
191
    AddAttr<std::vector<int>>("strides",
K
kexinzhao 已提交
192
                              "(vector<int>, default {1,1,1}), strides(depth, "
C
fix doc  
chengduoZH 已提交
193
                              "height, width) of pooling operator.")
C
chengduoZH 已提交
194
        .SetDefault({1, 1, 1});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
195
    // TypedAttrChecker don't support vector type.)
C
fix bug  
chengduoZH 已提交
196 197
    AddAttr<std::vector<int>>(
        "paddings",
C
chengduoZH 已提交
198
        "(vector, default {0,0,0}), paddings(depth, "
K
kexinzhao 已提交
199
        "height, width) of pooling operator. "
C
chengduoZH 已提交
200
        "If global_pooling = true, paddings and ksize will be ignored.")
C
chengduoZH 已提交
201
        .SetDefault({0, 0, 0});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
202
    // TypedAttrChecker don't support vector type.)
C
chengduoZH 已提交
203

C
chengduoZH 已提交
204
    AddComment(R"DOC(
K
kexinzhao 已提交
205 206
MaxPool3d Operator.

C
chengduoZH 已提交
207 208
The maxpooling3d with index operation calculates the output and the mask
based on the input and ksize, strides, paddings parameters.
K
kexinzhao 已提交
209 210 211 212
Input(X) and output(Out, Mask) are in NCDHW format, where N is batch
size, C is the number of channels, and D, H and W are the depth, height and
width of the feature, respectively. 
Parameters(ksize, strides, paddings) are three elements.
C
chengduoZH 已提交
213
These three elements represent depth, height and width, respectively.
C
chengduoZH 已提交
214 215 216 217
The input(X) size and output(Out, Mask) size may be different.

Example:
  Input:
K
kexinzhao 已提交
218
       X shape: $(N, C, D_{in}, H_{in}, W_{in})$
C
chengduoZH 已提交
219
  Output:
K
kexinzhao 已提交
220 221
       Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
       Mask shape: $(N, C, D_{out}, H_{out}, W_{out})$
C
chengduoZH 已提交
222
  Where
K
kexinzhao 已提交
223
       $$
C
chengduoZH 已提交
224 225 226
       D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
       H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 \\
       W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1
K
kexinzhao 已提交
227
       $$
228 229 230 231 232
  
  For adaptive = true:
       $$
       D_{out} = ksize[0]   H_{out} = ksize[1]   W_{out} = ksize[2]
       $$
K
kexinzhao 已提交
233

C
chengduoZH 已提交
234 235 236
)DOC");
  }
};
C
chengduoZH 已提交
237

238 239 240 241 242 243
template <typename T>
class MaxPoolWithIndexGradOpMaker : public framework::SingleGradOpMaker<T> {
 public:
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;

 protected:
244
  void Apply(GradOpPtr<T> op) const override {
245 246 247 248 249 250 251 252 253
    op->SetType(this->ForwardOpType() + "_grad");
    op->SetAttrMap(this->Attrs());
    op->SetInput("X", this->Input("X"));
    op->SetInput("Mask", this->Output("Mask"));
    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
  }
};

Z
Zeng Jinle 已提交
254
DECLARE_NO_NEED_BUFFER_VARS_INFERER(
255
    MaxPoolWithIndexOpGradNoNeedBufferVarsInferer, "X");
256

C
chengduoZH 已提交
257 258 259 260 261
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;

F
From00 已提交
262 263 264 265 266 267 268
DECLARE_INFER_SHAPE_FUNCTOR(max_pool2d_with_index,
                            MaxPool2dWithIndexInferShapeFunctor,
                            PD_INFER_META(phi::MaxPoolWithIndexInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(max_pool2d_with_index_grad,
                            MaxPool2dWithIndexGradInferShapeFunctor,
                            PD_INFER_META(phi::MaxPoolWithIndexGradInferMeta));

269 270 271
REGISTER_OPERATOR(max_pool2d_with_index, ops::MaxPoolWithIndexOp,
                  ops::MaxPool2dWithIndexOpMaker,
                  ops::MaxPoolWithIndexGradOpMaker<paddle::framework::OpDesc>,
F
From00 已提交
272 273
                  ops::MaxPoolWithIndexGradOpMaker<paddle::imperative::OpBase>,
                  MaxPool2dWithIndexInferShapeFunctor);
274
REGISTER_OPERATOR(max_pool2d_with_index_grad, ops::MaxPoolWithIndexOpGrad,
F
From00 已提交
275 276
                  ops::MaxPoolWithIndexOpGradNoNeedBufferVarsInferer,
                  MaxPool2dWithIndexGradInferShapeFunctor);
C
chengduoZH 已提交
277

F
From00 已提交
278 279 280 281 282 283
DECLARE_INFER_SHAPE_FUNCTOR(max_pool3d_with_index,
                            MaxPool3dWithIndexInferShapeFunctor,
                            PD_INFER_META(phi::MaxPoolWithIndexInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(max_pool3d_with_index_grad,
                            MaxPool3dWithIndexGradInferShapeFunctor,
                            PD_INFER_META(phi::MaxPoolWithIndexGradInferMeta));
C
chengduoZH 已提交
284

285 286 287
REGISTER_OPERATOR(max_pool3d_with_index, ops::MaxPoolWithIndexOp,
                  ops::MaxPool3dWithIndexOpMaker,
                  ops::MaxPoolWithIndexGradOpMaker<paddle::framework::OpDesc>,
F
From00 已提交
288 289
                  ops::MaxPoolWithIndexGradOpMaker<paddle::imperative::OpBase>,
                  MaxPool3dWithIndexInferShapeFunctor);
290
REGISTER_OPERATOR(max_pool3d_with_index_grad, ops::MaxPoolWithIndexOpGrad,
F
From00 已提交
291 292
                  ops::MaxPoolWithIndexOpGradNoNeedBufferVarsInferer,
                  MaxPool3dWithIndexGradInferShapeFunctor);