pool_with_index_op.cc 12.2 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
C
chengduoZH 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

15
#include <memory>
16

F
From00 已提交
17 18 19 20 21
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/backward.h"
#include "paddle/phi/infermeta/unary.h"
C
chengduoZH 已提交
22 23 24 25

namespace paddle {
namespace operators {

26 27 28
inline int MaxPoolOutputSize(int input_size,
                             int filter_size,
                             int padding,
C
chengduoZH 已提交
29
                             int stride) {
C
chengduoZH 已提交
30 31 32 33 34 35 36 37
  int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
  return output_size;
}

class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

C
chengduoZH 已提交
38
 protected:
39
  framework::OpKernelType GetExpectedKernelType(
C
chengduoZH 已提交
40
      const framework::ExecutionContext &ctx) const override {
41 42 43
    return framework::OpKernelType(
        OperatorWithKernel::IndicateVarDataType(ctx, "X"),
        ctx.device_context());
C
chengduoZH 已提交
44
  }
C
chengduoZH 已提交
45 46 47 48 49 50
};

class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

C
chengduoZH 已提交
51
 protected:
52
  framework::OpKernelType GetExpectedKernelType(
C
chengduoZH 已提交
53
      const framework::ExecutionContext &ctx) const override {
54 55 56
    return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
                                       ctx, framework::GradVarName("Out")),
                                   ctx.device_context());
C
chengduoZH 已提交
57
  }
C
chengduoZH 已提交
58 59 60 61
};

class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
62
  void Make() override {
C
chengduoZH 已提交
63 64
    AddInput(
        "X",
K
kexinzhao 已提交
65 66 67 68
        "(Tensor) The input tensor of pooling operator. "
        "The format of input tensor is NCHW, where N is batch size, C is the "
        "number of channels, H is the height of the image, "
        "and W is the width of the image.");
C
chengduoZH 已提交
69
    AddOutput("Out",
K
kexinzhao 已提交
70 71 72 73 74
              "(Tensor) The output tensor of pooling operator. "
              "The format of output tensor is also NCHW, "
              "where N is batch size, C is "
              "the number of channels, H is the height of the image "
              "and W is the width of the image.");
C
chengduoZH 已提交
75
    AddOutput("Mask",
K
kexinzhao 已提交
76 77 78 79 80 81
              "(Tensor) The Mask tensor of pooling operator."
              "The format of output tensor is also NCHW, "
              "where N is batch size, C is the number of channels, "
              "H is the height of the image, "
              "and W is the width of the image. "
              "It represents the index in the current feature map.");
C
chengduoZH 已提交
82

C
fix bug  
chengduoZH 已提交
83
    AddAttr<std::vector<int>>("ksize",
K
kexinzhao 已提交
84 85
                              "(vector<int>) The pooling window size(height, "
                              "width) of pooling operator. "
C
chengduoZH 已提交
86
                              "If global_pooling = true, ksize and paddings "
C
fix bug  
chengduoZH 已提交
87 88
                              "will be ignored.");  // TODO(Chengduo): Add
                                                    // checker. (Currently,
C
fix doc  
chengduoZH 已提交
89
    // TypedAttrChecker don't support vector type.)
C
fix bug  
chengduoZH 已提交
90
    AddAttr<bool>(
C
chengduoZH 已提交
91
        "global_pooling",
C
chengduoZH 已提交
92
        "(bool, default:false) Whether to use the global pooling. "
C
chengduoZH 已提交
93
        "If global_pooling = true, ksize and paddings will be ignored.")
C
chengduoZH 已提交
94
        .SetDefault(false);
95 96 97 98 99 100 101 102
    AddAttr<bool>(
        "adaptive",
        "(bool, default False) When true, will perform adaptive pooling "
        "instead, "
        "output shape in H and W dimensions will be same as ksize, input data "
        "will be divided into grids specify by ksize averagely and perform "
        "pooling in each grid area to get output pooling value.")
        .SetDefault(false);
K
kexinzhao 已提交
103 104 105
    AddAttr<std::vector<int>>("strides",
                              "(vector<int>, default {1, 1}), strides(height, "
                              "width) of pooling operator.")
C
chengduoZH 已提交
106
        .SetDefault({1, 1});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
107
    // TypedAttrChecker don't support vector type.)
C
chengduoZH 已提交
108 109
    AddAttr<std::vector<int>>(
        "paddings",
C
chengduoZH 已提交
110
        "(vector<int>, default:{0, 0}), paddings(height, width) of pooling "
K
kexinzhao 已提交
111
        "operator. "
C
chengduoZH 已提交
112
        "If global_pooling = true, paddings and will be ignored.")
C
chengduoZH 已提交
113
        .SetDefault({0, 0});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
114
    // TypedAttrChecker don't support vector type.)
C
chengduoZH 已提交
115 116

    AddComment(R"DOC(
K
kexinzhao 已提交
117 118
MaxPool2d Operator.

C
chengduoZH 已提交
119
The maxPooling2d with index operation calculates the output and the mask
K
kexinzhao 已提交
120 121 122 123
based on the input, ksize, strides, and paddings parameters. Input(X) and
output(Out, Mask) are in NCHW format, where N is batch size, C is the
number of channels, H is the height of the feature, 
and W is the width of the feature.
C
chengduoZH 已提交
124 125
Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively.
C
chengduoZH 已提交
126 127 128 129
The input(X) size and output(Out, Mask) size may be different.

Example:
  Input:
K
kexinzhao 已提交
130
       X shape: $(N, C, H_{in}, W_{in})$
C
chengduoZH 已提交
131
  Output:
K
kexinzhao 已提交
132 133
       Out shape: $(N, C, H_{out}, W_{out})$
       Mask shape: $(N, C, H_{out}, W_{out})$
C
chengduoZH 已提交
134
  Where
K
kexinzhao 已提交
135
       $$
C
chengduoZH 已提交
136 137
       H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
       W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
K
kexinzhao 已提交
138
       $$
139 140 141 142 143 144
  
  For adaptive = true:
       $$
       H_{out} = ksize[0]   W_{out} = ksize[1]
       $$
      
K
kexinzhao 已提交
145

C
chengduoZH 已提交
146 147 148 149 150 151
)DOC");
  }
};

class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
152
  void Make() override {
K
kexinzhao 已提交
153 154 155 156 157 158
    AddInput("X",
             "(Tensor) The input tensor of pooling operator. "
             "The format of input tensor is NCDHW, where N is batch size, C is "
             "the number of channels, and D, H and W are the depth, height and "
             "width of "
             "the image, respectively");
C
chengduoZH 已提交
159
    AddOutput("Out",
K
kexinzhao 已提交
160 161 162 163 164
              "(Tensor) The output tensor of pooling operator. "
              "The format of output tensor is also NCDHW, "
              "where N is the batch size, C is the number of channels, "
              "and D, H and W are the depth, height and "
              "width of the image, respectively.");
C
chengduoZH 已提交
165
    AddOutput("Mask",
K
kexinzhao 已提交
166 167 168 169 170 171
              "(Tensor) The Mask tensor of pooling operator. "
              "The format of output tensor is also NCDHW, "
              "where N is the batch size, C is the number of channels, and "
              "D, H and W are the depth, height and width "
              "of the image, respectively. "
              "It represents the index in the current feature map.");
C
chengduoZH 已提交
172

C
fix bug  
chengduoZH 已提交
173
    AddAttr<std::vector<int>>("ksize",
K
kexinzhao 已提交
174 175
                              "(vector<int>) The pooling window size(depth, "
                              "height, width) of pooling operator. "
C
chengduoZH 已提交
176
                              "If global_pooling = true, ksize and paddings "
C
fix bug  
chengduoZH 已提交
177 178
                              "will be ignored.");  // TODO(Chengduo): Add
                                                    // checker. (Currently,
C
fix doc  
chengduoZH 已提交
179
    // TypedAttrChecker don't support vector type.)
C
fix bug  
chengduoZH 已提交
180
    AddAttr<bool>(
C
chengduoZH 已提交
181
        "global_pooling",
K
kexinzhao 已提交
182
        "(bool, default false) Whether to use the global pooling. "
C
chengduoZH 已提交
183
        "If global_pooling = true, ksize and paddings will be ignored.")
C
chengduoZH 已提交
184
        .SetDefault(false);
185 186 187 188 189 190 191 192
    AddAttr<bool>(
        "adaptive",
        "(bool, default False) When true, will perform adaptive pooling "
        "instead, "
        "output shape in H and W dimensions will be same as ksize, input data "
        "will be divided into grids specify by ksize averagely and perform "
        "pooling in each grid area to get output pooling value.")
        .SetDefault(false);
C
fix doc  
chengduoZH 已提交
193
    AddAttr<std::vector<int>>("strides",
K
kexinzhao 已提交
194
                              "(vector<int>, default {1,1,1}), strides(depth, "
C
fix doc  
chengduoZH 已提交
195
                              "height, width) of pooling operator.")
C
chengduoZH 已提交
196
        .SetDefault({1, 1, 1});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
197
    // TypedAttrChecker don't support vector type.)
C
fix bug  
chengduoZH 已提交
198 199
    AddAttr<std::vector<int>>(
        "paddings",
C
chengduoZH 已提交
200
        "(vector, default {0,0,0}), paddings(depth, "
K
kexinzhao 已提交
201
        "height, width) of pooling operator. "
C
chengduoZH 已提交
202
        "If global_pooling = true, paddings and ksize will be ignored.")
C
chengduoZH 已提交
203
        .SetDefault({0, 0, 0});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
204
    // TypedAttrChecker don't support vector type.)
C
chengduoZH 已提交
205

C
chengduoZH 已提交
206
    AddComment(R"DOC(
K
kexinzhao 已提交
207 208
MaxPool3d Operator.

C
chengduoZH 已提交
209 210
The maxpooling3d with index operation calculates the output and the mask
based on the input and ksize, strides, paddings parameters.
K
kexinzhao 已提交
211 212 213 214
Input(X) and output(Out, Mask) are in NCDHW format, where N is batch
size, C is the number of channels, and D, H and W are the depth, height and
width of the feature, respectively. 
Parameters(ksize, strides, paddings) are three elements.
C
chengduoZH 已提交
215
These three elements represent depth, height and width, respectively.
C
chengduoZH 已提交
216 217 218 219
The input(X) size and output(Out, Mask) size may be different.

Example:
  Input:
K
kexinzhao 已提交
220
       X shape: $(N, C, D_{in}, H_{in}, W_{in})$
C
chengduoZH 已提交
221
  Output:
K
kexinzhao 已提交
222 223
       Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
       Mask shape: $(N, C, D_{out}, H_{out}, W_{out})$
C
chengduoZH 已提交
224
  Where
K
kexinzhao 已提交
225
       $$
C
chengduoZH 已提交
226 227 228
       D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
       H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 \\
       W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1
K
kexinzhao 已提交
229
       $$
230 231 232 233 234
  
  For adaptive = true:
       $$
       D_{out} = ksize[0]   H_{out} = ksize[1]   W_{out} = ksize[2]
       $$
K
kexinzhao 已提交
235

C
chengduoZH 已提交
236 237 238
)DOC");
  }
};
C
chengduoZH 已提交
239

240 241 242 243 244 245
template <typename T>
class MaxPoolWithIndexGradOpMaker : public framework::SingleGradOpMaker<T> {
 public:
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;

 protected:
246
  void Apply(GradOpPtr<T> op) const override {
247 248 249 250 251 252 253 254 255
    op->SetType(this->ForwardOpType() + "_grad");
    op->SetAttrMap(this->Attrs());
    op->SetInput("X", this->Input("X"));
    op->SetInput("Mask", this->Output("Mask"));
    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
  }
};

Z
Zeng Jinle 已提交
256
DECLARE_NO_NEED_BUFFER_VARS_INFERER(
257
    MaxPoolWithIndexOpGradNoNeedBufferVarsInferer, "X");
258

C
chengduoZH 已提交
259 260 261 262 263
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;

F
From00 已提交
264 265 266 267 268 269 270
DECLARE_INFER_SHAPE_FUNCTOR(max_pool2d_with_index,
                            MaxPool2dWithIndexInferShapeFunctor,
                            PD_INFER_META(phi::MaxPoolWithIndexInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(max_pool2d_with_index_grad,
                            MaxPool2dWithIndexGradInferShapeFunctor,
                            PD_INFER_META(phi::MaxPoolWithIndexGradInferMeta));

271 272
REGISTER_OPERATOR(max_pool2d_with_index,
                  ops::MaxPoolWithIndexOp,
273 274
                  ops::MaxPool2dWithIndexOpMaker,
                  ops::MaxPoolWithIndexGradOpMaker<paddle::framework::OpDesc>,
F
From00 已提交
275 276
                  ops::MaxPoolWithIndexGradOpMaker<paddle::imperative::OpBase>,
                  MaxPool2dWithIndexInferShapeFunctor);
277 278
REGISTER_OPERATOR(max_pool2d_with_index_grad,
                  ops::MaxPoolWithIndexOpGrad,
F
From00 已提交
279 280
                  ops::MaxPoolWithIndexOpGradNoNeedBufferVarsInferer,
                  MaxPool2dWithIndexGradInferShapeFunctor);
C
chengduoZH 已提交
281

F
From00 已提交
282 283 284 285 286 287
DECLARE_INFER_SHAPE_FUNCTOR(max_pool3d_with_index,
                            MaxPool3dWithIndexInferShapeFunctor,
                            PD_INFER_META(phi::MaxPoolWithIndexInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(max_pool3d_with_index_grad,
                            MaxPool3dWithIndexGradInferShapeFunctor,
                            PD_INFER_META(phi::MaxPoolWithIndexGradInferMeta));
C
chengduoZH 已提交
288

289 290
REGISTER_OPERATOR(max_pool3d_with_index,
                  ops::MaxPoolWithIndexOp,
291 292
                  ops::MaxPool3dWithIndexOpMaker,
                  ops::MaxPoolWithIndexGradOpMaker<paddle::framework::OpDesc>,
F
From00 已提交
293 294
                  ops::MaxPoolWithIndexGradOpMaker<paddle::imperative::OpBase>,
                  MaxPool3dWithIndexInferShapeFunctor);
295 296
REGISTER_OPERATOR(max_pool3d_with_index_grad,
                  ops::MaxPoolWithIndexOpGrad,
F
From00 已提交
297 298
                  ops::MaxPoolWithIndexOpGradNoNeedBufferVarsInferer,
                  MaxPool3dWithIndexGradInferShapeFunctor);