pool_with_index_op.cc 12.2 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
C
chengduoZH 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

15
#include <memory>
16

F
From00 已提交
17 18 19 20 21
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/backward.h"
#include "paddle/phi/infermeta/unary.h"
C
chengduoZH 已提交
22 23 24 25

namespace paddle {
namespace operators {

26 27 28
inline int MaxPoolOutputSize(int input_size,
                             int filter_size,
                             int padding,
C
chengduoZH 已提交
29
                             int stride) {
C
chengduoZH 已提交
30 31 32 33 34 35 36 37
  int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
  return output_size;
}

class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

C
chengduoZH 已提交
38
 protected:
39
  phi::KernelKey GetExpectedKernelType(
C
chengduoZH 已提交
40
      const framework::ExecutionContext &ctx) const override {
41 42
    return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"),
                          ctx.device_context().GetPlace());
C
chengduoZH 已提交
43
  }
C
chengduoZH 已提交
44 45 46 47 48 49
};

class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

C
chengduoZH 已提交
50
 protected:
51
  phi::KernelKey GetExpectedKernelType(
C
chengduoZH 已提交
52
      const framework::ExecutionContext &ctx) const override {
53 54 55
    return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(
                              ctx, framework::GradVarName("Out")),
                          ctx.device_context().GetPlace());
C
chengduoZH 已提交
56
  }
C
chengduoZH 已提交
57 58 59 60
};

class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
61
  void Make() override {
C
chengduoZH 已提交
62 63
    AddInput(
        "X",
K
kexinzhao 已提交
64 65 66 67
        "(Tensor) The input tensor of pooling operator. "
        "The format of input tensor is NCHW, where N is batch size, C is the "
        "number of channels, H is the height of the image, "
        "and W is the width of the image.");
C
chengduoZH 已提交
68
    AddOutput("Out",
K
kexinzhao 已提交
69 70 71 72 73
              "(Tensor) The output tensor of pooling operator. "
              "The format of output tensor is also NCHW, "
              "where N is batch size, C is "
              "the number of channels, H is the height of the image "
              "and W is the width of the image.");
C
chengduoZH 已提交
74
    AddOutput("Mask",
K
kexinzhao 已提交
75 76 77 78 79 80
              "(Tensor) The Mask tensor of pooling operator."
              "The format of output tensor is also NCHW, "
              "where N is batch size, C is the number of channels, "
              "H is the height of the image, "
              "and W is the width of the image. "
              "It represents the index in the current feature map.");
C
chengduoZH 已提交
81

C
fix bug  
chengduoZH 已提交
82
    AddAttr<std::vector<int>>("ksize",
K
kexinzhao 已提交
83 84
                              "(vector<int>) The pooling window size(height, "
                              "width) of pooling operator. "
C
chengduoZH 已提交
85
                              "If global_pooling = true, ksize and paddings "
C
fix bug  
chengduoZH 已提交
86 87
                              "will be ignored.");  // TODO(Chengduo): Add
                                                    // checker. (Currently,
C
fix doc  
chengduoZH 已提交
88
    // TypedAttrChecker don't support vector type.)
C
fix bug  
chengduoZH 已提交
89
    AddAttr<bool>(
C
chengduoZH 已提交
90
        "global_pooling",
C
chengduoZH 已提交
91
        "(bool, default:false) Whether to use the global pooling. "
C
chengduoZH 已提交
92
        "If global_pooling = true, ksize and paddings will be ignored.")
C
chengduoZH 已提交
93
        .SetDefault(false);
94 95 96 97 98 99 100 101
    AddAttr<bool>(
        "adaptive",
        "(bool, default False) When true, will perform adaptive pooling "
        "instead, "
        "output shape in H and W dimensions will be same as ksize, input data "
        "will be divided into grids specify by ksize averagely and perform "
        "pooling in each grid area to get output pooling value.")
        .SetDefault(false);
K
kexinzhao 已提交
102 103 104
    AddAttr<std::vector<int>>("strides",
                              "(vector<int>, default {1, 1}), strides(height, "
                              "width) of pooling operator.")
C
chengduoZH 已提交
105
        .SetDefault({1, 1});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
106
    // TypedAttrChecker don't support vector type.)
C
chengduoZH 已提交
107 108
    AddAttr<std::vector<int>>(
        "paddings",
C
chengduoZH 已提交
109
        "(vector<int>, default:{0, 0}), paddings(height, width) of pooling "
K
kexinzhao 已提交
110
        "operator. "
C
chengduoZH 已提交
111
        "If global_pooling = true, paddings and will be ignored.")
C
chengduoZH 已提交
112
        .SetDefault({0, 0});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
113
    // TypedAttrChecker don't support vector type.)
C
chengduoZH 已提交
114 115

    AddComment(R"DOC(
K
kexinzhao 已提交
116 117
MaxPool2d Operator.

C
chengduoZH 已提交
118
The maxPooling2d with index operation calculates the output and the mask
K
kexinzhao 已提交
119 120
based on the input, ksize, strides, and paddings parameters. Input(X) and
output(Out, Mask) are in NCHW format, where N is batch size, C is the
121
number of channels, H is the height of the feature,
K
kexinzhao 已提交
122
and W is the width of the feature.
C
chengduoZH 已提交
123 124
Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively.
C
chengduoZH 已提交
125 126 127 128
The input(X) size and output(Out, Mask) size may be different.

Example:
  Input:
K
kexinzhao 已提交
129
       X shape: $(N, C, H_{in}, W_{in})$
C
chengduoZH 已提交
130
  Output:
K
kexinzhao 已提交
131 132
       Out shape: $(N, C, H_{out}, W_{out})$
       Mask shape: $(N, C, H_{out}, W_{out})$
C
chengduoZH 已提交
133
  Where
K
kexinzhao 已提交
134
       $$
C
chengduoZH 已提交
135 136
       H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
       W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
K
kexinzhao 已提交
137
       $$
138

139 140 141 142
  For adaptive = true:
       $$
       H_{out} = ksize[0]   W_{out} = ksize[1]
       $$
143

K
kexinzhao 已提交
144

C
chengduoZH 已提交
145 146 147 148 149 150
)DOC");
  }
};

class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
151
  void Make() override {
K
kexinzhao 已提交
152 153 154 155 156 157
    AddInput("X",
             "(Tensor) The input tensor of pooling operator. "
             "The format of input tensor is NCDHW, where N is batch size, C is "
             "the number of channels, and D, H and W are the depth, height and "
             "width of "
             "the image, respectively");
C
chengduoZH 已提交
158
    AddOutput("Out",
K
kexinzhao 已提交
159 160 161 162 163
              "(Tensor) The output tensor of pooling operator. "
              "The format of output tensor is also NCDHW, "
              "where N is the batch size, C is the number of channels, "
              "and D, H and W are the depth, height and "
              "width of the image, respectively.");
C
chengduoZH 已提交
164
    AddOutput("Mask",
K
kexinzhao 已提交
165 166 167 168 169 170
              "(Tensor) The Mask tensor of pooling operator. "
              "The format of output tensor is also NCDHW, "
              "where N is the batch size, C is the number of channels, and "
              "D, H and W are the depth, height and width "
              "of the image, respectively. "
              "It represents the index in the current feature map.");
C
chengduoZH 已提交
171

C
fix bug  
chengduoZH 已提交
172
    AddAttr<std::vector<int>>("ksize",
K
kexinzhao 已提交
173 174
                              "(vector<int>) The pooling window size(depth, "
                              "height, width) of pooling operator. "
C
chengduoZH 已提交
175
                              "If global_pooling = true, ksize and paddings "
C
fix bug  
chengduoZH 已提交
176 177
                              "will be ignored.");  // TODO(Chengduo): Add
                                                    // checker. (Currently,
C
fix doc  
chengduoZH 已提交
178
    // TypedAttrChecker don't support vector type.)
C
fix bug  
chengduoZH 已提交
179
    AddAttr<bool>(
C
chengduoZH 已提交
180
        "global_pooling",
K
kexinzhao 已提交
181
        "(bool, default false) Whether to use the global pooling. "
C
chengduoZH 已提交
182
        "If global_pooling = true, ksize and paddings will be ignored.")
C
chengduoZH 已提交
183
        .SetDefault(false);
184 185 186 187 188 189 190 191
    AddAttr<bool>(
        "adaptive",
        "(bool, default False) When true, will perform adaptive pooling "
        "instead, "
        "output shape in H and W dimensions will be same as ksize, input data "
        "will be divided into grids specify by ksize averagely and perform "
        "pooling in each grid area to get output pooling value.")
        .SetDefault(false);
C
fix doc  
chengduoZH 已提交
192
    AddAttr<std::vector<int>>("strides",
K
kexinzhao 已提交
193
                              "(vector<int>, default {1,1,1}), strides(depth, "
C
fix doc  
chengduoZH 已提交
194
                              "height, width) of pooling operator.")
C
chengduoZH 已提交
195
        .SetDefault({1, 1, 1});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
196
    // TypedAttrChecker don't support vector type.)
C
fix bug  
chengduoZH 已提交
197 198
    AddAttr<std::vector<int>>(
        "paddings",
C
chengduoZH 已提交
199
        "(vector, default {0,0,0}), paddings(depth, "
K
kexinzhao 已提交
200
        "height, width) of pooling operator. "
C
chengduoZH 已提交
201
        "If global_pooling = true, paddings and ksize will be ignored.")
C
chengduoZH 已提交
202
        .SetDefault({0, 0, 0});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
203
    // TypedAttrChecker don't support vector type.)
C
chengduoZH 已提交
204

C
chengduoZH 已提交
205
    AddComment(R"DOC(
K
kexinzhao 已提交
206 207
MaxPool3d Operator.

C
chengduoZH 已提交
208 209
The maxpooling3d with index operation calculates the output and the mask
based on the input and ksize, strides, paddings parameters.
K
kexinzhao 已提交
210 211
Input(X) and output(Out, Mask) are in NCDHW format, where N is batch
size, C is the number of channels, and D, H and W are the depth, height and
212
width of the feature, respectively.
K
kexinzhao 已提交
213
Parameters(ksize, strides, paddings) are three elements.
C
chengduoZH 已提交
214
These three elements represent depth, height and width, respectively.
C
chengduoZH 已提交
215 216 217 218
The input(X) size and output(Out, Mask) size may be different.

Example:
  Input:
K
kexinzhao 已提交
219
       X shape: $(N, C, D_{in}, H_{in}, W_{in})$
C
chengduoZH 已提交
220
  Output:
K
kexinzhao 已提交
221 222
       Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
       Mask shape: $(N, C, D_{out}, H_{out}, W_{out})$
C
chengduoZH 已提交
223
  Where
K
kexinzhao 已提交
224
       $$
C
chengduoZH 已提交
225 226 227
       D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
       H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 \\
       W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1
K
kexinzhao 已提交
228
       $$
229

230 231 232 233
  For adaptive = true:
       $$
       D_{out} = ksize[0]   H_{out} = ksize[1]   W_{out} = ksize[2]
       $$
K
kexinzhao 已提交
234

C
chengduoZH 已提交
235 236 237
)DOC");
  }
};
C
chengduoZH 已提交
238

239 240 241 242 243 244
template <typename T>
class MaxPoolWithIndexGradOpMaker : public framework::SingleGradOpMaker<T> {
 public:
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;

 protected:
245
  void Apply(GradOpPtr<T> op) const override {
246 247 248 249 250 251 252 253 254
    op->SetType(this->ForwardOpType() + "_grad");
    op->SetAttrMap(this->Attrs());
    op->SetInput("X", this->Input("X"));
    op->SetInput("Mask", this->Output("Mask"));
    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
  }
};

Z
Zeng Jinle 已提交
255
DECLARE_NO_NEED_BUFFER_VARS_INFERER(
256
    MaxPoolWithIndexOpGradNoNeedBufferVarsInferer, "X");
257

C
chengduoZH 已提交
258 259 260 261 262
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;

F
From00 已提交
263 264 265 266 267 268 269
DECLARE_INFER_SHAPE_FUNCTOR(max_pool2d_with_index,
                            MaxPool2dWithIndexInferShapeFunctor,
                            PD_INFER_META(phi::MaxPoolWithIndexInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(max_pool2d_with_index_grad,
                            MaxPool2dWithIndexGradInferShapeFunctor,
                            PD_INFER_META(phi::MaxPoolWithIndexGradInferMeta));

270 271
REGISTER_OPERATOR(max_pool2d_with_index,
                  ops::MaxPoolWithIndexOp,
272 273
                  ops::MaxPool2dWithIndexOpMaker,
                  ops::MaxPoolWithIndexGradOpMaker<paddle::framework::OpDesc>,
F
From00 已提交
274 275
                  ops::MaxPoolWithIndexGradOpMaker<paddle::imperative::OpBase>,
                  MaxPool2dWithIndexInferShapeFunctor);
276 277
REGISTER_OPERATOR(max_pool2d_with_index_grad,
                  ops::MaxPoolWithIndexOpGrad,
F
From00 已提交
278 279
                  ops::MaxPoolWithIndexOpGradNoNeedBufferVarsInferer,
                  MaxPool2dWithIndexGradInferShapeFunctor);
C
chengduoZH 已提交
280

F
From00 已提交
281 282 283 284 285 286
DECLARE_INFER_SHAPE_FUNCTOR(max_pool3d_with_index,
                            MaxPool3dWithIndexInferShapeFunctor,
                            PD_INFER_META(phi::MaxPoolWithIndexInferMeta));
DECLARE_INFER_SHAPE_FUNCTOR(max_pool3d_with_index_grad,
                            MaxPool3dWithIndexGradInferShapeFunctor,
                            PD_INFER_META(phi::MaxPoolWithIndexGradInferMeta));
C
chengduoZH 已提交
287

288 289
REGISTER_OPERATOR(max_pool3d_with_index,
                  ops::MaxPoolWithIndexOp,
290 291
                  ops::MaxPool3dWithIndexOpMaker,
                  ops::MaxPoolWithIndexGradOpMaker<paddle::framework::OpDesc>,
F
From00 已提交
292 293
                  ops::MaxPoolWithIndexGradOpMaker<paddle::imperative::OpBase>,
                  MaxPool3dWithIndexInferShapeFunctor);
294 295
REGISTER_OPERATOR(max_pool3d_with_index_grad,
                  ops::MaxPoolWithIndexOpGrad,
F
From00 已提交
296 297
                  ops::MaxPoolWithIndexOpGradNoNeedBufferVarsInferer,
                  MaxPool3dWithIndexGradInferShapeFunctor);