pool_with_index_op.cc 14.7 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
C
chengduoZH 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15
#include "paddle/fluid/operators/pool_with_index_op.h"
16
#include <memory>
C
chengduoZH 已提交
17 18 19 20

namespace paddle {
namespace operators {

Y
Yang Yang 已提交
21
inline int MaxPoolOutputSize(int input_size, int filter_size, int padding,
C
chengduoZH 已提交
22
                             int stride) {
C
chengduoZH 已提交
23 24 25 26 27 28 29 30
  int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
  return output_size;
}

class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

C
fix doc  
chengduoZH 已提交
31
  void InferShape(framework::InferShapeContext *ctx) const override {
C
chengduoZH 已提交
32
    PADDLE_ENFORCE(ctx->HasInput("X"),
C
chengduoZH 已提交
33
                   "Input(X) of Pooling should not be null.");
C
chengduoZH 已提交
34
    PADDLE_ENFORCE(ctx->HasOutput("Out"),
C
chengduoZH 已提交
35
                   "Output(Out) of Pooling should not be null.");
C
chengduoZH 已提交
36
    PADDLE_ENFORCE(ctx->HasOutput("Mask"),
C
chengduoZH 已提交
37
                   "Output(Mask) of Pooling should not be null.");
C
chengduoZH 已提交
38 39 40 41 42 43

    auto in_x_dims = ctx->GetInputDim("X");

    std::vector<int> ksize = ctx->Attrs().Get<std::vector<int>>("ksize");
    std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
    std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
44
    bool adaptive = ctx->Attrs().Get<bool>("adaptive");
C
chengduoZH 已提交
45 46

    PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5,
C
chengduoZH 已提交
47
                   "Pooling intput should be 4-D or 5-D tensor.");
C
chengduoZH 已提交
48

C
chengduoZH 已提交
49
    if (ctx->Attrs().Get<bool>("global_pooling")) {
C
chengduoZH 已提交
50
      ksize.resize(static_cast<size_t>(in_x_dims.size()) - 2);
C
fix bug  
chengduoZH 已提交
51 52
      for (size_t i = 0; i < ksize.size(); ++i) {
        paddings[i] = 0;
C
chengduoZH 已提交
53
        ksize[i] = static_cast<int>(in_x_dims[i + 2]);
C
fix bug  
chengduoZH 已提交
54
      }
C
chengduoZH 已提交
55 56 57
    }

    PADDLE_ENFORCE(in_x_dims.size() - ksize.size() == 2U,
C
fix doc  
chengduoZH 已提交
58
                   "Input size and pooling size should be consistent.");
C
chengduoZH 已提交
59
    PADDLE_ENFORCE_EQ(ksize.size(), strides.size(),
C
chengduoZH 已提交
60
                      "Strides size and pooling size should be the same.");
C
chengduoZH 已提交
61
    PADDLE_ENFORCE_EQ(ksize.size(), paddings.size(),
C
chengduoZH 已提交
62
                      "Paddings size and pooling size should be the same.");
C
chengduoZH 已提交
63 64

    std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
65 66 67 68 69 70 71
    if (adaptive) {
      output_shape.insert(output_shape.end(), ksize.begin(), ksize.end());
    } else {
      for (size_t i = 0; i < ksize.size(); ++i) {
        output_shape.push_back(MaxPoolOutputSize(in_x_dims[i + 2], ksize[i],
                                                 paddings[i], strides[i]));
      }
C
chengduoZH 已提交
72 73 74 75
    }
    ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
    ctx->SetOutputDim("Mask", framework::make_ddim(output_shape));
  }
C
chengduoZH 已提交
76 77

 protected:
78
  framework::OpKernelType GetExpectedKernelType(
C
chengduoZH 已提交
79
      const framework::ExecutionContext &ctx) const override {
80 81 82
    return framework::OpKernelType(
        OperatorWithKernel::IndicateVarDataType(ctx, "X"),
        ctx.device_context());
C
chengduoZH 已提交
83
  }
C
chengduoZH 已提交
84 85 86 87 88 89
};

class MaxPoolWithIndexOpGrad : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

C
fix doc  
chengduoZH 已提交
90
  void InferShape(framework::InferShapeContext *ctx) const override {
91 92 93 94 95 96 97 98 99 100 101
    PADDLE_ENFORCE_EQ(
        ctx->HasInput("Mask"), true,
        platform::errors::NotFound("Input(Mask) must not be null."));
    PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
                      platform::errors::NotFound("Input(X) must not be null."));
    PADDLE_ENFORCE_EQ(
        ctx->HasInput(framework::GradVarName("Out")), true,
        platform::errors::NotFound("Input(Out@GRAD) should not be null."));
    PADDLE_ENFORCE_EQ(
        ctx->HasOutput(framework::GradVarName("X")), true,
        platform::errors::NotFound("Output(X@GRAD) should not be null."));
C
chengduoZH 已提交
102 103
    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
  }
C
chengduoZH 已提交
104 105

 protected:
106
  framework::OpKernelType GetExpectedKernelType(
C
chengduoZH 已提交
107
      const framework::ExecutionContext &ctx) const override {
108 109 110
    return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
                                       ctx, framework::GradVarName("Out")),
                                   ctx.device_context());
C
chengduoZH 已提交
111
  }
C
chengduoZH 已提交
112 113 114 115
};

class MaxPool2dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
116
  void Make() override {
C
chengduoZH 已提交
117 118
    AddInput(
        "X",
K
kexinzhao 已提交
119 120 121 122
        "(Tensor) The input tensor of pooling operator. "
        "The format of input tensor is NCHW, where N is batch size, C is the "
        "number of channels, H is the height of the image, "
        "and W is the width of the image.");
C
chengduoZH 已提交
123
    AddOutput("Out",
K
kexinzhao 已提交
124 125 126 127 128
              "(Tensor) The output tensor of pooling operator. "
              "The format of output tensor is also NCHW, "
              "where N is batch size, C is "
              "the number of channels, H is the height of the image "
              "and W is the width of the image.");
C
chengduoZH 已提交
129
    AddOutput("Mask",
K
kexinzhao 已提交
130 131 132 133 134 135
              "(Tensor) The Mask tensor of pooling operator."
              "The format of output tensor is also NCHW, "
              "where N is batch size, C is the number of channels, "
              "H is the height of the image, "
              "and W is the width of the image. "
              "It represents the index in the current feature map.");
C
chengduoZH 已提交
136

C
fix bug  
chengduoZH 已提交
137
    AddAttr<std::vector<int>>("ksize",
K
kexinzhao 已提交
138 139
                              "(vector<int>) The pooling window size(height, "
                              "width) of pooling operator. "
C
chengduoZH 已提交
140
                              "If global_pooling = true, ksize and paddings "
C
fix bug  
chengduoZH 已提交
141 142
                              "will be ignored.");  // TODO(Chengduo): Add
                                                    // checker. (Currently,
C
fix doc  
chengduoZH 已提交
143
    // TypedAttrChecker don't support vector type.)
C
fix bug  
chengduoZH 已提交
144
    AddAttr<bool>(
C
chengduoZH 已提交
145
        "global_pooling",
C
chengduoZH 已提交
146
        "(bool, default:false) Whether to use the global pooling. "
C
chengduoZH 已提交
147
        "If global_pooling = true, ksize and paddings will be ignored.")
C
chengduoZH 已提交
148
        .SetDefault(false);
149 150 151 152 153 154 155 156
    AddAttr<bool>(
        "adaptive",
        "(bool, default False) When true, will perform adaptive pooling "
        "instead, "
        "output shape in H and W dimensions will be same as ksize, input data "
        "will be divided into grids specify by ksize averagely and perform "
        "pooling in each grid area to get output pooling value.")
        .SetDefault(false);
K
kexinzhao 已提交
157 158 159
    AddAttr<std::vector<int>>("strides",
                              "(vector<int>, default {1, 1}), strides(height, "
                              "width) of pooling operator.")
C
chengduoZH 已提交
160
        .SetDefault({1, 1});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
161
    // TypedAttrChecker don't support vector type.)
C
chengduoZH 已提交
162 163
    AddAttr<std::vector<int>>(
        "paddings",
C
chengduoZH 已提交
164
        "(vector<int>, default:{0, 0}), paddings(height, width) of pooling "
K
kexinzhao 已提交
165
        "operator. "
C
chengduoZH 已提交
166
        "If global_pooling = true, paddings and will be ignored.")
C
chengduoZH 已提交
167
        .SetDefault({0, 0});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
168
    // TypedAttrChecker don't support vector type.)
C
chengduoZH 已提交
169 170

    AddComment(R"DOC(
K
kexinzhao 已提交
171 172
MaxPool2d Operator.

C
chengduoZH 已提交
173
The maxPooling2d with index operation calculates the output and the mask
K
kexinzhao 已提交
174 175 176 177
based on the input, ksize, strides, and paddings parameters. Input(X) and
output(Out, Mask) are in NCHW format, where N is batch size, C is the
number of channels, H is the height of the feature, 
and W is the width of the feature.
C
chengduoZH 已提交
178 179
Parameters(ksize, strides, paddings) are two elements.
These two elements represent height and width, respectively.
C
chengduoZH 已提交
180 181 182 183
The input(X) size and output(Out, Mask) size may be different.

Example:
  Input:
K
kexinzhao 已提交
184
       X shape: $(N, C, H_{in}, W_{in})$
C
chengduoZH 已提交
185
  Output:
K
kexinzhao 已提交
186 187
       Out shape: $(N, C, H_{out}, W_{out})$
       Mask shape: $(N, C, H_{out}, W_{out})$
C
chengduoZH 已提交
188
  Where
K
kexinzhao 已提交
189
       $$
C
chengduoZH 已提交
190 191
       H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
       W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1
K
kexinzhao 已提交
192
       $$
193 194 195 196 197 198
  
  For adaptive = true:
       $$
       H_{out} = ksize[0]   W_{out} = ksize[1]
       $$
      
K
kexinzhao 已提交
199

C
chengduoZH 已提交
200 201 202 203 204 205
)DOC");
  }
};

class MaxPool3dWithIndexOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
206
  void Make() override {
K
kexinzhao 已提交
207 208 209 210 211 212
    AddInput("X",
             "(Tensor) The input tensor of pooling operator. "
             "The format of input tensor is NCDHW, where N is batch size, C is "
             "the number of channels, and D, H and W are the depth, height and "
             "width of "
             "the image, respectively");
C
chengduoZH 已提交
213
    AddOutput("Out",
K
kexinzhao 已提交
214 215 216 217 218
              "(Tensor) The output tensor of pooling operator. "
              "The format of output tensor is also NCDHW, "
              "where N is the batch size, C is the number of channels, "
              "and D, H and W are the depth, height and "
              "width of the image, respectively.");
C
chengduoZH 已提交
219
    AddOutput("Mask",
K
kexinzhao 已提交
220 221 222 223 224 225
              "(Tensor) The Mask tensor of pooling operator. "
              "The format of output tensor is also NCDHW, "
              "where N is the batch size, C is the number of channels, and "
              "D, H and W are the depth, height and width "
              "of the image, respectively. "
              "It represents the index in the current feature map.");
C
chengduoZH 已提交
226

C
fix bug  
chengduoZH 已提交
227
    AddAttr<std::vector<int>>("ksize",
K
kexinzhao 已提交
228 229
                              "(vector<int>) The pooling window size(depth, "
                              "height, width) of pooling operator. "
C
chengduoZH 已提交
230
                              "If global_pooling = true, ksize and paddings "
C
fix bug  
chengduoZH 已提交
231 232
                              "will be ignored.");  // TODO(Chengduo): Add
                                                    // checker. (Currently,
C
fix doc  
chengduoZH 已提交
233
    // TypedAttrChecker don't support vector type.)
C
fix bug  
chengduoZH 已提交
234
    AddAttr<bool>(
C
chengduoZH 已提交
235
        "global_pooling",
K
kexinzhao 已提交
236
        "(bool, default false) Whether to use the global pooling. "
C
chengduoZH 已提交
237
        "If global_pooling = true, ksize and paddings will be ignored.")
C
chengduoZH 已提交
238
        .SetDefault(false);
239 240 241 242 243 244 245 246
    AddAttr<bool>(
        "adaptive",
        "(bool, default False) When true, will perform adaptive pooling "
        "instead, "
        "output shape in H and W dimensions will be same as ksize, input data "
        "will be divided into grids specify by ksize averagely and perform "
        "pooling in each grid area to get output pooling value.")
        .SetDefault(false);
C
fix doc  
chengduoZH 已提交
247
    AddAttr<std::vector<int>>("strides",
K
kexinzhao 已提交
248
                              "(vector<int>, default {1,1,1}), strides(depth, "
C
fix doc  
chengduoZH 已提交
249
                              "height, width) of pooling operator.")
C
chengduoZH 已提交
250
        .SetDefault({1, 1, 1});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
251
    // TypedAttrChecker don't support vector type.)
C
fix bug  
chengduoZH 已提交
252 253
    AddAttr<std::vector<int>>(
        "paddings",
C
chengduoZH 已提交
254
        "(vector, default {0,0,0}), paddings(depth, "
K
kexinzhao 已提交
255
        "height, width) of pooling operator. "
C
chengduoZH 已提交
256
        "If global_pooling = true, paddings and ksize will be ignored.")
C
chengduoZH 已提交
257
        .SetDefault({0, 0, 0});  // TODO(Chengduo): Add checker. (Currently,
C
fix doc  
chengduoZH 已提交
258
    // TypedAttrChecker don't support vector type.)
C
chengduoZH 已提交
259

C
chengduoZH 已提交
260
    AddComment(R"DOC(
K
kexinzhao 已提交
261 262
MaxPool3d Operator.

C
chengduoZH 已提交
263 264
The maxpooling3d with index operation calculates the output and the mask
based on the input and ksize, strides, paddings parameters.
K
kexinzhao 已提交
265 266 267 268
Input(X) and output(Out, Mask) are in NCDHW format, where N is batch
size, C is the number of channels, and D, H and W are the depth, height and
width of the feature, respectively. 
Parameters(ksize, strides, paddings) are three elements.
C
chengduoZH 已提交
269
These three elements represent depth, height and width, respectively.
C
chengduoZH 已提交
270 271 272 273
The input(X) size and output(Out, Mask) size may be different.

Example:
  Input:
K
kexinzhao 已提交
274
       X shape: $(N, C, D_{in}, H_{in}, W_{in})$
C
chengduoZH 已提交
275
  Output:
K
kexinzhao 已提交
276 277
       Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
       Mask shape: $(N, C, D_{out}, H_{out}, W_{out})$
C
chengduoZH 已提交
278
  Where
K
kexinzhao 已提交
279
       $$
C
chengduoZH 已提交
280 281 282
       D_{out} = \frac{(D_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\
       H_{out} = \frac{(H_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 \\
       W_{out} = \frac{(W_{in} - ksize[2] + 2 * paddings[2])}{strides[2]} + 1
K
kexinzhao 已提交
283
       $$
284 285 286 287 288
  
  For adaptive = true:
       $$
       D_{out} = ksize[0]   H_{out} = ksize[1]   W_{out} = ksize[2]
       $$
K
kexinzhao 已提交
289

C
chengduoZH 已提交
290 291 292
)DOC");
  }
};
C
chengduoZH 已提交
293

294 295 296 297 298 299
template <typename T>
class MaxPoolWithIndexGradOpMaker : public framework::SingleGradOpMaker<T> {
 public:
  using framework::SingleGradOpMaker<T>::SingleGradOpMaker;

 protected:
300
  void Apply(GradOpPtr<T> op) const override {
301 302 303 304 305 306 307 308 309
    op->SetType(this->ForwardOpType() + "_grad");
    op->SetAttrMap(this->Attrs());
    op->SetInput("X", this->Input("X"));
    op->SetInput("Mask", this->Output("Mask"));
    op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
    op->SetOutput(framework::GradVarName("X"), this->InputGrad("X"));
  }
};

310 311 312
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(
    MaxPoolWithIndexOpGradNoNeedBufferVarsInference, "X");

C
chengduoZH 已提交
313 314 315 316 317
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;

318 319 320 321
REGISTER_OPERATOR(max_pool2d_with_index, ops::MaxPoolWithIndexOp,
                  ops::MaxPool2dWithIndexOpMaker,
                  ops::MaxPoolWithIndexGradOpMaker<paddle::framework::OpDesc>,
                  ops::MaxPoolWithIndexGradOpMaker<paddle::imperative::OpBase>);
322 323
REGISTER_OPERATOR(max_pool2d_with_index_grad, ops::MaxPoolWithIndexOpGrad,
                  ops::MaxPoolWithIndexOpGradNoNeedBufferVarsInference);
C
chengduoZH 已提交
324 325

REGISTER_OP_CPU_KERNEL(
C
chengduoZH 已提交
326
    max_pool2d_with_index,
Q
QI JUN 已提交
327 328 329
    ops::MaxPoolWithIndexKernel<paddle::platform::CPUDeviceContext, float, int>,
    ops::MaxPoolWithIndexKernel<paddle::platform::CPUDeviceContext, double,
                                int>);
C
chengduoZH 已提交
330
REGISTER_OP_CPU_KERNEL(
C
chengduoZH 已提交
331
    max_pool2d_with_index_grad,
Q
QI JUN 已提交
332 333 334
    ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUDeviceContext, float,
                                    int>,
    ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUDeviceContext, double,
335
                                    int>);
C
chengduoZH 已提交
336

337 338 339 340
REGISTER_OPERATOR(max_pool3d_with_index, ops::MaxPoolWithIndexOp,
                  ops::MaxPool3dWithIndexOpMaker,
                  ops::MaxPoolWithIndexGradOpMaker<paddle::framework::OpDesc>,
                  ops::MaxPoolWithIndexGradOpMaker<paddle::imperative::OpBase>);
341 342
REGISTER_OPERATOR(max_pool3d_with_index_grad, ops::MaxPoolWithIndexOpGrad,
                  ops::MaxPoolWithIndexOpGradNoNeedBufferVarsInference);
C
chengduoZH 已提交
343 344

REGISTER_OP_CPU_KERNEL(
C
chengduoZH 已提交
345
    max_pool3d_with_index,
Q
QI JUN 已提交
346 347 348
    ops::MaxPoolWithIndexKernel<paddle::platform::CPUDeviceContext, float, int>,
    ops::MaxPoolWithIndexKernel<paddle::platform::CPUDeviceContext, double,
                                int>);
C
chengduoZH 已提交
349
REGISTER_OP_CPU_KERNEL(
C
chengduoZH 已提交
350
    max_pool3d_with_index_grad,
Q
QI JUN 已提交
351 352 353
    ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUDeviceContext, float,
                                    int>,
    ops::MaxPoolWithIndexGradKernel<paddle::platform::CPUDeviceContext, double,
354
                                    int>);