prior_box_op.cc 13.3 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
W
wanghaox 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

B
baiyf 已提交
15
#include "paddle/fluid/operators/detection/prior_box_op.h"
W
wanghaox 已提交
16

17 18
#include <string>

19 20 21
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
22
#include "paddle/fluid/framework/convert_utils.h"
23

W
wanghaox 已提交
24 25 26 27 28 29 30 31
namespace paddle {
namespace operators {

class PriorBoxOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

  void InferShape(framework::InferShapeContext* ctx) const override {
32 33
    OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", "PriorBoxOp");
    OP_INOUT_CHECK(ctx->HasInput("Image"), "Input", "Image", "PriorBoxOp");
W
wanghaox 已提交
34 35 36

    auto image_dims = ctx->GetInputDim("Image");
    auto input_dims = ctx->GetInputDim("Input");
37 38

    PADDLE_ENFORCE_EQ(
39 40
        image_dims.size(),
        4,
41 42 43 44
        platform::errors::InvalidArgument(
            "The Input(Image) of Op(PriorBoxOp) should be a 4-D Tensor "
            "and data format is NCHW. But received Image's dimensions = %d, "
            "shape = [%s].",
45 46
            image_dims.size(),
            image_dims));
47
    PADDLE_ENFORCE_EQ(
48 49
        input_dims.size(),
        4,
50 51 52 53
        platform::errors::InvalidArgument(
            "The Input(Input) of Op(PriorBoxOp) should be a 4-D Tensor "
            "and data format is NCHW. But received Input's dimensions = %d, "
            "shape = [%s].",
54 55
            input_dims.size(),
            input_dims));
W
wanghaox 已提交
56

C
chengduoZH 已提交
57 58
    auto min_sizes = ctx->Attrs().Get<std::vector<float>>("min_sizes");
    auto max_sizes = ctx->Attrs().Get<std::vector<float>>("max_sizes");
W
wanghaox 已提交
59
    auto variances = ctx->Attrs().Get<std::vector<float>>("variances");
W
wanghaox 已提交
60
    auto aspect_ratios = ctx->Attrs().Get<std::vector<float>>("aspect_ratios");
W
wanghaox 已提交
61 62
    bool flip = ctx->Attrs().Get<bool>("flip");

W
wanghaox 已提交
63
    std::vector<float> aspect_ratios_vec;
64
    ExpandAspectRatios(aspect_ratios, flip, &aspect_ratios_vec);
W
wanghaox 已提交
65

C
chengduoZH 已提交
66
    size_t num_priors = aspect_ratios_vec.size() * min_sizes.size();
W
wanghaox 已提交
67
    if (max_sizes.size() > 0) {
68
      PADDLE_ENFORCE_EQ(
69 70
          max_sizes.size(),
          min_sizes.size(),
71 72 73 74
          platform::errors::InvalidArgument(
              "The length of min_size and "
              "max_size must be equal. But received: min_size's length is %d, "
              "max_size's length is %d.",
75 76
              min_sizes.size(),
              max_sizes.size()));
C
chengduoZH 已提交
77 78
      num_priors += max_sizes.size();
      for (size_t i = 0; i < max_sizes.size(); ++i) {
79
        PADDLE_ENFORCE_GT(
80 81
            max_sizes[i],
            min_sizes[i],
82 83 84 85
            platform::errors::InvalidArgument(
                "max_size[%d] must be greater "
                "than min_size[%d]. But received: max_size[%d] is %f, "
                "min_size[%d] is %f.",
86 87 88 89 90 91
                i,
                i,
                i,
                max_sizes[i],
                i,
                min_sizes[i]));
W
wanghaox 已提交
92 93 94
      }
    }

W
wanghaox 已提交
95 96 97 98 99
    std::vector<int64_t> dim_vec(4);
    dim_vec[0] = input_dims[2];
    dim_vec[1] = input_dims[3];
    dim_vec[2] = num_priors;
    dim_vec[3] = 4;
100 101
    ctx->SetOutputDim("Boxes", phi::make_ddim(dim_vec));
    ctx->SetOutputDim("Variances", phi::make_ddim(dim_vec));
W
wanghaox 已提交
102
  }
103 104 105 106

 protected:
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
107 108
    auto input_input_type =
        OperatorWithKernel::IndicateVarDataType(ctx, "Input");
109 110 111 112 113

    framework::LibraryType library_{framework::LibraryType::kPlain};
    framework::DataLayout layout_ = framework::DataLayout::kAnyLayout;
#ifdef PADDLE_WITH_MKLDNN
    if (library_ == framework::LibraryType::kPlain &&
114
        this->CanMKLDNNBeUsed(ctx, input_input_type)) {
115 116
      library_ = framework::LibraryType::kMKLDNN;
      layout_ = framework::DataLayout::kMKLDNN;
117 118
      auto input_image_type = framework::TransToProtoVarType(
          ctx.Input<framework::Tensor>("Image")->dtype());
119 120
      int customized_type_value =
          framework::OpKernelType::kDefaultCustomizedTypeValue;
121
      if (input_image_type == framework::DataTypeTrait<float>::DataType()) {
122 123
        customized_type_value = kPriorBoxFLOAT;
      } else if (input_image_type ==
124
                 framework::DataTypeTrait<double>::DataType()) {
125 126
        customized_type_value = kPriorBoxDOUBLE;
      }
127 128 129 130 131
      return framework::OpKernelType(input_input_type,
                                     ctx.GetPlace(),
                                     layout_,
                                     library_,
                                     customized_type_value);
132 133
    }
#endif
134 135
    return framework::OpKernelType(
        input_input_type, ctx.GetPlace(), layout_, library_);
136
  }
W
wanghaox 已提交
137 138 139 140
};

class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
141
  void Make() override {
W
wanghaox 已提交
142
    AddInput("Input",
W
wanghaox 已提交
143
             "(Tensor, default Tensor<float>), "
W
wanghaox 已提交
144
             "the input feature data of PriorBoxOp, The layout is NCHW.");
W
wanghaox 已提交
145
    AddInput("Image",
W
wanghaox 已提交
146
             "(Tensor, default Tensor<float>), "
W
wanghaox 已提交
147
             "the input image data of PriorBoxOp, The layout is NCHW.");
W
wanghaox 已提交
148
    AddOutput("Boxes",
W
wanghaox 已提交
149
              "(Tensor, default Tensor<float>), the output prior boxes of "
W
wanghaox 已提交
150 151 152
              "PriorBoxOp. The layout is [H, W, num_priors, 4]. "
              "H is the height of input, W is the width of input, num_priors "
              "is the box count of each position.");
W
wanghaox 已提交
153 154
    AddOutput("Variances",
              "(Tensor, default Tensor<float>), the expanded variances of "
W
wanghaox 已提交
155 156 157
              "PriorBoxOp. The layout is [H, W, num_priors, 4]. "
              "H is the height of input, W is the width of input, num_priors "
              "is the box count of each position.");
C
fix bug  
chengduoZH 已提交
158

C
chengduoZH 已提交
159 160 161 162
    AddAttr<std::vector<float>>("min_sizes",
                                "(vector<float>) List of min sizes "
                                "of generated prior boxes.")
        .AddCustomChecker([](const std::vector<float>& min_sizes) {
163
          PADDLE_ENFORCE_GT(
164 165
              min_sizes.size(),
              0,
166 167
              platform::errors::InvalidArgument("Size of min_sizes must be "
                                                "at least 1."));
C
fix bug  
chengduoZH 已提交
168
          for (size_t i = 0; i < min_sizes.size(); ++i) {
169 170
            PADDLE_ENFORCE_GT(min_sizes[i],
                              0.0,
171 172 173
                              platform::errors::OutOfRange(
                                  "min_sizes[%d] must be larger "
                                  "than 0. But received: min_sizes[%d] is %f.",
174 175 176
                                  i,
                                  i,
                                  min_sizes[i]));
C
fix bug  
chengduoZH 已提交
177 178
          }
        });
C
chengduoZH 已提交
179
    AddAttr<std::vector<float>>(
C
fix bug  
chengduoZH 已提交
180
        "max_sizes",
181 182
        "(vector<float>) List of max sizes of generated prior boxes.")
        .SetDefault(std::vector<float>{});
W
wanghaox 已提交
183
    AddAttr<std::vector<float>>(
C
fix bug  
chengduoZH 已提交
184 185 186
        "aspect_ratios",
        "(vector<float>) List of aspect ratios of generated prior boxes.");

W
wanghaox 已提交
187
    AddAttr<std::vector<float>>(
C
fix bug  
chengduoZH 已提交
188 189 190
        "variances",
        "(vector<float>) List of variances to be encoded in prior boxes.")
        .AddCustomChecker([](const std::vector<float>& variances) {
191 192
          PADDLE_ENFORCE_EQ(variances.size(),
                            4,
193 194 195 196
                            platform::errors::InvalidArgument(
                                "The length of variance must "
                                "be 4. But received: variances' length is %d.",
                                variances.size()));
C
fix bug  
chengduoZH 已提交
197
          for (size_t i = 0; i < variances.size(); ++i) {
198 199
            PADDLE_ENFORCE_GT(variances[i],
                              0.0,
200 201 202
                              platform::errors::OutOfRange(
                                  "variance[%d] must be greater "
                                  "than 0. But received: variance[%d] = %f",
203 204 205
                                  i,
                                  i,
                                  variances[i]));
C
fix bug  
chengduoZH 已提交
206 207 208
          }
        });
    AddAttr<bool>("flip", "(bool) Whether to flip aspect ratios.")
W
wanghaox 已提交
209
        .SetDefault(true);
C
fix bug  
chengduoZH 已提交
210
    AddAttr<bool>("clip", "(bool) Whether to clip out-of-boundary boxes.")
W
wanghaox 已提交
211
        .SetDefault(true);
C
fix bug  
chengduoZH 已提交
212

W
wanghaox 已提交
213
    AddAttr<float>("step_w",
C
chengduoZH 已提交
214
                   "Prior boxes step across width, 0.0 for auto calculation.")
C
fix bug  
chengduoZH 已提交
215 216
        .SetDefault(0.0)
        .AddCustomChecker([](const float& step_w) {
217 218
          PADDLE_ENFORCE_GE(step_w,
                            0.0,
219 220 221 222
                            platform::errors::InvalidArgument(
                                "step_w should be larger "
                                "than 0. But received: step_w = %f.",
                                step_w));
C
fix bug  
chengduoZH 已提交
223
        });
W
wanghaox 已提交
224
    AddAttr<float>("step_h",
C
chengduoZH 已提交
225
                   "Prior boxes step across height, 0.0 for auto calculation.")
C
fix bug  
chengduoZH 已提交
226 227
        .SetDefault(0.0)
        .AddCustomChecker([](const float& step_h) {
228 229
          PADDLE_ENFORCE_GE(step_h,
                            0.0,
230 231 232 233
                            platform::errors::InvalidArgument(
                                "step_h should be larger "
                                "than 0. But received: step_h = %f.",
                                step_h));
C
fix bug  
chengduoZH 已提交
234 235
        });

W
wanghaox 已提交
236 237 238 239
    AddAttr<float>("offset",
                   "(float) "
                   "Prior boxes center offset.")
        .SetDefault(0.5);
240 241 242 243 244 245 246
    AddAttr<bool>(
        "min_max_aspect_ratios_order",
        "(bool) If set True, the output prior box is in order of"
        "[min, max, aspect_ratios], which is consistent with Caffe."
        "Please note, this order affects the weights order of convolution layer"
        "followed by and does not affect the final detection results.")
        .SetDefault(false);
247 248 249
    AddAttr<bool>("use_mkldnn",
                  "(bool, default false) Only used in mkldnn kernel")
        .SetDefault(false);
250 251 252 253
    AddAttr<bool>(
        "use_quantizer",
        "(bool, default false) "
        "This parameter is no longer used. Use 'mkldnn_data_type' instead.")
254
        .SetDefault(false);
255 256 257 258 259
    AddAttr<std::string>(
        "mkldnn_data_type",
        "(string, default \"float32\"). Data type of mkldnn kernel")
        .SetDefault("float32")
        .InEnum({"float32", "int8", "bfloat16"});
W
wanghaox 已提交
260 261 262
    AddComment(R"DOC(
Prior box operator
Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm.
W
wanghaox 已提交
263 264 265 266 267
Each position of the input produce N prior boxes, N is determined by
 the count of min_sizes, max_sizes and aspect_ratios, The size of the
 box is in range(min_size, max_size) interval, which is generated in
 sequence according to the aspect_ratios.

W
wanghaox 已提交
268 269 270 271 272 273 274 275 276 277
Please get more information from the following papers:
https://arxiv.org/abs/1512.02325.
)DOC");
  }
};

}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
H
hong 已提交
278
REGISTER_OPERATOR(
279 280 281
    prior_box,
    ops::PriorBoxOp,
    ops::PriorBoxOpMaker,
H
hong 已提交
282 283
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
284

285 286
REGISTER_OP_CPU_KERNEL(prior_box,
                       ops::PriorBoxOpKernel<float, float>,
287 288
                       ops::PriorBoxOpKernel<double, double>);

289 290 291 292
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(prior_box,
                                    MKLDNN,
                                    ::paddle::platform::CPUPlace,
                                    FF,
293 294 295
                                    ops::kPriorBoxFLOAT,
                                    ops::PriorBoxOpKernel<float, float>);

296 297 298 299
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(prior_box,
                                    MKLDNN,
                                    ::paddle::platform::CPUPlace,
                                    DD,
300 301 302
                                    ops::kPriorBoxDOUBLE,
                                    ops::PriorBoxOpKernel<double, double>);

303 304 305 306
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(prior_box,
                                    MKLDNN,
                                    ::paddle::platform::CPUPlace,
                                    U8F,
307 308 309
                                    ops::kPriorBoxFLOAT,
                                    ops::PriorBoxOpKernel<uint8_t, float>);

310 311 312 313
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(prior_box,
                                    MKLDNN,
                                    ::paddle::platform::CPUPlace,
                                    S8F,
314 315 316
                                    ops::kPriorBoxFLOAT,
                                    ops::PriorBoxOpKernel<int8_t, float>);

317 318 319 320
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(prior_box,
                                    MKLDNN,
                                    ::paddle::platform::CPUPlace,
                                    U8D,
321 322 323
                                    ops::kPriorBoxDOUBLE,
                                    ops::PriorBoxOpKernel<uint8_t, double>);

324 325 326 327
REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(prior_box,
                                    MKLDNN,
                                    ::paddle::platform::CPUPlace,
                                    S8D,
328 329
                                    ops::kPriorBoxDOUBLE,
                                    ops::PriorBoxOpKernel<int8_t, double>);