mine_hard_examples_op.cc 13.3 KB
Newer Older
1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
W
wanghaox 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15 16
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
W
wanghaox 已提交
17 18 19 20

namespace paddle {
namespace operators {

W
wanghaox 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
enum MiningType { kNone = 0, kMaxNegative, kHardExample };

template <typename T>
bool SortScoreDescend(const std::pair<float, T>& pair1,
                      const std::pair<float, T>& pair2) {
  return pair1.first > pair2.first;
}

inline bool IsEligibleMining(const MiningType mining_type, const int match_idx,
                             const float match_dist,
                             const float neg_dist_threshold) {
  if (mining_type == MiningType::kMaxNegative) {
    return match_idx == -1 && match_dist < neg_dist_threshold;
  } else if (mining_type == MiningType::kHardExample) {
    return true;
  } else {
    return false;
  }
}

W
wanghaox 已提交
41
inline MiningType GetMiningType(std::string str) {
W
wanghaox 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
  if (str == "max_negative") {
    return MiningType::kMaxNegative;
  } else if (str == "hard_example") {
    return MiningType::kHardExample;
  } else {
    return MiningType::kNone;
  }
}

template <typename DeviceContext, typename T>
class MineHardExamplesKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* in_cls_loss = ctx.Input<framework::Tensor>("ClsLoss");
    auto* in_loc_loss = ctx.Input<framework::Tensor>("LocLoss");
    auto* in_matched_indices = ctx.Input<framework::Tensor>("MatchIndices");
    auto* in_match_dist = ctx.Input<framework::Tensor>("MatchDist");
    float neg_pos_ratio = ctx.Attr<float>("neg_pos_ratio");
    T neg_dist_threshold =
        static_cast<T>(ctx.Attr<float>("neg_dist_threshold"));
    int sample_size = ctx.Attr<int>("sample_size");
    MiningType mining_type =
        GetMiningType(ctx.Attr<std::string>("mining_type"));

    auto out_neg_indices = ctx.Output<framework::LoDTensor>("NegIndices");
    auto out_match_indices =
        ctx.Output<framework::Tensor>("UpdatedMatchIndices");

Y
Yi Wang 已提交
70 71
    framework::TensorCopy(*in_matched_indices, ctx.GetPlace(),
                          out_match_indices);
W
wanghaox 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115

    int batch_size = in_matched_indices->dims()[0];
    int prior_num = in_matched_indices->dims()[1];

    auto match_indices = framework::EigenMatrix<int>::From(*in_matched_indices);

    auto match_indices_et =
        framework::EigenMatrix<int>::From(*out_match_indices);

    auto match_dist = framework::EigenMatrix<T>::From(*in_match_dist);

    const T* cls_loss = in_cls_loss->data<T>();
    const T* loc_loss = nullptr;
    if (in_loc_loss) {
      loc_loss = in_loc_loss->data<T>();
    }

    std::vector<std::vector<int>> all_neg_indices;
    std::vector<size_t> batch_starts = {0};
    for (int n = 0; n < batch_size; ++n) {
      std::vector<std::pair<T, size_t>> loss_idx;
      int neg_sel = 0;
      for (int m = 0; m < prior_num; ++m) {
        if (IsEligibleMining(mining_type, match_indices(n, m), match_dist(n, m),
                             neg_dist_threshold)) {
          T loss = cls_loss[n * prior_num + m];
          if (mining_type == MiningType::kHardExample && loc_loss != nullptr) {
            loss = cls_loss[n * prior_num + m] + loc_loss[n * prior_num + m];
          }
          loss_idx.push_back(std::make_pair(loss, m));
          ++neg_sel;
        }
      }

      if (mining_type == MiningType::kMaxNegative) {
        int num_pos = 0;
        for (int m = 0; m < prior_num; ++m) {
          if (match_indices(n, m) != -1) ++num_pos;
        }
        neg_sel = std::min(static_cast<int>(num_pos * neg_pos_ratio), neg_sel);
      } else if (mining_type == MiningType::kHardExample) {
        neg_sel = std::min(sample_size, neg_sel);
      }

W
wanghaox 已提交
116
      std::sort(loss_idx.begin(), loss_idx.end(), SortScoreDescend<size_t>);
W
wanghaox 已提交
117 118 119 120
      std::set<int> sel_indices;
      std::vector<int> neg_indices;
      std::transform(loss_idx.begin(), loss_idx.begin() + neg_sel,
                     std::inserter(sel_indices, sel_indices.begin()),
W
wanghaox 已提交
121
                     [](std::pair<T, size_t>& l) -> int {
W
wanghaox 已提交
122 123 124
                       return static_cast<int>(l.second);
                     });

W
wanghaox 已提交
125 126 127 128 129 130 131 132 133 134
      if (mining_type == MiningType::kHardExample) {
        for (int m = 0; m < prior_num; ++m) {
          if (match_indices(n, m) > -1) {
            if (sel_indices.find(m) == sel_indices.end()) {
              match_indices_et(n, m) = -1;
            }
          } else {
            if (sel_indices.find(m) != sel_indices.end()) {
              neg_indices.push_back(m);
            }
W
wanghaox 已提交
135
          }
W
wanghaox 已提交
136 137
        }
      } else {
W
wanghaox 已提交
138 139
        neg_indices.resize(sel_indices.size());
        std::copy(sel_indices.begin(), sel_indices.end(), neg_indices.begin());
W
wanghaox 已提交
140
      }
W
wanghaox 已提交
141

W
wanghaox 已提交
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
      all_neg_indices.push_back(neg_indices);
      batch_starts.push_back(batch_starts.back() + neg_indices.size());
    }

    framework::LoD out_neg_indices_lod;
    out_neg_indices_lod.emplace_back(batch_starts);
    int neg_offset = 0;
    auto neg_data = out_neg_indices->mutable_data<int>(
        framework::make_ddim({static_cast<int>(batch_starts.back()), 1}),
        ctx.GetPlace());

    for (auto neg_indices : all_neg_indices) {
      std::copy(neg_indices.begin(), neg_indices.end(), neg_data + neg_offset);
      neg_offset += neg_indices.size();
    }
    out_neg_indices->set_lod(out_neg_indices_lod);
    return;
  }
};

W
wanghaox 已提交
162 163 164 165 166
class MineHardExamplesOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

 protected:
W
wanghaox 已提交
167
  void InferShape(framework::InferShapeContext* ctx) const override {
W
wanghaox 已提交
168 169 170
    PADDLE_ENFORCE(ctx->HasInput("ClsLoss"),
                   "Input(ClsLoss) of MineHardExamplesOp should not be null.");
    PADDLE_ENFORCE(
W
wanghaox 已提交
171 172
        ctx->HasInput("MatchIndices"),
        "Input(MatchIndices) of MineHardExamplesOp should not be null.");
W
wanghaox 已提交
173
    PADDLE_ENFORCE(
W
wanghaox 已提交
174 175
        ctx->HasInput("MatchDist"),
        "Input(MatchDist) of MineHardExamplesOp should not be null.");
W
wanghaox 已提交
176
    PADDLE_ENFORCE(
W
wanghaox 已提交
177 178 179 180 181
        ctx->HasOutput("NegIndices"),
        "Output(NegIndices) of MineHardExamplesOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("UpdatedMatchIndices"),
                   "Output(UpdatedMatchIndices) of MineHardExamplesOp should "
                   "not be null.");
W
wanghaox 已提交
182 183

    auto cls_loss_dims = ctx->GetInputDim("ClsLoss");
W
wanghaox 已提交
184 185
    auto idx_dims = ctx->GetInputDim("MatchIndices");
    auto dis_dims = ctx->GetInputDim("MatchDist");
W
wanghaox 已提交
186 187 188 189

    PADDLE_ENFORCE_EQ(cls_loss_dims.size(), 2UL,
                      "The shape of ClsLoss is [N, Np].");
    PADDLE_ENFORCE_EQ(idx_dims.size(), 2UL,
W
wanghaox 已提交
190
                      "The shape of MatchIndices is [N, Np].");
W
wanghaox 已提交
191
    PADDLE_ENFORCE_EQ(dis_dims.size(), 2UL,
W
wanghaox 已提交
192
                      "The shape of MatchDist is [N, Np].");
W
wanghaox 已提交
193 194 195 196 197 198 199 200 201 202 203 204 205 206

    if (ctx->HasInput("LocLoss")) {
      auto loc_loss_dims = ctx->GetInputDim("LocLoss");
      PADDLE_ENFORCE_EQ(loc_loss_dims.size(), 2UL,
                        "The shape of LocLoss is [N, Np].");
      PADDLE_ENFORCE_EQ(cls_loss_dims[0], loc_loss_dims[0],
                        "Batch size of ClsLoss and LocLoss must be the same.");
      PADDLE_ENFORCE_EQ(
          cls_loss_dims[1], loc_loss_dims[1],
          "Prior box number of ClsLoss and LocLoss must be the same.");
    }

    PADDLE_ENFORCE_EQ(
        cls_loss_dims[0], idx_dims[0],
W
wanghaox 已提交
207
        "Batch size of ClsLoss and MatchIndices must be the same.");
W
wanghaox 已提交
208 209
    PADDLE_ENFORCE_EQ(
        cls_loss_dims[1], idx_dims[1],
W
wanghaox 已提交
210
        "Prior box number of ClsLoss and MatchIndices must be the same.");
W
wanghaox 已提交
211 212

    PADDLE_ENFORCE_EQ(cls_loss_dims[0], dis_dims[0],
W
wanghaox 已提交
213
                      "Batch size of ClsLoss and MatchDist must be the same.");
W
wanghaox 已提交
214 215
    PADDLE_ENFORCE_EQ(
        cls_loss_dims[1], idx_dims[1],
W
wanghaox 已提交
216
        "Prior box number of ClsLoss and MatchDist must be the same.");
W
wanghaox 已提交
217 218 219 220 221 222 223 224 225

    auto mining_type =
        GetMiningType(ctx->Attrs().Get<std::string>("mining_type"));

    PADDLE_ENFORCE_NE(mining_type, MiningType::kNone,
                      "mining_type must be hard_example or max_negative");

    if (mining_type == MiningType::kMaxNegative) {
      auto neg_pos_ratio = ctx->Attrs().Get<float>("neg_pos_ratio");
W
wanghaox 已提交
226
      auto neg_dist_threshold = ctx->Attrs().Get<float>("neg_dist_threshold");
W
wanghaox 已提交
227 228 229 230
      PADDLE_ENFORCE_GT(
          neg_pos_ratio, 0.0f,
          "neg_pos_ratio must greater than zero in max_negative mode");
      PADDLE_ENFORCE_GT(
W
wanghaox 已提交
231 232
          neg_dist_threshold, 0.0f,
          "neg_dist_threshold must greater than zero in max_negative mode");
W
wanghaox 已提交
233 234 235 236 237 238 239
    } else if (mining_type == MiningType::kHardExample) {
      auto sample_size = ctx->Attrs().Get<int>("sample_size");
      PADDLE_ENFORCE_GT(
          sample_size, 0,
          "sample_size must greater than zero in hard_example mode");
    }

W
wanghaox 已提交
240
    ctx->SetOutputDim("UpdatedMatchIndices", idx_dims);
241 242
    // The first dimension of NegIndices will be set correcttly in Compute.
    ctx->SetOutputDim("NegIndices", {-1, 1});
W
wanghaox 已提交
243 244 245 246
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
W
wanghaox 已提交
247
      const framework::ExecutionContext& ctx) const override {
W
wanghaox 已提交
248 249
    return framework::OpKernelType(
        framework::ToDataType(ctx.Input<framework::Tensor>("ClsLoss")->type()),
250
        platform::CPUPlace());
W
wanghaox 已提交
251 252 253 254 255
  }
};

class MineHardExamplesOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
W
wanghaox 已提交
256
  MineHardExamplesOpMaker(OpProto* proto, OpAttrChecker* op_checker)
W
wanghaox 已提交
257 258 259
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput(
        "ClsLoss",
W
wanghaox 已提交
260
        "(Tensor, default Tensor<float>), The classification loss with shape "
W
wanghaox 已提交
261 262 263
        "[N, Np], N is the batch size and Np is the number of prior box.");
    AddInput("LocLoss",
             "(Tensor, optional, default Tensor<float>), The localization loss "
W
wanghaox 已提交
264
             "with shape [N, Np], N is the batch size and Np is the number of "
W
wanghaox 已提交
265 266
             "prior box.")
        .AsDispensable();
W
wanghaox 已提交
267
    AddInput("MatchIndices",
W
wanghaox 已提交
268 269
             "(Tensor, Tensor<int>), Matched indices with shape [N, Np], N is "
             "the batch size and Np is the number of prior box. "
W
wanghaox 已提交
270 271 272 273
             "MatchIndices[i][j] equal -1 means the j-th prior box in i-th "
             "instance does not match any entity, otherwise means it is "
             "matched to row.");
    AddInput("MatchDist",
W
wanghaox 已提交
274 275 276 277
             "(Tensor, default Tensor<float>) Matched indices with shape [N, "
             "Np], N is the batch size and Np is the number of prior box.");
    AddAttr<float>("neg_pos_ratio",
                   "(float) The ratio of the negative box to the positive "
W
wanghaox 已提交
278
                   "box. Use only when mining_type is max_negative.")
W
wanghaox 已提交
279
        .SetDefault(1.0);
W
wanghaox 已提交
280
    AddAttr<float>("neg_dist_threshold",
W
wanghaox 已提交
281 282
                   "(float) The negative overlap upper bound for the unmatched "
                   "predictions. Use only when mining_type is max_negative.")
W
wanghaox 已提交
283 284 285
        .SetDefault(0.5);
    AddAttr<int>("sample_size",
                 "(float) The max sample size of negative box. Use only when "
W
wanghaox 已提交
286
                 "mining_type is hard_example.")
W
wanghaox 已提交
287 288 289 290 291 292 293
        .SetDefault(0);
    AddAttr<std::string>("mining_type",
                         "(float) The mining algorithm name, the value is "
                         "hard_example or max_negative.")
        .SetDefault("max_negative")
        .InEnum({"hard_example", "max_negative"});

W
wanghaox 已提交
294 295 296 297 298 299 300 301 302 303 304 305
    AddOutput(
        "NegIndices",
        "(LoDTensor<int>) The output of negative example indices. a LoDTensor "
        "with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
        "and each element is the prior box index. "
        "For example, the batch size is 2, the lod is [[0, 1, 2]], "
        "the sample 0's box 1(MatchIndices[0][1]) is selected, "
        "and sample 1's box 0 is selected. The output NegIndices is "
        "[[1], [0]].");

    AddOutput("UpdatedMatchIndices",
              "(Tensor<int>) The output of updated MatchIndices, a tensor with "
W
wanghaox 已提交
306
              "shape [N, Np]. Only update when mining_type is "
W
wanghaox 已提交
307 308 309
              "hard_example. The input MatchIndices elements will be update to "
              "-1 when it is not in the candidate high loss list of negative "
              "examples.");
W
wanghaox 已提交
310 311 312

    AddComment(R"DOC(
Mine hard examples Operator.
W
wanghaox 已提交
313
This operator implements hard example mining to select a subset of negative box indices.
W
wanghaox 已提交
314 315 316 317 318 319
For each image, selects the box with highest losses. subject to the condition that the 
box cannot have an Matcht > neg_dist_threshold when mining_type is max_negative. 
The selected number is min(sample_size, max_negative_box_number) when mining_type is 
hard_example, or min(neg_pos_ratio * positive_box_number, max_negative_box_number) 
when mining_type is max_negative, where the max_negative_box_number is the count of 
MatchIndices elements with value -1.
W
wanghaox 已提交
320 321 322 323 324 325 326
)DOC");
  }
};
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
327 328 329
REGISTER_OPERATOR(mine_hard_examples, ops::MineHardExamplesOp,
                  ops::MineHardExamplesOpMaker,
                  paddle::framework::EmptyGradOpMaker);
W
wanghaox 已提交
330 331 332 333 334

REGISTER_OP_CPU_KERNEL(
    mine_hard_examples,
    ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, float>,
    ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, double>);