mine_hard_examples_op.cc 13.2 KB
Newer Older
W
wanghaox 已提交
1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
W
wanghaox 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

W
wanghaox 已提交
15 16
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
W
wanghaox 已提交
17 18 19 20

namespace paddle {
namespace operators {

W
wanghaox 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
enum MiningType { kNone = 0, kMaxNegative, kHardExample };

template <typename T>
bool SortScoreDescend(const std::pair<float, T>& pair1,
                      const std::pair<float, T>& pair2) {
  return pair1.first > pair2.first;
}

inline bool IsEligibleMining(const MiningType mining_type, const int match_idx,
                             const float match_dist,
                             const float neg_dist_threshold) {
  if (mining_type == MiningType::kMaxNegative) {
    return match_idx == -1 && match_dist < neg_dist_threshold;
  } else if (mining_type == MiningType::kHardExample) {
    return true;
  } else {
    return false;
  }
}

W
wanghaox 已提交
41
inline MiningType GetMiningType(std::string str) {
W
wanghaox 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
  if (str == "max_negative") {
    return MiningType::kMaxNegative;
  } else if (str == "hard_example") {
    return MiningType::kHardExample;
  } else {
    return MiningType::kNone;
  }
}

template <typename DeviceContext, typename T>
class MineHardExamplesKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* in_cls_loss = ctx.Input<framework::Tensor>("ClsLoss");
    auto* in_loc_loss = ctx.Input<framework::Tensor>("LocLoss");
    auto* in_matched_indices = ctx.Input<framework::Tensor>("MatchIndices");
    auto* in_match_dist = ctx.Input<framework::Tensor>("MatchDist");
    float neg_pos_ratio = ctx.Attr<float>("neg_pos_ratio");
    T neg_dist_threshold =
        static_cast<T>(ctx.Attr<float>("neg_dist_threshold"));
    int sample_size = ctx.Attr<int>("sample_size");
    MiningType mining_type =
        GetMiningType(ctx.Attr<std::string>("mining_type"));

    auto out_neg_indices = ctx.Output<framework::LoDTensor>("NegIndices");
    auto out_match_indices =
        ctx.Output<framework::Tensor>("UpdatedMatchIndices");

    framework::Copy(*in_matched_indices, ctx.GetPlace(), out_match_indices);

    int batch_size = in_matched_indices->dims()[0];
    int prior_num = in_matched_indices->dims()[1];

    auto match_indices = framework::EigenMatrix<int>::From(*in_matched_indices);

    auto match_indices_et =
        framework::EigenMatrix<int>::From(*out_match_indices);

    auto match_dist = framework::EigenMatrix<T>::From(*in_match_dist);

    const T* cls_loss = in_cls_loss->data<T>();
    const T* loc_loss = nullptr;
    if (in_loc_loss) {
      loc_loss = in_loc_loss->data<T>();
    }

    std::vector<std::vector<int>> all_neg_indices;
    std::vector<size_t> batch_starts = {0};
    for (int n = 0; n < batch_size; ++n) {
      std::vector<std::pair<T, size_t>> loss_idx;
      int neg_sel = 0;
      for (int m = 0; m < prior_num; ++m) {
        if (IsEligibleMining(mining_type, match_indices(n, m), match_dist(n, m),
                             neg_dist_threshold)) {
          T loss = cls_loss[n * prior_num + m];
          if (mining_type == MiningType::kHardExample && loc_loss != nullptr) {
            loss = cls_loss[n * prior_num + m] + loc_loss[n * prior_num + m];
          }
          loss_idx.push_back(std::make_pair(loss, m));
          ++neg_sel;
        }
      }

      if (mining_type == MiningType::kMaxNegative) {
        int num_pos = 0;
        for (int m = 0; m < prior_num; ++m) {
          if (match_indices(n, m) != -1) ++num_pos;
        }
        neg_sel = std::min(static_cast<int>(num_pos * neg_pos_ratio), neg_sel);
      } else if (mining_type == MiningType::kHardExample) {
        neg_sel = std::min(sample_size, neg_sel);
      }

W
wanghaox 已提交
115
      std::sort(loss_idx.begin(), loss_idx.end(), SortScoreDescend<size_t>);
W
wanghaox 已提交
116 117 118 119 120 121 122 123
      std::set<int> sel_indices;
      std::vector<int> neg_indices;
      std::transform(loss_idx.begin(), loss_idx.begin() + neg_sel,
                     std::inserter(sel_indices, sel_indices.begin()),
                     [](std::pair<T, size_t> l) -> int {
                       return static_cast<int>(l.second);
                     });

W
wanghaox 已提交
124 125 126 127 128 129 130 131 132 133
      if (mining_type == MiningType::kHardExample) {
        for (int m = 0; m < prior_num; ++m) {
          if (match_indices(n, m) > -1) {
            if (sel_indices.find(m) == sel_indices.end()) {
              match_indices_et(n, m) = -1;
            }
          } else {
            if (sel_indices.find(m) != sel_indices.end()) {
              neg_indices.push_back(m);
            }
W
wanghaox 已提交
134
          }
W
wanghaox 已提交
135 136 137 138 139
        }
      } else {
        for (int m = 0; m < prior_num; ++m) {
          if (match_indices(n, m) == -1 &&
              sel_indices.find(m) != sel_indices.end()) {
W
wanghaox 已提交
140 141 142 143
            neg_indices.push_back(m);
          }
        }
      }
W
wanghaox 已提交
144

W
wanghaox 已提交
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
      all_neg_indices.push_back(neg_indices);
      batch_starts.push_back(batch_starts.back() + neg_indices.size());
    }

    framework::LoD out_neg_indices_lod;
    out_neg_indices_lod.emplace_back(batch_starts);
    int neg_offset = 0;
    auto neg_data = out_neg_indices->mutable_data<int>(
        framework::make_ddim({static_cast<int>(batch_starts.back()), 1}),
        ctx.GetPlace());

    for (auto neg_indices : all_neg_indices) {
      std::copy(neg_indices.begin(), neg_indices.end(), neg_data + neg_offset);
      neg_offset += neg_indices.size();
    }
    out_neg_indices->set_lod(out_neg_indices_lod);
    return;
  }
};

W
wanghaox 已提交
165 166 167 168 169
class MineHardExamplesOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

 protected:
W
wanghaox 已提交
170
  void InferShape(framework::InferShapeContext* ctx) const override {
W
wanghaox 已提交
171 172 173
    PADDLE_ENFORCE(ctx->HasInput("ClsLoss"),
                   "Input(ClsLoss) of MineHardExamplesOp should not be null.");
    PADDLE_ENFORCE(
W
wanghaox 已提交
174 175
        ctx->HasInput("MatchIndices"),
        "Input(MatchIndices) of MineHardExamplesOp should not be null.");
W
wanghaox 已提交
176
    PADDLE_ENFORCE(
W
wanghaox 已提交
177 178
        ctx->HasInput("MatchDist"),
        "Input(MatchDist) of MineHardExamplesOp should not be null.");
W
wanghaox 已提交
179
    PADDLE_ENFORCE(
W
wanghaox 已提交
180 181 182 183 184
        ctx->HasOutput("NegIndices"),
        "Output(NegIndices) of MineHardExamplesOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("UpdatedMatchIndices"),
                   "Output(UpdatedMatchIndices) of MineHardExamplesOp should "
                   "not be null.");
W
wanghaox 已提交
185 186

    auto cls_loss_dims = ctx->GetInputDim("ClsLoss");
W
wanghaox 已提交
187 188
    auto idx_dims = ctx->GetInputDim("MatchIndices");
    auto dis_dims = ctx->GetInputDim("MatchDist");
W
wanghaox 已提交
189 190 191 192

    PADDLE_ENFORCE_EQ(cls_loss_dims.size(), 2UL,
                      "The shape of ClsLoss is [N, Np].");
    PADDLE_ENFORCE_EQ(idx_dims.size(), 2UL,
W
wanghaox 已提交
193
                      "The shape of MatchIndices is [N, Np].");
W
wanghaox 已提交
194
    PADDLE_ENFORCE_EQ(dis_dims.size(), 2UL,
W
wanghaox 已提交
195
                      "The shape of MatchDist is [N, Np].");
W
wanghaox 已提交
196 197 198 199 200 201 202 203 204 205 206 207 208 209

    if (ctx->HasInput("LocLoss")) {
      auto loc_loss_dims = ctx->GetInputDim("LocLoss");
      PADDLE_ENFORCE_EQ(loc_loss_dims.size(), 2UL,
                        "The shape of LocLoss is [N, Np].");
      PADDLE_ENFORCE_EQ(cls_loss_dims[0], loc_loss_dims[0],
                        "Batch size of ClsLoss and LocLoss must be the same.");
      PADDLE_ENFORCE_EQ(
          cls_loss_dims[1], loc_loss_dims[1],
          "Prior box number of ClsLoss and LocLoss must be the same.");
    }

    PADDLE_ENFORCE_EQ(
        cls_loss_dims[0], idx_dims[0],
W
wanghaox 已提交
210
        "Batch size of ClsLoss and MatchIndices must be the same.");
W
wanghaox 已提交
211 212
    PADDLE_ENFORCE_EQ(
        cls_loss_dims[1], idx_dims[1],
W
wanghaox 已提交
213
        "Prior box number of ClsLoss and MatchIndices must be the same.");
W
wanghaox 已提交
214 215

    PADDLE_ENFORCE_EQ(cls_loss_dims[0], dis_dims[0],
W
wanghaox 已提交
216
                      "Batch size of ClsLoss and MatchDist must be the same.");
W
wanghaox 已提交
217 218
    PADDLE_ENFORCE_EQ(
        cls_loss_dims[1], idx_dims[1],
W
wanghaox 已提交
219
        "Prior box number of ClsLoss and MatchDist must be the same.");
W
wanghaox 已提交
220 221 222 223 224 225 226 227 228

    auto mining_type =
        GetMiningType(ctx->Attrs().Get<std::string>("mining_type"));

    PADDLE_ENFORCE_NE(mining_type, MiningType::kNone,
                      "mining_type must be hard_example or max_negative");

    if (mining_type == MiningType::kMaxNegative) {
      auto neg_pos_ratio = ctx->Attrs().Get<float>("neg_pos_ratio");
W
wanghaox 已提交
229
      auto neg_dist_threshold = ctx->Attrs().Get<float>("neg_dist_threshold");
W
wanghaox 已提交
230 231 232 233
      PADDLE_ENFORCE_GT(
          neg_pos_ratio, 0.0f,
          "neg_pos_ratio must greater than zero in max_negative mode");
      PADDLE_ENFORCE_GT(
W
wanghaox 已提交
234 235
          neg_dist_threshold, 0.0f,
          "neg_dist_threshold must greater than zero in max_negative mode");
W
wanghaox 已提交
236 237 238 239 240 241 242
    } else if (mining_type == MiningType::kHardExample) {
      auto sample_size = ctx->Attrs().Get<int>("sample_size");
      PADDLE_ENFORCE_GT(
          sample_size, 0,
          "sample_size must greater than zero in hard_example mode");
    }

W
wanghaox 已提交
243
    ctx->SetOutputDim("UpdatedMatchIndices", idx_dims);
W
wanghaox 已提交
244 245 246 247
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
W
wanghaox 已提交
248
      const framework::ExecutionContext& ctx) const override {
W
wanghaox 已提交
249 250 251 252 253 254 255 256
    return framework::OpKernelType(
        framework::ToDataType(ctx.Input<framework::Tensor>("ClsLoss")->type()),
        ctx.device_context());
  }
};

class MineHardExamplesOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
W
wanghaox 已提交
257
  MineHardExamplesOpMaker(OpProto* proto, OpAttrChecker* op_checker)
W
wanghaox 已提交
258 259 260
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput(
        "ClsLoss",
W
wanghaox 已提交
261
        "(Tensor, default Tensor<float>), The classification loss with shape "
W
wanghaox 已提交
262 263 264
        "[N, Np], N is the batch size and Np is the number of prior box.");
    AddInput("LocLoss",
             "(Tensor, optional, default Tensor<float>), The localization loss "
W
wanghaox 已提交
265
             "with shape [N, Np], N is the batch size and Np is the number of "
W
wanghaox 已提交
266 267
             "prior box.")
        .AsDispensable();
W
wanghaox 已提交
268
    AddInput("MatchIndices",
W
wanghaox 已提交
269 270
             "(Tensor, Tensor<int>), Matched indices with shape [N, Np], N is "
             "the batch size and Np is the number of prior box. "
W
wanghaox 已提交
271 272 273 274
             "MatchIndices[i][j] equal -1 means the j-th prior box in i-th "
             "instance does not match any entity, otherwise means it is "
             "matched to row.");
    AddInput("MatchDist",
W
wanghaox 已提交
275 276 277 278
             "(Tensor, default Tensor<float>) Matched indices with shape [N, "
             "Np], N is the batch size and Np is the number of prior box.");
    AddAttr<float>("neg_pos_ratio",
                   "(float) The ratio of the negative box to the positive "
W
wanghaox 已提交
279
                   "box. Use only when mining_type is max_negative.")
W
wanghaox 已提交
280
        .SetDefault(1.0);
W
wanghaox 已提交
281
    AddAttr<float>("neg_dist_threshold",
W
wanghaox 已提交
282 283
                   "(float) The negative overlap upper bound for the unmatched "
                   "predictions. Use only when mining_type is max_negative.")
W
wanghaox 已提交
284 285 286
        .SetDefault(0.5);
    AddAttr<int>("sample_size",
                 "(float) The max sample size of negative box. Use only when "
W
wanghaox 已提交
287
                 "mining_type is hard_example.")
W
wanghaox 已提交
288 289 290 291 292 293 294
        .SetDefault(0);
    AddAttr<std::string>("mining_type",
                         "(float) The mining algorithm name, the value is "
                         "hard_example or max_negative.")
        .SetDefault("max_negative")
        .InEnum({"hard_example", "max_negative"});

W
wanghaox 已提交
295 296 297 298 299 300 301 302 303 304 305 306
    AddOutput(
        "NegIndices",
        "(LoDTensor<int>) The output of negative example indices. a LoDTensor "
        "with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
        "and each element is the prior box index. "
        "For example, the batch size is 2, the lod is [[0, 1, 2]], "
        "the sample 0's box 1(MatchIndices[0][1]) is selected, "
        "and sample 1's box 0 is selected. The output NegIndices is "
        "[[1], [0]].");

    AddOutput("UpdatedMatchIndices",
              "(Tensor<int>) The output of updated MatchIndices, a tensor with "
W
wanghaox 已提交
307
              "shape [N, Np]. Only update when mining_type is "
W
wanghaox 已提交
308 309 310
              "hard_example. The input MatchIndices elements will be update to "
              "-1 when it is not in the candidate high loss list of negative "
              "examples.");
W
wanghaox 已提交
311 312 313

    AddComment(R"DOC(
Mine hard examples Operator.
W
wanghaox 已提交
314
This operator implements hard example mining to select a subset of negative box indices.
W
wanghaox 已提交
315 316 317 318 319 320
For each image, selects the box with highest losses. subject to the condition that the 
box cannot have an Matcht > neg_dist_threshold when mining_type is max_negative. 
The selected number is min(sample_size, max_negative_box_number) when mining_type is 
hard_example, or min(neg_pos_ratio * positive_box_number, max_negative_box_number) 
when mining_type is max_negative, where the max_negative_box_number is the count of 
MatchIndices elements with value -1.
W
wanghaox 已提交
321 322 323 324 325 326 327 328 329 330 331 332 333 334
)DOC");
  }
};
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(mine_hard_examples, ops::MineHardExamplesOp,
                             ops::MineHardExamplesOpMaker);

REGISTER_OP_CPU_KERNEL(
    mine_hard_examples,
    ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, float>,
    ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, double>);