mine_hard_examples_op.cc 13.5 KB
Newer Older
1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
W
wanghaox 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15 16
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
W
wanghaox 已提交
17 18 19 20

namespace paddle {
namespace operators {

W
wanghaox 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
enum MiningType { kNone = 0, kMaxNegative, kHardExample };

template <typename T>
bool SortScoreDescend(const std::pair<float, T>& pair1,
                      const std::pair<float, T>& pair2) {
  return pair1.first > pair2.first;
}

inline bool IsEligibleMining(const MiningType mining_type, const int match_idx,
                             const float match_dist,
                             const float neg_dist_threshold) {
  if (mining_type == MiningType::kMaxNegative) {
    return match_idx == -1 && match_dist < neg_dist_threshold;
  } else if (mining_type == MiningType::kHardExample) {
    return true;
  } else {
    return false;
  }
}

W
wanghaox 已提交
41
inline MiningType GetMiningType(std::string str) {
W
wanghaox 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
  if (str == "max_negative") {
    return MiningType::kMaxNegative;
  } else if (str == "hard_example") {
    return MiningType::kHardExample;
  } else {
    return MiningType::kNone;
  }
}

template <typename DeviceContext, typename T>
class MineHardExamplesKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* in_cls_loss = ctx.Input<framework::Tensor>("ClsLoss");
    auto* in_loc_loss = ctx.Input<framework::Tensor>("LocLoss");
    auto* in_matched_indices = ctx.Input<framework::Tensor>("MatchIndices");
    auto* in_match_dist = ctx.Input<framework::Tensor>("MatchDist");
    float neg_pos_ratio = ctx.Attr<float>("neg_pos_ratio");
    T neg_dist_threshold =
        static_cast<T>(ctx.Attr<float>("neg_dist_threshold"));
    int sample_size = ctx.Attr<int>("sample_size");
    MiningType mining_type =
        GetMiningType(ctx.Attr<std::string>("mining_type"));

    auto out_neg_indices = ctx.Output<framework::LoDTensor>("NegIndices");
    auto out_match_indices =
        ctx.Output<framework::Tensor>("UpdatedMatchIndices");

Y
Yi Wang 已提交
70 71
    framework::TensorCopy(*in_matched_indices, ctx.GetPlace(),
                          out_match_indices);
W
wanghaox 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115

    int batch_size = in_matched_indices->dims()[0];
    int prior_num = in_matched_indices->dims()[1];

    auto match_indices = framework::EigenMatrix<int>::From(*in_matched_indices);

    auto match_indices_et =
        framework::EigenMatrix<int>::From(*out_match_indices);

    auto match_dist = framework::EigenMatrix<T>::From(*in_match_dist);

    const T* cls_loss = in_cls_loss->data<T>();
    const T* loc_loss = nullptr;
    if (in_loc_loss) {
      loc_loss = in_loc_loss->data<T>();
    }

    std::vector<std::vector<int>> all_neg_indices;
    std::vector<size_t> batch_starts = {0};
    for (int n = 0; n < batch_size; ++n) {
      std::vector<std::pair<T, size_t>> loss_idx;
      int neg_sel = 0;
      for (int m = 0; m < prior_num; ++m) {
        if (IsEligibleMining(mining_type, match_indices(n, m), match_dist(n, m),
                             neg_dist_threshold)) {
          T loss = cls_loss[n * prior_num + m];
          if (mining_type == MiningType::kHardExample && loc_loss != nullptr) {
            loss = cls_loss[n * prior_num + m] + loc_loss[n * prior_num + m];
          }
          loss_idx.push_back(std::make_pair(loss, m));
          ++neg_sel;
        }
      }

      if (mining_type == MiningType::kMaxNegative) {
        int num_pos = 0;
        for (int m = 0; m < prior_num; ++m) {
          if (match_indices(n, m) != -1) ++num_pos;
        }
        neg_sel = std::min(static_cast<int>(num_pos * neg_pos_ratio), neg_sel);
      } else if (mining_type == MiningType::kHardExample) {
        neg_sel = std::min(sample_size, neg_sel);
      }

W
wanghaox 已提交
116
      std::sort(loss_idx.begin(), loss_idx.end(), SortScoreDescend<size_t>);
W
wanghaox 已提交
117 118 119 120
      std::set<int> sel_indices;
      std::vector<int> neg_indices;
      std::transform(loss_idx.begin(), loss_idx.begin() + neg_sel,
                     std::inserter(sel_indices, sel_indices.begin()),
W
wanghaox 已提交
121
                     [](std::pair<T, size_t>& l) -> int {
W
wanghaox 已提交
122 123 124
                       return static_cast<int>(l.second);
                     });

W
wanghaox 已提交
125 126 127 128 129 130 131 132 133 134
      if (mining_type == MiningType::kHardExample) {
        for (int m = 0; m < prior_num; ++m) {
          if (match_indices(n, m) > -1) {
            if (sel_indices.find(m) == sel_indices.end()) {
              match_indices_et(n, m) = -1;
            }
          } else {
            if (sel_indices.find(m) != sel_indices.end()) {
              neg_indices.push_back(m);
            }
W
wanghaox 已提交
135
          }
W
wanghaox 已提交
136 137
        }
      } else {
W
wanghaox 已提交
138 139
        neg_indices.resize(sel_indices.size());
        std::copy(sel_indices.begin(), sel_indices.end(), neg_indices.begin());
W
wanghaox 已提交
140
      }
W
wanghaox 已提交
141

W
wanghaox 已提交
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
      all_neg_indices.push_back(neg_indices);
      batch_starts.push_back(batch_starts.back() + neg_indices.size());
    }

    framework::LoD out_neg_indices_lod;
    out_neg_indices_lod.emplace_back(batch_starts);
    int neg_offset = 0;
    auto neg_data = out_neg_indices->mutable_data<int>(
        framework::make_ddim({static_cast<int>(batch_starts.back()), 1}),
        ctx.GetPlace());

    for (auto neg_indices : all_neg_indices) {
      std::copy(neg_indices.begin(), neg_indices.end(), neg_data + neg_offset);
      neg_offset += neg_indices.size();
    }
    out_neg_indices->set_lod(out_neg_indices_lod);
    return;
  }
};

W
wanghaox 已提交
162 163 164 165 166
class MineHardExamplesOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

 protected:
W
wanghaox 已提交
167
  void InferShape(framework::InferShapeContext* ctx) const override {
W
wanghaox 已提交
168 169 170
    PADDLE_ENFORCE(ctx->HasInput("ClsLoss"),
                   "Input(ClsLoss) of MineHardExamplesOp should not be null.");
    PADDLE_ENFORCE(
W
wanghaox 已提交
171 172
        ctx->HasInput("MatchIndices"),
        "Input(MatchIndices) of MineHardExamplesOp should not be null.");
W
wanghaox 已提交
173
    PADDLE_ENFORCE(
W
wanghaox 已提交
174 175
        ctx->HasInput("MatchDist"),
        "Input(MatchDist) of MineHardExamplesOp should not be null.");
W
wanghaox 已提交
176
    PADDLE_ENFORCE(
W
wanghaox 已提交
177 178 179 180 181
        ctx->HasOutput("NegIndices"),
        "Output(NegIndices) of MineHardExamplesOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("UpdatedMatchIndices"),
                   "Output(UpdatedMatchIndices) of MineHardExamplesOp should "
                   "not be null.");
W
wanghaox 已提交
182 183

    auto cls_loss_dims = ctx->GetInputDim("ClsLoss");
W
wanghaox 已提交
184 185
    auto idx_dims = ctx->GetInputDim("MatchIndices");
    auto dis_dims = ctx->GetInputDim("MatchDist");
W
wanghaox 已提交
186 187 188 189

    PADDLE_ENFORCE_EQ(cls_loss_dims.size(), 2UL,
                      "The shape of ClsLoss is [N, Np].");
    PADDLE_ENFORCE_EQ(idx_dims.size(), 2UL,
W
wanghaox 已提交
190
                      "The shape of MatchIndices is [N, Np].");
W
wanghaox 已提交
191
    PADDLE_ENFORCE_EQ(dis_dims.size(), 2UL,
W
wanghaox 已提交
192
                      "The shape of MatchDist is [N, Np].");
W
wanghaox 已提交
193 194 195 196 197

    if (ctx->HasInput("LocLoss")) {
      auto loc_loss_dims = ctx->GetInputDim("LocLoss");
      PADDLE_ENFORCE_EQ(loc_loss_dims.size(), 2UL,
                        "The shape of LocLoss is [N, Np].");
198 199 200 201 202 203 204 205
      if (ctx->IsRuntime()) {
        PADDLE_ENFORCE_EQ(
            cls_loss_dims[0], loc_loss_dims[0],
            "Batch size of ClsLoss and LocLoss must be the same.");
        PADDLE_ENFORCE_EQ(
            cls_loss_dims[1], loc_loss_dims[1],
            "Prior box number of ClsLoss and LocLoss must be the same.");
      }
W
wanghaox 已提交
206 207
    }

208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
    if (ctx->IsRuntime()) {
      PADDLE_ENFORCE_EQ(
          cls_loss_dims[0], idx_dims[0],
          "Batch size of ClsLoss and MatchIndices must be the same.");
      PADDLE_ENFORCE_EQ(
          cls_loss_dims[1], idx_dims[1],
          "Prior box number of ClsLoss and MatchIndices must be the same.");

      PADDLE_ENFORCE_EQ(
          cls_loss_dims[0], dis_dims[0],
          "Batch size of ClsLoss and MatchDist must be the same.");
      PADDLE_ENFORCE_EQ(
          cls_loss_dims[1], idx_dims[1],
          "Prior box number of ClsLoss and MatchDist must be the same.");
    }
W
wanghaox 已提交
223 224 225 226 227 228 229 230 231

    auto mining_type =
        GetMiningType(ctx->Attrs().Get<std::string>("mining_type"));

    PADDLE_ENFORCE_NE(mining_type, MiningType::kNone,
                      "mining_type must be hard_example or max_negative");

    if (mining_type == MiningType::kMaxNegative) {
      auto neg_pos_ratio = ctx->Attrs().Get<float>("neg_pos_ratio");
W
wanghaox 已提交
232
      auto neg_dist_threshold = ctx->Attrs().Get<float>("neg_dist_threshold");
W
wanghaox 已提交
233 234 235
      PADDLE_ENFORCE_GT(
          neg_pos_ratio, 0.0f,
          "neg_pos_ratio must greater than zero in max_negative mode");
B
Bai Yifan 已提交
236 237 238
      PADDLE_ENFORCE_LT(
          neg_dist_threshold, 1.0f,
          "neg_dist_threshold must less than one in max_negative mode");
W
wanghaox 已提交
239
      PADDLE_ENFORCE_GT(
W
wanghaox 已提交
240 241
          neg_dist_threshold, 0.0f,
          "neg_dist_threshold must greater than zero in max_negative mode");
W
wanghaox 已提交
242 243 244 245 246 247 248
    } else if (mining_type == MiningType::kHardExample) {
      auto sample_size = ctx->Attrs().Get<int>("sample_size");
      PADDLE_ENFORCE_GT(
          sample_size, 0,
          "sample_size must greater than zero in hard_example mode");
    }

W
wanghaox 已提交
249
    ctx->SetOutputDim("UpdatedMatchIndices", idx_dims);
250 251
    // The first dimension of NegIndices will be set correcttly in Compute.
    ctx->SetOutputDim("NegIndices", {-1, 1});
W
wanghaox 已提交
252 253 254 255
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
W
wanghaox 已提交
256
      const framework::ExecutionContext& ctx) const override {
W
wanghaox 已提交
257
    return framework::OpKernelType(
258 259
        OperatorWithKernel::IndicateVarDataType(ctx, "ClsLoss"),
        platform::CPUPlace());
W
wanghaox 已提交
260 261 262 263 264
  }
};

class MineHardExamplesOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
265
  void Make() override {
W
wanghaox 已提交
266 267
    AddInput(
        "ClsLoss",
W
wanghaox 已提交
268
        "(Tensor, default Tensor<float>), The classification loss with shape "
W
wanghaox 已提交
269 270 271
        "[N, Np], N is the batch size and Np is the number of prior box.");
    AddInput("LocLoss",
             "(Tensor, optional, default Tensor<float>), The localization loss "
W
wanghaox 已提交
272
             "with shape [N, Np], N is the batch size and Np is the number of "
W
wanghaox 已提交
273 274
             "prior box.")
        .AsDispensable();
W
wanghaox 已提交
275
    AddInput("MatchIndices",
W
wanghaox 已提交
276 277
             "(Tensor, Tensor<int>), Matched indices with shape [N, Np], N is "
             "the batch size and Np is the number of prior box. "
W
wanghaox 已提交
278 279 280 281
             "MatchIndices[i][j] equal -1 means the j-th prior box in i-th "
             "instance does not match any entity, otherwise means it is "
             "matched to row.");
    AddInput("MatchDist",
W
wanghaox 已提交
282 283 284 285
             "(Tensor, default Tensor<float>) Matched indices with shape [N, "
             "Np], N is the batch size and Np is the number of prior box.");
    AddAttr<float>("neg_pos_ratio",
                   "(float) The ratio of the negative box to the positive "
W
wanghaox 已提交
286
                   "box. Use only when mining_type is max_negative.")
W
wanghaox 已提交
287
        .SetDefault(1.0);
W
wanghaox 已提交
288
    AddAttr<float>("neg_dist_threshold",
W
wanghaox 已提交
289 290
                   "(float) The negative overlap upper bound for the unmatched "
                   "predictions. Use only when mining_type is max_negative.")
W
wanghaox 已提交
291 292 293
        .SetDefault(0.5);
    AddAttr<int>("sample_size",
                 "(float) The max sample size of negative box. Use only when "
W
wanghaox 已提交
294
                 "mining_type is hard_example.")
W
wanghaox 已提交
295 296 297 298 299 300 301
        .SetDefault(0);
    AddAttr<std::string>("mining_type",
                         "(float) The mining algorithm name, the value is "
                         "hard_example or max_negative.")
        .SetDefault("max_negative")
        .InEnum({"hard_example", "max_negative"});

W
wanghaox 已提交
302 303 304 305 306 307 308 309 310 311 312 313
    AddOutput(
        "NegIndices",
        "(LoDTensor<int>) The output of negative example indices. a LoDTensor "
        "with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
        "and each element is the prior box index. "
        "For example, the batch size is 2, the lod is [[0, 1, 2]], "
        "the sample 0's box 1(MatchIndices[0][1]) is selected, "
        "and sample 1's box 0 is selected. The output NegIndices is "
        "[[1], [0]].");

    AddOutput("UpdatedMatchIndices",
              "(Tensor<int>) The output of updated MatchIndices, a tensor with "
W
wanghaox 已提交
314
              "shape [N, Np]. Only update when mining_type is "
W
wanghaox 已提交
315 316 317
              "hard_example. The input MatchIndices elements will be update to "
              "-1 when it is not in the candidate high loss list of negative "
              "examples.");
W
wanghaox 已提交
318 319 320

    AddComment(R"DOC(
Mine hard examples Operator.
W
wanghaox 已提交
321
This operator implements hard example mining to select a subset of negative box indices.
W
wanghaox 已提交
322 323 324 325 326 327
For each image, selects the box with highest losses. subject to the condition that the 
box cannot have an Matcht > neg_dist_threshold when mining_type is max_negative. 
The selected number is min(sample_size, max_negative_box_number) when mining_type is 
hard_example, or min(neg_pos_ratio * positive_box_number, max_negative_box_number) 
when mining_type is max_negative, where the max_negative_box_number is the count of 
MatchIndices elements with value -1.
W
wanghaox 已提交
328 329 330 331 332 333 334
)DOC");
  }
};
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
335 336 337
REGISTER_OPERATOR(mine_hard_examples, ops::MineHardExamplesOp,
                  ops::MineHardExamplesOpMaker,
                  paddle::framework::EmptyGradOpMaker);
W
wanghaox 已提交
338 339 340 341 342

REGISTER_OP_CPU_KERNEL(
    mine_hard_examples,
    ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, float>,
    ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, double>);