mine_hard_examples_op.cc 13.3 KB
Newer Older
W
wanghaox 已提交
1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
W
wanghaox 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15 16
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
W
wanghaox 已提交
17 18 19 20

namespace paddle {
namespace operators {

W
wanghaox 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
enum MiningType { kNone = 0, kMaxNegative, kHardExample };

template <typename T>
bool SortScoreDescend(const std::pair<float, T>& pair1,
                      const std::pair<float, T>& pair2) {
  return pair1.first > pair2.first;
}

inline bool IsEligibleMining(const MiningType mining_type, const int match_idx,
                             const float match_dist,
                             const float neg_dist_threshold) {
  if (mining_type == MiningType::kMaxNegative) {
    return match_idx == -1 && match_dist < neg_dist_threshold;
  } else if (mining_type == MiningType::kHardExample) {
    return true;
  } else {
    return false;
  }
}

W
wanghaox 已提交
41
inline MiningType GetMiningType(std::string str) {
W
wanghaox 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
  if (str == "max_negative") {
    return MiningType::kMaxNegative;
  } else if (str == "hard_example") {
    return MiningType::kHardExample;
  } else {
    return MiningType::kNone;
  }
}

template <typename DeviceContext, typename T>
class MineHardExamplesKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* in_cls_loss = ctx.Input<framework::Tensor>("ClsLoss");
    auto* in_loc_loss = ctx.Input<framework::Tensor>("LocLoss");
    auto* in_matched_indices = ctx.Input<framework::Tensor>("MatchIndices");
    auto* in_match_dist = ctx.Input<framework::Tensor>("MatchDist");
    float neg_pos_ratio = ctx.Attr<float>("neg_pos_ratio");
    T neg_dist_threshold =
        static_cast<T>(ctx.Attr<float>("neg_dist_threshold"));
    int sample_size = ctx.Attr<int>("sample_size");
    MiningType mining_type =
        GetMiningType(ctx.Attr<std::string>("mining_type"));

    auto out_neg_indices = ctx.Output<framework::LoDTensor>("NegIndices");
    auto out_match_indices =
        ctx.Output<framework::Tensor>("UpdatedMatchIndices");

    framework::Copy(*in_matched_indices, ctx.GetPlace(), out_match_indices);

    int batch_size = in_matched_indices->dims()[0];
    int prior_num = in_matched_indices->dims()[1];

    auto match_indices = framework::EigenMatrix<int>::From(*in_matched_indices);

    auto match_indices_et =
        framework::EigenMatrix<int>::From(*out_match_indices);

    auto match_dist = framework::EigenMatrix<T>::From(*in_match_dist);

    const T* cls_loss = in_cls_loss->data<T>();
    const T* loc_loss = nullptr;
    if (in_loc_loss) {
      loc_loss = in_loc_loss->data<T>();
    }

    std::vector<std::vector<int>> all_neg_indices;
    std::vector<size_t> batch_starts = {0};
    for (int n = 0; n < batch_size; ++n) {
      std::vector<std::pair<T, size_t>> loss_idx;
      int neg_sel = 0;
      for (int m = 0; m < prior_num; ++m) {
        if (IsEligibleMining(mining_type, match_indices(n, m), match_dist(n, m),
                             neg_dist_threshold)) {
          T loss = cls_loss[n * prior_num + m];
          if (mining_type == MiningType::kHardExample && loc_loss != nullptr) {
            loss = cls_loss[n * prior_num + m] + loc_loss[n * prior_num + m];
          }
          loss_idx.push_back(std::make_pair(loss, m));
          ++neg_sel;
        }
      }

      if (mining_type == MiningType::kMaxNegative) {
        int num_pos = 0;
        for (int m = 0; m < prior_num; ++m) {
          if (match_indices(n, m) != -1) ++num_pos;
        }
        neg_sel = std::min(static_cast<int>(num_pos * neg_pos_ratio), neg_sel);
      } else if (mining_type == MiningType::kHardExample) {
        neg_sel = std::min(sample_size, neg_sel);
      }

W
wanghaox 已提交
115
      std::sort(loss_idx.begin(), loss_idx.end(), SortScoreDescend<size_t>);
W
wanghaox 已提交
116 117 118 119
      std::set<int> sel_indices;
      std::vector<int> neg_indices;
      std::transform(loss_idx.begin(), loss_idx.begin() + neg_sel,
                     std::inserter(sel_indices, sel_indices.begin()),
W
wanghaox 已提交
120
                     [](std::pair<T, size_t>& l) -> int {
W
wanghaox 已提交
121 122 123
                       return static_cast<int>(l.second);
                     });

W
wanghaox 已提交
124 125 126 127 128 129 130 131 132 133
      if (mining_type == MiningType::kHardExample) {
        for (int m = 0; m < prior_num; ++m) {
          if (match_indices(n, m) > -1) {
            if (sel_indices.find(m) == sel_indices.end()) {
              match_indices_et(n, m) = -1;
            }
          } else {
            if (sel_indices.find(m) != sel_indices.end()) {
              neg_indices.push_back(m);
            }
W
wanghaox 已提交
134
          }
W
wanghaox 已提交
135 136
        }
      } else {
W
wanghaox 已提交
137 138
        neg_indices.resize(sel_indices.size());
        std::copy(sel_indices.begin(), sel_indices.end(), neg_indices.begin());
W
wanghaox 已提交
139
      }
W
wanghaox 已提交
140

W
wanghaox 已提交
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
      all_neg_indices.push_back(neg_indices);
      batch_starts.push_back(batch_starts.back() + neg_indices.size());
    }

    framework::LoD out_neg_indices_lod;
    out_neg_indices_lod.emplace_back(batch_starts);
    int neg_offset = 0;
    auto neg_data = out_neg_indices->mutable_data<int>(
        framework::make_ddim({static_cast<int>(batch_starts.back()), 1}),
        ctx.GetPlace());

    for (auto neg_indices : all_neg_indices) {
      std::copy(neg_indices.begin(), neg_indices.end(), neg_data + neg_offset);
      neg_offset += neg_indices.size();
    }
    out_neg_indices->set_lod(out_neg_indices_lod);
    return;
  }
};

W
wanghaox 已提交
161 162 163 164 165
class MineHardExamplesOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

 protected:
W
wanghaox 已提交
166
  void InferShape(framework::InferShapeContext* ctx) const override {
W
wanghaox 已提交
167 168 169
    PADDLE_ENFORCE(ctx->HasInput("ClsLoss"),
                   "Input(ClsLoss) of MineHardExamplesOp should not be null.");
    PADDLE_ENFORCE(
W
wanghaox 已提交
170 171
        ctx->HasInput("MatchIndices"),
        "Input(MatchIndices) of MineHardExamplesOp should not be null.");
W
wanghaox 已提交
172
    PADDLE_ENFORCE(
W
wanghaox 已提交
173 174
        ctx->HasInput("MatchDist"),
        "Input(MatchDist) of MineHardExamplesOp should not be null.");
W
wanghaox 已提交
175
    PADDLE_ENFORCE(
W
wanghaox 已提交
176 177 178 179 180
        ctx->HasOutput("NegIndices"),
        "Output(NegIndices) of MineHardExamplesOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("UpdatedMatchIndices"),
                   "Output(UpdatedMatchIndices) of MineHardExamplesOp should "
                   "not be null.");
W
wanghaox 已提交
181 182

    auto cls_loss_dims = ctx->GetInputDim("ClsLoss");
W
wanghaox 已提交
183 184
    auto idx_dims = ctx->GetInputDim("MatchIndices");
    auto dis_dims = ctx->GetInputDim("MatchDist");
W
wanghaox 已提交
185 186 187 188

    PADDLE_ENFORCE_EQ(cls_loss_dims.size(), 2UL,
                      "The shape of ClsLoss is [N, Np].");
    PADDLE_ENFORCE_EQ(idx_dims.size(), 2UL,
W
wanghaox 已提交
189
                      "The shape of MatchIndices is [N, Np].");
W
wanghaox 已提交
190
    PADDLE_ENFORCE_EQ(dis_dims.size(), 2UL,
W
wanghaox 已提交
191
                      "The shape of MatchDist is [N, Np].");
W
wanghaox 已提交
192 193 194 195 196 197 198 199 200 201 202 203 204 205

    if (ctx->HasInput("LocLoss")) {
      auto loc_loss_dims = ctx->GetInputDim("LocLoss");
      PADDLE_ENFORCE_EQ(loc_loss_dims.size(), 2UL,
                        "The shape of LocLoss is [N, Np].");
      PADDLE_ENFORCE_EQ(cls_loss_dims[0], loc_loss_dims[0],
                        "Batch size of ClsLoss and LocLoss must be the same.");
      PADDLE_ENFORCE_EQ(
          cls_loss_dims[1], loc_loss_dims[1],
          "Prior box number of ClsLoss and LocLoss must be the same.");
    }

    PADDLE_ENFORCE_EQ(
        cls_loss_dims[0], idx_dims[0],
W
wanghaox 已提交
206
        "Batch size of ClsLoss and MatchIndices must be the same.");
W
wanghaox 已提交
207 208
    PADDLE_ENFORCE_EQ(
        cls_loss_dims[1], idx_dims[1],
W
wanghaox 已提交
209
        "Prior box number of ClsLoss and MatchIndices must be the same.");
W
wanghaox 已提交
210 211

    PADDLE_ENFORCE_EQ(cls_loss_dims[0], dis_dims[0],
W
wanghaox 已提交
212
                      "Batch size of ClsLoss and MatchDist must be the same.");
W
wanghaox 已提交
213 214
    PADDLE_ENFORCE_EQ(
        cls_loss_dims[1], idx_dims[1],
W
wanghaox 已提交
215
        "Prior box number of ClsLoss and MatchDist must be the same.");
W
wanghaox 已提交
216 217 218 219 220 221 222 223 224

    auto mining_type =
        GetMiningType(ctx->Attrs().Get<std::string>("mining_type"));

    PADDLE_ENFORCE_NE(mining_type, MiningType::kNone,
                      "mining_type must be hard_example or max_negative");

    if (mining_type == MiningType::kMaxNegative) {
      auto neg_pos_ratio = ctx->Attrs().Get<float>("neg_pos_ratio");
W
wanghaox 已提交
225
      auto neg_dist_threshold = ctx->Attrs().Get<float>("neg_dist_threshold");
W
wanghaox 已提交
226 227 228 229
      PADDLE_ENFORCE_GT(
          neg_pos_ratio, 0.0f,
          "neg_pos_ratio must greater than zero in max_negative mode");
      PADDLE_ENFORCE_GT(
W
wanghaox 已提交
230 231
          neg_dist_threshold, 0.0f,
          "neg_dist_threshold must greater than zero in max_negative mode");
W
wanghaox 已提交
232 233 234 235 236 237 238
    } else if (mining_type == MiningType::kHardExample) {
      auto sample_size = ctx->Attrs().Get<int>("sample_size");
      PADDLE_ENFORCE_GT(
          sample_size, 0,
          "sample_size must greater than zero in hard_example mode");
    }

W
wanghaox 已提交
239
    ctx->SetOutputDim("UpdatedMatchIndices", idx_dims);
240 241
    // The first dimension of NegIndices will be set correcttly in Compute.
    ctx->SetOutputDim("NegIndices", {-1, 1});
W
wanghaox 已提交
242 243 244 245
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
W
wanghaox 已提交
246
      const framework::ExecutionContext& ctx) const override {
W
wanghaox 已提交
247 248 249 250 251 252 253 254
    return framework::OpKernelType(
        framework::ToDataType(ctx.Input<framework::Tensor>("ClsLoss")->type()),
        ctx.device_context());
  }
};

class MineHardExamplesOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
W
wanghaox 已提交
255
  MineHardExamplesOpMaker(OpProto* proto, OpAttrChecker* op_checker)
W
wanghaox 已提交
256 257 258
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput(
        "ClsLoss",
W
wanghaox 已提交
259
        "(Tensor, default Tensor<float>), The classification loss with shape "
W
wanghaox 已提交
260 261 262
        "[N, Np], N is the batch size and Np is the number of prior box.");
    AddInput("LocLoss",
             "(Tensor, optional, default Tensor<float>), The localization loss "
W
wanghaox 已提交
263
             "with shape [N, Np], N is the batch size and Np is the number of "
W
wanghaox 已提交
264 265
             "prior box.")
        .AsDispensable();
W
wanghaox 已提交
266
    AddInput("MatchIndices",
W
wanghaox 已提交
267 268
             "(Tensor, Tensor<int>), Matched indices with shape [N, Np], N is "
             "the batch size and Np is the number of prior box. "
W
wanghaox 已提交
269 270 271 272
             "MatchIndices[i][j] equal -1 means the j-th prior box in i-th "
             "instance does not match any entity, otherwise means it is "
             "matched to row.");
    AddInput("MatchDist",
W
wanghaox 已提交
273 274 275 276
             "(Tensor, default Tensor<float>) Matched indices with shape [N, "
             "Np], N is the batch size and Np is the number of prior box.");
    AddAttr<float>("neg_pos_ratio",
                   "(float) The ratio of the negative box to the positive "
W
wanghaox 已提交
277
                   "box. Use only when mining_type is max_negative.")
W
wanghaox 已提交
278
        .SetDefault(1.0);
W
wanghaox 已提交
279
    AddAttr<float>("neg_dist_threshold",
W
wanghaox 已提交
280 281
                   "(float) The negative overlap upper bound for the unmatched "
                   "predictions. Use only when mining_type is max_negative.")
W
wanghaox 已提交
282 283 284
        .SetDefault(0.5);
    AddAttr<int>("sample_size",
                 "(float) The max sample size of negative box. Use only when "
W
wanghaox 已提交
285
                 "mining_type is hard_example.")
W
wanghaox 已提交
286 287 288 289 290 291 292
        .SetDefault(0);
    AddAttr<std::string>("mining_type",
                         "(float) The mining algorithm name, the value is "
                         "hard_example or max_negative.")
        .SetDefault("max_negative")
        .InEnum({"hard_example", "max_negative"});

W
wanghaox 已提交
293 294 295 296 297 298 299 300 301 302 303 304
    AddOutput(
        "NegIndices",
        "(LoDTensor<int>) The output of negative example indices. a LoDTensor "
        "with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
        "and each element is the prior box index. "
        "For example, the batch size is 2, the lod is [[0, 1, 2]], "
        "the sample 0's box 1(MatchIndices[0][1]) is selected, "
        "and sample 1's box 0 is selected. The output NegIndices is "
        "[[1], [0]].");

    AddOutput("UpdatedMatchIndices",
              "(Tensor<int>) The output of updated MatchIndices, a tensor with "
W
wanghaox 已提交
305
              "shape [N, Np]. Only update when mining_type is "
W
wanghaox 已提交
306 307 308
              "hard_example. The input MatchIndices elements will be update to "
              "-1 when it is not in the candidate high loss list of negative "
              "examples.");
W
wanghaox 已提交
309 310 311

    AddComment(R"DOC(
Mine hard examples Operator.
W
wanghaox 已提交
312
This operator implements hard example mining to select a subset of negative box indices.
W
wanghaox 已提交
313 314 315 316 317 318
For each image, selects the box with highest losses. subject to the condition that the 
box cannot have an Matcht > neg_dist_threshold when mining_type is max_negative. 
The selected number is min(sample_size, max_negative_box_number) when mining_type is 
hard_example, or min(neg_pos_ratio * positive_box_number, max_negative_box_number) 
when mining_type is max_negative, where the max_negative_box_number is the count of 
MatchIndices elements with value -1.
W
wanghaox 已提交
319 320 321 322 323 324 325 326 327 328 329 330 331 332
)DOC");
  }
};
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(mine_hard_examples, ops::MineHardExamplesOp,
                             ops::MineHardExamplesOpMaker);

REGISTER_OP_CPU_KERNEL(
    mine_hard_examples,
    ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, float>,
    ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, double>);