mine_hard_examples_op.cc 13.2 KB
Newer Older
W
wanghaox 已提交
1
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
W
wanghaox 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

Y
Yi Wang 已提交
15 16
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
W
wanghaox 已提交
17 18 19 20

namespace paddle {
namespace operators {

W
wanghaox 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
enum MiningType { kNone = 0, kMaxNegative, kHardExample };

template <typename T>
bool SortScoreDescend(const std::pair<float, T>& pair1,
                      const std::pair<float, T>& pair2) {
  return pair1.first > pair2.first;
}

inline bool IsEligibleMining(const MiningType mining_type, const int match_idx,
                             const float match_dist,
                             const float neg_dist_threshold) {
  if (mining_type == MiningType::kMaxNegative) {
    return match_idx == -1 && match_dist < neg_dist_threshold;
  } else if (mining_type == MiningType::kHardExample) {
    return true;
  } else {
    return false;
  }
}

W
wanghaox 已提交
41
inline MiningType GetMiningType(std::string str) {
W
wanghaox 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114
  if (str == "max_negative") {
    return MiningType::kMaxNegative;
  } else if (str == "hard_example") {
    return MiningType::kHardExample;
  } else {
    return MiningType::kNone;
  }
}

template <typename DeviceContext, typename T>
class MineHardExamplesKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
    auto* in_cls_loss = ctx.Input<framework::Tensor>("ClsLoss");
    auto* in_loc_loss = ctx.Input<framework::Tensor>("LocLoss");
    auto* in_matched_indices = ctx.Input<framework::Tensor>("MatchIndices");
    auto* in_match_dist = ctx.Input<framework::Tensor>("MatchDist");
    float neg_pos_ratio = ctx.Attr<float>("neg_pos_ratio");
    T neg_dist_threshold =
        static_cast<T>(ctx.Attr<float>("neg_dist_threshold"));
    int sample_size = ctx.Attr<int>("sample_size");
    MiningType mining_type =
        GetMiningType(ctx.Attr<std::string>("mining_type"));

    auto out_neg_indices = ctx.Output<framework::LoDTensor>("NegIndices");
    auto out_match_indices =
        ctx.Output<framework::Tensor>("UpdatedMatchIndices");

    framework::Copy(*in_matched_indices, ctx.GetPlace(), out_match_indices);

    int batch_size = in_matched_indices->dims()[0];
    int prior_num = in_matched_indices->dims()[1];

    auto match_indices = framework::EigenMatrix<int>::From(*in_matched_indices);

    auto match_indices_et =
        framework::EigenMatrix<int>::From(*out_match_indices);

    auto match_dist = framework::EigenMatrix<T>::From(*in_match_dist);

    const T* cls_loss = in_cls_loss->data<T>();
    const T* loc_loss = nullptr;
    if (in_loc_loss) {
      loc_loss = in_loc_loss->data<T>();
    }

    std::vector<std::vector<int>> all_neg_indices;
    std::vector<size_t> batch_starts = {0};
    for (int n = 0; n < batch_size; ++n) {
      std::vector<std::pair<T, size_t>> loss_idx;
      int neg_sel = 0;
      for (int m = 0; m < prior_num; ++m) {
        if (IsEligibleMining(mining_type, match_indices(n, m), match_dist(n, m),
                             neg_dist_threshold)) {
          T loss = cls_loss[n * prior_num + m];
          if (mining_type == MiningType::kHardExample && loc_loss != nullptr) {
            loss = cls_loss[n * prior_num + m] + loc_loss[n * prior_num + m];
          }
          loss_idx.push_back(std::make_pair(loss, m));
          ++neg_sel;
        }
      }

      if (mining_type == MiningType::kMaxNegative) {
        int num_pos = 0;
        for (int m = 0; m < prior_num; ++m) {
          if (match_indices(n, m) != -1) ++num_pos;
        }
        neg_sel = std::min(static_cast<int>(num_pos * neg_pos_ratio), neg_sel);
      } else if (mining_type == MiningType::kHardExample) {
        neg_sel = std::min(sample_size, neg_sel);
      }

W
wanghaox 已提交
115
      std::sort(loss_idx.begin(), loss_idx.end(), SortScoreDescend<size_t>);
W
wanghaox 已提交
116 117 118 119
      std::set<int> sel_indices;
      std::vector<int> neg_indices;
      std::transform(loss_idx.begin(), loss_idx.begin() + neg_sel,
                     std::inserter(sel_indices, sel_indices.begin()),
W
wanghaox 已提交
120
                     [](std::pair<T, size_t>& l) -> int {
W
wanghaox 已提交
121 122 123
                       return static_cast<int>(l.second);
                     });

W
wanghaox 已提交
124 125 126 127 128 129 130 131 132 133
      if (mining_type == MiningType::kHardExample) {
        for (int m = 0; m < prior_num; ++m) {
          if (match_indices(n, m) > -1) {
            if (sel_indices.find(m) == sel_indices.end()) {
              match_indices_et(n, m) = -1;
            }
          } else {
            if (sel_indices.find(m) != sel_indices.end()) {
              neg_indices.push_back(m);
            }
W
wanghaox 已提交
134
          }
W
wanghaox 已提交
135 136
        }
      } else {
W
wanghaox 已提交
137 138
        neg_indices.resize(sel_indices.size());
        std::copy(sel_indices.begin(), sel_indices.end(), neg_indices.begin());
W
wanghaox 已提交
139
      }
W
wanghaox 已提交
140

W
wanghaox 已提交
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
      all_neg_indices.push_back(neg_indices);
      batch_starts.push_back(batch_starts.back() + neg_indices.size());
    }

    framework::LoD out_neg_indices_lod;
    out_neg_indices_lod.emplace_back(batch_starts);
    int neg_offset = 0;
    auto neg_data = out_neg_indices->mutable_data<int>(
        framework::make_ddim({static_cast<int>(batch_starts.back()), 1}),
        ctx.GetPlace());

    for (auto neg_indices : all_neg_indices) {
      std::copy(neg_indices.begin(), neg_indices.end(), neg_data + neg_offset);
      neg_offset += neg_indices.size();
    }
    out_neg_indices->set_lod(out_neg_indices_lod);
    return;
  }
};

W
wanghaox 已提交
161 162 163 164 165
class MineHardExamplesOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;

 protected:
W
wanghaox 已提交
166
  void InferShape(framework::InferShapeContext* ctx) const override {
W
wanghaox 已提交
167 168 169
    PADDLE_ENFORCE(ctx->HasInput("ClsLoss"),
                   "Input(ClsLoss) of MineHardExamplesOp should not be null.");
    PADDLE_ENFORCE(
W
wanghaox 已提交
170 171
        ctx->HasInput("MatchIndices"),
        "Input(MatchIndices) of MineHardExamplesOp should not be null.");
W
wanghaox 已提交
172
    PADDLE_ENFORCE(
W
wanghaox 已提交
173 174
        ctx->HasInput("MatchDist"),
        "Input(MatchDist) of MineHardExamplesOp should not be null.");
W
wanghaox 已提交
175
    PADDLE_ENFORCE(
W
wanghaox 已提交
176 177 178 179 180
        ctx->HasOutput("NegIndices"),
        "Output(NegIndices) of MineHardExamplesOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("UpdatedMatchIndices"),
                   "Output(UpdatedMatchIndices) of MineHardExamplesOp should "
                   "not be null.");
W
wanghaox 已提交
181 182

    auto cls_loss_dims = ctx->GetInputDim("ClsLoss");
W
wanghaox 已提交
183 184
    auto idx_dims = ctx->GetInputDim("MatchIndices");
    auto dis_dims = ctx->GetInputDim("MatchDist");
W
wanghaox 已提交
185 186 187 188

    PADDLE_ENFORCE_EQ(cls_loss_dims.size(), 2UL,
                      "The shape of ClsLoss is [N, Np].");
    PADDLE_ENFORCE_EQ(idx_dims.size(), 2UL,
W
wanghaox 已提交
189
                      "The shape of MatchIndices is [N, Np].");
W
wanghaox 已提交
190
    PADDLE_ENFORCE_EQ(dis_dims.size(), 2UL,
W
wanghaox 已提交
191
                      "The shape of MatchDist is [N, Np].");
W
wanghaox 已提交
192 193 194 195 196 197 198 199 200 201 202 203 204 205

    if (ctx->HasInput("LocLoss")) {
      auto loc_loss_dims = ctx->GetInputDim("LocLoss");
      PADDLE_ENFORCE_EQ(loc_loss_dims.size(), 2UL,
                        "The shape of LocLoss is [N, Np].");
      PADDLE_ENFORCE_EQ(cls_loss_dims[0], loc_loss_dims[0],
                        "Batch size of ClsLoss and LocLoss must be the same.");
      PADDLE_ENFORCE_EQ(
          cls_loss_dims[1], loc_loss_dims[1],
          "Prior box number of ClsLoss and LocLoss must be the same.");
    }

    PADDLE_ENFORCE_EQ(
        cls_loss_dims[0], idx_dims[0],
W
wanghaox 已提交
206
        "Batch size of ClsLoss and MatchIndices must be the same.");
W
wanghaox 已提交
207 208
    PADDLE_ENFORCE_EQ(
        cls_loss_dims[1], idx_dims[1],
W
wanghaox 已提交
209
        "Prior box number of ClsLoss and MatchIndices must be the same.");
W
wanghaox 已提交
210 211

    PADDLE_ENFORCE_EQ(cls_loss_dims[0], dis_dims[0],
W
wanghaox 已提交
212
                      "Batch size of ClsLoss and MatchDist must be the same.");
W
wanghaox 已提交
213 214
    PADDLE_ENFORCE_EQ(
        cls_loss_dims[1], idx_dims[1],
W
wanghaox 已提交
215
        "Prior box number of ClsLoss and MatchDist must be the same.");
W
wanghaox 已提交
216 217 218 219 220 221 222 223 224

    auto mining_type =
        GetMiningType(ctx->Attrs().Get<std::string>("mining_type"));

    PADDLE_ENFORCE_NE(mining_type, MiningType::kNone,
                      "mining_type must be hard_example or max_negative");

    if (mining_type == MiningType::kMaxNegative) {
      auto neg_pos_ratio = ctx->Attrs().Get<float>("neg_pos_ratio");
W
wanghaox 已提交
225
      auto neg_dist_threshold = ctx->Attrs().Get<float>("neg_dist_threshold");
W
wanghaox 已提交
226 227 228 229
      PADDLE_ENFORCE_GT(
          neg_pos_ratio, 0.0f,
          "neg_pos_ratio must greater than zero in max_negative mode");
      PADDLE_ENFORCE_GT(
W
wanghaox 已提交
230 231
          neg_dist_threshold, 0.0f,
          "neg_dist_threshold must greater than zero in max_negative mode");
W
wanghaox 已提交
232 233 234 235 236 237 238
    } else if (mining_type == MiningType::kHardExample) {
      auto sample_size = ctx->Attrs().Get<int>("sample_size");
      PADDLE_ENFORCE_GT(
          sample_size, 0,
          "sample_size must greater than zero in hard_example mode");
    }

W
wanghaox 已提交
239
    ctx->SetOutputDim("UpdatedMatchIndices", idx_dims);
W
wanghaox 已提交
240 241 242 243
  }

 protected:
  framework::OpKernelType GetExpectedKernelType(
W
wanghaox 已提交
244
      const framework::ExecutionContext& ctx) const override {
W
wanghaox 已提交
245 246 247 248 249 250 251 252
    return framework::OpKernelType(
        framework::ToDataType(ctx.Input<framework::Tensor>("ClsLoss")->type()),
        ctx.device_context());
  }
};

class MineHardExamplesOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
W
wanghaox 已提交
253
  MineHardExamplesOpMaker(OpProto* proto, OpAttrChecker* op_checker)
W
wanghaox 已提交
254 255 256
      : OpProtoAndCheckerMaker(proto, op_checker) {
    AddInput(
        "ClsLoss",
W
wanghaox 已提交
257
        "(Tensor, default Tensor<float>), The classification loss with shape "
W
wanghaox 已提交
258 259 260
        "[N, Np], N is the batch size and Np is the number of prior box.");
    AddInput("LocLoss",
             "(Tensor, optional, default Tensor<float>), The localization loss "
W
wanghaox 已提交
261
             "with shape [N, Np], N is the batch size and Np is the number of "
W
wanghaox 已提交
262 263
             "prior box.")
        .AsDispensable();
W
wanghaox 已提交
264
    AddInput("MatchIndices",
W
wanghaox 已提交
265 266
             "(Tensor, Tensor<int>), Matched indices with shape [N, Np], N is "
             "the batch size and Np is the number of prior box. "
W
wanghaox 已提交
267 268 269 270
             "MatchIndices[i][j] equal -1 means the j-th prior box in i-th "
             "instance does not match any entity, otherwise means it is "
             "matched to row.");
    AddInput("MatchDist",
W
wanghaox 已提交
271 272 273 274
             "(Tensor, default Tensor<float>) Matched indices with shape [N, "
             "Np], N is the batch size and Np is the number of prior box.");
    AddAttr<float>("neg_pos_ratio",
                   "(float) The ratio of the negative box to the positive "
W
wanghaox 已提交
275
                   "box. Use only when mining_type is max_negative.")
W
wanghaox 已提交
276
        .SetDefault(1.0);
W
wanghaox 已提交
277
    AddAttr<float>("neg_dist_threshold",
W
wanghaox 已提交
278 279
                   "(float) The negative overlap upper bound for the unmatched "
                   "predictions. Use only when mining_type is max_negative.")
W
wanghaox 已提交
280 281 282
        .SetDefault(0.5);
    AddAttr<int>("sample_size",
                 "(float) The max sample size of negative box. Use only when "
W
wanghaox 已提交
283
                 "mining_type is hard_example.")
W
wanghaox 已提交
284 285 286 287 288 289 290
        .SetDefault(0);
    AddAttr<std::string>("mining_type",
                         "(float) The mining algorithm name, the value is "
                         "hard_example or max_negative.")
        .SetDefault("max_negative")
        .InEnum({"hard_example", "max_negative"});

W
wanghaox 已提交
291 292 293 294 295 296 297 298 299 300 301 302
    AddOutput(
        "NegIndices",
        "(LoDTensor<int>) The output of negative example indices. a LoDTensor "
        "with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
        "and each element is the prior box index. "
        "For example, the batch size is 2, the lod is [[0, 1, 2]], "
        "the sample 0's box 1(MatchIndices[0][1]) is selected, "
        "and sample 1's box 0 is selected. The output NegIndices is "
        "[[1], [0]].");

    AddOutput("UpdatedMatchIndices",
              "(Tensor<int>) The output of updated MatchIndices, a tensor with "
W
wanghaox 已提交
303
              "shape [N, Np]. Only update when mining_type is "
W
wanghaox 已提交
304 305 306
              "hard_example. The input MatchIndices elements will be update to "
              "-1 when it is not in the candidate high loss list of negative "
              "examples.");
W
wanghaox 已提交
307 308 309

    AddComment(R"DOC(
Mine hard examples Operator.
W
wanghaox 已提交
310
This operator implements hard example mining to select a subset of negative box indices.
W
wanghaox 已提交
311 312 313 314 315 316
For each image, selects the box with highest losses. subject to the condition that the 
box cannot have an Matcht > neg_dist_threshold when mining_type is max_negative. 
The selected number is min(sample_size, max_negative_box_number) when mining_type is 
hard_example, or min(neg_pos_ratio * positive_box_number, max_negative_box_number) 
when mining_type is max_negative, where the max_negative_box_number is the count of 
MatchIndices elements with value -1.
W
wanghaox 已提交
317 318 319 320 321 322 323 324 325 326 327 328 329 330
)DOC");
  }
};
}  // namespace operators
}  // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(mine_hard_examples, ops::MineHardExamplesOp,
                             ops::MineHardExamplesOpMaker);

REGISTER_OP_CPU_KERNEL(
    mine_hard_examples,
    ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, float>,
    ops::MineHardExamplesKernel<paddle::platform::CPUDeviceContext, double>);