add OutPosCount for detection_map op

5ca0b762 · wanghaox · a0b57ac7 · 5ca0b762 · 5ca0b762 · 5ca0b762
3 changed file
--- a/paddle/operators/detection_map_op.cc
+++ b/paddle/operators/detection_map_op.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -28,6 +28,12 @@ class DetectionMAPOp : public framework::OperatorWithKernel {
                   "Input(Detection) of DetectionMAPOp should not be null.");
    PADDLE_ENFORCE(ctx->HasInput("Label"),
                   "Input(Label) of DetectionMAPOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("OutPosCount"),
+                   "Output(OutPosCount) of DetectionMAPOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("OutTruePos"),
+                   "Output(OutTruePos) of DetectionMAPOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("OutFalsePos"),
+                   "Output(OutFalsePos) of DetectionMAPOp should not be null.");
    PADDLE_ENFORCE(ctx->HasOutput("MAP"),
                   "Output(MAP) of DetectionMAPOp should not be null.");
@@ -44,9 +50,6 @@ class DetectionMAPOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(label_dims[1], 6UL,
                      "The shape is of Input(Label) [N, 6].");
-    auto ap_type = GetAPType(ctx->Attrs().Get<std::string>("ap_type"));
-    PADDLE_ENFORCE_NE(ap_type, APType::kNone,
-                      "The ap_type should be 'integral' or '11point.");
    auto map_dim = framework::make_ddim({1});
    ctx->SetOutputDim("MAP", map_dim);
  }
@@ -55,7 +58,8 @@ class DetectionMAPOp : public framework::OperatorWithKernel {
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext& ctx) const override {
    return framework::OpKernelType(
-        framework::ToDataType(ctx.Input<framework::Tensor>("Label")->type()),
+        framework::ToDataType(
+            ctx.Input<framework::Tensor>("Detection")->type()),
        ctx.device_context());
  }
 };
@@ -80,6 +84,33 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
             "the offsets in first dimension are called LoD, the number of "
             "offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
             "no detected data.");
+    AddInput("PosCount",
+             "(Tensor) A tensor with shape [Ncls, 1], store the "
+             "input positive example count of each class.")
+        .AsDispensable();
+    AddInput("TruePos",
+             "(LodTensor) A 2-D LodTensor with shape [Ntp, 2], store the "
+             "input true positive example of each class.")
+        .AsDispensable();
+    AddInput("FalsePos",
+             "(LodTensor) A 2-D LodTensor with shape [Nfp, 2], store the "
+             "input false positive example of each class.")
+        .AsDispensable();
+    AddOutput("OutPosCount",
+              "(Tensor) A tensor with shape [Ncls, 1], store the "
+              "positive example count of each class. It combines the input "
+              "input(PosCount) and the positive example count computed from "
+              "input(Detection) and input(Label).");
+    AddOutput("OutTruePos",
+              "(LodTensor) A LodTensor with shape [Ntp', 2], store the "
+              "true positive example of each class. It combines the "
+              "input(TruePos) and the true positive examples computed from "
+              "input(Detection) and input(Label).");
+    AddOutput("OutFalsePos",
+              "(LodTensor) A LodTensor with shape [Nfp', 2], store the "
+              "false positive example of each class. It combines the "
+              "input(FalsePos) and the false positive examples computed from "
+              "input(Detection) and input(Label).");
    AddOutput("MAP",
              "(Tensor) A tensor with shape [1], store the mAP evaluate "
              "result of the detection.");
@@ -97,7 +128,11 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
                         "(string, default 'integral') "
                         "The AP algorithm type, 'integral' or '11point'.")
        .SetDefault("integral")
-        .InEnum({"integral", "11point"});
+        .InEnum({"integral", "11point"})
+        .AddCustomChecker([](const std::string& ap_type) {
+          PADDLE_ENFORCE_NE(GetAPType(ap_type), APType::kNone,
+                            "The ap_type should be 'integral' or '11point.");
+        });
    AddComment(R"DOC(
 Detection mAP evaluate operator.
 The general steps are as follows. First, calculate the true positive and

--- a/paddle/operators/detection_map_op.h
+++ b/paddle/operators/detection_map_op.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -58,6 +58,14 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
    auto* in_label = ctx.Input<framework::LoDTensor>("Label");
    auto* out_map = ctx.Output<framework::Tensor>("MAP");
+    auto* in_pos_count = ctx.Input<framework::Tensor>("PosCount");
+    auto* in_true_pos = ctx.Input<framework::LoDTensor>("TruePos");
+    auto* in_false_pos = ctx.Input<framework::LoDTensor>("FalsePos");
+    auto* out_pos_count = ctx.Output<framework::Tensor>("OutPosCount");
+    auto* out_true_pos = ctx.Output<framework::LoDTensor>("OutTruePos");
+    auto* out_false_pos = ctx.Output<framework::LoDTensor>("OutFalsePos");
    float overlap_threshold = ctx.Attr<float>("overlap_threshold");
    float evaluate_difficult = ctx.Attr<bool>("evaluate_difficult");
    auto ap_type = GetAPType(ctx.Attr<std::string>("ap_type"));
@@ -79,12 +87,20 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
    std::map<int, std::vector<std::pair<T, int>>> true_pos;
    std::map<int, std::vector<std::pair<T, int>>> false_pos;
+    if (in_pos_count != nullptr) {
+      GetInputPos(*in_pos_count, *in_true_pos, *in_false_pos, label_pos_count,
+                  true_pos, false_pos);
+    }
    CalcTrueAndFalsePositive(gt_boxes, detect_boxes, evaluate_difficult,
                             overlap_threshold, label_pos_count, true_pos,
                             false_pos);
    T map = CalcMAP(ap_type, label_pos_count, true_pos, false_pos);
+    GetOutputPos(ctx, label_pos_count, true_pos, false_pos, *out_pos_count,
+                 *out_true_pos, *out_false_pos);
    T* map_data = out_map->mutable_data<T>(ctx.GetPlace());
    map_data[0] = map;
  }
@@ -161,6 +177,119 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
    }
  }
+  void GetOutputPos(
+      const framework::ExecutionContext& ctx,
+      const std::map<int, int>& label_pos_count,
+      const std::map<int, std::vector<std::pair<T, int>>>& true_pos,
+      const std::map<int, std::vector<std::pair<T, int>>>& false_pos,
+      framework::Tensor& output_pos_count,
+      framework::LoDTensor& output_true_pos,
+      framework::LoDTensor& output_false_pos) const {
+    int max_class_id = 0;
+    int true_pos_count = 0;
+    int false_pos_count = 0;
+    for (auto it = label_pos_count.begin(); it != label_pos_count.end(); ++it) {
+      int label = it->first;
+      if (label > max_class_id) max_class_id = label;
+      int label_num_pos = it->second;
+      if (label_num_pos == 0 || true_pos.find(label) == true_pos.end())
+        continue;
+      auto label_true_pos = true_pos.find(label)->second;
+      auto label_false_pos = false_pos.find(label)->second;
+      true_pos_count += label_true_pos.size();
+      false_pos_count += label_false_pos.size();
+    }
+    int* pos_count_data = output_pos_count.mutable_data<int>(
+        framework::make_ddim({max_class_id + 1, 1}), ctx.GetPlace());
+    T* true_pos_data = output_true_pos.mutable_data<T>(
+        framework::make_ddim({true_pos_count, 2}), ctx.GetPlace());
+    T* false_pos_data = output_false_pos.mutable_data<T>(
+        framework::make_ddim({false_pos_count, 2}), ctx.GetPlace());
+    true_pos_count = 0;
+    false_pos_count = 0;
+    std::vector<size_t> true_pos_starts = {0};
+    std::vector<size_t> false_pos_starts = {0};
+    for (int i = 0; i <= max_class_id; ++i) {
+      auto it_count = label_pos_count.find(i);
+      pos_count_data[i] = 0;
+      if (it_count != label_pos_count.end()) {
+        pos_count_data[i] = it_count->second;
+      }
+      auto it_true_pos = true_pos.find(i);
+      if (it_true_pos != true_pos.end()) {
+        const std::vector<std::pair<T, int>>& true_pos_vec =
+            it_true_pos->second;
+        for (const std::pair<T, int>& tp : true_pos_vec) {
+          true_pos_data[true_pos_count * 2] = tp.first;
+          true_pos_data[true_pos_count * 2 + 1] = static_cast<T>(tp.second);
+          true_pos_count++;
+        }
+      }
+      true_pos_starts.push_back(true_pos_count);
+      auto it_false_pos = false_pos.find(i);
+      if (it_false_pos != false_pos.end()) {
+        const std::vector<std::pair<T, int>>& false_pos_vec =
+            it_false_pos->second;
+        for (const std::pair<T, int>& fp : false_pos_vec) {
+          false_pos_data[false_pos_count * 2] = fp.first;
+          false_pos_data[false_pos_count * 2 + 1] = static_cast<T>(fp.second);
+          false_pos_count++;
+        }
+      }
+      false_pos_starts.push_back(false_pos_count);
+    }
+    framework::LoD true_pos_lod;
+    true_pos_lod.emplace_back(true_pos_starts);
+    framework::LoD false_pos_lod;
+    false_pos_lod.emplace_back(false_pos_starts);
+    output_true_pos.set_lod(true_pos_lod);
+    output_false_pos.set_lod(false_pos_lod);
+    return;
+  }
+  void GetInputPos(
+      const framework::Tensor& input_pos_count,
+      const framework::LoDTensor& input_true_pos,
+      const framework::LoDTensor& input_false_pos,
+      std::map<int, int>& label_pos_count,
+      std::map<int, std::vector<std::pair<T, int>>>& true_pos,
+      std::map<int, std::vector<std::pair<T, int>>>& false_pos) const {
+    constexpr T kEPS = static_cast<T>(1e-6);
+    int class_number = input_pos_count.dims()[0];
+    const int* pos_count_data = input_pos_count.data<int>();
+    for (int i = 0; i < class_number; ++i) {
+      label_pos_count[i] = pos_count_data[i];
+    }
+    const T* true_pos_data = input_true_pos.data<T>();
+    auto true_pos_data_lod = input_true_pos.lod();
+    for (int i = 0; i < true_pos_data_lod.size(); ++i) {
+      for (int j = true_pos_data_lod[0][i]; j < true_pos_data_lod[0][i + 1];
+           ++j) {
+        T score = true_pos_data[j * 2];
+        int flag = 1;
+        if (true_pos_data[j * 2 + 1] < kEPS) flag = 0;
+        true_pos[i].push_back(std::make_pair(score, flag));
+      }
+    }
+    const T* false_pos_data = input_false_pos.data<T>();
+    auto false_pos_data_lod = input_false_pos.lod();
+    for (int i = 0; i < false_pos_data_lod.size(); ++i) {
+      for (int j = false_pos_data_lod[0][i]; j < false_pos_data_lod[0][i + 1];
+           ++j) {
+        T score = false_pos_data[j * 2];
+        int flag = 1;
+        if (false_pos_data[j * 2 + 1] < kEPS) flag = 0;
+        false_pos[i].push_back(std::make_pair(score, flag));
+      }
+    }
+    return;
+  }
  void CalcTrueAndFalsePositive(
      const std::vector<std::map<int, std::vector<Box>>>& gt_boxes,
      const std::vector<std::map<int, std::vector<std::pair<T, Box>>>>&
@@ -283,7 +412,6 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
      size_t num = tp_sum.size();
      // Compute Precision.
      for (size_t i = 0; i < num; ++i) {
-        // CHECK_LE(tpCumSum[i], labelNumPos);
        precision.push_back(static_cast<T>(tp_sum[i]) /
                            static_cast<T>(tp_sum[i] + fp_sum[i]));
        recall.push_back(static_cast<T>(tp_sum[i]) / label_num_pos);

--- a/python/paddle/v2/fluid/tests/test_detection_map_op.py
+++ b/python/paddle/v2/fluid/tests/test_detection_map_op.py
@@ -29,10 +29,24 @@ class TestDetectionMAPOp(OpTest):
        self.detect = np.array(self.detect).astype('float32')
        self.mAP = np.array(self.mAP).astype('float32')
-        self.inputs = {
+        if (len(self.class_pos_count) > 0):
-            'Label': (self.label, self.label_lod),
+            self.class_pos_count = np.array(self.class_pos_count).astype(
-            'Detection': (self.detect, self.detect_lod)
+                'int32')
-        }
+            self.true_pos = np.array(self.true_pos).astype('float32')
+            self.false_pos = np.array(self.false_pos).astype('float32')
+            self.inputs = {
+                'Label': (self.label, self.label_lod),
+                'Detection': (self.detect, self.detect_lod),
+                'PosCount': self.class_pos_count,
+                'TruePos': (self.true_pos, self.true_pos_lod),
+                'FalsePos': (self.false_pos, self.false_pos_lod)
+            }
+        else:
+            self.inputs = {
+                'Label': (self.label, self.label_lod),
+                'Detection': (self.detect, self.detect_lod),
+            }
        self.attrs = {
            'overlap_threshold': self.overlap_threshold,
@@ -40,7 +54,17 @@ class TestDetectionMAPOp(OpTest):
            'ap_type': self.ap_type
        }
-        self.outputs = {'MAP': self.mAP}
+        self.out_class_pos_count = np.array(self.out_class_pos_count).astype(
+            'int')
+        self.out_true_pos = np.array(self.out_true_pos).astype('float32')
+        self.out_false_pos = np.array(self.out_false_pos).astype('float32')
+        self.outputs = {
+            'MAP': self.mAP,
+            'OutPosCount': self.out_class_pos_count,
+            'OutTruePos': (self.out_true_pos, self.out_true_pos_lod),
+            'OutFalsePos': (self.out_false_pos, self.out_false_pos_lod)
+        }
    def init_test_case(self):
        self.overlap_threshold = 0.3
@@ -67,13 +91,64 @@ class TestDetectionMAPOp(OpTest):
                       [1, 0.2, 1, 0], [2, 0.8, 0, 1], [2, 0.1, 1, 0],
                       [3, 0.2, 0, 1]]
+        self.class_pos_count = []
+        self.true_pos_lod = [[]]
+        self.true_pos = [[]]
+        self.false_pos_lod = [[]]
+        self.false_pos = [[]]
    def calc_map(self, tf_pos, tf_pos_lod):
        mAP = 0.0
        count = 0
-        class_pos_count = {}
+        def get_input_pos(class_pos_count, true_pos, true_pos_lod, false_pos,
-        true_pos = {}
+                          false_pos_lod):
-        false_pos = {}
+            class_pos_count_dict = collections.Counter()
+            true_pos_dict = collections.defaultdict(list)
+            false_pos_dict = collections.defaultdict(list)
+            for i, count in enumerate(class_pos_count):
+                class_pos_count_dict[i] = count
+            for i in range(len(true_pos_lod[0]) - 1):
+                start = true_pos_lod[0][i]
+                end = true_pos_lod[0][i + 1]
+                for j in range(start, end):
+                    true_pos_dict[i].append(true_pos[j])
+            for i in range(len(false_pos_lod[0]) - 1):
+                start = false_pos_lod[0][i]
+                end = false_pos_lod[0][i + 1]
+                for j in range(start, end):
+                    false_pos_dict[i].append(false_pos[j])
+            return class_pos_count_dict, true_pos_dict, false_pos_dict
+        def get_output_pos(label_count, true_pos, false_pos):
+            max_label = 0
+            for (label, label_pos_num) in label_count.items():
+                if max_label < label:
+                    max_label = label
+            label_number = max_label + 1
+            out_class_pos_count = []
+            out_true_pos_lod = [0]
+            out_true_pos = []
+            out_false_pos_lod = [0]
+            out_false_pos = []
+            for i in range(label_number):
+                out_class_pos_count.append([label_count[i]])
+                true_pos_list = true_pos[i]
+                out_true_pos += true_pos_list
+                out_true_pos_lod.append(len(out_true_pos))
+                false_pos_list = false_pos[i]
+                out_false_pos += false_pos_list
+                out_false_pos_lod.append(len(out_false_pos))
+            return out_class_pos_count, out_true_pos, [
+                out_true_pos_lod
+            ], out_false_pos, [out_false_pos_lod]
        def get_accumulation(pos_list):
            sorted_list = sorted(pos_list, key=lambda pos: pos[0], reverse=True)
@@ -84,7 +159,9 @@ class TestDetectionMAPOp(OpTest):
                accu_list.append(sum)
            return accu_list
-        label_count = collections.Counter()
+        label_count, true_pos, false_pos = get_input_pos(
+            self.class_pos_count, self.true_pos, self.true_pos_lod,
+            self.false_pos, self.false_pos_lod)
        for (label, difficult, xmin, ymin, xmax, ymax) in self.label:
            if self.evaluate_difficult:
                label_count[label] += 1
@@ -143,8 +220,10 @@ class TestDetectionMAPOp(OpTest):
                mAP += average_precisions
                count += 1
+        self.out_class_pos_count, self.out_true_pos, self.out_true_pos_lod, self.out_false_pos, self.out_false_pos_lod = get_output_pos(
-        if count != 0: mAP /= count
+            label_count, true_pos, false_pos)
+        if count != 0:
+            mAP /= count
        return mAP * 100.0
    def setUp(self):
@@ -174,5 +253,15 @@ class TestDetectionMAPOp11Point(TestDetectionMAPOp):
        self.ap_type = "11point"
+class TestDetectionMAPOpMultiBatch(TestDetectionMAPOp):
+    def init_test_case(self):
+        super(TestDetectionMAPOpMultiBatch, self).init_test_case()
+        self.class_pos_count = [0, 2, 1]
+        self.true_pos_lod = [[0, 0, 3, 5]]
+        self.true_pos = [[0.7, 1.], [0.3, 0.], [0.2, 1.], [0.8, 0.], [0.1, 1.]]
+        self.false_pos_lod = [[0, 0, 3, 5]]
+        self.false_pos = [[0.7, 0.], [0.3, 1.], [0.2, 0.], [0.8, 1.], [0.1, 0.]]
 if __name__ == '__main__':
    unittest.main()