From c22f7fcd17fea1a80a973d7135a37fdd0c619406 Mon Sep 17 00:00:00 2001
From: zhouxiao-coder <zhouxiaocoder@gmail.com>
Date: Thu, 26 Oct 2017 03:57:56 +0800
Subject: [PATCH] add positive_negative_pair_op evaluator

---
 paddle/operators/positive_negative_pair_op.cc | 104 ++++++++++++++++++
 paddle/operators/positive_negative_pair_op.h  |  92 ++++++++++++++++
 .../tests/test_positive_negative_pair_op.py   |  61 ++++++++++
 3 files changed, 257 insertions(+)
 create mode 100644 paddle/operators/positive_negative_pair_op.cc
 create mode 100644 paddle/operators/positive_negative_pair_op.h
 create mode 100644 python/paddle/v2/framework/tests/test_positive_negative_pair_op.py
diff --git a/paddle/operators/positive_negative_pair_op.cc b/paddle/operators/positive_negative_pair_op.cc
new file mode 100644
index 0000000000..5b6581ccac
--- /dev/null
+++ b/paddle/operators/positive_negative_pair_op.cc
@@ -0,0 +1,104 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/operators/positive_negative_pair_op.h"
+
+namespace paddle {
+namespace operators {
+
+class PositiveNegativePairOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    PADDLE_ENFORCE(
+        ctx->HasInput("Score"),
+        "Input(Score) of PositiveNegativePairOp should not be null.");
+    PADDLE_ENFORCE(
+        ctx->HasInput("Label"),
+        "Input(Label) of PositiveNegativePairOp should not be null.");
+    PADDLE_ENFORCE(
+        ctx->HasInput("QueryId"),
+        "Input(QueryId) of PositiveNegativePairOp should not be null.");
+    PADDLE_ENFORCE(
+        ctx->HasOutput("PositivePair"),
+        "Output(PositivePair) of PositiveNegativePairOp should not be null.");
+    PADDLE_ENFORCE(
+        ctx->HasOutput("NegativePair"),
+        "Output(NegativePair) of PositiveNegativePairOp should not be null.");
+    PADDLE_ENFORCE(
+        ctx->HasOutput("NeutralPair"),
+        "Output(NeutralPair) of PositiveNegativePairOp should not be null.");
+
+    auto score_dim = ctx->GetInputDim("Score");
+    auto label_dim = ctx->GetInputDim("Label");
+    auto query_dim = ctx->GetInputDim("QueryId");
+
+    PADDLE_ENFORCE(score_dim == label_dim,
+                   "Shape of Score must be the same as Label's shape.");
+    PADDLE_ENFORCE(query_dim == label_dim,
+                   "Shape of QueryId must be the same as Label's shape.");
+    PADDLE_ENFORCE(query_dim == label_dim,
+                   "Shape of QueryId must be the same as Label's shape.");
+
+    ctx->SetOutputDim("PositivePair", {1});
+    ctx->SetOutputDim("NegativePair", {1});
+    ctx->SetOutputDim("NeutralPair", {1});
+  }
+
+ protected:
+  framework::DataType IndicateDataType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::ToDataType(ctx.Input<Tensor>("Score")->type());
+  }
+};
+
+class PositiveNegativePairOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  PositiveNegativePairOpMaker(framework::OpProto *proto,
+                              framework::OpAttrChecker *op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("Score",
+             "(Tensor, float) Output score of the network on <query, document> "
+             "pair.");
+    AddInput("Label",
+             "(Tensor, float or int) Label of current <query, document> pair.");
+    AddInput("QueryId",
+             "(Tensor, int) query id of current <query, document> pair.");
+    AddOutput("PositivePair",
+              "(float) Number of positive ranking pairs, i.e. the pairs of "
+              "documents that are ranked correctly");
+    AddOutput("NegativePair",
+              "(float) Number of negative ranking pairs, i.e. the pairs of "
+              "documents that are ranked incorrectly");
+    AddOutput("NeutralPair",
+              "(float) Number of neutral ranking pairs. A pair of document "
+              "(doc#1, doc#2) is classified as \"neutral\" if their scores are "
+              "the same.");
+    AddComment(R"DOC(
+        PositiveNegativePairOp can be used to evaluate Learning To Rank(LTR) model performance. Its outputs are usually 
+        further summarized as positive-negative-ratio: PositivePair/NegativePair.
+        Its 3 inputs can be viewd as a series of 3 tuples: (predicition score, golden label, query id).
+        For each unique query id, a list of <score, label> are collected and positive/negative pairs are accumulated to its output. 
+)DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_WITHOUT_GRADIENT(positive_negative_pair,
+                             ops::PositiveNegativePairOp,
+                             ops::PositiveNegativePairOpMaker);
+REGISTER_OP_CPU_KERNEL(
+    positive_negative_pair,
+    ops::PositiveNegativePairKernel<paddle::platform::CPUPlace, float>);
diff --git a/paddle/operators/positive_negative_pair_op.h b/paddle/operators/positive_negative_pair_op.h
new file mode 100644
index 0000000000..a4ff5e3d81
--- /dev/null
+++ b/paddle/operators/positive_negative_pair_op.h
@@ -0,0 +1,92 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include <unordered_map>
+#include <vector>
+#include "paddle/framework/eigen.h"
+#include "paddle/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+using LoDTensor = framework::LoDTensor;
+
+template <typename Place, typename T>
+class PositiveNegativePairKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto score_t = context.Input<Tensor>("Score");
+    auto label_t = context.Input<Tensor>("Label");
+    auto query_t = context.Input<Tensor>("QueryId");
+    auto positive_t = context.Output<Tensor>("PositivePair");
+    auto negative_t = context.Output<Tensor>("NegativePair");
+    auto neutral_t = context.Output<Tensor>("NeutralPair");
+
+    auto score = score_t->data<float>();
+    auto label = label_t->data<float>();
+    auto query = query_t->data<int>();
+
+    T* positive = positive_t->mutable_data<T>(context.GetPlace());
+    T* negative = negative_t->mutable_data<T>(context.GetPlace());
+    T* neutral = neutral_t->mutable_data<T>(context.GetPlace());
+
+    auto score_dim = score_t->dims();
+    PADDLE_ENFORCE_GE(score_dim.size(), 1L,
+                      "Rank of Score must be at least 1.");
+    PADDLE_ENFORCE_LE(score_dim.size(), 2L,
+                      "Rank of Score must be less or equal to 2.");
+    auto batch_size = score_dim[0];
+    auto width = score_dim.size() > 1 ? score_dim[1] : 1;
+
+    // construct document instances for each query: Query => List[<score#0,
+    // label#0>, ...]
+    std::unordered_map<int, std::vector<std::pair<float, float>>> predictions;
+    for (auto i = 0; i < batch_size; ++i) {
+      if (predictions.find(query[i]) == predictions.end()) {
+        predictions.emplace(
+            std::make_pair(query[i], std::vector<std::pair<float, float>>()));
+      }
+      predictions[query[i]].push_back(
+          std::make_pair(score[i * width + width - 1], label[i]));
+    }
+
+    // for each query, accumulate pair counts
+    T pos = 0, neg = 0, neu = 0;
+    auto evaluate_one_list = [&pos, &neg,
+                              &neu](std::vector<std::pair<float, float>> vec) {
+      for (auto ite1 = vec.begin(); ite1 != vec.end(); ++ite1) {
+        for (auto ite2 = ite1 + 1; ite2 != vec.end(); ++ite2) {
+          if (ite1->second == ite2->second) {  // labels are equal, ignore.
+            continue;
+          }
+          if (ite1->first == ite2->first) {
+            ++neu;
+          }
+          (ite1->first - ite2->first) * (ite1->second - ite2->second) > 0.0
+              ? pos++
+              : neg++;
+        }
+      }
+    };
+    for (auto prediction : predictions) {
+      evaluate_one_list(prediction.second);
+    }
+
+    *positive = pos;
+    *negative = neg;
+    *neutral = neu;
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/python/paddle/v2/framework/tests/test_positive_negative_pair_op.py b/python/paddle/v2/framework/tests/test_positive_negative_pair_op.py
new file mode 100644
index 0000000000..314c17f00e
--- /dev/null
+++ b/python/paddle/v2/framework/tests/test_positive_negative_pair_op.py
@@ -0,0 +1,61 @@
+import unittest
+import itertools
+import numpy as np
+from op_test import OpTest
+
+
+def py_pnpair_op(score, label, query):
+    # group by query id
+    predictions = {}
+    for s, l, q in zip(score, label, query):
+        if type(s) is list:
+            s = s[-1]
+        q = q[0]
+        if q not in predictions:
+            predictions[q] = []
+        predictions[q].append((s, l))
+
+    # accumulate statistics
+    pos, neg, neu = 0, 0, 0
+    for _, ranks in predictions.items():
+        for e1, e2 in itertools.combinations(ranks, 2):
+            s1, s2, l1, l2 = e1[0][0], e2[0][0], e1[1][0], e2[1][0]
+            if l1 == l2:
+                continue
+            if s1 == s2:
+                neu += 1
+            elif (s1 - s2) * (l1 - l2) > 0:
+                pos += 1
+            else:
+                neg += 1
+
+    return np.array(pos).astype('float32'), np.array(neg).astype(
+        'float32'), np.array(neu).astype('float32')
+
+
+class TestPositiveNegativePairOp(OpTest):
+    def setUp(self):
+        self.op_type = 'positive_negative_pair'
+        batch_size = 20
+        max_query_id = 5
+        score = np.random.normal(size=(batch_size, 1)).astype('float32')
+        label = np.random.normal(size=(batch_size, 1)).astype('float32')
+        query = np.array(
+            [np.random.randint(max_query_id) for i in range(batch_size)])
+        query = np.reshape(query, newshape=(batch_size, 1)).astype('int32')
+
+        pos, neg, neu = py_pnpair_op(score, label, query)
+        self.inputs = {}
+        self.inputs = {'Score': score, 'Label': label, 'QueryId': query}
+        self.outputs = {
+            'PositivePair': pos,
+            'NegativePair': neg,
+            'NeutralPair': neu
+        }
+
+    def test_check_output(self):
+        self.check_output()
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
GitLab