From c22f7fcd17fea1a80a973d7135a37fdd0c619406 Mon Sep 17 00:00:00 2001 From: zhouxiao-coder Date: Thu, 26 Oct 2017 03:57:56 +0800 Subject: [PATCH] add positive_negative_pair_op evaluator --- paddle/operators/positive_negative_pair_op.cc | 104 ++++++++++++++++++ paddle/operators/positive_negative_pair_op.h | 92 ++++++++++++++++ .../tests/test_positive_negative_pair_op.py | 61 ++++++++++ 3 files changed, 257 insertions(+) create mode 100644 paddle/operators/positive_negative_pair_op.cc create mode 100644 paddle/operators/positive_negative_pair_op.h create mode 100644 python/paddle/v2/framework/tests/test_positive_negative_pair_op.py diff --git a/paddle/operators/positive_negative_pair_op.cc b/paddle/operators/positive_negative_pair_op.cc new file mode 100644 index 000000000..5b6581cca --- /dev/null +++ b/paddle/operators/positive_negative_pair_op.cc @@ -0,0 +1,104 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/positive_negative_pair_op.h" + +namespace paddle { +namespace operators { + +class PositiveNegativePairOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE( + ctx->HasInput("Score"), + "Input(Score) of PositiveNegativePairOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("Label"), + "Input(Label) of PositiveNegativePairOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("QueryId"), + "Input(QueryId) of PositiveNegativePairOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("PositivePair"), + "Output(PositivePair) of PositiveNegativePairOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("NegativePair"), + "Output(NegativePair) of PositiveNegativePairOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("NeutralPair"), + "Output(NeutralPair) of PositiveNegativePairOp should not be null."); + + auto score_dim = ctx->GetInputDim("Score"); + auto label_dim = ctx->GetInputDim("Label"); + auto query_dim = ctx->GetInputDim("QueryId"); + + PADDLE_ENFORCE(score_dim == label_dim, + "Shape of Score must be the same as Label's shape."); + PADDLE_ENFORCE(query_dim == label_dim, + "Shape of QueryId must be the same as Label's shape."); + PADDLE_ENFORCE(query_dim == label_dim, + "Shape of QueryId must be the same as Label's shape."); + + ctx->SetOutputDim("PositivePair", {1}); + ctx->SetOutputDim("NegativePair", {1}); + ctx->SetOutputDim("NeutralPair", {1}); + } + + protected: + framework::DataType IndicateDataType( + const framework::ExecutionContext &ctx) const override { + return framework::ToDataType(ctx.Input("Score")->type()); + } +}; + +class PositiveNegativePairOpMaker : public framework::OpProtoAndCheckerMaker { + public: + PositiveNegativePairOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Score", + "(Tensor, float) Output score of the network on " + "pair."); + AddInput("Label", + "(Tensor, float or int) Label of current pair."); + AddInput("QueryId", + "(Tensor, int) query id of current pair."); + AddOutput("PositivePair", + "(float) Number of positive ranking pairs, i.e. the pairs of " + "documents that are ranked correctly"); + AddOutput("NegativePair", + "(float) Number of negative ranking pairs, i.e. the pairs of " + "documents that are ranked incorrectly"); + AddOutput("NeutralPair", + "(float) Number of neutral ranking pairs. A pair of document " + "(doc#1, doc#2) is classified as \"neutral\" if their scores are " + "the same."); + AddComment(R"DOC( + PositiveNegativePairOp can be used to evaluate Learning To Rank(LTR) model performance. Its outputs are usually + further summarized as positive-negative-ratio: PositivePair/NegativePair. + Its 3 inputs can be viewd as a series of 3 tuples: (predicition score, golden label, query id). + For each unique query id, a list of are collected and positive/negative pairs are accumulated to its output. +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(positive_negative_pair, + ops::PositiveNegativePairOp, + ops::PositiveNegativePairOpMaker); +REGISTER_OP_CPU_KERNEL( + positive_negative_pair, + ops::PositiveNegativePairKernel); diff --git a/paddle/operators/positive_negative_pair_op.h b/paddle/operators/positive_negative_pair_op.h new file mode 100644 index 000000000..a4ff5e3d8 --- /dev/null +++ b/paddle/operators/positive_negative_pair_op.h @@ -0,0 +1,92 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; + +template +class PositiveNegativePairKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto score_t = context.Input("Score"); + auto label_t = context.Input("Label"); + auto query_t = context.Input("QueryId"); + auto positive_t = context.Output("PositivePair"); + auto negative_t = context.Output("NegativePair"); + auto neutral_t = context.Output("NeutralPair"); + + auto score = score_t->data(); + auto label = label_t->data(); + auto query = query_t->data(); + + T* positive = positive_t->mutable_data(context.GetPlace()); + T* negative = negative_t->mutable_data(context.GetPlace()); + T* neutral = neutral_t->mutable_data(context.GetPlace()); + + auto score_dim = score_t->dims(); + PADDLE_ENFORCE_GE(score_dim.size(), 1L, + "Rank of Score must be at least 1."); + PADDLE_ENFORCE_LE(score_dim.size(), 2L, + "Rank of Score must be less or equal to 2."); + auto batch_size = score_dim[0]; + auto width = score_dim.size() > 1 ? score_dim[1] : 1; + + // construct document instances for each query: Query => List[, ...] + std::unordered_map>> predictions; + for (auto i = 0; i < batch_size; ++i) { + if (predictions.find(query[i]) == predictions.end()) { + predictions.emplace( + std::make_pair(query[i], std::vector>())); + } + predictions[query[i]].push_back( + std::make_pair(score[i * width + width - 1], label[i])); + } + + // for each query, accumulate pair counts + T pos = 0, neg = 0, neu = 0; + auto evaluate_one_list = [&pos, &neg, + &neu](std::vector> vec) { + for (auto ite1 = vec.begin(); ite1 != vec.end(); ++ite1) { + for (auto ite2 = ite1 + 1; ite2 != vec.end(); ++ite2) { + if (ite1->second == ite2->second) { // labels are equal, ignore. + continue; + } + if (ite1->first == ite2->first) { + ++neu; + } + (ite1->first - ite2->first) * (ite1->second - ite2->second) > 0.0 + ? pos++ + : neg++; + } + } + }; + for (auto prediction : predictions) { + evaluate_one_list(prediction.second); + } + + *positive = pos; + *negative = neg; + *neutral = neu; + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/v2/framework/tests/test_positive_negative_pair_op.py b/python/paddle/v2/framework/tests/test_positive_negative_pair_op.py new file mode 100644 index 000000000..314c17f00 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_positive_negative_pair_op.py @@ -0,0 +1,61 @@ +import unittest +import itertools +import numpy as np +from op_test import OpTest + + +def py_pnpair_op(score, label, query): + # group by query id + predictions = {} + for s, l, q in zip(score, label, query): + if type(s) is list: + s = s[-1] + q = q[0] + if q not in predictions: + predictions[q] = [] + predictions[q].append((s, l)) + + # accumulate statistics + pos, neg, neu = 0, 0, 0 + for _, ranks in predictions.items(): + for e1, e2 in itertools.combinations(ranks, 2): + s1, s2, l1, l2 = e1[0][0], e2[0][0], e1[1][0], e2[1][0] + if l1 == l2: + continue + if s1 == s2: + neu += 1 + elif (s1 - s2) * (l1 - l2) > 0: + pos += 1 + else: + neg += 1 + + return np.array(pos).astype('float32'), np.array(neg).astype( + 'float32'), np.array(neu).astype('float32') + + +class TestPositiveNegativePairOp(OpTest): + def setUp(self): + self.op_type = 'positive_negative_pair' + batch_size = 20 + max_query_id = 5 + score = np.random.normal(size=(batch_size, 1)).astype('float32') + label = np.random.normal(size=(batch_size, 1)).astype('float32') + query = np.array( + [np.random.randint(max_query_id) for i in range(batch_size)]) + query = np.reshape(query, newshape=(batch_size, 1)).astype('int32') + + pos, neg, neu = py_pnpair_op(score, label, query) + self.inputs = {} + self.inputs = {'Score': score, 'Label': label, 'QueryId': query} + self.outputs = { + 'PositivePair': pos, + 'NegativePair': neg, + 'NeutralPair': neu + } + + def test_check_output(self): + self.check_output() + + +if __name__ == '__main__': + unittest.main() -- GitLab