From 38d6059d1cae77e4de08701dac919e639fc09293 Mon Sep 17 00:00:00 2001 From: "He, Kai" Date: Tue, 8 Sep 2020 12:07:07 +0000 Subject: [PATCH] add precision_recall op --- .../mpc_protocol/aby3_operators.h | 39 ++++ .../paddlefl_mpc/mpc_protocol/mpc_operators.h | 10 ++ core/paddlefl_mpc/operators/CMakeLists.txt | 3 +- .../operators/metrics/precision_recall_op.cc | 170 ++++++++++++++++++ .../operators/metrics/precision_recall_op.h | 68 +++++++ core/privc3/fixedpoint_tensor.h | 18 +- core/privc3/fixedpoint_tensor_imp.h | 170 +++++++++++++++++- core/privc3/fixedpoint_tensor_test.cc | 85 +++++++++ python/paddle_fl/mpc/layers/__init__.py | 4 + python/paddle_fl/mpc/layers/metric_op.py | 135 ++++++++++++++ python/paddle_fl/mpc/metrics.py | 2 +- .../mpc/tests/unittests/run_test_example.sh | 1 + .../mpc/tests/unittests/test_op_metric.py | 120 +++++++++++++ 13 files changed, 819 insertions(+), 6 deletions(-) create mode 100644 core/paddlefl_mpc/operators/metrics/precision_recall_op.cc create mode 100644 core/paddlefl_mpc/operators/metrics/precision_recall_op.h create mode 100644 python/paddle_fl/mpc/layers/metric_op.py create mode 100644 python/paddle_fl/mpc/tests/unittests/test_op_metric.py diff --git a/core/paddlefl_mpc/mpc_protocol/aby3_operators.h b/core/paddlefl_mpc/mpc_protocol/aby3_operators.h index 337d1fb..c129dda 100644 --- a/core/paddlefl_mpc/mpc_protocol/aby3_operators.h +++ b/core/paddlefl_mpc/mpc_protocol/aby3_operators.h @@ -338,6 +338,45 @@ public: x_->inverse_square_root(y_); } + // only support pred for 1 in binary classification for now + void predicts_to_indices(const Tensor* in, + Tensor* out, + float threshold = 0.5) override { + auto x_tuple = from_tensor(in); + auto x_ = std::get<0>(x_tuple).get(); + + auto y_tuple = from_tensor(out); + auto y_ = std::get<0>(y_tuple).get(); + + FixedTensor::preds_to_indices(x_, y_, threshold); + } + + void calc_tp_fp_fn(const Tensor* indices, + const Tensor* labels, + Tensor* out) override { + auto idx_tuple = from_tensor(indices); + auto idx = std::get<0>(idx_tuple).get(); + + auto lbl_tuple = from_tensor(labels); + auto lbl = std::get<0>(lbl_tuple).get(); + + auto out_tuple = from_tensor(out); + auto out_ = std::get<0>(out_tuple).get(); + + FixedTensor::calc_tp_fp_fn(idx, lbl, out_); + } + + void calc_precision_recall(const Tensor* tp_fp_fn, + Tensor* out) override { + auto in_tuple = from_tensor(tp_fp_fn); + auto in = std::get<0>(in_tuple).get(); + + PaddleTensor out_(ContextHolder::device_ctx(), *out); + out_.scaling_factor() = ABY3_SCALING_FACTOR; + + FixedTensor::calc_precision_recall(in, &out_); + } + private: template std::tuple< diff --git a/core/paddlefl_mpc/mpc_protocol/mpc_operators.h b/core/paddlefl_mpc/mpc_protocol/mpc_operators.h index 3064e88..309b5f3 100644 --- a/core/paddlefl_mpc/mpc_protocol/mpc_operators.h +++ b/core/paddlefl_mpc/mpc_protocol/mpc_operators.h @@ -83,6 +83,16 @@ public: virtual void max_pooling(const Tensor* in, Tensor* out, Tensor* pos_info) {} virtual void inverse_square_root(const Tensor* in, Tensor* out) = 0; + + virtual void predicts_to_indices(const Tensor* in, + Tensor* out, + float threshold = 0.5) = 0; + + virtual void calc_tp_fp_fn(const Tensor* indices, + const Tensor* labels, + Tensor* out) = 0; + + virtual void calc_precision_recall(const Tensor* tp_fp_fn, Tensor* out) = 0; }; } // mpc diff --git a/core/paddlefl_mpc/operators/CMakeLists.txt b/core/paddlefl_mpc/operators/CMakeLists.txt index 89a2873..7f3e8d6 100644 --- a/core/paddlefl_mpc/operators/CMakeLists.txt +++ b/core/paddlefl_mpc/operators/CMakeLists.txt @@ -1,6 +1,7 @@ aux_source_directory(. DIR_SRCS) aux_source_directory(./math MATH_SRCS) -add_library(mpc_ops_o OBJECT ${DIR_SRCS} ${MATH_SRCS}) +aux_source_directory(./metrics METRICS_SRCS) +add_library(mpc_ops_o OBJECT ${DIR_SRCS} ${MATH_SRCS} ${METRICS_SRCS}) add_dependencies(mpc_ops_o fluid_framework gloo) add_library(mpc_ops STATIC $) diff --git a/core/paddlefl_mpc/operators/metrics/precision_recall_op.cc b/core/paddlefl_mpc/operators/metrics/precision_recall_op.cc new file mode 100644 index 0000000..a54f16e --- /dev/null +++ b/core/paddlefl_mpc/operators/metrics/precision_recall_op.cc @@ -0,0 +1,170 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "precision_recall_op.h" +#include "paddle/fluid/framework/op_registry.h" +#include + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +class MpcPrecisionRecallOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE_EQ(ctx->HasInput("Predicts"), true, + platform::errors::InvalidArgument( + "Input(Predicts) should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("Labels"), true, + platform::errors::InvalidArgument("Input(Labels) should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasOutput("BatchMetrics"), true, + platform::errors::InvalidArgument( + "Output(BatchMetrics) should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasOutput("AccumMetrics"), true, + platform::errors::InvalidArgument( + "Output(AccumMetrics) should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasOutput("AccumStatesInfo"), true, + platform::errors::InvalidArgument( + "Output(AccumStatesInfo) should not be null.")); + + int64_t cls_num = + static_cast(ctx->Attrs().Get("class_number")); + + PADDLE_ENFORCE_EQ(cls_num, 1, + platform::errors::InvalidArgument( + "Only support predicts/labels for 1" + "in binary classification for now.")); + + auto preds_dims = ctx->GetInputDim("Predicts"); + auto labels_dims = ctx->GetInputDim("Labels"); + + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_EQ(preds_dims, labels_dims, + platform::errors::InvalidArgument( + "The dimension of Input(Predicts) and " + "Input(Labels) should be the same." + "But received (%d) != (%d)", + preds_dims, labels_dims)); + PADDLE_ENFORCE_EQ( + labels_dims.size(), 2, + platform::errors::InvalidArgument( + "Only support predicts/labels for 1" + "in binary classification for now." + "The dimension of Input(Labels) should be equal to 2 " + "(1 for shares). But received (%d)", + labels_dims.size())); + } + if (ctx->HasInput("StatesInfo")) { + auto states_dims = ctx->GetInputDim("StatesInfo"); + + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_EQ( + states_dims, framework::make_ddim({2, 3}), + platform::errors::InvalidArgument( + "The shape of Input(StatesInfo) should be [2, 3].")); + } + } + + // Layouts of BatchMetrics and AccumMetrics both are: + // [ + // precision, recall, F1 score, + // ] + ctx->SetOutputDim("BatchMetrics", {3}); + ctx->SetOutputDim("AccumMetrics", {3}); + // Shape of AccumStatesInfo is [3] + // The layout of each row is: + // [ TP, FP, FN ] + ctx->SetOutputDim("AccumStatesInfo", {2, 3}); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "Predicts"), + ctx.device_context()); + } +}; + +class MpcPrecisionRecallOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Predicts", + "(Tensor, default Tensor) A 1-D tensor with shape N, " + "where N is the batch size. Each element contains the " + "corresponding predicts of an instance which computed by the " + "previous sigmoid operator."); + AddInput("Labels", + "(Tensor, default Tensor) A 1-D tensor with shape N, " + "where N is the batch size. Each element is a label and the " + "value should be in [0, 1]."); + AddInput("StatesInfo", + "(Tensor, default Tensor) A 1-D tensor with shape 3. " + "This input is optional. If provided, current state will be " + "accumulated to this state and the accumulation state will be " + "the output state.") + .AsDispensable(); + AddOutput("BatchMetrics", + "(Tensor, default Tensor) A 1-D tensor with shape {3}. " + "This output tensor contains metrics for current batch data. " + "The layout is [precision, recall, f1 score]."); + AddOutput("AccumMetrics", + "(Tensor, default Tensor) A 1-D tensor with shape {3}. " + "This output tensor contains metrics for accumulated data. " + "The layout is [precision, recall, f1 score]."); + AddOutput("AccumStatesInfo", + "(Tensor, default Tensor) A 1-D tensor with shape 3. " + "This output tensor contains " + "accumulated state variables used to compute metrics. The layout " + "for each class is [true positives, false positives, " + "false negatives]."); + AddAttr("class_number", "(int) Number of classes to be evaluated."); + AddAttr("threshold", "(threshold) Threshold of true predict."); + AddComment(R"DOC( +Precision Recall Operator. +When given Input(Indices) and Input(Labels), this operator can be used +to compute various metrics including: +1. precision +2. recall +3. f1 score +To compute the above metrics, we need to do statistics for true positives, +false positives and false negatives. +We define state as a 1-D tensor with shape [3]. Each element of a +state contains statistic variables for corresponding class. Layout of each row +is: TP(true positives), FP(false positives), FN(false negatives). +This operator also supports metrics computing for cross-batch situation. To +achieve this, Input(StatesInfo) should be provided. State of current batch +data will be accumulated to Input(StatesInfo) and Output(AccumStatesInfo) +is the accumulation state. +Output(BatchMetrics) is metrics of current batch data while +Output(AccumStatesInfo) is metrics of accumulation data. +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + mpc_precision_recall, ops::MpcPrecisionRecallOp, ops::MpcPrecisionRecallOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL( + mpc_precision_recall, + ops::MpcPrecisionRecallKernel); diff --git a/core/paddlefl_mpc/operators/metrics/precision_recall_op.h b/core/paddlefl_mpc/operators/metrics/precision_recall_op.h new file mode 100644 index 0000000..599c4f9 --- /dev/null +++ b/core/paddlefl_mpc/operators/metrics/precision_recall_op.h @@ -0,0 +1,68 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/fluid/framework/op_registry.h" +#include "../mpc_op.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class MpcPrecisionRecallKernel : public MpcOpKernel { + public: + void ComputeImpl(const framework::ExecutionContext& context) const override { + const Tensor* preds = context.Input("Predicts"); + const Tensor* lbls = context.Input("Labels"); + const Tensor* stats = context.Input("StatesInfo"); + Tensor* batch_metrics = context.Output("BatchMetrics"); + Tensor* accum_metrics = context.Output("AccumMetrics"); + Tensor* accum_stats = context.Output("AccumStatesInfo"); + + + float threshold = context.Attr("threshold"); + + Tensor idx; + idx.mutable_data(preds->dims(), context.GetPlace(), 0); + + Tensor batch_stats; + batch_stats.mutable_data(stats->dims(), context.GetPlace(), 0); + + mpc::MpcInstance::mpc_instance()->mpc_protocol() + ->mpc_operators()->predicts_to_indices(preds, &idx, threshold); + + mpc::MpcInstance::mpc_instance()->mpc_protocol() + ->mpc_operators()->calc_tp_fp_fn(&idx, lbls, &batch_stats); + + batch_metrics->mutable_data(framework::make_ddim({3}), context.GetPlace(), 0); + + mpc::MpcInstance::mpc_instance()->mpc_protocol() + ->mpc_operators()->calc_precision_recall(&batch_stats, batch_metrics); + + if (stats) { + mpc::MpcInstance::mpc_instance()->mpc_protocol() + ->mpc_operators()->add(&batch_stats, stats, accum_stats); + + accum_metrics->mutable_data(framework::make_ddim({3}), context.GetPlace(), 0); + mpc::MpcInstance::mpc_instance()->mpc_protocol() + ->mpc_operators()->calc_precision_recall(accum_stats, accum_metrics); + } +} +}; + +} // namespace operators +} // namespace paddle diff --git a/core/privc3/fixedpoint_tensor.h b/core/privc3/fixedpoint_tensor.h index 2346ad1..a1ce863 100644 --- a/core/privc3/fixedpoint_tensor.h +++ b/core/privc3/fixedpoint_tensor.h @@ -16,12 +16,10 @@ #include -#include "boolean_tensor.h" #include "aby3_context.h" #include "core/paddlefl_mpc/mpc_protocol/context_holder.h" #include "paddle_tensor.h" #include "boolean_tensor.h" -#include "core/paddlefl_mpc/mpc_protocol/context_holder.h" namespace aby3 { @@ -193,6 +191,20 @@ public: void max_pooling(FixedPointTensor* ret, BooleanTensor* pos = nullptr) const; + // only support pred for 1 in binary classification for now + static void preds_to_indices(const FixedPointTensor* preds, + FixedPointTensor* indices, + float threshold = 0.5); + + static void calc_tp_fp_fn(const FixedPointTensor* indices, + const FixedPointTensor* labels, + FixedPointTensor* tp_fp_fn); + + // clac precision_recall f1_score + // result is a plaintext fixed-point tensor, shape is [3] + static void calc_precision_recall(const FixedPointTensor* tp_fp_fn, + TensorAdapter* ret); + static void truncate(const FixedPointTensor* op, FixedPointTensor* ret, size_t scaling_factor); @@ -217,7 +229,7 @@ private: size_t scaling_factor); // reduce last dim - static void reduce(FixedPointTensor* input, + static void reduce(const FixedPointTensor* input, FixedPointTensor* ret); static size_t party() { diff --git a/core/privc3/fixedpoint_tensor_imp.h b/core/privc3/fixedpoint_tensor_imp.h index 47fc0bf..02fa16c 100644 --- a/core/privc3/fixedpoint_tensor_imp.h +++ b/core/privc3/fixedpoint_tensor_imp.h @@ -847,7 +847,7 @@ void FixedPointTensor::long_div(const FixedPointTensor* rhs, // reduce last dim template -void FixedPointTensor::reduce(FixedPointTensor* input, +void FixedPointTensor::reduce(const FixedPointTensor* input, FixedPointTensor* ret) { //enfoce shape: input->shape[0 ... (n-2)] == ret shape auto& shape = input->shape(); @@ -1293,4 +1293,172 @@ void FixedPointTensor::max_pooling(FixedPointTensor* ret, } +template +void FixedPointTensor::preds_to_indices(const FixedPointTensor* preds, + FixedPointTensor* indices, + float threshold) { + // 3 for allocating temp tensor + std::vector>> temp; + for (size_t i = 0; i < 3; ++i) { + temp.emplace_back( + tensor_factory()->template create()); + } + + auto shape_ = preds->shape(); + + // plaintext tensor for threshold + temp[0]->reshape(shape_); + temp[0]->scaling_factor() = N; + assign_to_tensor(temp[0].get(), T(threshold * (T(1) << N))); + + temp[1]->reshape(shape_); + temp[2]->reshape(shape_); + BooleanTensor cmp_res(temp[1].get(), temp[2].get()); + + preds->gt(temp[0].get(), &cmp_res); + + cmp_res.lshift(N, &cmp_res); + + cmp_res.b2a(indices); +} + +template +void FixedPointTensor::calc_tp_fp_fn( + const FixedPointTensor* indices, + const FixedPointTensor* labels, + FixedPointTensor* tp_fp_fn) { + + PADDLE_ENFORCE_EQ(indices->shape().size(), 1, + "multi-classification not support yet"); + + PADDLE_ENFORCE_EQ(tp_fp_fn->shape().size(), 1, + "multi-classification not support yet"); + + PADDLE_ENFORCE_EQ(tp_fp_fn->shape()[0], 3, + "store tp fp fn for binary-classification only"); + + // 4 for allocating temp tensor + std::vector>> temp; + for (size_t i = 0; i < 4; ++i) { + temp.emplace_back( + tensor_factory()->template create()); + } + + auto shape_ = indices->shape(); + std::vector shape_one = {1}; + std::vector shape_3 = {3}; + + temp[0]->reshape(shape_); + temp[1]->reshape(shape_); + + FixedPointTensor true_positive(temp[0].get(), temp[1].get()); + + indices->mul(labels, &true_positive); + + temp[2]->reshape(shape_one); + temp[3]->reshape(shape_one); + + FixedPointTensor scalar(temp[2].get(), temp[3].get()); + + // tp + reduce(&true_positive, &scalar); + + const T& share0 = scalar.share(0)->data()[0]; + const T& share1 = scalar.share(1)->data()[0]; + + T* ret_data0 = tp_fp_fn->mutable_share(0)->data(); + T* ret_data1 = tp_fp_fn->mutable_share(1)->data(); + + // assgin tp + ret_data0[0] = share0; + ret_data1[0] = share1; + + // tp + fp + reduce(indices, &scalar); + + // direcrt aby3 sub + ret_data0[1] = share0 - ret_data0[0]; + ret_data1[1] = share1 - ret_data1[0]; + + // tp + fn + reduce(labels, &scalar); + + ret_data0[2] = share0 - ret_data0[0]; + ret_data1[2] = share1 - ret_data1[0]; + +} + +template +void FixedPointTensor::calc_precision_recall( + const FixedPointTensor* tp_fp_fn, + TensorAdapter* ret) { + PADDLE_ENFORCE_EQ(tp_fp_fn->shape().size(), 1, + "multi-classification not support yet"); + + PADDLE_ENFORCE_EQ(tp_fp_fn->shape()[0], 3, + "store tp fp fn for binary-classification only"); + + PADDLE_ENFORCE_EQ(ret->shape().size(), 1, + "multi-classification not support yet"); + + PADDLE_ENFORCE_EQ(ret->shape()[0], 3, + "store precision recall f1-score" + "for binary-classification only"); + // 5 for allocating temp tensor + std::vector>> temp; + for (size_t i = 0; i < 5; ++i) { + temp.emplace_back( + tensor_factory()->template create()); + } + std::vector shape_ = {3}; + + std::vector shape_one = {1}; + + temp[0]->reshape(shape_one); + temp[1]->reshape(shape_one); + FixedPointTensor scalar(temp[0].get(), temp[1].get()); + + temp[2]->reshape(shape_one); + temp[3]->reshape(shape_one); + FixedPointTensor scalar2(temp[2].get(), temp[3].get()); + + auto get = [&tp_fp_fn](size_t idx, FixedPointTensor* dest) { + dest->mutable_share(0)->data()[0] = tp_fp_fn->share(0)->data()[idx]; + dest->mutable_share(1)->data()[0] = tp_fp_fn->share(1)->data()[idx]; + }; + + get(0, &scalar); + get(1, &scalar2); + + // tp + fp + scalar.add(&scalar2, &scalar2); + + scalar.long_div(&scalar2, &scalar2); + + temp[4]->reshape(shape_one); + + scalar2.reveal(temp[4].get()); + + ret->scaling_factor() = N; + ret->data()[0] = temp[4]->data()[0]; + + get(2, &scalar2); + + // tp + fn + scalar.add(&scalar2, &scalar2); + + scalar.long_div(&scalar2, &scalar2); + scalar2.reveal(temp[4].get()); + + ret->data()[1] = temp[4]->data()[0]; + + float precision = 1.0 * ret->data()[0] / (T(1) << N); + float recall = 1.0 * ret->data()[1] / (T(1) << N); + float f1_score = 0.0; + if (precision + recall > 0) { + f1_score = 2 * precision * recall / (precision + recall); + } + + ret->data()[2] = T(f1_score * (T(1) << N)); +} } // namespace aby3 diff --git a/core/privc3/fixedpoint_tensor_test.cc b/core/privc3/fixedpoint_tensor_test.cc index c525205..1d40698 100644 --- a/core/privc3/fixedpoint_tensor_test.cc +++ b/core/privc3/fixedpoint_tensor_test.cc @@ -898,6 +898,40 @@ void test_fixedt_matmul_fixed(size_t p, result->reveal(out); } +void test_fixedt_precision_recall_fixed(size_t p, + double threshold, + std::vector>> in, + TensorAdapter* out) { + std::vector>> temp; + // preds + for (int i = 0; i < 2; i++) { + temp.emplace_back(gen(in[0]->shape())); + } + // labels + for (int i = 0; i < 2; i++) { + temp.emplace_back(gen(in[1]->shape())); + } + // indices + for (int i = 0; i < 2; i++) { + temp.emplace_back(gen(in[0]->shape())); + } + std::vector shape_ = {3}; + // tp fp fn + for (int i = 0; i < 2; i++) { + temp.emplace_back(gen(shape_)); + } + + test_fixedt_gen_shares(p, in, temp); + Fix64N16* preds = new Fix64N16(temp[0].get(), temp[1].get()); + Fix64N16* labels = new Fix64N16(temp[2].get(), temp[3].get()); + Fix64N16* indices = new Fix64N16(temp[4].get(), temp[5].get()); + Fix64N16* tpfpfn = new Fix64N16(temp[6].get(), temp[7].get()); + + Fix64N16::preds_to_indices(preds, indices, threshold); + Fix64N16::calc_tp_fp_fn(indices, labels, tpfpfn); + Fix64N16::calc_precision_recall(tpfpfn, out); +} + TEST_F(FixedTensorTest, matmulfixed) { std::vector shape = {1, 3}; @@ -3559,4 +3593,55 @@ TEST_F(FixedTensorTest, truncate3_msb_correct) { } #endif +TEST_F(FixedTensorTest, precision_recall) { + + std::vector shape = {6}; + std::vector shape_o = {3}; + std::vector in0_val = {0.0, 0.2, 0.4, 0.6, 0.8, 1.0}; + std::vector in1_val = {0, 1, 0, 1, 0 ,1}; + std::vector res_val = {0.5, 1.0/3, 0.4}; + double threshold = 0.7; + std::vector>> in = + {gen(shape), gen(shape)}; + + test_fixedt_gen_paddle_tensor(in0_val, + shape, _cpu_ctx).copy(in[0].get()); + test_fixedt_gen_paddle_tensor(in1_val, + shape, _cpu_ctx).copy(in[1].get()); + + auto out0 = _s_tensor_factory->create(shape_o); + auto out1 = _s_tensor_factory->create(shape_o); + auto out2 = _s_tensor_factory->create(shape_o); + + PaddleTensor result = + test_fixedt_gen_paddle_tensor(res_val, shape_o, _cpu_ctx); + + _t[0] = std::thread([this, in, out0, threshold]() mutable { + g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[0], [&](){ + test_fixedt_precision_recall_fixed(0, threshold, in, out0.get()); + }); + + }); + _t[1] = std::thread([this, in, out1, threshold]() mutable { + g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[1], [&](){ + test_fixedt_precision_recall_fixed(1, threshold, in, out1.get()); + }); + + }); + _t[2] = std::thread([this, in, out2, threshold]() mutable { + g_ctx_holder::template run_with_context(_exec_ctx.get(), _mpc_ctx[2], [&](){ + test_fixedt_precision_recall_fixed(2, threshold, in, out2.get()); + }); + + }); + + _t[0].join(); + _t[1].join(); + _t[2].join(); + + EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), out1.get())); + EXPECT_TRUE(test_fixedt_check_tensor_eq(out1.get(), out2.get())); + EXPECT_TRUE(test_fixedt_check_tensor_eq(out0.get(), &result)); +} + } // namespace aby3 diff --git a/python/paddle_fl/mpc/layers/__init__.py b/python/paddle_fl/mpc/layers/__init__.py index 0fe6035..3f6b0f2 100644 --- a/python/paddle_fl/mpc/layers/__init__.py +++ b/python/paddle_fl/mpc/layers/__init__.py @@ -18,6 +18,7 @@ mpc layers: matrix: 'mul' ml: 'fc', 'relu', 'softmax'(todo) compare:'greater_than', 'greater_equal', 'less_than', 'less_equal', 'equal', 'not_equal' + metric_op:'precision_recall' """ from . import basic @@ -34,6 +35,8 @@ from . import conv from .conv import conv2d from . import rnn from .rnn import * +from . import metric_op +from .metric_op import * __all__ = [] __all__ += basic.__all__ @@ -42,3 +45,4 @@ __all__ += matrix.__all__ __all__ += ml.__all__ __all__ += compare.__all__ __all__ += conv.__all__ +__all__ += metric_op.__all__ diff --git a/python/paddle_fl/mpc/layers/metric_op.py b/python/paddle_fl/mpc/layers/metric_op.py new file mode 100644 index 0000000..374748a --- /dev/null +++ b/python/paddle_fl/mpc/layers/metric_op.py @@ -0,0 +1,135 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +mpc metric op layers. +""" +from paddle.fluid.data_feeder import check_type, check_dtype +from paddle.fluid.initializer import Constant +from ..framework import check_mpc_variable_and_dtype +from ..mpc_layer_helper import MpcLayerHelper + +__all__ = ['precision_recall'] + +def precision_recall(input, label, threshold=0.5): + """ + Precision (also called positive predictive value) is the fraction of + relevant instances among the retrieved instances. + Recall (also known as sensitivity) is the fraction of + relevant instances that have been retrieved over the + total amount of relevant instances + F1-score is a measure of a test's accuracy. + It is calculated from the precision and recall of the test. + Refer to: + https://en.wikipedia.org/wiki/Precision_and_recall + https://en.wikipedia.org/wiki/F1_score + + Noted that this class manages the metrics only for binary classification task. + Noted that in both precision and recall, define 0/0 equals to 0. + + Args: + input (Variable): ciphtext predicts for 1 in binary classification. + label (Variable): labels in ciphertext. + threshold (float): predict threshold. + Returns: + batch_out (Variable): plaintext of batch metrics [precision, recall, f1-score] + Note that values in batch_out are fixed-point number. + To get float type values, div fetched batch_out by + 3 * mpc_data_utils.mpc_one_share (which equals to 2**16). + acc_out (Variable): plaintext of accumulated metrics [precision, recall, f1-score] + To get float type values, div fetched acc_out by + 3 * mpc_data_utils.mpc_one_share (which equals to 2**16). + + Examples: + .. code-block:: python + import sys + import numpy as np + import paddle.fluid as fluid + import paddle_fl.mpc as pfl_mpc + import mpc_data_utils as mdu + + role = int(sys.argv[1]) + + redis_server = "127.0.0.1" + redis_port = 9937 + loop = 5 + np.random.seed(0) + + input_size = [100] + + threshold = 0.6 + + preds, labels = [], [] + preds_cipher, labels_cipher = [], [] + #simulating mpc share + + share = lambda x: np.array([x * mdu.mpc_one_share] * 2).astype('int64').reshape([2] + input_size) + for _ in range(loop): + + preds.append(np.random.random(input_size)) + labels.append(np.rint(np.random.random(input_size))) + preds_cipher.append(share(preds[-1])) + labels_cipher.append(share(labels[-1])) + + pfl_mpc.init("aby3", role, "localhost", redis_server, redis_port) + x = pfl_mpc.data(name='x', shape=input_size, dtype='int64') + y = pfl_mpc.data(name='y', shape=input_size, dtype='int64') + out0, out1 = pfl_mpc.layers.precision_recall(input=x, label=y, threshold=threshold) + exe = fluid.Executor(place=fluid.CPUPlace()) + exe.run(fluid.default_startup_program()) + + for i in range(loop): + batch_res, acc_res = exe.run(feed={'x': preds_cipher[i], 'y': labels_cipher[i]}, + fetch_list=[out0, out1]) + fixed_point_one = 3.0 * mdu.mpc_one_share + # result could be varified by calcuatling metrics with plaintext preds, labels + print(batch_res / fixed_point_one , acc_res / fixed_point_one) + + """ + helper = MpcLayerHelper("precision_recall", **locals()) + + dtype = helper.input_dtype() + + check_dtype(dtype, 'input', ['int64'], 'precision_recall') + check_dtype(dtype, 'label', ['int64'], 'precision_recall') + + batch_out = helper.create_mpc_variable_for_type_inference(dtype=input.dtype) + acc_out = helper.create_mpc_variable_for_type_inference(dtype=input.dtype) + + stat = helper.create_global_mpc_variable( + persistable=True, + dtype='int64', shape=[3], + ) + + helper.set_variable_initializer(stat, Constant(value=0)) + + op_type = 'precision_recall' + + helper.append_op( + type='mpc_' + op_type, + inputs={ + "Predicts": input, + "Labels": label, + "StatesInfo": stat, + }, + outputs={ + "BatchMetrics": batch_out, + "AccumMetrics": acc_out, + "AccumStatesInfo": stat, + }, + attrs={ + "threshold": threshold, + "class_number": 1, + }) + + return batch_out, acc_out diff --git a/python/paddle_fl/mpc/metrics.py b/python/paddle_fl/mpc/metrics.py index 0143bf0..8bc0f99 100644 --- a/python/paddle_fl/mpc/metrics.py +++ b/python/paddle_fl/mpc/metrics.py @@ -34,7 +34,7 @@ def _is_numpy_(var): class KSstatistic(MetricBase): """ - The is for binary classification. + The KSstatistic is for binary classification. Refer to https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Kolmogorov%E2%80%93Smirnov_statistic Please notice that the KS statistic is implemented with scipy. diff --git a/python/paddle_fl/mpc/tests/unittests/run_test_example.sh b/python/paddle_fl/mpc/tests/unittests/run_test_example.sh index d4d4ff2..30ede58 100644 --- a/python/paddle_fl/mpc/tests/unittests/run_test_example.sh +++ b/python/paddle_fl/mpc/tests/unittests/run_test_example.sh @@ -25,6 +25,7 @@ TEST_MODULES=("test_datautils_aby3" "test_op_batch_norm" "test_op_conv" "test_op_pool" +"test_op_metric" ) # run unittest diff --git a/python/paddle_fl/mpc/tests/unittests/test_op_metric.py b/python/paddle_fl/mpc/tests/unittests/test_op_metric.py new file mode 100644 index 0000000..9260957 --- /dev/null +++ b/python/paddle_fl/mpc/tests/unittests/test_op_metric.py @@ -0,0 +1,120 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This module test metric op. + +""" +import unittest + +import numpy as np +import paddle.fluid as fluid +import paddle_fl.mpc as pfl_mpc + +import test_op_base + + +def precision_recall_naive(input, label, threshold=0.5, stat=None): + pred = input - (threshold - 0.5) + pred = np.maximum(0, pred) + pred = np.minimum(1, pred) + idx = np.rint(pred) + tp = np.sum(idx * label) + fp = np.sum(idx) - tp + fn = np.sum(label) - tp + + def calc_precision(tp, fp): + return tp / (tp + fp) if tp + fp > 0 else 0.0 + + def calc_recall(tp, fn): + return tp / (tp + fn) if tp + fn > 0 else 0.0 + + def calc_f1(precision, recall): + return 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0.0 + + p_batch, r_batch = calc_precision(tp, fp), calc_recall(tp, fn) + f_batch = calc_f1(p_batch, r_batch) + + p_acc, r_acc, f_acc = p_batch, r_batch, f_batch + + if stat: + tp += stat[0] + fp += stat[1] + fn += stat[2] + + p_acc, r_acc = calc_precision(tp, fp), calc_recall(tp, fn) + f_acc = calc_f1(p_acc, r_acc) + + new_stat = [tp, fp, fn] + + return np.array([p_batch, r_batch, f_batch, p_acc, r_acc, f_acc]), new_stat + + +class TestOpPrecisionRecall(test_op_base.TestOpBase): + + def precision_recall(self, **kwargs): + """ + precision_recall op ut + :param kwargs: + :return: + """ + role = kwargs['role'] + preds = kwargs['preds'] + labels = kwargs['labels'] + loop = kwargs['loop'] + + pfl_mpc.init("aby3", role, "localhost", self.server, int(self.port)) + x = pfl_mpc.data(name='x', shape=self.input_size, dtype='int64') + y = pfl_mpc.data(name='y', shape=self.input_size, dtype='int64') + out0, out1 = pfl_mpc.layers.precision_recall(input=x, label=y, threshold=self.threshold) + exe = fluid.Executor(place=fluid.CPUPlace()) + exe.run(fluid.default_startup_program()) + for i in range(loop): + batch_res, acc_res = exe.run(feed={'x': preds[i], 'y': labels[i]}, + fetch_list=[out0, out1]) + + self.assertTrue(np.allclose(batch_res * (2 ** -16), self.exp_res[0][:3], atol=1e-4)) + self.assertTrue(np.allclose(acc_res* (2 ** -16), self.exp_res[0][3:], atol=1e-4)) + + def n_batch_test(self, n): + self.input_size = [100] + + self.threshold = np.random.random() + preds, labels = [], [] + self.exp_res = (0, [0] * 3) + share = lambda x: np.array([x * 65536/3] * 2).astype('int64').reshape( + [2] + self.input_size) + + for _ in range(n): + preds.append(np.random.random(self.input_size)) + labels.append(np.rint(np.random.random(self.input_size))) + self.exp_res = precision_recall_naive(preds[-1], labels[-1], + self.threshold, self.exp_res[-1]) + preds[-1] = share(preds[-1]) + labels[-1] = share(labels[-1]) + + ret = self.multi_party_run(target=self.precision_recall, + preds=preds, labels=labels, loop=n) + + self.assertEqual(ret[0], True) + + def test_1(self): + self.n_batch_test(1) + + def test_2(self): + self.n_batch_test(2) + + +if __name__ == '__main__': + unittest.main() + -- GitLab