From 4152d39962972ebb15c9dec8e2072d62429e113a Mon Sep 17 00:00:00 2001 From: xujiaqi01 <173596896@qq.com> Date: Fri, 24 Jul 2020 12:40:38 +0800 Subject: [PATCH] add fleet metric (#25463) * add fleet distributed metrics * test=develop --- python/paddle/fleet/metrics/metric.py | 372 ++++++++++++++++++ .../tests/unittests/test_fleet_metric.py | 113 ++++++ 2 files changed, 485 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_fleet_metric.py diff --git a/python/paddle/fleet/metrics/metric.py b/python/paddle/fleet/metrics/metric.py index 847ddc47ac8..83e0dd2e541 100644 --- a/python/paddle/fleet/metrics/metric.py +++ b/python/paddle/fleet/metrics/metric.py @@ -11,3 +11,375 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Fleet Metrics""" + +import paddle.fluid as fluid +import math +import numpy as np +from paddle.fluid.framework import Variable +from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet as fleet + + +def sum(input, scope=None): + """ + distributed sum in fleet + + Args: + input(numpy.array|Variable|string): output of a layer + scope(Scope): specific scope + + Returns: + global_metric(numpy.array): sum array + + Example: + .. code-block:: python + + # in model.py + input = fluid.layers.cast(some_input, dtype='float32') + cnt = fluid.layers.reduce_sum(input) + global_cnt = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0) + tmp = fluid.layers.elementwise_add(cnt, global_cnt) + fluid.layers.assign(tmp, global_cnt) + + # in train.py, after train or infer + res = np.array(scope.find_var(global_cnt.name).get_tensor()) + print("sum array: ", paddle.fleet.sum(res)) + """ + fleet._role_maker._barrier_worker() + if scope is None: + scope = fluid.global_scope() + if isinstance(input, Variable): + input = np.array(scope.find_var(input.name).get_tensor()) + elif isinstance(input, str): + input = np.array(scope.find_var(input).get_tensor()) + old_shape = np.array(input.shape) + output = np.copy(input) * 0 + fleet._role_maker._all_reduce(input, output, mode="sum") + output = output.reshape(old_shape) + return output + + +def max(input, scope=None): + """ + distributed max in fleet + + Args: + input(numpy.array|Variable|string): output of a layer + scope(Scope): specific scope + + Returns: + global_metric(numpy.array): max array + + Example: + .. code-block:: python + + # in model.py + input = fluid.layers.cast(some_input, dtype='float32') + cnt = fluid.layers.reduce_sum(input) + global_cnt = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0) + tmp = fluid.layers.elementwise_max(cnt, global_cnt) + fluid.layers.assign(tmp, global_cnt) + + # in train.py, after train or infer + res = np.array(scope.find_var(global_cnt.name).get_tensor()) + print("max array: ", paddle.fleet.max(res)) + """ + fleet._role_maker._barrier_worker() + if scope is None: + scope = fluid.global_scope() + if isinstance(input, Variable): + input = np.array(scope.find_var(input.name).get_tensor()) + elif isinstance(input, str): + input = np.array(scope.find_var(input).get_tensor()) + old_shape = np.array(input.shape) + output = np.copy(input) * 0 + fleet._role_maker._all_reduce(input, output, mode="max") + output = output.reshape(old_shape) + return output + + +def min(input, scope=None): + """ + distributed min in fleet + + Args: + input(numpy.array|Variable|string): output of a layer + scope(Scope): specific scope + + Returns: + global_metric(numpy.array): min array + + Example: + .. code-block:: python + + # in model.py + input = fluid.layers.cast(some_input, dtype='float32') + cnt = fluid.layers.reduce_sum(input) + global_cnt = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0) + tmp = fluid.layers.elementwise_min(cnt, global_cnt) + fluid.layers.assign(tmp, global_cnt) + + # in train.py, after train or infer + res = np.array(scope.find_var(global_cnt.name).get_tensor()) + print("min array: ", paddle.fleet.min(res)) + """ + fleet._role_maker._barrier_worker() + if scope is None: + scope = fluid.global_scope() + if isinstance(input, Variable): + input = np.array(scope.find_var(input.name).get_tensor()) + elif isinstance(input, str): + input = np.array(scope.find_var(input).get_tensor()) + old_shape = np.array(input.shape) + output = np.copy(input) * 0 + fleet._role_maker._all_reduce(input, output, mode="min") + output = output.reshape(old_shape) + return output + + +def auc(stat_pos, stat_neg, scope=None): + """ + distributed auc in fleet + + Args: + stat_pos(numpy.array|Variable|string): stat_pos in output of fluid.layers.auc + stat_neg(numpy.array|Variable|string): stat_neg in output of fluid.layers.auc + scope(Scope): specific scope + + Returns: + auc_value(float): auc value + + Example: + .. code-block:: python + + # in model.py + similarity_norm = fluid.layers.sigmoid(fluid.layers.clip(output, min=-15.0, max=15.0)) + binary_predict = fluid.layers.concat( + input=[fluid.layers.elementwise_sub(fluid.layers.ceil(similarity_norm), similarity_norm), similarity_norm], axis=1) + self.auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg] = + fluid.layers.auc(input=binary_predict, label=label, curve='ROC', num_thresholds=4096) + + # in train.py, after train or infer + pos = np.array(scope.find_var(stat_pos.name).get_tensor()) + neg = np.array(scope.find_var(stat_neg.name).get_tensor()) + print("auc: ", paddle.fleet.auc(pos, neg)) + """ + fleet._role_maker._barrier_worker() + if scope is None: + scope = fluid.global_scope() + if isinstance(stat_pos, Variable): + stat_pos = np.array(scope.find_var(stat_pos.name).get_tensor()) + elif isinstance(stat_pos, str): + stat_pos = np.array(scope.find_var(stat_pos).get_tensor()) + if isinstance(stat_neg, Variable): + stat_neg = np.array(scope.find_var(stat_neg.name).get_tensor()) + elif isinstance(stat_neg, str): + stat_neg = np.array(scope.find_var(stat_neg).get_tensor()) + # auc pos bucket shape + old_pos_shape = np.array(stat_pos.shape) + # reshape to one dim + stat_pos = stat_pos.reshape(-1) + global_pos = np.copy(stat_pos) * 0 + # mpi allreduce + fleet._role_maker._all_reduce(stat_pos, global_pos) + # reshape to its original shape + global_pos = global_pos.reshape(old_pos_shape) + + # auc neg bucket + old_neg_shape = np.array(stat_neg.shape) + stat_neg = stat_neg.reshape(-1) + global_neg = np.copy(stat_neg) * 0 + fleet._role_maker._all_reduce(stat_neg, global_neg) + global_neg = global_neg.reshape(old_neg_shape) + + # calculate auc + num_bucket = len(global_pos[0]) + area = 0.0 + pos = 0.0 + neg = 0.0 + new_pos = 0.0 + new_neg = 0.0 + total_ins_num = 0 + for i in range(num_bucket): + index = num_bucket - 1 - i + new_pos = pos + global_pos[0][index] + total_ins_num += global_pos[0][index] + new_neg = neg + global_neg[0][index] + total_ins_num += global_neg[0][index] + area += (new_neg - neg) * (pos + new_pos) / 2 + pos = new_pos + neg = new_neg + + auc_value = None + if pos * neg == 0 or total_ins_num == 0: + auc_value = 0.5 + else: + auc_value = area / (pos * neg) + + fleet._role_maker._barrier_worker() + return auc_value + + +def mae(abserr, total_ins_num, scope=None): + """ + distributed mae in fleet + + Args: + abserr(numpy.array|Variable|string): abserr in output of fluid.contrib.layers.ctr_metric_bundle + total_ins_num(int|float): total train/infer instance count + scope(Scope): specific scope + + Returns: + mae(float): mae value + + Example: + .. code-block:: python + + # in model.py + sqrerr, abserr, prob, q, pos, total = fluid.contrib.layers.ctr_metric_bundle(similarity_norm, fluid.layers.cast(x=label, dtype='float32')) + + # in train.py, after train or infer + res = np.array(scope.find_var(abserr.name).get_tensor()) + print("mae: ", paddle.fleet.mae(res, total_ins_num)) + """ + fleet._role_maker._barrier_worker() + if scope is None: + scope = fluid.global_scope() + if isinstance(abserr, Variable): + abserr = np.array(scope.find_var(abserr.name).get_tensor()) + elif isinstance(abserr, str): + abserr = np.array(scope.find_var(abserr).get_tensor()) + old_metric_shape = np.array(abserr.shape) + abserr = abserr.reshape(-1) + global_metric = np.copy(abserr) * 0 + fleet._role_maker._all_reduce(abserr, global_metric) + global_metric = global_metric.reshape(old_metric_shape) + mae_value = global_metric[0] / total_ins_num + return mae_value + + +def rmse(sqrerr, total_ins_num, scope=None): + """ + distributed rmse in fleet + + Args: + sqrerr(numpy.array|Variable|string): sqrerr in output of fluid.contrib.layers.ctr_metric_bundle + total_ins_num(int|float): total train/infer instance count + scope(Scope): specific scope + + Returns: + rmse(float): rmse value + + Example: + .. code-block:: python + + # in model.py + sqrerr, abserr, prob, q, pos, total = fluid.contrib.layers.ctr_metric_bundle(similarity_norm, fluid.layers.cast(x=label, dtype='float32')) + + # in train.py, after train or infer + res = np.array(scope.find_var(sqrerr.name).get_tensor()) + print("rmse: ", paddle.fleet.rmse(res, total_ins_num)) + """ + fleet._role_maker._barrier_worker() + if scope is None: + scope = fluid.global_scope() + if isinstance(sqrerr, Variable): + sqrerr = np.array(scope.find_var(sqrerr.name).get_tensor()) + elif isinstance(sqrerr, str): + sqrerr = np.array(scope.find_var(sqrerr).get_tensor()) + old_metric_shape = np.array(sqrerr.shape) + sqrerr = sqrerr.reshape(-1) + global_metric = np.copy(sqrerr) * 0 + fleet._role_maker._all_reduce(sqrerr, global_metric) + global_metric = global_metric.reshape(old_metric_shape) + rmse_value = math.sqrt(global_metric[0] / total_ins_num) + return rmse_value + + +def mse(sqrerr, total_ins_num, scope=None): + """ + distributed mse in fleet + + Args: + sqrerr(numpy.array|Variable|string): sqrerr in output of fluid.contrib.layers.ctr_metric_bundle + total_ins_num(int|float): total train/infer instance count + scope(Scope): specific scope + + Returns: + mse(float): mse value + + Example: + .. code-block:: python + + # in model.py + sqrerr, abserr, prob, q, pos, total = fluid.contrib.layers.ctr_metric_bundle(similarity_norm, fluid.layers.cast(x=label, dtype='float32')) + + # in train.py, after train or infer + metric = np.array(scope.find_var(sqrerr.name).get_tensor()) + print("mse: ", paddle.fleet.mse(metric, total_ins_num)) + """ + fleet._role_maker._barrier_worker() + if scope is None: + scope = fluid.global_scope() + if isinstance(sqrerr, Variable): + sqrerr = np.array(scope.find_var(sqrerr.name).get_tensor()) + elif isinstance(sqrerr, str): + sqrerr = np.array(scope.find_var(sqrerr).get_tensor()) + old_metric_shape = np.array(sqrerr.shape) + sqrerr = sqrerr.reshape(-1) + global_metric = np.copy(sqrerr) * 0 + fleet._role_maker._all_reduce(sqrerr, global_metric) + global_metric = global_metric.reshape(old_metric_shape) + mse_value = global_metric[0] / total_ins_num + return mse_value + + +def acc(correct, total, scope=None): + """ + distributed accuracy in fleet + + Args: + correct(numpy.array|Variable|string): correct Variable + total(numpy.array|Variable): total Variable + scope(Scope): specific scope + + Returns: + acc(float): accuracy value + + Example: + .. code-block:: python + + # in model.py + correct = fluid.layers.create_global_var(dtype='float32', shape=[1], value=0) + total = fluid.layers.create_global_var(dtype='float32', shape=[1], value=0) + acc = fluid.layers.acc(predict, label, k=1, correct=correct, total=total) + + global_correct = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0) + tmp1 = fluid.layers.elementwise_min(correct, global_correct) + fluid.layers.assign(tmp1, global_correct) + + global_total = fluid.layers.create_global_var(persistable=True, dtype='float32', shape=[1], value=0) + tmp2 = fluid.layers.elementwise_min(total, global_total) + fluid.layers.assign(tmp2, global_total) + + # in train.py, after train or infer + correct_num = np.array(scope.find_var(correct.name).get_tensor()) + total_num = np.array(scope.find_var(total.name).get_tensor()) + print("accuracy: ", paddle.fleet.acc(correct_num, total_num)) + """ + fleet._role_maker._barrier_worker() + if scope is None: + scope = fluid.global_scope() + if isinstance(correct, Variable): + correct = np.array(scope.find_var(correct.name).get_tensor()) + elif isinstance(correct, str): + correct = np.array(scope.find_var(correct).get_tensor()) + if isinstance(total, Variable): + total = np.array(scope.find_var(total.name).get_tensor()) + elif isinstance(total, str): + total = np.array(scope.find_var(total).get_tensor()) + global_correct_num = np.copy(correct) * 0 + global_total_num = np.copy(total) * 0 + fleet._role_maker._all_reduce(correct, global_correct_num) + fleet._role_maker._all_reduce(total, global_total_num) + return float(global_correct_num[0]) / float(global_total_num[0]) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_metric.py b/python/paddle/fluid/tests/unittests/test_fleet_metric.py new file mode 100644 index 00000000000..6e5feece93f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fleet_metric.py @@ -0,0 +1,113 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test fleet metric.""" + +from __future__ import print_function +import numpy as np +import paddle +import paddle.fluid as fluid +import os +import unittest +import paddle.fleet.metrics.metric as metric +from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker +from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet as fleet + + +class TestFleetMetric(unittest.TestCase): + """Test cases for fleet metric.""" + + def setUp(self): + """Set up, set envs.""" + + class FakeFleet: + """Fake fleet only for test.""" + + def __init__(self): + """Init.""" + self.gloo = fluid.core.Gloo() + self.gloo.set_rank(0) + self.gloo.set_size(1) + self.gloo.set_prefix("123") + self.gloo.set_iface("lo") + self.gloo.set_hdfs_store("./tmp_test_metric", "", "") + self.gloo.init() + + def _all_reduce(self, input, output, mode="sum"): + """All reduce using gloo.""" + input_list = [i for i in input] + ans = self.gloo.all_reduce(input_list, mode) + for i in range(len(ans)): + output[i] = 1 + + def _barrier_worker(self): + """Fake barrier worker, do nothing.""" + pass + + self.fleet = FakeFleet() + fleet._role_maker = self.fleet + + def test_metric_1(self): + """Test cases for metrics.""" + train = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(train, startup): + t = fluid.layers.create_global_var( + shape=[1, 1], + value=1, + dtype='int64', + persistable=True, + force_cpu=True) + t1 = fluid.layers.create_global_var( + shape=[1, 1], + value=1, + dtype='int64', + persistable=True, + force_cpu=True) + place = fluid.CPUPlace() + exe = fluid.Executor(place) + scope = fluid.Scope() + with fluid.scope_guard(scope): + exe.run(startup) + metric.sum(t, scope) + metric.max(t, scope) + metric.min(t, scope) + metric.auc(t, t1, scope) + metric.mae(t1, 3, scope) + metric.rmse(t1, 3, scope) + metric.mse(t1, 3, scope) + metric.acc(t, t1, scope) + metric.sum(str(t.name), scope) + metric.max(str(t.name), scope) + metric.min(str(t.name), scope) + metric.auc(str(t1.name), str(t.name), scope) + metric.mae(str(t1.name), 3, scope) + metric.rmse(str(t1.name), 3, scope) + metric.mse(str(t1.name), 3, scope) + metric.acc(str(t.name), str(t1.name), scope) + arr = np.array([1, 2, 3, 4]) + metric.sum(arr) + metric.max(arr) + metric.min(arr) + arr1 = np.array([[1, 2, 3, 4]]) + arr2 = np.array([[1, 2, 3, 4]]) + arr3 = np.array([1, 2, 3, 4]) + metric.auc(arr1, arr2) + metric.mae(arr, 3) + metric.rmse(arr, 3) + metric.mse(arr, 3) + metric.acc(arr, arr3) + + +if __name__ == "__main__": + unittest.main() -- GitLab