auc_metrics.py 7.6 KB
Newer Older
T
tangwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
T
tangwei 已提交
14

X
xiexionghang 已提交
15 16 17
import math
import numpy as np
import paddle.fluid as fluid
T
tangwei 已提交
18
from .base import Metric
X
xiexionghang 已提交
19 20


T
tangwei 已提交
21
class AUCMetric(Metric):
X
xiexionghang 已提交
22 23 24
    """
    Metric For Paddle Model
    """
T
tangwei 已提交
25 26

    def __init__(self, config, fleet):
X
xiexionghang 已提交
27
        """ """
T
tangwei 已提交
28 29 30
        self.config = config
        self.fleet = fleet

X
xiexionghang 已提交
31
    def clear(self, scope, params):
X
xiexionghang 已提交
32 33 34 35
        """
        Clear current metric value, usually set to zero
        Args:
            scope : paddle runtime var container
T
tangwei 已提交
36
            params(dict) :
X
xiexionghang 已提交
37 38 39
                label : a group name for metric
                metric_dict : current metric_items in group
        Return:
T
tangwei 已提交
40
            None
X
xiexionghang 已提交
41
        """
X
xiexionghang 已提交
42 43 44
        self._label = params['label']
        self._metric_dict = params['metric_dict']
        self._result = {}
T
tangwei 已提交
45
        place = fluid.CPUPlace()
X
xiexionghang 已提交
46 47 48 49 50 51 52
        for metric_name in self._metric_dict:
            metric_config = self._metric_dict[metric_name]
            if scope.find_var(metric_config['var'].name) is None:
                continue
            metric_var = scope.var(metric_config['var'].name).get_tensor()
            data_type = 'float32'
            if 'data_type' in metric_config:
T
tangwei 已提交
53
                data_type = metric_config['data_type']
X
xiexionghang 已提交
54 55
            data_array = np.zeros(metric_var._get_dims()).astype(data_type)
            metric_var.set(data_array, place)
T
tangwei 已提交
56

X
xiexionghang 已提交
57
    def get_metric(self, scope, metric_name):
X
xiexionghang 已提交
58 59 60 61 62
        """
        reduce metric named metric_name from all worker
        Return:
            metric reduce result
        """
X
xiexionghang 已提交
63 64 65 66
        metric = np.array(scope.find_var(metric_name).get_tensor())
        old_metric_shape = np.array(metric.shape)
        metric = metric.reshape(-1)
        global_metric = np.copy(metric) * 0
T
tangwei 已提交
67
        self.fleet._role_maker._node_type_comm.Allreduce(metric, global_metric)
X
xiexionghang 已提交
68 69
        global_metric = global_metric.reshape(old_metric_shape)
        return global_metric[0]
T
tangwei 已提交
70

X
xiexionghang 已提交
71
    def get_global_metrics(self, scope, metric_dict):
X
xiexionghang 已提交
72 73 74 75 76
        """
        reduce all metric in metric_dict from all worker
        Return:
            dict : {matric_name : metric_result}
        """
T
tangwei 已提交
77
        self.fleet._role_maker._barrier_worker()
X
xiexionghang 已提交
78 79 80 81 82 83 84 85 86 87
        result = {}
        for metric_name in metric_dict:
            metric_item = metric_dict[metric_name]
            if scope.find_var(metric_item['var'].name) is None:
                result[metric_name] = None
                continue
            result[metric_name] = self.get_metric(scope, metric_item['var'].name)
        return result

    def calculate_auc(self, global_pos, global_neg):
T
tangwei 已提交
88
        """R
X
xiexionghang 已提交
89
        """
X
xiexionghang 已提交
90 91 92 93 94 95 96
        num_bucket = len(global_pos)
        area = 0.0
        pos = 0.0
        neg = 0.0
        new_pos = 0.0
        new_neg = 0.0
        total_ins_num = 0
T
tangwei 已提交
97
        for i in range(num_bucket):
X
xiexionghang 已提交
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
            index = num_bucket - 1 - i
            new_pos = pos + global_pos[index]
            total_ins_num += global_pos[index]
            new_neg = neg + global_neg[index]
            total_ins_num += global_neg[index]
            area += (new_neg - neg) * (pos + new_pos) / 2
            pos = new_pos
            neg = new_neg
        auc_value = None
        if pos * neg == 0 or total_ins_num == 0:
            auc_value = 0.5
        else:
            auc_value = area / (pos * neg)
        return auc_value

    def calculate_bucket_error(self, global_pos, global_neg):
T
tangwei 已提交
114
        """R
X
xiexionghang 已提交
115
        """
X
xiexionghang 已提交
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
        num_bucket = len(global_pos)
        last_ctr = -1.0
        impression_sum = 0.0
        ctr_sum = 0.0
        click_sum = 0.0
        error_sum = 0.0
        error_count = 0.0
        click = 0.0
        show = 0.0
        ctr = 0.0
        adjust_ctr = 0.0
        relative_error = 0.0
        actual_ctr = 0.0
        relative_ctr_error = 0.0
        k_max_span = 0.01
        k_relative_error_bound = 0.05
T
tangwei 已提交
132
        for i in range(num_bucket):
X
xiexionghang 已提交
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
            click = global_pos[i]
            show = global_pos[i] + global_neg[i]
            ctr = float(i) / num_bucket
            if abs(ctr - last_ctr) > k_max_span:
                last_ctr = ctr
                impression_sum = 0.0
                ctr_sum = 0.0
                click_sum = 0.0
            impression_sum += show
            ctr_sum += ctr * show
            click_sum += click
            if impression_sum == 0:
                continue
            adjust_ctr = ctr_sum / impression_sum
            if adjust_ctr == 0:
                continue
            relative_error = \
T
tangwei 已提交
150
                math.sqrt((1 - adjust_ctr) / (adjust_ctr * impression_sum))
X
xiexionghang 已提交
151 152 153 154 155 156 157 158 159
            if relative_error < k_relative_error_bound:
                actual_ctr = click_sum / impression_sum
                relative_ctr_error = abs(actual_ctr / adjust_ctr - 1)
                error_sum += relative_ctr_error * impression_sum
                error_count += impression_sum
                last_ctr = -1

        bucket_error = error_sum / error_count if error_count > 0 else 0.0
        return bucket_error
T
tangwei 已提交
160

X
xiexionghang 已提交
161
    def calculate(self, scope, params):
X
xiexionghang 已提交
162
        """ """
X
xiexionghang 已提交
163 164
        self._label = params['label']
        self._metric_dict = params['metric_dict']
T
tangwei 已提交
165
        self.fleet._role_maker._barrier_worker()
X
xiexionghang 已提交
166
        result = self.get_global_metrics(scope, self._metric_dict)
167 168 169 170 171 172 173 174 175 176 177
        if result['total_ins_num'] == 0:
            self._result = result
            self._result['auc'] = 0
            self._result['bucket_error'] = 0
            self._result['actual_ctr'] = 0
            self._result['predict_ctr'] = 0
            self._result['mae'] = 0
            self._result['rmse'] = 0
            self._result['copc'] = 0
            self._result['mean_q'] = 0
            return self._result
X
xiexionghang 已提交
178 179 180 181 182 183 184 185
        if 'stat_pos' in result and 'stat_neg' in result:
            result['auc'] = self.calculate_auc(result['stat_pos'], result['stat_neg'])
            result['bucket_error'] = self.calculate_auc(result['stat_pos'], result['stat_neg'])
        if 'pos_ins_num' in result:
            result['actual_ctr'] = result['pos_ins_num'] / result['total_ins_num']
        if 'abserr' in result:
            result['mae'] = result['abserr'] / result['total_ins_num']
        if 'sqrerr' in result:
T
tangwei 已提交
186
            result['rmse'] = math.sqrt(result['sqrerr'] / result['total_ins_num'])
X
xiexionghang 已提交
187 188 189 190 191 192 193 194 195 196 197
        if 'prob' in result:
            result['predict_ctr'] = result['prob'] / result['total_ins_num']
            if abs(result['predict_ctr']) > 1e-6:
                result['copc'] = result['actual_ctr'] / result['predict_ctr']

        if 'q' in result:
            result['mean_q'] = result['q'] / result['total_ins_num']
        self._result = result
        return result

    def get_result(self):
X
xiexionghang 已提交
198
        """ """
X
xiexionghang 已提交
199 200 201
        return self._result

    def get_result_to_string(self):
X
xiexionghang 已提交
202
        """ """
X
xiexionghang 已提交
203
        result = self.get_result()
T
tangwei 已提交
204 205 206 207 208
        result_str = "%s AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f " \
                     "Actural_CTR=%.6f Predicted_CTR=%.6f COPC=%.6f MEAN Q_VALUE=%.6f Ins number=%s" % \
                     (self._label, result['auc'], result['bucket_error'], result['mae'], result['rmse'],
                      result['actual_ctr'],
                      result['predict_ctr'], result['copc'], result['mean_q'], result['total_ins_num'])
X
xiexionghang 已提交
209
        return result_str