auc_metrics.py 7.8 KB
Newer Older
T
tangwei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
T
tangwei 已提交
14

X
xiexionghang 已提交
15
import math
T
tangwei 已提交
16

X
xiexionghang 已提交
17 18
import numpy as np
import paddle.fluid as fluid
T
tangwei 已提交
19

20
from paddlerec.core.metric import Metric
X
xiexionghang 已提交
21 22


T
tangwei 已提交
23
class AUCMetric(Metric):
X
xiexionghang 已提交
24
    """
T
tangwei 已提交
25
    Metric For Fluid Model
X
xiexionghang 已提交
26
    """
T
tangwei 已提交
27 28

    def __init__(self, config, fleet):
X
xiexionghang 已提交
29
        """ """
T
tangwei 已提交
30 31 32
        self.config = config
        self.fleet = fleet

X
xiexionghang 已提交
33
    def clear(self, scope, params):
X
xiexionghang 已提交
34 35 36 37
        """
        Clear current metric value, usually set to zero
        Args:
            scope : paddle runtime var container
T
tangwei 已提交
38
            params(dict) :
X
xiexionghang 已提交
39 40 41
                label : a group name for metric
                metric_dict : current metric_items in group
        Return:
T
tangwei 已提交
42
            None
X
xiexionghang 已提交
43
        """
X
xiexionghang 已提交
44 45 46
        self._label = params['label']
        self._metric_dict = params['metric_dict']
        self._result = {}
T
tangwei 已提交
47
        place = fluid.CPUPlace()
X
xiexionghang 已提交
48 49 50 51 52 53 54
        for metric_name in self._metric_dict:
            metric_config = self._metric_dict[metric_name]
            if scope.find_var(metric_config['var'].name) is None:
                continue
            metric_var = scope.var(metric_config['var'].name).get_tensor()
            data_type = 'float32'
            if 'data_type' in metric_config:
T
tangwei 已提交
55
                data_type = metric_config['data_type']
X
xiexionghang 已提交
56 57
            data_array = np.zeros(metric_var._get_dims()).astype(data_type)
            metric_var.set(data_array, place)
T
tangwei 已提交
58

X
xiexionghang 已提交
59
    def get_metric(self, scope, metric_name):
X
xiexionghang 已提交
60 61 62 63 64
        """
        reduce metric named metric_name from all worker
        Return:
            metric reduce result
        """
X
xiexionghang 已提交
65 66 67 68
        metric = np.array(scope.find_var(metric_name).get_tensor())
        old_metric_shape = np.array(metric.shape)
        metric = metric.reshape(-1)
        global_metric = np.copy(metric) * 0
T
tangwei 已提交
69
        self.fleet._role_maker._node_type_comm.Allreduce(metric, global_metric)
X
xiexionghang 已提交
70 71
        global_metric = global_metric.reshape(old_metric_shape)
        return global_metric[0]
T
tangwei 已提交
72

X
xiexionghang 已提交
73
    def get_global_metrics(self, scope, metric_dict):
X
xiexionghang 已提交
74 75 76 77 78
        """
        reduce all metric in metric_dict from all worker
        Return:
            dict : {matric_name : metric_result}
        """
T
tangwei 已提交
79
        self.fleet._role_maker._barrier_worker()
X
xiexionghang 已提交
80 81 82 83 84 85
        result = {}
        for metric_name in metric_dict:
            metric_item = metric_dict[metric_name]
            if scope.find_var(metric_item['var'].name) is None:
                result[metric_name] = None
                continue
T
tangwei 已提交
86 87
            result[metric_name] = self.get_metric(scope,
                                                  metric_item['var'].name)
X
xiexionghang 已提交
88 89 90
        return result

    def calculate_auc(self, global_pos, global_neg):
T
tangwei 已提交
91
        """R
X
xiexionghang 已提交
92
        """
X
xiexionghang 已提交
93 94 95 96 97 98 99
        num_bucket = len(global_pos)
        area = 0.0
        pos = 0.0
        neg = 0.0
        new_pos = 0.0
        new_neg = 0.0
        total_ins_num = 0
T
tangwei 已提交
100
        for i in range(num_bucket):
X
xiexionghang 已提交
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
            index = num_bucket - 1 - i
            new_pos = pos + global_pos[index]
            total_ins_num += global_pos[index]
            new_neg = neg + global_neg[index]
            total_ins_num += global_neg[index]
            area += (new_neg - neg) * (pos + new_pos) / 2
            pos = new_pos
            neg = new_neg
        auc_value = None
        if pos * neg == 0 or total_ins_num == 0:
            auc_value = 0.5
        else:
            auc_value = area / (pos * neg)
        return auc_value

    def calculate_bucket_error(self, global_pos, global_neg):
T
tangwei 已提交
117
        """R
X
xiexionghang 已提交
118
        """
X
xiexionghang 已提交
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
        num_bucket = len(global_pos)
        last_ctr = -1.0
        impression_sum = 0.0
        ctr_sum = 0.0
        click_sum = 0.0
        error_sum = 0.0
        error_count = 0.0
        click = 0.0
        show = 0.0
        ctr = 0.0
        adjust_ctr = 0.0
        relative_error = 0.0
        actual_ctr = 0.0
        relative_ctr_error = 0.0
        k_max_span = 0.01
        k_relative_error_bound = 0.05
T
tangwei 已提交
135
        for i in range(num_bucket):
X
xiexionghang 已提交
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
            click = global_pos[i]
            show = global_pos[i] + global_neg[i]
            ctr = float(i) / num_bucket
            if abs(ctr - last_ctr) > k_max_span:
                last_ctr = ctr
                impression_sum = 0.0
                ctr_sum = 0.0
                click_sum = 0.0
            impression_sum += show
            ctr_sum += ctr * show
            click_sum += click
            if impression_sum == 0:
                continue
            adjust_ctr = ctr_sum / impression_sum
            if adjust_ctr == 0:
                continue
            relative_error = \
T
tangwei 已提交
153
                math.sqrt((1 - adjust_ctr) / (adjust_ctr * impression_sum))
X
xiexionghang 已提交
154 155 156 157 158 159 160 161 162
            if relative_error < k_relative_error_bound:
                actual_ctr = click_sum / impression_sum
                relative_ctr_error = abs(actual_ctr / adjust_ctr - 1)
                error_sum += relative_ctr_error * impression_sum
                error_count += impression_sum
                last_ctr = -1

        bucket_error = error_sum / error_count if error_count > 0 else 0.0
        return bucket_error
T
tangwei 已提交
163

X
xiexionghang 已提交
164
    def calculate(self, scope, params):
X
xiexionghang 已提交
165
        """ """
X
xiexionghang 已提交
166 167
        self._label = params['label']
        self._metric_dict = params['metric_dict']
T
tangwei 已提交
168
        self.fleet._role_maker._barrier_worker()
X
xiexionghang 已提交
169
        result = self.get_global_metrics(scope, self._metric_dict)
170 171 172 173 174 175 176 177 178 179 180
        if result['total_ins_num'] == 0:
            self._result = result
            self._result['auc'] = 0
            self._result['bucket_error'] = 0
            self._result['actual_ctr'] = 0
            self._result['predict_ctr'] = 0
            self._result['mae'] = 0
            self._result['rmse'] = 0
            self._result['copc'] = 0
            self._result['mean_q'] = 0
            return self._result
X
xiexionghang 已提交
181
        if 'stat_pos' in result and 'stat_neg' in result:
T
tangwei 已提交
182 183 184 185
            result['auc'] = self.calculate_auc(result['stat_pos'],
                                               result['stat_neg'])
            result['bucket_error'] = self.calculate_auc(result['stat_pos'],
                                                        result['stat_neg'])
X
xiexionghang 已提交
186
        if 'pos_ins_num' in result:
T
tangwei 已提交
187 188
            result['actual_ctr'] = result['pos_ins_num'] / result[
                'total_ins_num']
X
xiexionghang 已提交
189 190 191
        if 'abserr' in result:
            result['mae'] = result['abserr'] / result['total_ins_num']
        if 'sqrerr' in result:
T
tangwei 已提交
192 193
            result['rmse'] = math.sqrt(result['sqrerr'] /
                                       result['total_ins_num'])
X
xiexionghang 已提交
194 195 196 197 198 199 200 201 202 203 204
        if 'prob' in result:
            result['predict_ctr'] = result['prob'] / result['total_ins_num']
            if abs(result['predict_ctr']) > 1e-6:
                result['copc'] = result['actual_ctr'] / result['predict_ctr']

        if 'q' in result:
            result['mean_q'] = result['q'] / result['total_ins_num']
        self._result = result
        return result

    def get_result(self):
X
xiexionghang 已提交
205
        """ """
X
xiexionghang 已提交
206 207
        return self._result

T
for mat  
tangwei 已提交
208
    def __str__(self):
X
xiexionghang 已提交
209
        """ """
X
xiexionghang 已提交
210
        result = self.get_result()
T
tangwei 已提交
211 212 213 214 215
        result_str = "%s AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f " \
                     "Actural_CTR=%.6f Predicted_CTR=%.6f COPC=%.6f MEAN Q_VALUE=%.6f Ins number=%s" % \
                     (self._label, result['auc'], result['bucket_error'], result['mae'], result['rmse'],
                      result['actual_ctr'],
                      result['predict_ctr'], result['copc'], result['mean_q'], result['total_ins_num'])
X
xiexionghang 已提交
216
        return result_str