metrics.py 19.3 KB
Newer Older
C
chenxuyi 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
C
chenxuyi 已提交
14
"""predefined metrics"""
C
chenxuyi 已提交
15 16 17

import sys
import os
C
chenxuyi 已提交
18 19
import six

C
chenxuyi 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
import numpy as np
import itertools
import logging

import paddle.fluid as F
import paddle.fluid.layers as L
import sklearn.metrics

log = logging.getLogger(__name__)

__all__ = [
    'Metrics', 'F1', 'Recall', 'Precision', 'Mrr', 'Mean', 'Acc', 'ChunkF1',
    'RecallAtPrecision'
]


class Metrics(object):
C
chenxuyi 已提交
37 38
    """Metrics base class"""

C
chenxuyi 已提交
39
    def __init__(self):
C
chenxuyi 已提交
40
        """doc"""
C
chenxuyi 已提交
41 42 43 44
        self.saver = []

    @property
    def tensor(self):
C
chenxuyi 已提交
45
        """doc"""
C
chenxuyi 已提交
46 47 48
        pass

    def update(self, *args):
C
chenxuyi 已提交
49
        """doc"""
C
chenxuyi 已提交
50 51 52
        pass

    def eval(self):
C
chenxuyi 已提交
53
        """doc"""
C
chenxuyi 已提交
54 55 56 57
        pass


class Mean(Metrics):
C
chenxuyi 已提交
58 59
    """doc"""

C
chenxuyi 已提交
60
    def __init__(self, t):
C
chenxuyi 已提交
61
        """doc"""
C
chenxuyi 已提交
62 63 64 65
        self.t = t
        self.reset()

    def reset(self):
C
chenxuyi 已提交
66
        """doc"""
C
chenxuyi 已提交
67 68 69 70
        self.saver = np.array([])

    @property
    def tensor(self):
C
chenxuyi 已提交
71
        """doc"""
C
chenxuyi 已提交
72 73 74
        return self.t,

    def update(self, args):
C
chenxuyi 已提交
75
        """doc"""
C
chenxuyi 已提交
76 77 78 79 80
        t, = args
        t = t.reshape([-1])
        self.saver = np.concatenate([self.saver, t])

    def eval(self):
C
chenxuyi 已提交
81
        """doc"""
C
chenxuyi 已提交
82 83 84 85
        return self.saver.mean()


class Ppl(Mean):
C
chenxuyi 已提交
86 87
    """doc"""

C
chenxuyi 已提交
88
    def eval(self):
C
chenxuyi 已提交
89
        """doc"""
C
chenxuyi 已提交
90 91 92 93
        return np.exp(self.saver.mean())


class Acc(Mean):
C
chenxuyi 已提交
94 95
    """doc"""

C
chenxuyi 已提交
96
    def __init__(self, label, pred):
C
chenxuyi 已提交
97
        """doc"""
M
Meiyim 已提交
98 99 100 101
        if label.shape != pred.shape:
            raise ValueError(
                'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s'
                % (repr(label), repr(pred)))
C
chenxuyi 已提交
102 103 104 105 106
        self.eq = L.equal(pred, label)
        self.reset()

    @property
    def tensor(self):
C
chenxuyi 已提交
107
        """doc"""
C
chenxuyi 已提交
108 109 110 111
        return self.eq,


class MSE(Mean):
C
chenxuyi 已提交
112 113
    """doc"""

C
chenxuyi 已提交
114
    def __init__(self, label, pred):
C
chenxuyi 已提交
115
        """doc"""
M
Meiyim 已提交
116 117 118 119 120
        if label.shape != pred.shape:
            raise ValueError(
                'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s'
                % (repr(label), repr(pred)))

C
chenxuyi 已提交
121 122 123 124 125 126
        diff = pred - label
        self.mse = diff * diff
        self.reset()

    @property
    def tensor(self):
C
chenxuyi 已提交
127
        """doc"""
C
chenxuyi 已提交
128 129 130 131
        return self.mse,


class Cosine(Mean):
C
chenxuyi 已提交
132 133
    """doc"""

C
chenxuyi 已提交
134
    def __init__(self, label, pred):
C
chenxuyi 已提交
135
        """doc"""
M
Meiyim 已提交
136 137 138 139 140
        if label.shape != pred.shape:
            raise ValueError(
                'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s'
                % (repr(label), repr(pred)))

C
chenxuyi 已提交
141 142 143 144 145
        self.cos = L.cos_sim(label, pred)
        self.reset()

    @property
    def tensor(self):
C
chenxuyi 已提交
146
        """doc"""
C
chenxuyi 已提交
147 148 149
        return self.cos,


M
Meiyim 已提交
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
class MacroF1(Metrics):
    """doc"""

    def __init__(self, label, pred):
        """doc"""
        if label.shape != pred.shape:
            raise ValueError(
                'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s'
                % (repr(label), repr(pred)))

        self.label = label
        self.pred = pred
        self.reset()

    def reset(self):
        """doc"""
        self.label_saver = np.array([], dtype=np.bool)
        self.pred_saver = np.array([], dtype=np.bool)

    @property
    def tensor(self):
        """doc"""
        return self.label, self.pred

    def update(self, args):
        """doc"""
        label, pred = args
        label = label.reshape([-1]).astype(np.bool)
        pred = pred.reshape([-1]).astype(np.bool)
        if label.shape != pred.shape:
            raise ValueError(
                'Metrics precesion: input not match: label:%s pred:%s' %
                (label, pred))
        self.label_saver = np.concatenate([self.label_saver, label])
        self.pred_saver = np.concatenate([self.pred_saver, pred])

    def eval(self):
        """doc"""
        return sklearn.metrics.f1_score(
            self.label_saver, self.pred_saver, average='macro')


C
chenxuyi 已提交
192
class Precision(Metrics):
C
chenxuyi 已提交
193 194
    """doc"""

C
chenxuyi 已提交
195
    def __init__(self, label, pred):
C
chenxuyi 已提交
196
        """doc"""
M
Meiyim 已提交
197 198 199 200 201
        if label.shape != pred.shape:
            raise ValueError(
                'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s'
                % (repr(label), repr(pred)))

C
chenxuyi 已提交
202 203 204 205 206
        self.label = label
        self.pred = pred
        self.reset()

    def reset(self):
C
chenxuyi 已提交
207
        """doc"""
C
chenxuyi 已提交
208 209 210 211 212
        self.label_saver = np.array([], dtype=np.bool)
        self.pred_saver = np.array([], dtype=np.bool)

    @property
    def tensor(self):
C
chenxuyi 已提交
213
        """doc"""
C
chenxuyi 已提交
214 215 216
        return self.label, self.pred

    def update(self, args):
C
chenxuyi 已提交
217
        """doc"""
C
chenxuyi 已提交
218 219 220 221 222 223 224 225 226 227 228
        label, pred = args
        label = label.reshape([-1]).astype(np.bool)
        pred = pred.reshape([-1]).astype(np.bool)
        if label.shape != pred.shape:
            raise ValueError(
                'Metrics precesion: input not match: label:%s pred:%s' %
                (label, pred))
        self.label_saver = np.concatenate([self.label_saver, label])
        self.pred_saver = np.concatenate([self.pred_saver, pred])

    def eval(self):
C
chenxuyi 已提交
229
        """doc"""
C
chenxuyi 已提交
230
        tp = (self.label_saver & self.pred_saver).astype(np.int64).sum()
J
Jason N 已提交
231 232
        p = self.pred_saver.astype(np.int64).sum()
        return tp / p
C
chenxuyi 已提交
233 234 235


class Recall(Precision):
C
chenxuyi 已提交
236 237
    """doc"""

C
chenxuyi 已提交
238
    def eval(self):
C
chenxuyi 已提交
239
        """doc"""
C
chenxuyi 已提交
240
        tp = (self.label_saver & self.pred_saver).astype(np.int64).sum()
J
Jason N 已提交
241 242
        t = (self.label_saver).astype(np.int64).sum()
        return tp / t
C
chenxuyi 已提交
243 244 245


class F1(Precision):
C
chenxuyi 已提交
246 247
    """doc"""

C
chenxuyi 已提交
248
    def eval(self):
C
chenxuyi 已提交
249
        """doc"""
C
chenxuyi 已提交
250 251 252
        tp = (self.label_saver & self.pred_saver).astype(np.int64).sum()
        t = self.label_saver.astype(np.int64).sum()
        p = self.pred_saver.astype(np.int64).sum()
J
Jason N 已提交
253 254
        precision = tp / (p + 1.e-6)
        recall = tp / (t + 1.e-6)
C
chenxuyi 已提交
255 256 257 258
        return 2 * precision * recall / (precision + recall + 1.e-6)


class Auc(Metrics):
C
chenxuyi 已提交
259 260
    """doc"""

C
chenxuyi 已提交
261
    def __init__(self, label, pred):
C
chenxuyi 已提交
262
        """doc"""
M
Meiyim 已提交
263 264 265 266 267
        if label.shape != pred.shape:
            raise ValueError(
                'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s'
                % (repr(label), repr(pred)))

C
chenxuyi 已提交
268 269 270 271 272
        self.pred = pred
        self.label = label
        self.reset()

    def reset(self):
C
chenxuyi 已提交
273
        """doc"""
C
chenxuyi 已提交
274 275 276 277 278
        self.pred_saver = np.array([], dtype=np.float32)
        self.label_saver = np.array([], dtype=np.bool)

    @property
    def tensor(self):
C
chenxuyi 已提交
279
        """doc"""
C
chenxuyi 已提交
280 281 282
        return [self.pred, self.label]

    def update(self, args):
C
chenxuyi 已提交
283
        """doc"""
C
chenxuyi 已提交
284 285 286 287 288 289 290
        pred, label = args
        pred = pred.reshape([-1]).astype(np.float32)
        label = label.reshape([-1]).astype(np.bool)
        self.pred_saver = np.concatenate([self.pred_saver, pred])
        self.label_saver = np.concatenate([self.label_saver, label])

    def eval(self):
C
chenxuyi 已提交
291
        """doc"""
C
chenxuyi 已提交
292 293 294 295 296 297 298
        fpr, tpr, thresholds = sklearn.metrics.roc_curve(
            self.label_saver.astype(np.int64), self.pred_saver)
        auc = sklearn.metrics.auc(fpr, tpr)
        return auc


class RecallAtPrecision(Auc):
C
chenxuyi 已提交
299 300
    """doc"""

C
chenxuyi 已提交
301
    def __init__(self, label, pred, precision=0.9):
C
chenxuyi 已提交
302
        """doc"""
C
chenxuyi 已提交
303 304 305 306
        super(RecallAtPrecision, self).__init__(label, pred)
        self.precision = precision

    def eval(self):
C
chenxuyi 已提交
307
        """doc"""
C
chenxuyi 已提交
308 309 310 311 312 313 314 315 316 317
        self.pred_saver = self.pred_saver.reshape(
            [self.label_saver.size, -1])[:, -1]
        precision, recall, thresholds = sklearn.metrics.precision_recall_curve(
            self.label_saver, self.pred_saver)
        for p, r in zip(precision, recall):
            if p > self.precision:
                return r


class PrecisionAtThreshold(Auc):
C
chenxuyi 已提交
318 319
    """doc"""

C
chenxuyi 已提交
320
    def __init__(self, label, pred, threshold=0.5):
C
chenxuyi 已提交
321
        """doc"""
C
chenxuyi 已提交
322 323 324 325
        super().__init__(label, pred)
        self.threshold = threshold

    def eval(self):
C
chenxuyi 已提交
326
        """doc"""
C
chenxuyi 已提交
327 328 329 330 331 332 333
        infered = self.pred_saver > self.threshold
        correct_num = np.array(infered & self.label_saver).sum()
        infer_num = infered.sum()
        return correct_num / (infer_num + 1.e-6)


class Mrr(Metrics):
C
chenxuyi 已提交
334 335
    """doc"""

C
chenxuyi 已提交
336
    def __init__(self, qid, label, pred):
C
chenxuyi 已提交
337
        """doc"""
M
Meiyim 已提交
338 339 340 341 342
        if label.shape != pred.shape:
            raise ValueError(
                'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s'
                % (repr(label), repr(pred)))

C
chenxuyi 已提交
343 344 345 346 347 348
        self.qid = qid
        self.label = label
        self.pred = pred
        self.reset()

    def reset(self):
C
chenxuyi 已提交
349
        """doc"""
C
chenxuyi 已提交
350 351 352 353 354 355
        self.qid_saver = np.array([], dtype=np.int64)
        self.label_saver = np.array([], dtype=np.int64)
        self.pred_saver = np.array([], dtype=np.float32)

    @property
    def tensor(self):
C
chenxuyi 已提交
356
        """doc"""
C
chenxuyi 已提交
357 358 359
        return [self.qid, self.label, self.pred]

    def update(self, args):
C
chenxuyi 已提交
360
        """doc"""
C
chenxuyi 已提交
361 362 363 364 365 366 367 368 369 370 371 372 373
        qid, label, pred = args
        if not (qid.shape[0] == label.shape[0] == pred.shape[0]):
            raise ValueError(
                'Mrr dimention not match: qid[%s] label[%s], pred[%s]' %
                (qid.shape, label.shape, pred.shape))
        self.qid_saver = np.concatenate(
            [self.qid_saver, qid.reshape([-1]).astype(np.int64)])
        self.label_saver = np.concatenate(
            [self.label_saver, label.reshape([-1]).astype(np.int64)])
        self.pred_saver = np.concatenate(
            [self.pred_saver, pred.reshape([-1]).astype(np.float32)])

    def eval(self):
C
chenxuyi 已提交
374 375 376
        """doc"""

        def _key_func(tup):
C
chenxuyi 已提交
377 378
            return tup[0]

C
chenxuyi 已提交
379
        def _calc_func(tup):
C
chenxuyi 已提交
380 381 382 383 384 385
            ranks = [
                1. / (rank + 1.)
                for rank, (_, l, p) in enumerate(
                    sorted(
                        tup, key=lambda t: t[2], reverse=True)) if l != 0
            ]
C
chenxuyi 已提交
386 387 388 389
            if len(ranks):
                return ranks[0]
            else:
                return 0.
C
chenxuyi 已提交
390 391

        mrr_for_qid = [
C
chenxuyi 已提交
392
            _calc_func(tup)
C
chenxuyi 已提交
393 394 395
            for _, tup in itertools.groupby(
                sorted(
                    zip(self.qid_saver, self.label_saver, self.pred_saver),
C
chenxuyi 已提交
396 397
                    key=_key_func),
                key=_key_func)
C
chenxuyi 已提交
398 399 400 401 402 403
        ]
        mrr = np.float32(sum(mrr_for_qid) / len(mrr_for_qid))
        return mrr


class ChunkF1(Metrics):
C
chenxuyi 已提交
404 405
    """doc"""

C
chenxuyi 已提交
406
    def __init__(self, label, pred, seqlen, num_label):
C
chenxuyi 已提交
407
        """doc"""
C
chenxuyi 已提交
408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
        self.label = label
        self.pred = pred
        self.seqlen = seqlen
        self.null_index = num_label - 1
        self.label_cnt = 0
        self.pred_cnt = 0
        self.correct_cnt = 0

    def _extract_bio_chunk(self, seq):
        chunks = []
        cur_chunk = None

        for index in range(len(seq)):
            tag = seq[index]
            tag_type = tag // 2
            tag_pos = tag % 2

            if tag == self.null_index:
                if cur_chunk is not None:
                    chunks.append(cur_chunk)
                    cur_chunk = None
                continue

            if tag_pos == 0:
                if cur_chunk is not None:
                    chunks.append(cur_chunk)
                    cur_chunk = {}
                cur_chunk = {"st": index, "en": index + 1, "type": tag_type}
            else:
                if cur_chunk is None:
                    cur_chunk = {
                        "st": index,
                        "en": index + 1,
                        "type": tag_type
                    }
                    continue

                if cur_chunk["type"] == tag_type:
                    cur_chunk["en"] = index + 1
                else:
                    chunks.append(cur_chunk)
                    cur_chunk = {
                        "st": index,
                        "en": index + 1,
                        "type": tag_type
                    }

        if cur_chunk is not None:
            chunks.append(cur_chunk)
        return chunks

    def reset(self):
C
chenxuyi 已提交
460
        """doc"""
C
chenxuyi 已提交
461 462 463 464 465 466
        self.label_cnt = 0
        self.pred_cnt = 0
        self.correct_cnt = 0

    @property
    def tensor(self):
C
chenxuyi 已提交
467
        """doc"""
C
chenxuyi 已提交
468 469 470
        return [self.pred, self.label, self.seqlen]

    def update(self, args):
C
chenxuyi 已提交
471
        """doc"""
C
chenxuyi 已提交
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506
        pred, label, seqlen = args
        pred = pred.reshape([-1]).astype(np.int32).tolist()
        label = label.reshape([-1]).astype(np.int32).tolist()
        seqlen = seqlen.reshape([-1]).astype(np.int32).tolist()

        max_len = 0
        for l in seqlen:
            max_len = max(max_len, l)

        for i in range(len(seqlen)):
            seq_st = i * max_len + 1
            seq_en = seq_st + (seqlen[i] - 2)
            pred_chunks = self._extract_bio_chunk(pred[seq_st:seq_en])
            label_chunks = self._extract_bio_chunk(label[seq_st:seq_en])
            self.pred_cnt += len(pred_chunks)
            self.label_cnt += len(label_chunks)

            pred_index = 0
            label_index = 0
            while label_index < len(label_chunks) and pred_index < len(
                    pred_chunks):
                if pred_chunks[pred_index]['st'] < label_chunks[label_index][
                        'st']:
                    pred_index += 1
                elif pred_chunks[pred_index]['st'] > label_chunks[label_index][
                        'st']:
                    label_index += 1
                else:
                    if pred_chunks[pred_index]['en'] == label_chunks[label_index]['en'] \
                            and pred_chunks[pred_index]['type'] == label_chunks[label_index]['type']:
                        self.correct_cnt += 1
                    pred_index += 1
                    label_index += 1

    def eval(self):
C
chenxuyi 已提交
507
        """doc"""
C
chenxuyi 已提交
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
        if self.pred_cnt == 0:
            precision = 0.0
        else:
            precision = 1.0 * self.correct_cnt / self.pred_cnt

        if self.label_cnt == 0:
            recall = 0.0
        else:
            recall = 1.0 * self.correct_cnt / self.label_cnt

        if self.correct_cnt == 0:
            f1 = 0.0
        else:
            f1 = 2 * precision * recall / (precision + recall)

        return np.float32(f1)


class PNRatio(Metrics):
C
chenxuyi 已提交
527 528
    """doc"""

C
chenxuyi 已提交
529
    def __init__(self, qid, label, pred):
C
chenxuyi 已提交
530
        """doc"""
M
Meiyim 已提交
531 532 533 534 535
        if label.shape != pred.shape:
            raise ValueError(
                'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s'
                % (repr(label), repr(pred)))

C
chenxuyi 已提交
536 537 538 539 540 541
        self.qid = qid
        self.label = label
        self.pred = pred
        self.saver = {}

    def reset(self):
C
chenxuyi 已提交
542
        """doc"""
C
chenxuyi 已提交
543 544 545 546
        self.saver = {}

    @property
    def tensor(self):
C
chenxuyi 已提交
547
        """doc"""
C
chenxuyi 已提交
548 549 550
        return [self.qid, self.label, self.pred]

    def update(self, args):
C
chenxuyi 已提交
551
        """doc"""
C
chenxuyi 已提交
552 553 554 555 556 557 558 559 560 561 562 563 564 565
        qid, label, pred = args
        if not (qid.shape[0] == label.shape[0] == pred.shape[0]):
            raise ValueError('dimention not match: qid[%s] label[%s], pred[%s]'
                             % (qid.shape, label.shape, pred.shape))
        qid = qid.reshape([-1]).tolist()
        label = label.reshape([-1]).tolist()
        pred = pred.reshape([-1]).tolist()
        assert len(qid) == len(label) == len(pred)
        for q, l, p in zip(qid, label, pred):
            if q not in self.saver:
                self.saver[q] = []
            self.saver[q].append((l, p))

    def eval(self):
C
chenxuyi 已提交
566
        """doc"""
C
chenxuyi 已提交
567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
        p = 0
        n = 0
        for qid, outputs in self.saver.items():
            for i in range(0, len(outputs)):
                l1, p1 = outputs[i]
                for j in range(i + 1, len(outputs)):
                    l2, p2 = outputs[j]
                    if l1 > l2:
                        if p1 > p2:
                            p += 1
                        elif p1 < p2:
                            n += 1
                    elif l1 < l2:
                        if p1 < p2:
                            p += 1
                        elif p1 > p2:
                            n += 1
        pn = p / n if n > 0 else 0.0
        return np.float32(pn)


class BinaryPNRatio(PNRatio):
C
chenxuyi 已提交
589 590
    """doc"""

C
chenxuyi 已提交
591
    def __init__(self, qid, label, pred):
C
chenxuyi 已提交
592
        """doc"""
C
chenxuyi 已提交
593 594 595
        super(BinaryPNRatio, self).__init__(qid, label, pred)

    def eval(self):
C
chenxuyi 已提交
596
        """doc"""
C
chenxuyi 已提交
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
        p = 0
        n = 0
        for qid, outputs in self.saver.items():
            pos_set = []
            neg_set = []
            for label, score in outputs:
                if label == 1:
                    pos_set.append(score)
                else:
                    neg_set.append(score)

            for ps in pos_set:
                for ns in neg_set:
                    if ps > ns:
                        p += 1
                    elif ps < ns:
                        n += 1
                    else:
                        continue
        pn = p / n if n > 0 else 0.0
        return np.float32(pn)


class PrecisionAtK(Metrics):
C
chenxuyi 已提交
621 622
    """doc"""

C
chenxuyi 已提交
623
    def __init__(self, qid, label, pred, k=1):
C
chenxuyi 已提交
624
        """doc"""
M
Meiyim 已提交
625 626 627 628 629
        if label.shape != pred.shape:
            raise ValueError(
                'expect label shape == pred shape, got: label.shape=%s, pred.shape = %s'
                % (repr(label), repr(pred)))

C
chenxuyi 已提交
630 631 632 633 634 635 636
        self.qid = qid
        self.label = label
        self.pred = pred
        self.k = k
        self.saver = {}

    def reset(self):
C
chenxuyi 已提交
637
        """doc"""
C
chenxuyi 已提交
638 639 640 641
        self.saver = {}

    @property
    def tensor(self):
C
chenxuyi 已提交
642
        """doc"""
C
chenxuyi 已提交
643 644 645
        return [self.qid, self.label, self.pred]

    def update(self, args):
C
chenxuyi 已提交
646
        """doc"""
C
chenxuyi 已提交
647 648 649 650 651 652 653 654 655 656 657 658 659 660 661
        qid, label, pred = args
        if not (qid.shape[0] == label.shape[0] == pred.shape[0]):
            raise ValueError('dimention not match: qid[%s] label[%s], pred[%s]'
                             % (qid.shape, label.shape, pred.shape))
        qid = qid.reshape([-1]).tolist()
        label = label.reshape([-1]).tolist()
        pred = pred.reshape([-1]).tolist()

        assert len(qid) == len(label) == len(pred)
        for q, l, p in zip(qid, label, pred):
            if q not in self.saver:
                self.saver[q] = []
            self.saver[q].append((l, p))

    def eval(self):
C
chenxuyi 已提交
662
        """doc"""
C
chenxuyi 已提交
663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
        right = 0
        total = 0
        for v in self.saver.values():
            v = sorted(v, key=lambda x: x[1], reverse=True)
            k = min(self.k, len(v))
            for i in range(k):
                if v[i][0] == 1:
                    right += 1
                    break
            total += 1

        return np.float32(1.0 * right / total)


#class SemanticRecallMetrics(Metrics):
#    def __init__(self, qid, vec, type_id):
#        self.qid = qid
#        self.vec = vec
#        self.type_id = type_id
#        self.reset()
#
#    def reset(self):
#        self.saver = []
#
#    @property
#    def tensor(self):
#        return [self.qid, self.vec, self.type_id]
#
#    def update(self, args):
#        qid, vec, type_id = args
#        self.saver.append((qid, vec, type_id))
#
#    def eval(self):
#        dic = {}
#        for qid, vec, type_id in self.saver():
#            dic.setdefault(i, {}).setdefault(k, []).append(vec)
#        
#        for qid in dic:
#            assert len(dic[qid]) == 3
#            qvec = np.arrray(dic[qid][0])
#            assert len(qvec) == 1
#            ptvec = np.array(dic[qid][1])
#            ntvec = np.array(dic[qid][2])
#
#            np.matmul(qvec, np.transpose(ptvec))
#            np.matmul(qvec, np.transpose(ntvec))
#