evaluate.py 2.0 KB
Newer Older
0
0YuanZhang0 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for auto dialogue evaluation"""
Y
Yibing Liu 已提交
15 16 17 18 19

import sys
import numpy as np
import pandas as pd

L
lilu 已提交
20

Y
Yibing Liu 已提交
21 22 23 24 25
def get_p_at_n_in_m(data, n, m, ind):
    """
    Get n in m
    """
    pos_score = data[ind][0]
L
lilu 已提交
26 27 28 29
    curr = data[ind:ind + m]
    curr = sorted(curr, key=lambda x: x[0], reverse=True)

    if curr[n - 1][0] <= pos_score:
Y
Yibing Liu 已提交
30 31 32 33 34 35 36 37 38 39 40 41
        return 1
    return 0


def evaluate_Recall(data):
    """
    Evaluate Recall
    """
    p_at_1_in_2 = 0.0
    p_at_1_in_10 = 0.0
    p_at_2_in_10 = 0.0
    p_at_5_in_10 = 0.0
L
lilu 已提交
42 43 44 45 46

    length = len(data) // 10
    print('length=%s' % length)

    for i in range(0, length):
Y
Yibing Liu 已提交
47 48
        ind = i * 10
        assert data[ind][1] == 1
L
lilu 已提交
49

Y
Yibing Liu 已提交
50 51 52 53 54 55 56 57 58
        p_at_1_in_2 += get_p_at_n_in_m(data, 1, 2, ind)
        p_at_1_in_10 += get_p_at_n_in_m(data, 1, 10, ind)
        p_at_2_in_10 += get_p_at_n_in_m(data, 2, 10, ind)
        p_at_5_in_10 += get_p_at_n_in_m(data, 5, 10, ind)

    recall_dict = {
        '1_in_2': p_at_1_in_2 / length,
        '1_in_10': p_at_1_in_10 / length,
        '2_in_10': p_at_2_in_10 / length,
L
lilu 已提交
59 60 61
        '5_in_10': p_at_5_in_10 / length
    }

Y
Yibing Liu 已提交
62 63 64 65 66 67 68 69 70 71
    return recall_dict


def evaluate_cor(pred, true):
    """
    Evaluate cor
    """
    df = pd.DataFrame({'pred': pred, 'true': true})
    cor_matrix = df.corr('spearman')
    return cor_matrix['pred']['true']