cal_pos_neg.py 2.3 KB
Newer Older
Y
add  
yinhaofeng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
#!/usr/bin/python  
#-*- coding:utf-8 -*-  
############################  
#File Name: cal_pos_neg.py
#Author: youqiheng 
#Mail: youqiheng@baidu.com
#Created Time: 2018-04-15 21:59:45
############################ 
"""
docstring
"""

import os
import sys

if len(sys.argv) < 2:
    print "usage:python %s input" % (sys.argv[0])
    sys.exit(-1)

fin = file(sys.argv[1])
pos_num = 0
neg_num = 0

score_list = []
label_list = []
last_query = "-1"

#0       12.786960       1
#0       -1.480890       0
cnt = 0
query_num = 0
pair_num = 0
equal_num = 0
for line in fin:
    cols = line.strip().split("\t")
    cnt += 1
    if cnt % 500000 == 0:
        print "cnt:", cnt, 1.0 * pos_num / neg_num
    if len(cols) != 3:
        continue

    cur_query = cols[0]
    if cur_query != last_query:
        query_num += 1
        for i in xrange(0, len(score_list)):
            for j in xrange(i + 1, len(score_list)):
                if label_list[i] == label_list[j]:
                    continue
                pair_num += 1
                if (score_list[i] - score_list[j]) * (
                        label_list[i] - label_list[j]) < 0:
                    neg_num += 1
                elif (score_list[i] - score_list[j]) * (
                        label_list[i] - label_list[j]) > 0:
                    pos_num += 1
                else:
                    equal_num += 1
        score_list = []
        label_list = []

    last_query = cur_query

    label = int(cols[2])

    score_list.append(round(float(cols[1]), 6))
    label_list.append(int(cols[2]))

fin.close()

for i in xrange(0, len(score_list)):
    for j in xrange(i + 1, len(score_list)):
        if label_list[i] == label_list[j]:
            continue
        pair_num += 1
        if (score_list[i] - score_list[j]) * (label_list[i] - label_list[j]
                                              ) < 0:
            neg_num += 1
        elif (score_list[i] - score_list[j]) * (label_list[i] - label_list[j]
                                                ) > 0:
            pos_num += 1
        else:
            equal_num += 1

if neg_num > 0:
    print "pnr:", 1.0 * pos_num / neg_num
    print "query_num:", query_num
    print "pair_num:", pos_num + neg_num + equal_num, pair_num
    print "equal_num:", equal_num
    print "正序率:", 1.0 * pos_num / (pos_num + neg_num)
print pos_num, neg_num