diff --git a/PaddleNLP/Research/MRQA2019-D-NET/multi_task_learning/scripts/macro_avg.py b/PaddleNLP/Research/MRQA2019-D-NET/multi_task_learning/scripts/macro_avg.py index 768a1678aa812c1c96affd5c467253d01bee998a..dc0f9ff3a11a8e9572c6daba2415162a9a54a392 100644 --- a/PaddleNLP/Research/MRQA2019-D-NET/multi_task_learning/scripts/macro_avg.py +++ b/PaddleNLP/Research/MRQA2019-D-NET/multi_task_learning/scripts/macro_avg.py @@ -1,13 +1,13 @@ import numpy as np import argparse +import json import re def extract_score(line): - prog = re.compile(r'{"f1": (-?\d+\.?\d*e?-?\d*?), "exact_match": (-?\d+\.?\d*e?-?\d*?)}') - result = prog.match(line) - f1 = float(result.group(1)) - em = float(result.group(2)) - return f1, em + score_json = json.loads(line) + f1 = score_json['f1'] + em = score_json['exact_match'] + return float(f1), float(em) if __name__ == '__main__': @@ -16,7 +16,7 @@ if __name__ == '__main__': parser.add_argument('input_file', help='Score file') args = parser.parse_args() with open(args.input_file) as fin: - lines = map(str.strip, fin.readlines()) + lines = list(map(str.strip, fin.readlines())) in_domain_scores = {} for dataset_id in range(0, 12, 2): f1, em = extract_score(lines[dataset_id+1]) @@ -25,5 +25,5 @@ if __name__ == '__main__': for dataset_id in range(12, 24, 2): f1, em = extract_score(lines[dataset_id+1]) out_of_domain_scores[lines[dataset_id]] = f1 - print('In domain avg: {}'.format(np.mean(in_domain_scores.values()))) - print('Out of domain avg: {}'.format(np.mean(out_of_domain_scores.values()))) + print('In domain avg: {}'.format(sum(in_domain_scores.values()) / len(in_domain_scores.values()))) + print('Out of domain avg: {}'.format(sum(out_of_domain_scores.values()) / len(in_domain_scores.values())))