# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # #Licensed under the Apache License, Version 2.0 (the "License"); #you may not use this file except in compliance with the License. #You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # #Unless required by applicable law or agreed to in writing, software #distributed under the License is distributed on an "AS IS" BASIS, #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #See the License for the specific language governing permissions and #limitations under the License. from __future__ import absolute_import from __future__ import unicode_literals from __future__ import print_function from __future__ import division import sys import os import numpy as np import datetime import logging from collections import defaultdict import pickle logger = logging.getLogger(__name__) class MetricsCalculator(): def __init__(self, name, mode, **metrics_args): """ metrics args: num_test_clips, number of clips of each video when test dataset_size, total number of videos in the dataset filename_gt, a file with each line stores the groud truth of each video checkpoint_dir, dir where to store the test results num_classes, number of classes of the dataset """ self.name = name self.mode = mode # 'train', 'val', 'test' self.metrics_args = metrics_args self.num_test_clips = metrics_args['num_test_clips'] self.dataset_size = metrics_args['dataset_size'] self.filename_gt = metrics_args['filename_gt'] self.checkpoint_dir = metrics_args['checkpoint_dir'] self.num_classes = metrics_args['num_classes'] self.reset() def reset(self): logger.info('Resetting {} metrics...'.format(self.mode)) self.aggr_acc1 = 0.0 self.aggr_acc5 = 0.0 self.aggr_loss = 0.0 self.aggr_batch_size = 0 self.seen_inds = defaultdict(int) self.results = [] def calculate_metrics(self, loss, pred, labels): pass def accumulate(self, loss, pred, labels): labels = labels.astype(int) labels = labels[:, 0] for i in range(pred.shape[0]): probs = pred[i, :].tolist() vid = labels[i] self.seen_inds[vid] += 1 if self.seen_inds[vid] > self.num_test_clips: logger.warning('Video id {} have been seen. Skip.'.format(vid, )) continue save_pairs = [vid, probs] self.results.append(save_pairs) logger.info("({0} / {1}) videos".format(\ len(self.seen_inds), self.dataset_size)) def finalize_metrics(self): if self.filename_gt is not None: evaluate_results(self.results, self.filename_gt, self.dataset_size, \ self.num_classes, self.num_test_clips) # save temporary file if not os.path.isdir(self.checkpoint_dir): os.makedirs(self.checkpoint_dir) pkl_path = os.path.join(self.checkpoint_dir, "results_probs.pkl") with open(pkl_path, 'w') as f: pickle.dump(self.results, f) logger.info('Temporary file saved to: {}'.format(pkl_path)) def read_groundtruth(filename_gt): f = open(filename_gt, 'r') labels = [] for line in f: rows = line.split() labels.append(int(rows[1])) f.close() return labels def evaluate_results(results, filename_gt, test_dataset_size, num_classes, num_test_clips): gt_labels = read_groundtruth(filename_gt) sample_num = test_dataset_size class_num = num_classes sample_video_times = num_test_clips counts = np.zeros(sample_num, dtype=np.int32) probs = np.zeros((sample_num, class_num)) assert (len(gt_labels) == sample_num) """ clip_accuracy: the (e.g.) 10*19761 clips' average accuracy clip1_accuracy: the 1st clip's accuracy (starting from frame 0) """ clip_accuracy = 0 clip1_accuracy = 0 clip1_count = 0 seen_inds = defaultdict(int) # evaluate for entry in results: vid = entry[0] prob = np.array(entry[1]) probs[vid] += prob[0:class_num] counts[vid] += 1 idx = prob.argmax() if idx == gt_labels[vid]: # clip accuracy clip_accuracy += 1 # clip1 accuracy seen_inds[vid] += 1 if seen_inds[vid] == 1: clip1_count += 1 if idx == gt_labels[vid]: clip1_accuracy += 1 # sanity checkcnt = 0 max_clips = 0 min_clips = sys.maxsize count_empty = 0 count_corrupted = 0 for i in range(sample_num): max_clips = max(max_clips, counts[i]) min_clips = min(min_clips, counts[i]) if counts[i] != sample_video_times: count_corrupted += 1 logger.warning('Id: {} count: {}'.format(i, counts[i])) if counts[i] == 0: count_empty += 1 logger.info('Num of empty videos: {}'.format(count_empty)) logger.info('Num of corrupted videos: {}'.format(count_corrupted)) logger.info('Max num of clips in a video: {}'.format(max_clips)) logger.info('Min num of clips in a video: {}'.format(min_clips)) # clip1 accuracy for sanity (# print clip1 first as it is lowest) logger.info('Clip1 accuracy: {:.2f} percent ({}/{})'.format( 100. * clip1_accuracy / clip1_count, clip1_accuracy, clip1_count)) # clip accuracy for sanity logger.info('Clip accuracy: {:.2f} percent ({}/{})'.format( 100. * clip_accuracy / len(results), clip_accuracy, len(results))) # compute accuracy accuracy = 0 accuracy_top5 = 0 for i in range(sample_num): prob = probs[i] # top-1 idx = prob.argmax() if idx == gt_labels[i] and counts[i] > 0: accuracy = accuracy + 1 ids = np.argsort(prob)[::-1] for j in range(5): if ids[j] == gt_labels[i] and counts[i] > 0: accuracy_top5 = accuracy_top5 + 1 break accuracy = float(accuracy) / float(sample_num) accuracy_top5 = float(accuracy_top5) / float(sample_num) logger.info('-' * 80) logger.info('top-1 accuracy: {:.2f} percent'.format(accuracy * 100)) logger.info('top-5 accuracy: {:.2f} percent'.format(accuracy_top5 * 100)) logger.info('-' * 80) return