#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and

import numpy as np
import datetime
import logging
import json
import os
import operator

logger = logging.getLogger(__name__)


class MetricsCalculator():
    def __init__(
            self,
            name='TALL',
            mode='train', ):
        self.name = name
        self.mode = mode  # 'train', 'valid', 'test', 'infer'
        self.reset()

    def reset(self):
        logger.info('Resetting {} metrics...'.format(self.mode))
        if (self.mode == 'train') or (self.mode == 'valid'):
            self.aggr_loss = 0.0
        elif (self.mode == 'test') or (self.mode == 'infer'):
            self.result_dict = dict()
            self.save_res = dict()
            self.out_file = self.name + '_' + self.mode + '_res_' + '.json'

    def nms_temporal(self, x1, x2, sim, overlap):
        pick = []
        assert len(x1) == len(sim)
        assert len(x2) == len(sim)
        if len(x1) == 0:
            return pick

        union = map(operator.sub, x2, x1)  # union = x2-x1

        I = [i[0] for i in sorted(
            enumerate(sim), key=lambda x: x[1])]  # sort and get index

        while len(I) > 0:
            i = I[-1]
            pick.append(i)

            xx1 = [max(x1[i], x1[j]) for j in I[:-1]]
            xx2 = [min(x2[i], x2[j]) for j in I[:-1]]
            inter = [max(0.0, k2 - k1) for k1, k2 in zip(xx1, xx2)]
            o = [
                inter[u] / (union[i] + union[I[u]] - inter[u])
                for u in range(len(I) - 1)
            ]
            I_new = []
            for j in range(len(o)):
                if o[j] <= overlap:
                    I_new.append(I[j])
            I = I_new
        return pick

    def calculate_IoU(self, i0, i1):
        # calculate temporal intersection over union
        union = (min(i0[0], i1[0]), max(i0[1], i1[1]))
        inter = (max(i0[0], i1[0]), min(i0[1], i1[1]))
        iou = 1.0 * (inter[1] - inter[0]) / (union[1] - union[0])
        return iou

    def compute_IoU_recall_top_n_forreg(self, top_n, iou_thresh,
                                        sentence_image_mat,
                                        sentence_image_reg_mat, sclips):
        correct_num = 0.0
        for k in range(sentence_image_mat.shape[0]):
            gt = sclips[k]
            gt_start = float(gt.split("_")[1])
            gt_end = float(gt.split("_")[2])
            sim_v = [v for v in sentence_image_mat[k]]
            starts = [s for s in sentence_image_reg_mat[k, :, 0]]
            ends = [e for e in sentence_image_reg_mat[k, :, 1]]
            picks = self.nms_temporal(starts, ends, sim_v, iou_thresh - 0.05)
            if top_n < len(picks):
                picks = picks[0:top_n]
            for index in picks:
                pred_start = sentence_image_reg_mat[k, index, 0]
                pred_end = sentence_image_reg_mat[k, index, 1]
                iou = self.calculate_IoU((gt_start, gt_end),
                                         (pred_start, pred_end))
                if iou >= iou_thresh:
                    correct_num += 1
                    break
        return correct_num

    def accumulate(self, fetch_list):
        if self.mode == 'valid':
            loss = fetch_list[0]
            self.aggr_loss += np.mean(np.array(loss))
        elif (self.mode == 'test') or (self.mode == 'infer'):
            outputs = fetch_list[0]
            b_start = [item[0] for item in fetch_list[1]]
            b_end = [item[1] for item in fetch_list[1]]
            b_k = [item[2] for item in fetch_list[1]]
            b_t = [item[3] for item in fetch_list[1]]
            b_movie_clip_sentences = [item[4] for item in fetch_list[1]]
            b_movie_clip_featmaps = [item[5] for item in fetch_list[1]]
            b_movie_name = [item[6] for item in fetch_list[1]]

            batch_size = len(b_start)
            for i in range(batch_size):
                start = b_start[i]
                end = b_end[i]
                k = b_k[i]
                t = b_t[i]
                movie_clip_sentences = b_movie_clip_sentences[i]
                movie_clip_featmaps = b_movie_clip_featmaps[i]
                movie_name = b_movie_name[i]

                item_res = [outputs, start, end, k, t]

                if movie_name not in self.result_dict.keys():
                    self.result_dict[movie_name] = []
                    self.result_dict[movie_name].append(movie_clip_sentences)
                    self.result_dict[movie_name].append(movie_clip_featmaps)

                self.result_dict[movie_name].append(item_res)

    def accumulate_infer_results(self, fetch_list):
        # the same as test
        pass

    def finalize_metrics(self, savedir):
        # init
        IoU_thresh = [0.1, 0.3, 0.5, 0.7]
        all_correct_num_10 = [0.0] * 5
        all_correct_num_5 = [0.0] * 5
        all_correct_num_1 = [0.0] * 5
        all_retrievd = 0.0

        idx = 0
        all_number = len(self.result_dict)
        for movie_name in self.result_dict.keys():
            idx += 1
            logger.info('{} / {}'.format('%d' % idx, '%d' % all_number))

            movie_clip_sentences = self.result_dict[movie_name][0]
            movie_clip_featmaps = self.result_dict[movie_name][1]

            ls = len(movie_clip_sentences)
            lf = len(movie_clip_featmaps)
            sentence_image_mat = np.zeros([ls, lf])
            sentence_image_reg_mat = np.zeros([ls, lf, 2])

            movie_res = self.result_dict[movie_name][2:]
            for item_res in movie_res:
                outputs, start, end, k, t = item_res

                outputs = np.squeeze(outputs)
                sentence_image_mat[k, t] = outputs[0]
                reg_end = end + outputs[2]
                reg_start = start + outputs[1]

                sentence_image_reg_mat[k, t, 0] = reg_start
                sentence_image_reg_mat[k, t, 1] = reg_end

            sclips = [b[0] for b in movie_clip_sentences]

            for i in range(len(IoU_thresh)):
                IoU = IoU_thresh[i]
                correct_num_10 = self.compute_IoU_recall_top_n_forreg(
                    10, IoU, sentence_image_mat, sentence_image_reg_mat, sclips)
                correct_num_5 = self.compute_IoU_recall_top_n_forreg(
                    5, IoU, sentence_image_mat, sentence_image_reg_mat, sclips)
                correct_num_1 = self.compute_IoU_recall_top_n_forreg(
                    1, IoU, sentence_image_mat, sentence_image_reg_mat, sclips)

                logger.info(
                    movie_name +
                    " IoU= {}, R@10: {}; IoU= {}, R@5: {}; IoU= {}, R@1: {}".
                    format('%s' % str(IoU), '%s' % str(correct_num_10 / len(
                        sclips)), '%s' % str(IoU), '%s' % str(
                            correct_num_5 / len(sclips)), '%s' % str(IoU), '%s'
                           % str(correct_num_1 / len(sclips))))

                all_correct_num_10[i] += correct_num_10
                all_correct_num_5[i] += correct_num_5
                all_correct_num_1[i] += correct_num_1

            all_retrievd += len(sclips)

        for j in range(len(IoU_thresh)):
            logger.info(
                " IoU= {}, R@10: {}; IoU= {}, R@5: {}; IoU= {}, R@1: {}".format(
                    '%s' % str(IoU_thresh[j]), '%s' % str(all_correct_num_10[
                        j] / all_retrievd), '%s' % str(IoU_thresh[j]), '%s' %
                    str(all_correct_num_5[j] / all_retrievd), '%s' % str(
                        IoU_thresh[j]), '%s' % str(all_correct_num_1[j] /
                                                   all_retrievd)))

        self.R1_IOU5 = all_correct_num_1[2] / all_retrievd
        self.R5_IOU5 = all_correct_num_5[2] / all_retrievd

        self.save_res["best_R1_IOU5"] = self.R1_IOU5
        self.save_res["best_R5_IOU5"] = self.R5_IOU5

        self.filepath = os.path.join(savedir, self.out_file)
        with open(self.filepath, 'w') as f:
            f.write(
                json.dumps(
                    {
                        'version': 'VERSION 1.0',
                        'results': self.save_res,
                        'external_data': {}
                    },
                    indent=2))
            logger.info('results has been saved into file: {}'.format(
                self.filepath))

    def finalize_infer_metrics(self, savedir):
        idx = 0
        all_number = len(self.result_dict)
        res = dict()
        for movie_name in self.result_dict.keys():
            res[movie_name] = []
            idx += 1
            logger.info('{} / {}'.format('%d' % idx, '%d' % all_number))

            movie_clip_sentences = self.result_dict[movie_name][0]
            movie_clip_featmaps = self.result_dict[movie_name][1]

            ls = len(movie_clip_sentences)
            lf = len(movie_clip_featmaps)
            sentence_image_mat = np.zeros([ls, lf])
            sentence_image_reg_mat = np.zeros([ls, lf, 2])

            movie_res = self.result_dict[movie_name][2:]
            for item_res in movie_res:
                outputs, start, end, k, t = item_res

                outputs = np.squeeze(outputs)
                sentence_image_mat[k, t] = outputs[0]
                reg_end = end + outputs[2]
                reg_start = start + outputs[1]

                sentence_image_reg_mat[k, t, 0] = reg_start
                sentence_image_reg_mat[k, t, 1] = reg_end

            sclips = [b[0] for b in movie_clip_sentences]
            IoU = 0.5  #pre-define
            for k in range(sentence_image_mat.shape[0]):
                #ground_truth for compare
                gt = sclips[k]
                gt_start = float(gt.split("_")[1])
                gt_end = float(gt.split("_")[2])

                sim_v = [v for v in sentence_image_mat[k]]
                starts = [s for s in sentence_image_reg_mat[k, :, 0]]
                ends = [e for e in sentence_image_reg_mat[k, :, 1]]
                picks = self.nms_temporal(starts, ends, sim_v, IoU - 0.05)

                if 1 < len(picks):  #top1
                    picks = picks[0:1]

                for index in picks:
                    pred_start = sentence_image_reg_mat[k, index, 0]
                    pred_end = sentence_image_reg_mat[k, index, 1]
                    res[movie_name].append((k, pred_start, pred_end))

                logger.info(
                    'movie_name: {}, sentence_id: {}, pred_start_time: {}, pred_end_time: {}, gt_start_time: {}, gt_end_time: {}'.
                    format('%s' % movie_name, '%s' % str(k), '%s' % str(
                        pred_start), '%s' % str(pred_end), '%s' % str(gt_start),
                           '%s' % str(gt_end)))

        self.filepath = os.path.join(savedir, self.out_file)
        with open(self.filepath, 'w') as f:
            f.write(
                json.dumps(
                    {
                        'version': 'VERSION 1.0',
                        'results': res,
                        'external_data': {}
                    },
                    indent=2))
            logger.info('results has been saved into file: {}'.format(
                self.filepath))

    def get_computed_metrics(self):
        return self.save_res