import time
import math
import cv2
import numpy as np

import paddle.fluid as fluid
from paddle.fluid import dygraph

from pytracking.tracker.base.basetracker import BaseTracker
from ltr.data.anchor import Anchors


class SiamRPN(BaseTracker):
    def initialize_features(self):
        if not getattr(self, 'features_initialized', False):
            self.params.features.initialize()
        self.features_initialized = True

    def initialize(self, image, state, *args, **kwargs):
        # Initialize some stuff
        self.frame_num = 1

        # Initialize features
        self.initialize_features()

        self.time = 0
        tic = time.time()

        # Get position and size
        # self.pos: target center (y, x)
        self.pos = np.array(
            [
                state[1] + state[3] // 2,
                state[0] + state[2] // 2
            ],
            dtype=np.float32)
        self.target_sz = np.array([state[3], state[2]], dtype=np.float32)

        # Set search area
        context = self.params.context_amount * np.sum(self.target_sz)
        self.z_sz = np.sqrt(np.prod(self.target_sz + context))
        self.x_sz = round(self.z_sz * (self.params.instance_size / self.params.exemplar_size))

        self.score_size = (self.params.instance_size - self.params.exemplar_size) // \
            self.params.anchor_stride + 1 + self.params.base_size
        self.anchor_num = len(self.params.anchor_ratios) * len(self.params.anchor_scales)
        hanning = np.hanning(self.score_size)
        window = np.outer(hanning, hanning)
        self.window = np.tile(window.flatten(), self.anchor_num)
        self.anchors = self.generate_anchor(self.score_size)

        # Convert image
        self.avg_color = np.mean(image, axis=(0, 1))
        with dygraph.guard():
            exemplar_image = self._crop_and_resize(
                image,
                self.pos,
                self.z_sz,
                out_size=self.params.exemplar_size,
                pad_color=self.avg_color)
            
            # get template
            self.params.features.features[0].net.template(exemplar_image)

        self.time += time.time() - tic
    
    def track(self, image):
        self.frame_num += 1

        # Convert image
        image = np.asarray(image)

        with dygraph.guard():
            # search images
            instance_image = self._crop_and_resize(
                image,
                self.pos,
                self.x_sz,
                out_size=self.params.instance_size,
                pad_color=self.avg_color)

            # predict
            output = self.params.features.features[0].net.track(instance_image)
            score = self._convert_score(output['cls'])
            pred_bbox = self._convert_bbox(output['loc'], self.anchors)
        
        def change(r):
            return np.maximum(r, 1. / r)

        def sz(w, h):
            pad = (w + h) * 0.5
            return np.sqrt((w + pad) * (h + pad))

        # scale penalty
        scale_z = self.params.exemplar_size / self.z_sz
        s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) /
                     (sz(self.target_sz[1]*scale_z, self.target_sz[0]*scale_z)))

        # aspect ratio penalty
        r_c = change((self.target_sz[1]/self.target_sz[0]) /
                     (pred_bbox[2, :]/pred_bbox[3, :]))
        penalty = np.exp(-(r_c * s_c - 1) * self.params.penalty_k)
        pscore = penalty * score

        # window penalty
        pscore = pscore * (1 - self.params.window_influence) + \
            self.window * self.params.window_influence
        best_idx = np.argmax(pscore)

        bbox = pred_bbox[:, best_idx] / scale_z
        lr = penalty[best_idx] * score[best_idx] * self.params.lr

        cx = bbox[0] + self.pos[1]
        cy = bbox[1] + self.pos[0]

        # smooth bbox
        width = self.target_sz[1] * (1 - lr) + bbox[2] * lr
        height = self.target_sz[0] * (1 - lr) + bbox[3] * lr

        # clip boundary
        cx, cy, width, height = self._bbox_clip(cx, cy, width, height, image.shape[:2])
        
        # update state
        self.pos = np.array([cy, cx])
        self.target_sz = np.array([height, width])
        context = self.params.context_amount * np.sum(self.target_sz)
        self.z_sz = np.sqrt(np.prod(self.target_sz + context))
        self.x_sz = round(self.z_sz * (self.params.instance_size / self.params.exemplar_size))

        # Return new state
        yx = self.pos - self.target_sz / 2
        new_state = np.array([yx[1], yx[0], self.target_sz[1], self.target_sz[0]], 'float32')

        return new_state.tolist()

    def generate_anchor(self, score_size):
        anchors = Anchors(
            self.params.anchor_stride,
            self.params.anchor_ratios,
            self.params.anchor_scales)
        anchor = anchors.anchors
        x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3]
        anchor = np.stack([(x1+x2)*0.5, (y1+y2)*0.5, x2-x1, y2-y1], 1)
        total_stride = anchors.stride
        anchor_num = anchor.shape[0]
        anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
        ori = - (score_size // 2) * total_stride
        xx, yy = np.meshgrid(
            [ori + total_stride * dx for dx in range(score_size)],
            [ori + total_stride * dy for dy in range(score_size)])
        xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
            np.tile(yy.flatten(), (anchor_num, 1)).flatten()
        anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
        return anchor

    def _crop_and_resize(self, image, center, size, out_size, pad_color):
        # convert box to corners (0-indexed)
        size = round(size)
        corners = np.concatenate(
            (
                np.floor(center - (size + 1) / 2 + 0.5),
                np.floor(center - (size + 1) / 2 + 0.5) + size
            ))
        corners = np.round(corners).astype(int)

        # pad image if necessary
        pads = np.concatenate((-corners[:2], corners[2:] - image.shape[:2]))
        npad = max(0, int(pads.max()))
        if npad > 0:
            image = cv2.copyMakeBorder(
                image,
                npad,
                npad,
                npad,
                npad,
                cv2.BORDER_CONSTANT,
                value=pad_color)

        # crop image patch
        corners = (corners + npad).astype(int)
        patch = image[corners[0]:corners[2], corners[1]:corners[3]]

        # resize to out_size
        patch = cv2.resize(patch, (out_size, out_size))

        patch = patch.transpose(2, 0, 1)
        patch = patch[np.newaxis, :, :, :]
        patch = patch.astype(np.float32)
        patch = fluid.dygraph.to_variable(patch)
        return patch

    def _convert_bbox(self, delta, anchor):
        delta = fluid.layers.transpose(delta, [1, 2, 3, 0])
        delta = fluid.layers.reshape(delta, [4, -1]).numpy()

        delta[0, :] = delta[0, :] * anchor[:, 2] + anchor[:, 0]
        delta[1, :] = delta[1, :] * anchor[:, 3] + anchor[:, 1]
        delta[2, :] = np.exp(delta[2, :]) * anchor[:, 2]
        delta[3, :] = np.exp(delta[3, :]) * anchor[:, 3]
        return delta

    def _convert_score(self, score):
        score = fluid.layers.transpose(score, [1, 2, 3, 0])
        score = fluid.layers.reshape(score, [2, -1])
        score = fluid.layers.transpose(score, [1, 0])
        score = fluid.layers.softmax(score, axis=1)[:, 1].numpy()
        return score

    def _bbox_clip(self, cx, cy, width, height, boundary):
        cx = max(0, min(cx, boundary[1]))
        cy = max(0, min(cy, boundary[0]))
        width = max(10, min(width, boundary[1]))
        height = max(10, min(height, boundary[0]))
        return cx, cy, width, height