# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""operator dsl function:encode"""

import akg
import akg.lang.cce

from akg.tvm.hybrid import script
import akg.tvm
import akg.topi


COORDINATES_LEN = 4
COORDINATES_PAD_LEN = 8


def bouding_box_encode(anchor_box, groundtruth_box, anchor_samples, scale_factors, epsilon=1e-5):
    """
    Calculate bounding box encode.

    Args:
        anchor_box: akg.tvm.Tensor.
        groundtruth_box: akg.tvm.Tensor.
        anchor_samples: akg.tvm.Tensor.
        scale_factors: Tuple or list.
        epsilon: Default to be 1e-5.

    Returns:
        Tensor.
    """
    # check shapes
    anchor_box_shape = [x.value for x in anchor_box.shape]
    groundtruth_box_shape = [x.value for x in groundtruth_box.shape]
    anchor_samples_shape = [x.value for x in anchor_samples.shape]
    for shape in (anchor_box_shape, groundtruth_box_shape, anchor_samples_shape):
        check_shape(shape)
    # num archors
    assert anchor_box_shape[0] == anchor_samples_shape[1]
    # batch size
    assert groundtruth_box_shape[0] == anchor_samples_shape[0]
    assert (not scale_factors or len(scale_factors) == COORDINATES_LEN) and \
        (anchor_box_shape[-1] == COORDINATES_PAD_LEN) and \
        (groundtruth_box_shape[-1] == COORDINATES_PAD_LEN)

    # check dtypes; (vextract instruction only support for float16)
    check_list = ["float16"]
    assert anchor_box.dtype == groundtruth_box.dtype
    dtype = anchor_box.dtype
    if not dtype.lower() in check_list:
        raise RuntimeError("concat_cce only support %s while dtype is %s" % (",".join(check_list), dtype))
    assert anchor_samples.dtype == "int32"

    # extract coordinate for anchor
    reducer = akg.tvm.comm_reducer(lambda x, y: y, lambda t: akg.tvm.const(0, dtype=t), name="reducer")
    anchor_coordinate_shape = (anchor_box_shape[0],)
    k0 = akg.tvm.reduce_axis((0, 8), name='k0')
    ymin_a = akg.tvm.compute(anchor_coordinate_shape, lambda j0: reducer(
        akg.lang.cce.extract0(anchor_box[j0, k0]), axis=k0), name="ymin_a")
    k1 = akg.tvm.reduce_axis((0, 8), name='k1')
    xmin_a = akg.tvm.compute(anchor_coordinate_shape, lambda j1: reducer(
        akg.lang.cce.extract1(anchor_box[j1, k1]), axis=k1), name="xmin_a")
    k2 = akg.tvm.reduce_axis((0, 8), name='k2')
    ymax_a = akg.tvm.compute(anchor_coordinate_shape, lambda j2: reducer(
        akg.lang.cce.extract2(anchor_box[j2, k2]), axis=k2), name="ymax_a")
    k3 = akg.tvm.reduce_axis((0, 8), name='k3')
    xmax_a = akg.tvm.compute(anchor_coordinate_shape, lambda j3: reducer(
        akg.lang.cce.extract3(anchor_box[j3, k3]), axis=k3), name="xmax_a")
    # get center coordinates and sizes for anchor
    width_a_raw = akg.lang.cce.vsub(xmax_a, xmin_a)
    height_a_raw = akg.lang.cce.vsub(ymax_a, ymin_a)
    height_a_half = akg.lang.cce.vmuls(height_a_raw, akg.tvm.const(0.5, dtype))
    ycenter_a_raw = akg.lang.cce.vadd(ymin_a, height_a_half)
    width_a_half = akg.lang.cce.vmuls(width_a_raw, akg.tvm.const(0.5, dtype))
    xcenter_a_raw = akg.lang.cce.vadd(xmin_a, width_a_half)

    # extract coordinate for anchor_sample
    @script
    def hy_func_extract_sample(anchor_samples_hy, groundtruth_box_hy):
        batch_size, num_anchor = anchor_samples_hy.shape
        _, num_groundtruth, _ = groundtruth_box_hy.shape
        output = output_tensor((COORDINATES_PAD_LEN, batch_size, num_anchor), groundtruth_box_hy.dtype)
        for i in range(batch_size):
            for j in range(num_anchor):
                # COORDINATES_PAD_LEN should be replace by COORDINATES_LEN and need to valid
                for m in range(COORDINATES_PAD_LEN):
                    # loop for k should be replace by anchor_samples_hy[i, j], but now have some problem with anchor_samples_hy[i, j]
                    for k in range(num_groundtruth):
                        if k == anchor_samples_hy[i, j]:
                            output[m, i, j] = groundtruth_box_hy[i, k, m]
        return output
    anchor_samples_box_extract = hy_func_extract_sample(anchor_samples, groundtruth_box)
    ymin = akg.tvm.compute(anchor_samples_shape, lambda *indice: anchor_samples_box_extract[0, indice[0], indice[1]], name="ymin")
    xmin = akg.tvm.compute(anchor_samples_shape, lambda *indice: anchor_samples_box_extract[1, indice[0], indice[1]], name="xmin")
    ymax = akg.tvm.compute(anchor_samples_shape, lambda *indice: anchor_samples_box_extract[2, indice[0], indice[1]], name="ymax")
    xmax = akg.tvm.compute(anchor_samples_shape, lambda *indice: anchor_samples_box_extract[3, indice[0], indice[1]], name="xmax")
    # get center coordinates and sizes for anchor_sample
    width = akg.lang.cce.vsub(xmax, xmin)
    height = akg.lang.cce.vsub(ymax, ymin)
    height_half = akg.lang.cce.vmuls(height, akg.tvm.const(0.5, dtype))
    ycenter = akg.lang.cce.vadd(ymin, height_half)
    width_half = akg.lang.cce.vmuls(width, akg.tvm.const(0.5, dtype))
    xcenter = akg.lang.cce.vadd(xmin, width_half)

    # encode
    height_a = akg.topi.broadcast_to(height_a_raw, anchor_samples_shape)
    width_a = akg.topi.broadcast_to(width_a_raw, anchor_samples_shape)
    ycenter_a = akg.topi.broadcast_to(ycenter_a_raw, anchor_samples_shape)
    xcenter_a = akg.topi.broadcast_to(xcenter_a_raw, anchor_samples_shape)
    epsilon_ = akg.lang.cce.broadcast(akg.tvm.const(epsilon, dtype), anchor_samples_shape)
    h_a = akg.lang.cce.vadd(height_a, epsilon_)
    w_a = akg.lang.cce.vadd(width_a, epsilon_)
    h = akg.lang.cce.vadd(height, epsilon_)
    w = akg.lang.cce.vadd(width, epsilon_)
    xc_sub_xc_a = akg.lang.cce.vsub(xcenter, xcenter_a)
    w_a_rec = akg.lang.cce.vrec(w_a)
    h_a_rec = akg.lang.cce.vrec(h_a)
    tx = akg.lang.cce.vmul(xc_sub_xc_a, w_a_rec)
    yc_sub_yc_a = akg.lang.cce.vsub(ycenter, ycenter_a)
    ty = akg.lang.cce.vmul(yc_sub_yc_a, h_a_rec)
    h_div_h_a = akg.lang.cce.vmul(h, h_a_rec)
    th = akg.lang.cce.vlog(h_div_h_a)
    w_div_w_a = akg.lang.cce.vmul(w, w_a_rec)
    tw = akg.lang.cce.vlog(w_div_w_a)
    if scale_factors:
        ty = akg.lang.cce.vmuls(ty, akg.tvm.const(scale_factors[0], dtype))
        tx = akg.lang.cce.vmuls(tx, akg.tvm.const(scale_factors[1], dtype))
        th = akg.lang.cce.vmuls(th, akg.tvm.const(scale_factors[2], dtype))
        tw = akg.lang.cce.vmuls(tw, akg.tvm.const(scale_factors[3], dtype))
    output = akg.topi.stack([ty, tx, th, tw], axis=-1)
    return output