#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
#
# Based on:
# --------------------------------------------------------
# DARTS
# Copyright (c) 2018, Hanxiao Liu.
# Licensed under the Apache License, Version 2.0;
# --------------------------------------------------------

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import numpy as np
import time
import functools
import paddle
import paddle.fluid as fluid
from operations import *


class Cell():
    def __init__(self, genotype, C_prev_prev, C_prev, C, reduction,
                 reduction_prev):
        print(C_prev_prev, C_prev, C)

        if reduction_prev:
            self.preprocess0 = functools.partial(FactorizedReduce, C_out=C)
        else:
            self.preprocess0 = functools.partial(
                ReLUConvBN, C_out=C, kernel_size=1, stride=1, padding=0)
        self.preprocess1 = functools.partial(
            ReLUConvBN, C_out=C, kernel_size=1, stride=1, padding=0)
        if reduction:
            op_names, indices = zip(*genotype.reduce)
            concat = genotype.reduce_concat
        else:
            op_names, indices = zip(*genotype.normal)
            concat = genotype.normal_concat
        print(op_names, indices, concat, reduction)
        self._compile(C, op_names, indices, concat, reduction)

    def _compile(self, C, op_names, indices, concat, reduction):
        assert len(op_names) == len(indices)
        self._steps = len(op_names) // 2
        self._concat = concat
        self.multiplier = len(concat)

        self._ops = []
        for name, index in zip(op_names, indices):
            stride = 2 if reduction and index < 2 else 1
            op = functools.partial(OPS[name], C=C, stride=stride, affine=True)
            self._ops += [op]
        self._indices = indices

    def forward(self, s0, s1, drop_prob, is_train, name):
        self.training = is_train
        preprocess0_name = name + 'preprocess0.'
        preprocess1_name = name + 'preprocess1.'
        s0 = self.preprocess0(s0, name=preprocess0_name)
        s1 = self.preprocess1(s1, name=preprocess1_name)
        out = [s0, s1]
        for i in range(self._steps):
            h1 = out[self._indices[2 * i]]
            h2 = out[self._indices[2 * i + 1]]
            op1 = self._ops[2 * i]
            op2 = self._ops[2 * i + 1]
            h3 = op1(h1, name=name + '_ops.' + str(2 * i) + '.')
            h4 = op2(h2, name=name + '_ops.' + str(2 * i + 1) + '.')
            if self.training and drop_prob > 0.:
                if h3 != h1:
                    h3 = fluid.layers.dropout(
                        h3,
                        drop_prob,
                        dropout_implementation='upscale_in_train')
                if h4 != h2:
                    h4 = fluid.layers.dropout(
                        h4,
                        drop_prob,
                        dropout_implementation='upscale_in_train')
            s = h3 + h4
            out += [s]
        return fluid.layers.concat([out[i] for i in self._concat], axis=1)


def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'):
    relu_a = fluid.layers.relu(input)
    pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', 3)
    conv2d_a = fluid.layers.conv2d(
        pool_a,
        128,
        1,
        name=aux_name + '.features.2',
        param_attr=ParamAttr(
            initializer=Xavier(
                uniform=False, fan_in=0),
            name=aux_name + '.features.2.weight'),
        bias_attr=False)
    bn_a_name = aux_name + '.features.3'
    bn_a = fluid.layers.batch_norm(
        conv2d_a,
        act='relu',
        name=bn_a_name,
        param_attr=ParamAttr(
            initializer=Constant(1.), name=bn_a_name + '.weight'),
        bias_attr=ParamAttr(
            initializer=Constant(0.), name=bn_a_name + '.bias'),
        moving_mean_name=bn_a_name + '.running_mean',
        moving_variance_name=bn_a_name + '.running_var')
    conv2d_b = fluid.layers.conv2d(
        bn_a,
        768,
        2,
        name=aux_name + '.features.5',
        param_attr=ParamAttr(
            initializer=Xavier(
                uniform=False, fan_in=0),
            name=aux_name + '.features.5.weight'),
        bias_attr=False)
    bn_b_name = aux_name + '.features.6'
    bn_b = fluid.layers.batch_norm(
        conv2d_b,
        act='relu',
        name=bn_b_name,
        param_attr=ParamAttr(
            initializer=Constant(1.), name=bn_b_name + '.weight'),
        bias_attr=ParamAttr(
            initializer=Constant(0.), name=bn_b_name + '.bias'),
        moving_mean_name=bn_b_name + '.running_mean',
        moving_variance_name=bn_b_name + '.running_var')
    fc_name = aux_name + '.classifier'
    fc = fluid.layers.fc(bn_b,
                         num_classes,
                         name=fc_name,
                         param_attr=ParamAttr(
                             initializer=Normal(scale=1e-3),
                             name=fc_name + '.weight'),
                         bias_attr=ParamAttr(
                             initializer=Constant(0.), name=fc_name + '.bias'))
    return fc


def StemConv(input, C_out, kernel_size, padding):
    conv_a = fluid.layers.conv2d(
        input,
        C_out,
        kernel_size,
        padding=padding,
        param_attr=ParamAttr(
            initializer=Xavier(
                uniform=False, fan_in=0), name='stem.0.weight'),
        bias_attr=False)
    bn_a = fluid.layers.batch_norm(
        conv_a,
        param_attr=ParamAttr(
            initializer=Constant(1.), name='stem.1.weight'),
        bias_attr=ParamAttr(
            initializer=Constant(0.), name='stem.1.bias'),
        moving_mean_name='stem.1.running_mean',
        moving_variance_name='stem.1.running_var')
    return bn_a


class NetworkCIFAR(object):
    def __init__(self, C, class_num, layers, auxiliary, genotype):
        self.class_num = class_num
        self._layers = layers
        self._auxiliary = auxiliary

        stem_multiplier = 3
        self.drop_path_prob = 0
        C_curr = stem_multiplier * C

        C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
        self.cells = []
        reduction_prev = False
        for i in range(layers):
            if i in [layers // 3, 2 * layers // 3]:
                C_curr *= 2
                reduction = True
            else:
                reduction = False
            cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction,
                        reduction_prev)
            reduction_prev = reduction
            self.cells += [cell]
            C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
            if i == 2 * layers // 3:
                C_to_auxiliary = C_prev

    def forward(self, init_channel, is_train):
        self.training = is_train
        self.logits_aux = None
        num_channel = init_channel * 3
        s0 = StemConv(self.image, num_channel, kernel_size=3, padding=1)
        s1 = s0
        for i, cell in enumerate(self.cells):
            name = 'cells.' + str(i) + '.'
            s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
                                      name)
            if i == int(2 * self._layers // 3):
                if self._auxiliary and self.training:
                    self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num)
        out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg")
        self.logits = fluid.layers.fc(out,
                                      size=self.class_num,
                                      param_attr=ParamAttr(
                                          initializer=Normal(scale=1e-3),
                                          name='classifier.weight'),
                                      bias_attr=ParamAttr(
                                          initializer=Constant(0.),
                                          name='classifier.bias'))
        return self.logits, self.logits_aux

    def build_input(self, image_shape, batch_size, is_train):
        if is_train:
            py_reader = fluid.layers.py_reader(
                capacity=64,
                shapes=[[-1] + image_shape, [-1, 1], [-1, 1], [-1, 1], [-1, 1],
                        [-1, 1], [-1, batch_size, self.class_num - 1]],
                lod_levels=[0, 0, 0, 0, 0, 0, 0],
                dtypes=[
                    "float32", "int64", "int64", "float32", "int32", "int32",
                    "float32"
                ],
                use_double_buffer=True,
                name='train_reader')
        else:
            py_reader = fluid.layers.py_reader(
                capacity=64,
                shapes=[[-1] + image_shape, [-1, 1]],
                lod_levels=[0, 0],
                dtypes=["float32", "int64"],
                use_double_buffer=True,
                name='test_reader')
        return py_reader

    def train_model(self, py_reader, init_channels, aux, aux_w, batch_size,
                    loss_lambda):
        self.image, self.ya, self.yb, self.lam, self.label_reshape,\
           self.non_label_reshape, self.rad_var = fluid.layers.read_file(py_reader)
        self.logits, self.logits_aux = self.forward(init_channels, True)
        self.mixup_loss = self.mixup_loss(aux, aux_w)
        self.lrc_loss = self.lrc_loss(batch_size)
        return self.mixup_loss + loss_lambda * self.lrc_loss

    def test_model(self, py_reader, init_channels):
        self.image, self.ya = fluid.layers.read_file(py_reader)
        self.logits, _ = self.forward(init_channels, False)
        prob = fluid.layers.softmax(self.logits, use_cudnn=False)
        loss = fluid.layers.cross_entropy(prob, self.ya)
        acc_1 = fluid.layers.accuracy(self.logits, self.ya, k=1)
        acc_5 = fluid.layers.accuracy(self.logits, self.ya, k=5)
        return loss, acc_1, acc_5

    def mixup_loss(self, auxiliary, auxiliary_weight):
        prob = fluid.layers.softmax(self.logits, use_cudnn=False)
        loss_a = fluid.layers.cross_entropy(prob, self.ya)
        loss_b = fluid.layers.cross_entropy(prob, self.yb)
        loss_a_mean = fluid.layers.reduce_mean(loss_a)
        loss_b_mean = fluid.layers.reduce_mean(loss_b)
        loss = self.lam * loss_a_mean + (1 - self.lam) * loss_b_mean
        if auxiliary:
            prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False)
            loss_a_aux = fluid.layers.cross_entropy(prob_aux, self.ya)
            loss_b_aux = fluid.layers.cross_entropy(prob_aux, self.yb)
            loss_a_aux_mean = fluid.layers.reduce_mean(loss_a_aux)
            loss_b_aux_mean = fluid.layers.reduce_mean(loss_b_aux)
            loss_aux = self.lam * loss_a_aux_mean + (1 - self.lam
                                                     ) * loss_b_aux_mean
        return loss + auxiliary_weight * loss_aux

    def lrc_loss(self, batch_size):
        y_diff_reshape = fluid.layers.reshape(self.logits, shape=(-1, 1))
        label_reshape = fluid.layers.squeeze(self.label_reshape, axes=[1])
        non_label_reshape = fluid.layers.squeeze(
            self.non_label_reshape, axes=[1])
        label_reshape.stop_gradient = True
        non_label_reshape.stop_graident = True

        y_diff_label_reshape = fluid.layers.gather(y_diff_reshape,
                                                   label_reshape)
        y_diff_non_label_reshape = fluid.layers.gather(y_diff_reshape,
                                                       non_label_reshape)
        y_diff_label = fluid.layers.reshape(
            y_diff_label_reshape, shape=(-1, batch_size, 1))
        y_diff_non_label = fluid.layers.reshape(
            y_diff_non_label_reshape,
            shape=(-1, batch_size, self.class_num - 1))
        y_diff_ = y_diff_non_label - y_diff_label

        y_diff_ = fluid.layers.transpose(y_diff_, perm=[1, 2, 0])
        rad_var_trans = fluid.layers.transpose(self.rad_var, perm=[1, 2, 0])
        rad_y_diff_trans = rad_var_trans * y_diff_
        lrc_loss_sum = fluid.layers.reduce_sum(rad_y_diff_trans, dim=[0, 1])
        lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / (batch_size *
                                                      (self.class_num - 1))
        lrc_loss_mean = fluid.layers.reduce_mean(lrc_loss_)

        return lrc_loss_mean