# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddle
from paddle import nn
from paddle.autograd import PyLayer
from paddle.nn import functional as F
from paddle.nn.layer import Conv1D
from paddle.nn.layer.common import Linear


class BinaryQuantizer(PyLayer):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        out = paddle.sign(input)
        return out

    @staticmethod
    def backward(ctx, grad_output):
        input = ctx.saved_tensor()[0]
        grad_input = grad_output
        grad_input[input >= 1] = 0
        grad_input[input <= -1] = 0
        return grad_input.clone()


class BiLinear(Linear):
    def __init__(self,
                 in_features,
                 out_features,
                 weight_attr=None,
                 bias_attr=None,
                 name=None):
        super(BiLinear, self).__init__(
            in_features,
            out_features,
            weight_attr=weight_attr,
            bias_attr=bias_attr,
            name=name)

        self.scale_weight_init = False
        self.scale_weight = paddle.create_parameter(shape=[1], dtype='float32')

    def forward(self, input):
        ba = input

        bw = self.weight
        bw = bw - bw.mean()

        if self.scale_weight_init == False:
            scale_weight = F.linear(ba, bw).std() / F.linear(
                paddle.sign(ba), paddle.sign(bw)).std()
            if paddle.isnan(scale_weight):
                scale_weight = bw.std() / paddle.sign(bw).std()
            self.scale_weight.set_value(scale_weight)
            self.scale_weight_init = True

        ba = BinaryQuantizer.apply(ba)
        bw = BinaryQuantizer.apply(bw)
        bw = bw * self.scale_weight

        out = F.linear(x=ba, weight=bw, bias=self.bias, name=self.name)
        return out


class BiConv1D(Conv1D):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 padding_mode='zeros',
                 weight_attr=None,
                 bias_attr=None,
                 data_format="NCL"):
        super(BiConv1D, self).__init__(
            in_channels, out_channels, kernel_size, stride, padding, dilation,
            groups, padding_mode, weight_attr, bias_attr, data_format)
        self.scale_weight_init = False
        self.scale_weight = paddle.create_parameter(shape=[1], dtype='float32')

    def forward(self, input):
        ba = input

        bw = self.weight
        bw = bw - bw.mean()

        padding = 0
        if self._padding_mode != "zeros":
            ba = F.pad(
                ba,
                self._reversed_padding_repeated_twice,
                mode=self._padding_mode,
                data_format=self._data_format)
        else:
            padding = self._padding

        if self.scale_weight_init == False:
            scale_weight = F.conv1d(ba, bw, bias=self.bias, padding=padding, stride=self._stride, dilation=self._dilation, groups=self._groups, data_format=self._data_format).std() / \
                F.conv1d(paddle.sign(ba), paddle.sign(bw), bias=self.bias, padding=padding, stride=self._stride, dilation=self._dilation, groups=self._groups, data_format=self._data_format).std()
            if paddle.isnan(scale_weight):
                scale_weight = bw.std() / paddle.sign(bw).std()

            self.scale_weight.set_value(scale_weight)
            self.scale_weight_init = True

        ba = BinaryQuantizer.apply(ba)
        bw = BinaryQuantizer.apply(bw)
        bw = bw * self.scale_weight

        return F.conv1d(
            ba,
            bw,
            bias=self.bias,
            padding=padding,
            stride=self._stride,
            dilation=self._dilation,
            groups=self._groups,
            data_format=self._data_format)


def _to_bi_function(model, fp_layers=[]):
    for name, layer in model.named_children():
        if id(layer) in fp_layers:
            continue
        if isinstance(layer, Linear):
            new_layer = BiLinear(layer.weight.shape[0], layer.weight.shape[1],
                                 layer._weight_attr, layer._bias_attr,
                                 layer.name)
            new_layer.weight = layer.weight
            new_layer.bias = layer.bias
            model._sub_layers[name] = new_layer
        elif isinstance(layer, Conv1D):
            new_layer = BiConv1D(layer._in_channels, layer._out_channels,
                                 layer._kernel_size, layer._stride,
                                 layer._padding, layer._dilation, layer._groups,
                                 layer._padding_mode, layer._param_attr,
                                 layer._bias_attr, layer._data_format)
            new_layer.weight = layer.weight
            new_layer.bias = layer.bias
            model._sub_layers[name] = new_layer
        elif isinstance(layer, nn.ReLU):
            model._sub_layers[name] = nn.Hardtanh()
        else:
            model._sub_layers[name] = _to_bi_function(layer, fp_layers)
    return model