# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from paddle.common_ops_import import LayerHelper, check_type, in_dygraph_mode
from paddle.fluid import core
from paddle.fluid.backward import _append_grad_suffix_
from paddle.fluid.framework import Variable
from paddle.utils import flatten, map_structure

# NOTE(MarioLulab): Borrowed from `python/paddle/static/nn/control_flow.py`
from .control_flow import BlockGuard, copy_var_to_parent_block


class StaticPyLayerBlockGuard(BlockGuard):
    def __init__(self, block_manager):
        check_type(
            block_manager,
            "block",
            StaticPyLayerBlock,
            "StaticPyLayerBlockGuard",
        )
        super().__init__(block_manager.helper.main_program)
        self.block_manager = block_manager

    def __enter__(self):
        super().__enter__()
        return self.block_manager

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.block_manager.complete()
        return super().__exit__(exc_type, exc_val, exc_tb)


class StaticPyLayerBlock:
    def __init__(self, inputs, name=None):
        for each_input in inputs:
            check_type(each_input, "input", Variable, "StaticPyLayerBlock")

        # used to specify the `Input` to `pylayer` op
        self.fwd_inputs = inputs
        # used to specify the `Out` to `pylayer` op
        self.fwd_outputs = []

        self.helper = LayerHelper("static_pylayer_block", name=name)
        self.fwd_op_id = None
        self._forward_block_id = None
        self._backward_block_id = None
        self.var_old_to_new = {}

    def block(self, is_backward_block=False):
        self.is_backward_block = is_backward_block
        return StaticPyLayerBlockGuard(self)

    @property
    def forward_block_index(self):
        return self._forward_block_id

    @property
    def backward_block_index(self):
        return self._backward_block_id

    @property
    def fwd_op_index(self):
        return self.fwd_op_id

    def complete_forward_block(self):
        inside_block = self.helper.main_program.current_block()
        parent_block = self.helper.main_program.block(inside_block.parent_idx)
        self._forward_block_id = inside_block.idx

        step_scope = parent_block.create_var(
            type=core.VarDesc.VarType.STEP_SCOPES
        )

        pylayer_op = parent_block.append_op(
            type='pylayer',
            inputs={
                'Input': self.fwd_inputs,
            },
            outputs={"Out": self.fwd_outputs, "Scope": [step_scope]},
            attrs={
                'blocks': [inside_block],
            },
        )

        self.fwd_op_id = pylayer_op.idx

    def complete_backward_block(self):
        inside_block = self.helper.main_program.current_block()
        parent_block = self.helper.main_program.block(inside_block.parent_idx)

        self._backward_block_id = inside_block.idx
        # set OpRole to `backward`
        for op in inside_block.ops:
            op_role_attr_name = (
                core.op_proto_and_checker_maker.kOpRoleAttrName()
            )
            backward = core.op_proto_and_checker_maker.OpRole.Backward
            op.desc._set_attr(op_role_attr_name, backward)
        inside_block._set_forward_block_idx(self.forward_block_index)

        # NOTE(MarioLulab): The reason of renaming the var name in the inside block is that
        # we need to associating `inside_grads` and `outside_grads` at
        # runtime `RunImpl` in pylayer op
        for old_var_name, new_var_name in self.var_old_to_new.items():
            # TODO(MarioLulab): need to remove recursively in ``sub_block``

            # NOTE(MarioLulab): The reason why not using Block._rename_var is that `old_var_name` does not correspond to a Variable instance in Block
            # and Block._rename_var will raise ValueError.
            inside_block.desc._rename_var(
                old_var_name.encode(), new_var_name.encode()
            )

        # update `blocks` attr by appending backward_block
        forward_block_desc = parent_block.program.block(
            self.forward_block_index
        ).desc
        backward_block_desc = inside_block.desc
        parent_block.ops[self.fwd_op_index].desc.set_blocks_attr(
            "blocks", [forward_block_desc, backward_block_desc]
        )

    def complete(self):
        if not self.is_backward_block:
            return self.complete_forward_block()
        else:
            return self.complete_backward_block()


# TODO(MarioLulab):
# Need to support non-Variable in ``inputs``
def static_pylayer(forward_fn, inputs, backward_fn=None, name=None):
    """
    This API returns ``forward_fn(inputs)``, and two sub-block are created based on
    the logic of ``forward_fn`` and ``backward_fn``, with the operator ``pylayer``
    holding information about the two blocks.

    ``forward_fn`` and ``backward_fn`` should return a nest structure of tensors.
    A nest structure of tensors in PaddlePaddle is tensor(s), or tuple of tensors, or
    list of tensors.

    Note:
        1. If ``backward_fn`` is not None, user needs to keep the number of inputs to ``forward_fn`` the same as the
        number of outputs to ``backward_fn``, and the number of outputs to ``forward_fn``
        the same as the number of inputs to ``backward_fn``.

        2. If ``backward_fn`` is None, ``stop_gradient`` attr of all Variable in ``inputs`` is expected to be True.
        Otherwise it might get unexpected results in backward pass.

        3. This API can only be used under static graph mode.

    Args:
        forward_fn (callable): A callable to be performed in forward pass
        inputs (list[Variable]): The list of if input Variable to the ``forward_fn``
        backward_fn (callable, optional): A callable to be performed in backward pass
        name (str, optional): The default value is ``None`` . Normally users
            don't have to set this parameter.

    Returns:
        Variable|list(Variable)|tuple(Variable): returns the output of ``forward_fn(inputs)``

    Examples:
        .. code-block: python

            import paddle
            import numpy as np

            #
            # pseudocode:
            # y = exp(x)
            # dx = 2 * exp(dy)
            #

            paddle.enable_static()

            def forward_fn(x):
                return paddle.exp(x)

            def backward_fn(dy):
                return 2 * paddle.exp(dy)

            main_program = paddle.static.Program()
            start_program = paddle.static.Program()

            place = paddle.CPUPlace()
            exe = paddle.static.Executor(place)
            with paddle.static.program_guard(main_program, start_program):
                data = paddle.static.data(name="X", shape=[None, 5], dtype="float32")
                data.stop_gradient = False
                ret = paddle.static.nn.static_pylayer(forward_fn, [data], backward_fn)
                data_grad = paddle.static.gradients([ret], data)[0]

            exe = paddle.static.Executor(place)
            exe.run(start_program)
            x = np.array([[1.0, 2.0, 3.0, 4.0, 5.0]], dtype=np.float32)
            x, x_grad, y = exe.run(
                main_program,
                feed={"X": x},
                fetch_list=[
                    data.name,
                    data_grad.name,
                    ret.name
                ],
            )
            # x is Numpy
            # x.data = [[1.0, 2.0, 3.0, 4.0, 5.0]]
            # x.shape = [1, 5]
            # y is Numpy
            # y.data = [[2.7182817, 7.389056, 20.085537, 54.59815, 148.41316]]
            # y.shape = [1, 5]
            # x_grad is Numpy
            # x_grad.data = [[5.4365635, 5.4365635, 5.4365635, 5.4365635, 5.4365635]]
            # x_grad.shape = [1, 5]
    """
    assert (
        in_dygraph_mode() is False
    ), "please use PyLayer instead of static_pylayer in dygraph mode"

    assert isinstance(inputs, list)
    if backward_fn is None:
        for input_var in inputs:
            if input_var.stop_gradient is False:
                raise ValueError(
                    "``stop_gradient`` attr of all inputs to ``forward_fn`` are expected to be True, when ``backward_fn == None``, but {}.stop_gradient got {}".format(
                        input_var.name, input_var.stop_gradient
                    )
                )

    check_type(name, "name", (str, type(None)), "fluid.layers.static_pylayer")
    helper = LayerHelper('static_pylayer', **locals())
    copy_to_parent_func = lambda var: copy_var_to_parent_block(var, helper)

    assert forward_fn is not None and callable(forward_fn)
    pylayer_block_manager = StaticPyLayerBlock(inputs)
    with pylayer_block_manager.block(is_backward_block=False) as mgr:
        origin_output = forward_fn(*inputs)
        if origin_output is not None:
            output = map_structure(copy_to_parent_func, origin_output)
            mgr.fwd_outputs = flatten(output)
        else:
            mgr.fwd_outputs = []

    current_block = helper.main_program.current_block()
    current_block._sync_with_cpp()
    if backward_fn is not None:
        assert callable(backward_fn)
        if origin_output is None:
            output = []

        # **Create the backward input** from the output of the op to build the
        # backward block, and then delete it.
        grad_var_ins = []
        for fwd_var in flatten(output):
            fwd_var_name = fwd_var.name
            bwd_var_name = _append_grad_suffix_(fwd_var_name)
            var = current_block.create_var(name=bwd_var_name)
            if not current_block.desc.has_var_recursive(fwd_var_name.encode()):
                raise ValueError(
                    "Grad var {} , we can't find its related forward var {}".format(
                        bwd_var_name, fwd_var_name
                    )
                )

            var.desc.set_dtype(fwd_var.dtype)
            var.desc.set_shape(fwd_var.shape)

            grad_var_ins.append(var)

        assert isinstance(grad_var_ins, list)
        with pylayer_block_manager.block(is_backward_block=True) as mgr:
            grad_origin_output = backward_fn(*grad_var_ins)
            if grad_origin_output is not None:
                flat_grad_origin = flatten(grad_origin_output)
                # NOTE(MarioLulab): ``current_block`` was defined outside
                forward_input_names = current_block.ops[
                    pylayer_block_manager.fwd_op_index
                ].desc.input_arg_names()
                assert len(forward_input_names) == len(
                    flat_grad_origin
                ), f"needs to keep the number of inputs to ``forward_fn`` the same as the number of outputs to ``backward_fn``, \
                    but got {len(forward_input_names)} and {len(flat_grad_origin)}"

                for bwd_output_name, fwd_input_name in zip(
                    flat_grad_origin, forward_input_names
                ):
                    # NOTE(MarioLulab): Because `flat_grad_origin` are the Variables inside the backward block, which one by one corresponds
                    # to the gradients of the inputs to the forward function, we need to establish a link between `flat_grad_origin`,
                    # and the Variable outside the backward block which represent the gradient of the input ot the forward function.
                    # The approach we have taken is renaming `flat_grad_origin` by forward input name with suffix of "@GRAD", and aligning
                    # the order of `Out@GRAD` in `pylayer_grad` op with `flat_grad_origin`. And in the runtime `RunImpl` in `pylayer_grad` op,
                    # we will find inside_grad with the name of forward input name with suffix of "@GRAD" in the scope, and assign `inside_grads`
                    # to `outside_grads`.
                    #
                    # Example:
                    # after run the code below to create forward and backward block:
                    #
                    #    out = forward_fn(x, y)                 # create forward block
                    #    x_grad, y_grad = backward_fn(out_grad) # create backward block
                    #
                    # x.name is "X", y.name is "Y", and out.name is "tmp_0", but x_grad.name is "_generate_0", y_grad.name is "_generate_1".
                    # we rename x_grad by "X@GRAD", and y_grad by "Y@GRAD" inside backward block.
                    # One thing to keep in mind is that we assume there were no Variable naming "X@GRAD" inside backward block before performing rename operation.
                    # TODO(MarioLulab): We will validate the assumption above is whether a strong hypothesis or not.

                    # attach old var name into new
                    bwd_out_new = _append_grad_suffix_(
                        fwd_input_name
                    )  # "X" => "X@GRAD"
                    mgr.var_old_to_new[
                        bwd_output_name.name
                    ] = bwd_out_new  # e.g. "tmp_0.mean_0": "X@GRAD"

        # **Delete the backward input**
        for bwd_var in grad_var_ins:
            current_block._remove_var(bwd_var.name)

    if origin_output is None:
        return None

    return output