# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import six
import numpy as np
import unittest

import paddle.fluid as fluid
from paddle.fluid.dygraph.jit import dygraph_to_static_graph

PLACE = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace(
)


class SubNetWithDict(fluid.dygraph.Layer):
    def __init__(self, hidden_size=16, output_size=16):
        super(SubNetWithDict, self).__init__()

        init_weight = lambda x: fluid.ParamAttr(initializer=fluid.initializer.Constant(x))

        self.q_fc = fluid.dygraph.Linear(
            input_dim=hidden_size,
            output_dim=output_size,
            bias_attr=False,
            param_attr=init_weight(0.6))
        self.k_fc = fluid.dygraph.Linear(
            input_dim=hidden_size,
            output_dim=output_size,
            bias_attr=False,
            param_attr=init_weight(0.5))
        self.v_fc = fluid.dygraph.Linear(
            input_dim=hidden_size,
            output_dim=output_size,
            bias_attr=False,
            param_attr=init_weight(0.2))

    @dygraph_to_static_graph
    def forward(self, input, cache=None):
        input = fluid.dygraph.to_variable(input)

        q = self.q_fc(input)
        k = self.k_fc(input)
        v = self.v_fc(input)

        if cache is not None:
            cache_k, cache_v = cache["k"], cache["v"]
            k = 0.1 * cache_k + k
            v = 0.2 * cache_v + v
            cache["k"], cache["v"] = k, v

        weight = fluid.layers.matmul(x=q, y=k, transpose_y=True)
        weight = fluid.layers.softmax(weight)
        out = fluid.layers.matmul(weight, v)

        return out


class MainNetWithDict(fluid.dygraph.Layer):
    def __init__(self, batch_size=64, hidden_size=16, output_size=16):
        super(MainNetWithDict, self).__init__()
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.sub_net = SubNetWithDict(hidden_size, output_size)

    @dygraph_to_static_graph
    def forward(self, input, max_len=4):
        input = fluid.dygraph.to_variable(input)
        cache = {
            "k": fluid.layers.fill_constant(
                shape=[self.batch_size, self.output_size],
                dtype='float32',
                value=0),
            "v": fluid.layers.fill_constant(
                shape=[self.batch_size, self.output_size],
                dtype='float32',
                value=0),
        }
        max_len = input.shape[0] if input.shape[0] != max_len else max_len
        out = input
        for i in range(max_len):
            out = self.sub_net(out, cache)
            cache = self.update_cache(cache)

        return out

    def update_cache(self, cache):
        for k, val in six.iteritems(cache):
            cache[k] = fluid.layers.softmax(val)

        return cache


class TestNetWithDict(unittest.TestCase):
    """
    TestCase for the transformation from control flow `if/else`
    dependent on tensor in Dygraph into Static `fluid.layers.cond`.
    """

    def setUp(self):
        self.x = np.random.random([10, 16]).astype('float32')
        self.batch_size = self.x.shape[0]

    def _run_static(self):
        main_program = fluid.Program()
        with fluid.program_guard(main_program):
            net = MainNetWithDict(batch_size=self.batch_size)
            # Transform into static graph
            out = net(self.x)
            exe = fluid.Executor(PLACE)
            exe.run(fluid.default_startup_program())
            ret = exe.run(main_program, fetch_list=out)
            return ret[0]

    def _run_dygraph(self):
        with fluid.dygraph.guard(PLACE):
            net = MainNetWithDict(batch_size=self.batch_size)
            ret = net(self.x)
            return ret.numpy()

    def test_ast_to_func(self):
        self.assertTrue((self._run_dygraph() == self._run_static()).all())


if __name__ == '__main__':
    unittest.main()