#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import os
import unittest
import warnings
import numpy as np
import random
import six
import struct
import time
import itertools
import collections
from collections import defaultdict

import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.backward import append_backward
from paddle.fluid.op import Operator
from paddle.fluid.executor import Executor
from paddle.fluid.framework import Program, OpProtoHolder, Variable
from testsuite import create_op, set_input, append_input_output, append_loss_ops
from paddle.fluid import unique_name
from white_list import op_accuracy_white_list, check_shape_white_list, compile_vs_runtime_white_list, no_check_set_white_list
from white_list import op_threshold_white_list, no_grad_set_white_list
from op_test import OpTest, _set_use_system_allocator, get_numeric_gradient


class XPUOpTest(OpTest):
    @classmethod
    def setUpClass(cls):
        '''Fix random seeds to remove randomness from tests'''
        cls._np_rand_state = np.random.get_state()
        cls._py_rand_state = random.getstate()
        cls.call_once = False
        cls.dtype = np.float32
        cls.outputs = {}
        cls.input_shape_is_large = True

        np.random.seed(123)
        random.seed(124)

        cls._use_system_allocator = _set_use_system_allocator(True)

    @classmethod
    def tearDownClass(cls):
        """Restore random seeds"""
        np.random.set_state(cls._np_rand_state)
        random.setstate(cls._py_rand_state)

        _set_use_system_allocator(cls._use_system_allocator)

        def is_empty_grad_op(op_type):
            all_op_kernels = core._get_all_register_op_kernels()
            grad_op = op_type + '_grad'
            if grad_op in all_op_kernels.keys():
                if is_mkldnn_op_test():
                    grad_op_kernels = all_op_kernels[grad_op]
                    for grad_op_kernel in grad_op_kernels:
                        if 'MKLDNN' in grad_op_kernel:
                            return False
                else:
                    return False
            return True

        def is_xpu_op_test():
            return True

        def is_mkldnn_op_test():
            return False

        if not hasattr(cls, "op_type"):
            raise AssertionError(
                "This test do not have op_type in class attrs, "
                "please set self.__class__.op_type=the_real_op_type manually.")

        # case in NO_FP64_CHECK_GRAD_CASES and op in NO_FP64_CHECK_GRAD_OP_LIST should be fixed
        if not hasattr(cls, "no_need_check_grad") \
            and not is_empty_grad_op(cls.op_type):
            if cls.dtype is not None and \
                cls.dtype != np.float32:
                raise AssertionError("This test of %s op needs check_grad." %
                                     cls.op_type)

    def try_call_once(self, data_type):
        if not self.call_once:
            self.call_once = True
            if data_type is not None and \
                data_type != np.float32:
                raise AssertionError("Unsupport data type %s in xpu" %
                                     data_type)
            self.dtype = data_type

    def check_output_with_place(self,
                                place,
                                atol=0.001,
                                no_check_set=None,
                                equal_nan=False,
                                check_dygraph=True,
                                inplace_atol=None):
        self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs)
        if self.dtype == np.float64 and \
            self.op_type not in op_threshold_white_list.NEED_FIX_FP64_CHECK_OUTPUT_THRESHOLD_OP_LIST:
            atol = 0

        if self.is_bfloat16_op():
            check_dygraph = False
            if hasattr(self, 'force_fp32_output') and getattr(
                    self, 'force_fp32_output'):
                atol = 1e-2
            else:
                atol = 2

        if no_check_set is not None:
            if self.op_type not in no_check_set_white_list.no_check_set_white_list:
                raise AssertionError(
                    "no_check_set of op %s must be set to None." % self.op_type)

        if check_dygraph:
            dygraph_outs = self._calc_dygraph_output(
                place, no_check_set=no_check_set)
        outs, fetch_list = self._calc_output(place, no_check_set=no_check_set)
        for out_name, out_dup in Operator.get_op_outputs(self.op_type):
            if out_name not in self.outputs:
                continue
            if no_check_set is not None and out_name in no_check_set:
                continue

            def find_imperative_actual(target_name, dygraph_outs, place):
                with fluid.dygraph.base.guard(place=place):
                    for name in dygraph_outs:
                        if name == target_name:
                            return dygraph_outs[name][0]
                        var_list = dygraph_outs[name]
                        for i, var in enumerate(var_list):
                            if var.name == target_name:
                                return dygraph_outs[name][i]
                    self.assertTrue(False, "Found failed {} {}".format(
                        dygraph_outs.keys(), target_name))

            def find_actual(target_name, fetch_list):
                found = [
                    i for i, var_name in enumerate(fetch_list)
                    if var_name == target_name
                ]
                self.assertTrue(
                    len(found) == 1, "Found {} {}".format(
                        len(found), target_name))
                return found[0]

            if out_dup:
                sub_out = self.outputs[out_name]
                if not isinstance(sub_out, list):
                    raise AssertionError("sub_out type %s is not list",
                                         type(sub_out))
                for item in sub_out:
                    sub_out_name, expect = item[0], item[1]
                    if check_dygraph:
                        imperative_actual = find_imperative_actual(
                            sub_out_name, dygraph_outs, place)
                        imperative_actual_t = np.array(imperative_actual.value()
                                                       .get_tensor())
                    idx = find_actual(sub_out_name, fetch_list)
                    actual = outs[idx]
                    actual_t = np.array(actual)
                    expect_t = expect[0] \
                        if isinstance(expect, tuple) else expect
                    self.assertTrue(
                        np.allclose(
                            actual_t, expect_t, atol=atol, equal_nan=equal_nan),
                        "Output (" + sub_out_name + ") has diff at " +
                        str(place))
                    if check_dygraph:
                        self.assertTrue(
                            np.allclose(
                                imperative_actual_t,
                                expect_t,
                                atol=atol,
                                equal_nan=equal_nan),
                            "Output (" + sub_out_name + ") has diff at " +
                            str(place) + " in dygraph mode")
                    if isinstance(expect, tuple):
                        self.assertListEqual(
                            actual.recursive_sequence_lengths(), expect[1],
                            "Output (" + sub_out_name +
                            ") has different lod at " + str(place))
                        if check_dygraph:
                            self.assertListEqual(
                                imperative_actual.value().get_tensor()
                                .recursive_sequence_lengths(), expect[1],
                                "Output (" + out_name +
                                ") has different lod at " + str(place) +
                                " in dygraph mode")
            else:
                if check_dygraph:
                    imperative_actual = find_imperative_actual(
                        out_name, dygraph_outs, place)
                    imperative_actual_t = np.array(imperative_actual.value()
                                                   .get_tensor())
                idx = find_actual(out_name, fetch_list)
                actual = outs[idx]
                actual_t = np.array(actual)
                expect = self.outputs[out_name]
                expect_t = expect[0] if isinstance(expect, tuple) else expect
                self.assertTrue(
                    np.allclose(
                        actual_t, expect_t, atol=atol, equal_nan=equal_nan),
                    "Output (" + out_name + ") has diff at " + str(place) +
                    "\nExpect " + str(expect_t) + "\n" + "But Got" +
                    str(actual_t) + " in class " + self.__class__.__name__ + " "
                    + str(atol) + " " + str(expect_t - actual_t))
                if check_dygraph:
                    if six.moves.reduce(
                            lambda x, y: x * y, imperative_actual_t.shape,
                            1) == 0 and six.moves.reduce(
                                lambda x, y: x * y, expect_t.shape, 1) == 0:
                        pass
                    else:
                        self.assertTrue(
                            np.allclose(
                                imperative_actual_t,
                                expect_t,
                                atol=atol,
                                equal_nan=equal_nan),
                            "Output (" + out_name + ") has diff at " +
                            str(place) + "\nExpect " + str(expect_t) + "\n" +
                            "But Got" + str(imperative_actual_t) + " in class "
                            + self.__class__.__name__)
                if isinstance(expect, tuple):
                    self.assertListEqual(actual.recursive_sequence_lengths(),
                                         expect[1], "Output (" + out_name +
                                         ") has different lod at " + str(place))
                    if check_dygraph:
                        self.assertListEqual(
                            imperative_actual.value().get_tensor()
                            .recursive_sequence_lengths(), expect[1],
                            "Output (" + out_name + ") has different lod at " +
                            str(place) + " in dygraph mode")

        # Note(zhiqiu): inplace_atol should be only set when op doesn't ensure
        # computational consistency.
        # For example, group_norm uses AtomicAdd on CUDAPlace, which do not ensure
        # computation order when multiple threads write the same address. So the
        # result of group_norm is non-deterministic when datatype is float.
        # When inplace_atol is not None, the inplace check uses numpy.allclose
        # to check inplace result instead of numpy.array_equal.
        if inplace_atol is not None:
            warnings.warn(
                "inplace_atol should only be set when op doesn't ensure computational consistency, please check it!"
            )
        # Check inplace for given op, its grad op, its grad_grad op, etc.
        # No effect on original OpTest
        # Currently not support ParallelExecutor on XPUPlace.
        if not paddle.is_compiled_with_xpu():
            self.check_inplace_output_with_place(
                place, no_check_set=no_check_set, inplace_atol=inplace_atol)

        if check_dygraph:
            return outs
        else:
            return outs

    def check_grad_with_place(self,
                              place,
                              inputs_to_check,
                              output_names,
                              no_grad_set=None,
                              numeric_grad_delta=0.005,
                              in_place=False,
                              max_relative_error=0.005,
                              user_defined_grads=None,
                              check_dygraph=True):
        place = paddle.XPUPlace(0)
        a1 = self.get_grad_with_place(
            place, inputs_to_check, output_names, no_grad_set=no_grad_set)
        a2 = self.get_grad_with_place(
            place, inputs_to_check, output_names, no_grad_set=no_grad_set)
        a3 = self.get_grad_with_place(
            paddle.CPUPlace(),
            inputs_to_check,
            output_names,
            no_grad_set=no_grad_set)
        self._assert_is_close(a1, a2, inputs_to_check, 0.00000001,
                              "Gradient Check On two xpu")
        self._assert_is_close(a1, a3, inputs_to_check, 0.001,
                              "Gradient Check On cpu & xpu")

    def get_grad_with_place(self,
                            place,
                            inputs_to_check,
                            output_names,
                            no_grad_set=None,
                            numeric_grad_delta=0.005,
                            in_place=False,
                            max_relative_error=0.005,
                            user_defined_grads=None,
                            check_dygraph=True):
        self.scope = core.Scope()
        op_inputs = self.inputs if hasattr(self, "inputs") else dict()
        op_outputs = self.outputs if hasattr(self, "outputs") else dict()
        op_attrs = self.attrs if hasattr(self, "attrs") else dict()

        self._check_grad_helper()
        if self.dtype == np.float64 and \
            self.op_type not in op_threshold_white_list.NEED_FIX_FP64_CHECK_GRAD_THRESHOLD_OP_LIST:
            numeric_grad_delta = 1e-5
            max_relative_error = 1e-7

        cache_list = None
        if hasattr(self, "cache_name_list"):
            cache_list = self.cache_name_list

        # oneDNN numeric gradient should use CPU kernel
        use_onednn = False
        if "use_mkldnn" in op_attrs and op_attrs["use_mkldnn"] == True:
            op_attrs["use_mkldnn"] = False
            use_onednn = True

        self.op = create_op(
            self.scope,
            self.op_type,
            op_inputs,
            op_outputs,
            op_attrs,
            cache_list=cache_list)

        if use_onednn:
            op_attrs["use_mkldnn"] = True

        if no_grad_set is None:
            no_grad_set = set()
        else:
            if (self.op_type not in no_grad_set_white_list.NEED_TO_FIX_OP_LIST
                ) and (
                    self.op_type not in no_grad_set_white_list.NOT_CHECK_OP_LIST
                ) and (not self.is_bfloat16_op()):
                raise AssertionError("no_grad_set must be None, op_type is " +
                                     self.op_type + " Op.")

        for input_to_check in inputs_to_check:
            set_input(self.scope, self.op, self.inputs, place)
            tensor_to_check = self.scope.find_var(input_to_check).get_tensor()
            tensor_size = six.moves.reduce(lambda a, b: a * b,
                                           tensor_to_check.shape(), 1)
            if tensor_size < 100:
                self.__class__.input_shape_is_large = False

        if not type(output_names) is list:
            output_names = [output_names]

        analytic_grads = self._get_gradient(inputs_to_check, place,
                                            output_names, no_grad_set)
        return analytic_grads