# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import typing
import unittest

import numpy as np
import paddle

import autograd
import autograd.numpy as anp
import autograd.scipy as ascipy
import config
import utils
from paddle.incubate.autograd import primx


@utils.place(config.DEVICES)
@utils.parameterize(
    (utils.TEST_CASE_NAME, 'fun', 'xs', 'dtype'),
    (
        (
            'uniform_random',
            lambda: paddle.uniform(
                [1, 2, 3], dtype='float32', min=0, max=1.0, seed=1
            ),
            (),
            'int32',
        ),
        (
            'sigmoid',
            paddle.nn.functional.sigmoid,
            (
                np.random.rand(
                    5,
                ),
            ),
            'float32',
        ),
    ),
)
class TestFowardApi(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.xs = tuple(x.astype(cls.dtype) for x in cls.xs)

    def setUp(self):
        paddle.enable_static()
        paddle.incubate.autograd.enable_prim()

    def tearDown(self):
        paddle.incubate.autograd.disable_prim()
        paddle.disable_static()

    def test_grad(self):
        def expected():
            paddle.incubate.autograd.disable_prim()
            sp = paddle.static.Program()
            mp = paddle.static.Program()
            with paddle.static.program_guard(mp, sp):
                feed, static_xs = utils.gen_static_inputs_and_feed(
                    self.xs, stop_gradient=False
                )
                out = self.fun(*static_xs)
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=out)
            paddle.incubate.autograd.enable_prim()
            return out

        def actual():
            paddle.incubate.autograd.enable_prim()
            sp = paddle.static.Program()
            mp = paddle.static.Program()
            with paddle.static.program_guard(mp, sp):
                feed, static_xs = utils.gen_static_inputs_and_feed(
                    self.xs, stop_gradient=False
                )
                out = self.fun(*static_xs)
                primx.orig2prim(mp.block(0))
                primx.prim2orig(mp.block(0))
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=out)
            paddle.incubate.autograd.disable_prim()
            return out

        expected = expected()
        actual = actual()
        self.assertEqual(type(actual), type(expected))
        for i, j in zip(actual, expected):
            np.testing.assert_allclose(i, j, rtol=1e-6)


@utils.place(config.DEVICES)
@utils.parameterize(
    (utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'dtype'),
    (
        (
            'dropout',
            paddle.nn.functional.dropout,
            (np.random.rand(5000, 5000),),
            None,
            'float32',
        ),
    ),
)
class TestDropoutGrad(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.xs = tuple(x.astype(cls.dtype) for x in cls.xs)
        cls._rtol = (
            config.TOLERANCE.get(str(cls.dtype))
            .get("first_order_grad")
            .get("rtol")
        )
        cls._atol = (
            config.TOLERANCE.get(str(cls.dtype))
            .get("first_order_grad")
            .get("atol")
        )

    def setUp(self):
        paddle.enable_static()
        paddle.incubate.autograd.enable_prim()

    def tearDown(self):
        paddle.incubate.autograd.disable_prim()
        paddle.disable_static()

    def test_grad(self):
        def expected():
            paddle.incubate.autograd.disable_prim()
            sp = paddle.static.Program()
            mp = paddle.static.Program()
            with paddle.static.program_guard(mp, sp):
                feed, static_xs, static_v = utils.gen_static_data_and_feed(
                    self.xs, self.v, stop_gradient=False
                )
                _, ys_grad = paddle.incubate.autograd.vjp(
                    self.fun, static_xs, static_v
                )
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=ys_grad)
            paddle.incubate.autograd.enable_prim()
            return out

        def actual():
            paddle.incubate.autograd.enable_prim()
            sp = paddle.static.Program()
            mp = paddle.static.Program()
            with paddle.static.program_guard(mp, sp):
                feed, static_xs, static_v = utils.gen_static_data_and_feed(
                    self.xs, self.v, stop_gradient=False
                )
                ys = (
                    self.fun(*static_xs)
                    if isinstance(static_xs, typing.Sequence)
                    else self.fun(static_xs)
                )
                ys_grad = paddle.incubate.autograd.grad(ys, static_xs, static_v)
                paddle.incubate.autograd.prim2orig(mp.block(0))
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=ys_grad)
            paddle.incubate.autograd.disable_prim()
            return out

        expected = expected()
        actual = actual()
        self.assertEqual(type(actual), type(expected))
        for i, j in zip(actual, expected):
            np.testing.assert_allclose(np.sum(i), np.sum(j), rtol=1e-1)


@utils.place(config.DEVICES)
@utils.parameterize(
    (utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'dtype'),
    (
        (
            'matmul',
            paddle.matmul,
            (np.random.rand(2, 3), np.random.rand(3, 2)),
            None,
            'float32',
        ),
    ),
)
class TestWithoutProgramGuard(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.xs = tuple(x.astype(cls.dtype) for x in cls.xs)
        cls._rtol = (
            config.TOLERANCE.get(str(cls.dtype))
            .get("first_order_grad")
            .get("rtol")
        )
        cls._atol = (
            config.TOLERANCE.get(str(cls.dtype))
            .get("first_order_grad")
            .get("atol")
        )

    def setUp(self):
        paddle.enable_static()
        paddle.incubate.autograd.enable_prim()

    def tearDown(self):
        paddle.incubate.autograd.disable_prim()
        paddle.disable_static()

    def test_forward_grad_without_program_guard(self):
        def with_program_guard():
            paddle.incubate.autograd.enable_prim()
            sp = paddle.static.Program()
            mp = paddle.static.Program()
            with paddle.static.program_guard(mp, sp):
                feed, static_xs, static_v = utils.gen_static_data_and_feed(
                    self.xs, self.v, stop_gradient=False
                )
                ys = (
                    self.fun(*static_xs)
                    if isinstance(static_xs, typing.Sequence)
                    else self.fun(static_xs)
                )
                ys_grad = paddle.incubate.autograd.forward_grad(
                    ys, static_xs, static_v
                )
                paddle.incubate.autograd.prim2orig(mp.block(0))
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=ys_grad)
            paddle.incubate.autograd.disable_prim()
            return out

        def without_program_guard():
            paddle.incubate.autograd.enable_prim()
            feed, static_xs, static_v = utils.gen_static_data_and_feed(
                self.xs, self.v, stop_gradient=False
            )
            ys = (
                self.fun(*static_xs)
                if isinstance(static_xs, typing.Sequence)
                else self.fun(static_xs)
            )
            ys_grad = paddle.incubate.autograd.forward_grad(
                ys, static_xs, static_v
            )
            sp = paddle.fluid.framework.default_startup_program()
            mp = paddle.fluid.framework.default_main_program()
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=ys_grad)
            paddle.incubate.autograd.disable_prim()
            return out

        expected = with_program_guard()
        actual = without_program_guard()
        self.assertEqual(type(actual), type(expected))
        np.testing.assert_allclose(
            np.concatenate(actual),
            np.concatenate(expected),
            rtol=self._rtol,
            atol=self._atol,
        )

    def test_grad_without_program_guard(self):
        def with_program_guard():
            paddle.incubate.autograd.enable_prim()
            sp = paddle.static.Program()
            mp = paddle.static.Program()
            with paddle.static.program_guard(mp, sp):
                feed, static_xs, static_v = utils.gen_static_data_and_feed(
                    self.xs, self.v, stop_gradient=False
                )
                ys = (
                    self.fun(*static_xs)
                    if isinstance(static_xs, typing.Sequence)
                    else self.fun(static_xs)
                )
                xs_grad = paddle.incubate.autograd.grad(ys, static_xs, static_v)
                paddle.incubate.autograd.prim2orig(mp.block(0))
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=xs_grad)
            paddle.incubate.autograd.disable_prim()
            return out

        def without_program_guard():
            paddle.incubate.autograd.enable_prim()
            feed, static_xs, static_v = utils.gen_static_data_and_feed(
                self.xs, self.v, stop_gradient=False
            )
            ys = (
                self.fun(*static_xs)
                if isinstance(static_xs, typing.Sequence)
                else self.fun(static_xs)
            )
            xs_grad = paddle.incubate.autograd.grad(ys, static_xs, static_v)
            sp = paddle.fluid.framework.default_startup_program()
            mp = paddle.fluid.framework.default_main_program()
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=xs_grad)
            paddle.incubate.autograd.disable_prim()
            return out

        expected = with_program_guard()
        actual = without_program_guard()
        for i, j in zip(actual, expected):
            self.assertEqual(type(i), type(j))
            np.testing.assert_allclose(
                np.concatenate(i),
                np.concatenate(j),
                rtol=self._rtol,
                atol=self._atol,
            )


@utils.place(config.DEVICES)
@utils.parameterize(
    (utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'dtype'),
    (
        (
            'matmul',
            paddle.matmul,
            (np.random.rand(2, 3), np.random.rand(3, 2)),
            None,
            'float32',
        ),
        (
            'multiply',
            paddle.multiply,
            (np.random.rand(2, 3), np.random.rand(2, 3)),
            None,
            'float64',
        ),
        (
            'add',
            paddle.add,
            (np.random.rand(2, 3), np.random.rand(2, 3)),
            None,
            'float32',
        ),
        (
            'input_not_sequence',
            paddle.tanh,
            (np.random.rand(5, 5),),
            None,
            'float64',
        ),
        (
            'input_gradients_not_none',
            paddle.matmul,
            (np.random.rand(3, 3), np.random.rand(3, 3)),
            (np.random.rand(3, 3), np.random.rand(3, 3)),
            'float64',
        ),
        ('log', paddle.log, (np.random.rand(3, 4),), None, 'float32'),
        (
            'abs',
            paddle.abs,
            (np.random.uniform(-10, 10, (10, 10)),),
            None,
            'float32',
        ),
        ('rsqrt', paddle.rsqrt, (np.random.rand(100, 200),), None, 'float32'),
        (
            'sigmoid',
            paddle.nn.functional.sigmoid,
            (
                np.random.rand(
                    5,
                ),
            ),
            None,
            'float32',
        ),
    ),
)
# paddle.where, paddle.pow, paddle.maximum has no double grad definition,
# can not compute forward grad use double trick
class TestForwardGrad(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.xs = tuple(x.astype(cls.dtype) for x in cls.xs)
        cls._rtol = (
            config.TOLERANCE.get(str(cls.dtype))
            .get("first_order_grad")
            .get("rtol")
        )
        cls._atol = (
            config.TOLERANCE.get(str(cls.dtype))
            .get("first_order_grad")
            .get("atol")
        )

    def setUp(self):
        paddle.enable_static()
        paddle.incubate.autograd.enable_prim()

    def tearDown(self):
        paddle.incubate.autograd.disable_prim()
        paddle.disable_static()

    def test_forward_grad(self):
        def expected():
            paddle.incubate.autograd.disable_prim()
            sp = paddle.static.Program()
            mp = paddle.static.Program()
            with paddle.static.program_guard(mp, sp):
                feed, static_xs, static_v = utils.gen_static_data_and_feed(
                    self.xs, self.v, stop_gradient=False
                )
                _, ys_grad = paddle.incubate.autograd.jvp(
                    self.fun, static_xs, static_v
                )
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=ys_grad)
            paddle.incubate.autograd.enable_prim()
            return out

        def actual():
            paddle.incubate.autograd.enable_prim()
            sp = paddle.static.Program()
            mp = paddle.static.Program()
            with paddle.static.program_guard(mp, sp):
                feed, static_xs, static_v = utils.gen_static_data_and_feed(
                    self.xs, self.v, stop_gradient=False
                )
                ys = (
                    self.fun(*static_xs)
                    if isinstance(static_xs, typing.Sequence)
                    else self.fun(static_xs)
                )
                ys_grad = paddle.incubate.autograd.forward_grad(
                    ys, static_xs, static_v
                )
                paddle.incubate.autograd.prim2orig(mp.block(0))
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=ys_grad)
            paddle.incubate.autograd.disable_prim()
            return out

        actual = actual()
        expected = expected()
        self.assertEqual(type(actual), type(expected))
        np.testing.assert_allclose(
            np.concatenate(actual),
            np.concatenate(expected),
            rtol=self._rtol,
            atol=self._atol,
        )

    def test_prim_disabled(self):
        paddle.incubate.autograd.disable_prim()
        sp = paddle.static.Program()
        mp = paddle.static.Program()
        with self.assertRaises(RuntimeError):
            with paddle.static.program_guard(mp, sp):
                feed, static_xs, static_v = utils.gen_static_data_and_feed(
                    self.xs, self.v, stop_gradient=False
                )
                ys = (
                    self.fun(*static_xs)
                    if isinstance(static_xs, typing.Sequence)
                    else self.fun(static_xs)
                )
                ys_grad = paddle.incubate.autograd.forward_grad(
                    ys, static_xs, static_v
                )
                paddle.incubate.autograd.prim2orig(mp.block(0))
            exe = paddle.static.Executor()
            exe.run(sp)
            exe.run(mp, feed=feed, fetch_list=ys_grad)
        paddle.incubate.autograd.enable_prim()

    def test_illegal_param(self):
        paddle.incubate.autograd.enable_prim()
        with self.assertRaises(TypeError):
            paddle.incubate.autograd.forward_grad(
                1, paddle.static.data('inputs', shape=[1])
            )

        with self.assertRaises(TypeError):
            paddle.incubate.autograd.forward_grad(
                paddle.static.data('targets', shape=[1]), 1
            )
        paddle.incubate.autograd.disable_prim()


where_wrap = lambda x, y: paddle.where(paddle.eye(3, 4) == 1, x, y)


@utils.place(config.DEVICES)
@utils.parameterize(
    (utils.TEST_CASE_NAME, 'fun', 'xs', 'v', 'dtype'),
    (
        (
            'matmul',
            paddle.matmul,
            (np.random.rand(2, 3), np.random.rand(3, 2)),
            None,
            'float32',
        ),
        (
            'multiply',
            paddle.multiply,
            (np.random.rand(2, 3), np.random.rand(2, 3)),
            None,
            'float64',
        ),
        (
            'div',
            paddle.divide,
            (np.random.rand(2, 3), np.random.rand(2, 3)),
            None,
            'float64',
        ),
        (
            'add',
            paddle.add,
            (np.random.rand(2, 3), np.random.rand(2, 3)),
            None,
            'float32',
        ),
        (
            'input_not_sequence',
            paddle.tanh,
            (np.random.rand(5, 5),),
            None,
            'float64',
        ),
        (
            'input_gradients_not_none',
            paddle.matmul,
            (np.random.rand(3, 3), np.random.rand(3, 3)),
            (np.random.rand(3, 3),),
            'float64',
        ),
        ('sin', paddle.sin, (np.random.rand(100, 200),), None, 'float32'),
        ('rsqrt', paddle.rsqrt, (np.random.rand(100, 200),), None, 'float32'),
        ('cos', paddle.cos, (np.random.rand(200, 90),), None, 'float32'),
        ('exp', paddle.exp, (np.random.rand(299, 320),), None, 'float32'),
        # In where op, grad of condition computed by paddle.static.gradients is None,
        # and paddle.incubate.autograd.grad will replace None with zeros while transpose
        # will just return None because cond_dot is unused, that is a diff.
        (
            'select',
            where_wrap,
            (np.random.rand(3, 4), np.random.rand(3, 4)),
            None,
            'float32',
        ),
        # pow_p and pow has diff when compute z_dot of 0^0
        (
            'pow',
            paddle.pow,
            (np.array([1, 2, 3]), np.array([0, 2, 7])),
            None,
            'float32',
        ),
        # To make max_p consistent with paddle.maximum, be sure x.grad = 0 and y.grad = 1 when x==y.
        (
            'max',
            paddle.maximum,
            (
                np.array([1, 2, 3]),
                np.array([2, 2, 2]),
            ),
            None,
            'float32',
        ),
        ('erf', paddle.erf, (np.random.rand(300, 288),), None, 'float32'),
        (
            'gelu',
            paddle.nn.functional.gelu,
            (np.random.rand(200, 189),),
            None,
            'float32',
        ),
        (
            'gelu_approximate',
            lambda x: paddle.nn.functional.gelu(x, True),
            (np.random.rand(200, 189),),
            None,
            'float32',
        ),
        ('sum', paddle.sum, (np.random.rand(200, 345),), None, 'float32'),
        (
            'sigmoid',
            paddle.nn.functional.sigmoid,
            (
                np.random.rand(
                    5,
                ),
            ),
            None,
            'float32',
        ),
        (
            'sum_with_axis',
            lambda x: paddle.sum(x, axis=1),
            (np.random.rand(200, 345),),
            None,
            'float32',
        ),
        (
            'sum_with_keepdim',
            lambda x: paddle.sum(x, keepdim=True),
            (np.random.rand(200, 345),),
            None,
            'float32',
        ),
        ('mean', paddle.mean, (np.random.rand(200, 345),), None, 'float32'),
        (
            'mean_with_axis',
            lambda x: paddle.mean(x, axis=1),
            (np.random.rand(200, 345),),
            None,
            'float32',
        ),
        (
            'mean_with_keepdim',
            lambda x: paddle.mean(x, keepdim=True),
            (np.random.rand(200, 345),),
            None,
            'float32',
        ),
        (
            'mean_with_axis_keepdim',
            lambda x: paddle.mean(x, axis=0, keepdim=True),
            (np.random.rand(200, 345),),
            None,
            'float32',
        ),
        (
            'abs',
            paddle.abs,
            (np.random.uniform(-10, 10, (200, 345)),),
            None,
            'float32',
        ),
        (
            'cast_float',
            lambda x: paddle.cast(x, paddle.float64),
            (np.random.rand(10, 20),),
            None,
            'float32',
        ),
        (
            'cast_int',
            lambda x: paddle.cast(x, paddle.int32),
            (np.random.rand(10, 20),),
            None,
            'float32',
        ),
        ('square', paddle.square, (np.random.rand(100),), None, 'float32'),
        (
            'pow_scalar',
            lambda x: paddle.pow(x, 2),
            (np.random.rand(20, 30),),
            None,
            'float32',
        ),
        ('var', paddle.var, (np.random.rand(200, 324),), None, 'float32'),
        (
            'var_with_axis',
            lambda x: paddle.var(x, axis=1),
            (np.random.rand(10, 20, 30),),
            None,
            'float32',
        ),
        (
            'var_without_unbiased',
            lambda x: paddle.var(x, axis=1, unbiased=False),
            (np.random.rand(10, 20, 30),),
            None,
            'float32',
        ),
        (
            'var_with_keepdim',
            lambda x: paddle.var(x, axis=1, keepdim=True),
            (np.random.rand(10, 20, 30),),
            None,
            'float32',
        ),
        (
            'bn',
            lambda x, w, b: paddle.nn.functional.batch_norm(
                x, paddle.ones((10,)), paddle.ones((10,)), w, b
            ),
            (np.random.rand(10, 10), np.random.rand(10), np.random.rand(10)),
            None,
            'float32',
        ),
        (
            'bn_train',
            lambda x, w, b: paddle.nn.functional.batch_norm(
                x, paddle.ones((10,)), paddle.ones((10,)), w, b, training=True
            ),
            (np.random.rand(10, 10), np.random.rand(10), np.random.rand(10)),
            None,
            'float32',
        ),
        (
            'bn_nhwc',
            lambda x, w, b: paddle.nn.functional.batch_norm(
                x,
                paddle.ones((10,)) + 1,
                paddle.ones((10,)),
                w,
                b,
                training=True,
                data_format='NHWC',
            ),
            (np.random.rand(10, 10), np.random.rand(10), np.random.rand(10)),
            None,
            'float32',
        ),
        (
            'bn_global_stat',
            lambda x, w, b: paddle.nn.functional.batch_norm(
                x,
                paddle.ones((10,)) + 3.2,
                paddle.ones((10,)) + 6.7,
                w,
                b,
                training=True,
                data_format='NHWC',
                use_global_stats=True,
            ),
            (np.random.rand(10, 10), np.random.rand(10), np.random.rand(10)),
            None,
            'float32',
        ),
    ),
)
class TestGrad(unittest.TestCase):
    def setUp(self):
        paddle.enable_static()
        paddle.incubate.autograd.enable_prim()

    def tearDown(self):
        paddle.incubate.autograd.disable_prim()
        paddle.disable_static()

    @classmethod
    def setUpClass(cls):
        cls.xs = tuple(x.astype(cls.dtype) for x in cls.xs)
        cls._rtol = (
            config.TOLERANCE.get(str(cls.dtype))
            .get("first_order_grad")
            .get("rtol")
        )
        cls._atol = (
            config.TOLERANCE.get(str(cls.dtype))
            .get("first_order_grad")
            .get("atol")
        )

    def test_grad(self):
        def expected():
            paddle.incubate.autograd.disable_prim()
            sp = paddle.static.Program()
            mp = paddle.static.Program()
            with paddle.static.program_guard(mp, sp):
                feed, static_xs, static_v = utils.gen_static_data_and_feed(
                    self.xs, self.v, stop_gradient=False
                )
                _, ys_grad = paddle.incubate.autograd.vjp(
                    self.fun, static_xs, static_v
                )
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=ys_grad)
            paddle.incubate.autograd.enable_prim()
            return out

        def actual():
            paddle.incubate.autograd.enable_prim()
            sp = paddle.static.Program()
            mp = paddle.static.Program()
            with paddle.static.program_guard(mp, sp):
                feed, static_xs, static_v = utils.gen_static_data_and_feed(
                    self.xs, self.v, stop_gradient=False
                )
                ys = (
                    self.fun(*static_xs)
                    if isinstance(static_xs, typing.Sequence)
                    else self.fun(static_xs)
                )
                ys_grad = paddle.incubate.autograd.grad(ys, static_xs, static_v)
                paddle.incubate.autograd.prim2orig(mp.block(0))
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=ys_grad)
            paddle.incubate.autograd.disable_prim()
            return out

        actual = actual()
        expected = expected()
        self.assertEqual(type(actual), type(expected))
        for i, j in zip(actual, expected):
            np.testing.assert_allclose(i, j, rtol=self._rtol, atol=self._atol)

    def test_illegal_param(self):
        paddle.incubate.autograd.enable_prim()
        with self.assertRaises(TypeError):
            paddle.incubate.autograd.grad(
                1, paddle.static.data('inputs', shape=[1])
            )

        with self.assertRaises(TypeError):
            paddle.incubate.autograd.grad(
                paddle.static.data('targets', shape=[1]), 1
            )
        paddle.incubate.autograd.disable_prim()

    def test_disable_prim(self):
        def expected():
            paddle.incubate.autograd.disable_prim()
            sp = paddle.static.Program()
            mp = paddle.static.Program()
            with paddle.static.program_guard(mp, sp):
                feed, static_xs, static_v = utils.gen_static_data_and_feed(
                    self.xs, self.v, stop_gradient=False
                )
                ys = (
                    self.fun(*static_xs)
                    if isinstance(static_xs, typing.Sequence)
                    else self.fun(static_xs)
                )
                ys_grad = paddle.incubate.autograd.grad(ys, static_xs, static_v)
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=ys_grad)
            paddle.incubate.autograd.enable_prim()
            return out

        def actual():
            paddle.incubate.autograd.disable_prim()
            sp = paddle.static.Program()
            mp = paddle.static.Program()
            with paddle.static.program_guard(mp, sp):
                feed, static_xs, static_v = utils.gen_static_data_and_feed(
                    self.xs, self.v, stop_gradient=False
                )
                ys = (
                    self.fun(*static_xs)
                    if isinstance(static_xs, typing.Sequence)
                    else self.fun(static_xs)
                )
                ys_grad = paddle.static.gradients(ys, static_xs, static_v)
            exe = paddle.static.Executor()
            exe.run(sp)
            out = exe.run(mp, feed=feed, fetch_list=ys_grad)
            paddle.incubate.autograd.enable_prim()
            return out

        actual = actual()
        expected = expected()
        self.assertEqual(type(actual), type(expected))
        for i, j in zip(actual, expected):
            np.testing.assert_allclose(i, j, rtol=self._rtol, atol=self._atol)


def multiply_pd(x):
    x2 = paddle.multiply(x, x)
    x3 = paddle.multiply(x2, x2)
    x4 = paddle.multiply(x3, x)
    return x4


multiply_ag = lambda xs: xs[0] * xs[0] * xs[0] * xs[0] * xs[0]
sin_ag = lambda xs: anp.sin(xs[0])
cos_ag = lambda xs: anp.cos(xs[0])
exp_ag = lambda xs: anp.exp(xs[0])
pow_ag = lambda xs: xs[0] ** xs[1]
log_ag = lambda xs: anp.log(xs[0])
erf_ag = lambda xs: ascipy.special.erf(xs[0])
sigmoid_ag = lambda xs: 1.0 / (1 + anp.exp(-xs[0]))


def gelu_ag(x, approximate=False):
    if approximate:
        sqrt_2_over_pi = np.sqrt(2 / np.pi).astype(x.dtype)
        cdf = 0.5 * (1.0 + anp.tanh(sqrt_2_over_pi * (x + 0.044715 * (x**3))))
        return x * cdf
    else:
        return x * (ascipy.special.erf(x / np.sqrt(2)) + 1) / 2


@utils.place(config.DEVICES)
@utils.parameterize(
    (utils.TEST_CASE_NAME, 'fun_pd', 'fun_ag', 'xs', 'v', 'dtype'),
    (
        (
            'multiply',
            multiply_pd,
            multiply_ag,
            (np.random.rand(3, 5),),
            None,
            'float32',
        ),
        ('sin', paddle.sin, sin_ag, (np.random.rand(2, 3),), None, 'float32'),
        ('cos', paddle.cos, cos_ag, (np.random.rand(3, 4),), None, 'float32'),
        ('exp', paddle.exp, exp_ag, (np.random.rand(2, 3),), None, 'float32'),
        (
            'pow',
            paddle.pow,
            pow_ag,
            (np.random.rand(2, 3), np.random.rand(2, 3)),
            None,
            'float32',
        ),
        ('log', paddle.log, log_ag, (np.random.rand(3, 8),), None, 'float32'),
        (
            'erf',
            paddle.erf,
            erf_ag,
            (np.random.rand(100, 200),),
            None,
            'float32',
        ),
        (
            'gelu',
            paddle.nn.functional.gelu,
            lambda xs: gelu_ag(xs[0]),
            (np.random.rand(10, 20, 30),),
            None,
            'float32',
        ),
        (
            'gelu_approximate',
            lambda x: paddle.nn.functional.gelu(x, approximate=True),
            lambda xs: gelu_ag(xs[0], approximate=True),
            (np.random.rand(10, 20, 30),),
            None,
            'float32',
        ),
        (
            'sigmoid',
            paddle.nn.functional.sigmoid,
            sigmoid_ag,
            (np.random.rand(10, 20),),
            None,
            'float32',
        ),
    ),
)
class TestGradWithHigherOrder(unittest.TestCase):
    def setUp(self):
        paddle.enable_static()
        paddle.incubate.autograd.enable_prim()

    def tearDown(self):
        paddle.incubate.autograd.disable_prim()
        paddle.disable_static()

    @classmethod
    def setUpClass(cls):
        cls.xs = tuple(x.astype(cls.dtype) for x in cls.xs)
        cls._rtol = (
            config.TOLERANCE.get(str(cls.dtype))
            .get("first_order_grad")
            .get("rtol")
        )
        cls._atol = (
            config.TOLERANCE.get(str(cls.dtype))
            .get("first_order_grad")
            .get("atol")
        )

    def test_grad(self):
        def expected():
            egrad = autograd.elementwise_grad
            grad_3 = egrad(egrad(egrad(self.fun_ag)))(self.xs)
            grad_4 = egrad(egrad(egrad(egrad(self.fun_ag))))(self.xs)
            grad_5 = egrad(egrad(egrad(egrad(egrad(self.fun_ag)))))(self.xs)
            # the output of egrad is tuple
            return list(grad_3 + grad_4 + grad_5)

        def actual():
            paddle_grad = paddle.incubate.autograd.grad
            paddle.incubate.autograd.enable_prim()
            main = paddle.static.Program()
            startup = paddle.static.Program()
            with paddle.static.program_guard(main, startup):
                feed, static_xs, static_v = utils.gen_static_data_and_feed(
                    self.xs, self.v, stop_gradient=False
                )
                ys = (
                    self.fun_pd(*static_xs)
                    if isinstance(static_xs, typing.Sequence)
                    else self.fun_pd(static_xs)
                )

                grad1 = paddle_grad(ys, static_xs, static_v)
                grad2 = paddle_grad(grad1, static_xs, static_v)
                grad3 = paddle_grad(grad2, static_xs, static_v)
                grad4 = paddle_grad(grad3, static_xs, static_v)
                grad5 = paddle_grad(grad4, static_xs, static_v)
                paddle.incubate.autograd.prim2orig()

            fetch_list = [grad3, grad4, grad5]

            place = paddle.CPUPlace()
            if paddle.device.is_compiled_with_cuda():
                place = paddle.CUDAPlace(0)
            exe = paddle.static.Executor(place)
            exe.run(startup)
            outs = exe.run(main, feed=feed, fetch_list=fetch_list)
            paddle.incubate.autograd.disable_prim()
            return outs

        actual = actual()
        expected = expected()
        self.assertEqual(type(actual), type(expected))
        for i, j in zip(actual, expected):
            np.testing.assert_allclose(i, j, rtol=self._rtol, atol=self._atol)


if __name__ == '__main__':
    unittest.main()