# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import paddle
from paddle.distributed.auto_parallel.completion import Completer
from paddle.distributed.auto_parallel.dist_context import (
    DistributedContext,
    get_default_distributed_context,
)
from paddle.distributed.auto_parallel.partitioner import Partitioner
from paddle.distributed.auto_parallel.utils import set_var_dist_attr
from paddle.distributed.fleet import auto
from paddle.fluid.layer_helper import LayerHelper
from paddle.incubate.autograd import enable_prim

paddle.enable_static()
enable_prim()
nranks = 2
rank = 0


class TestPrimDistOp(unittest.TestCase):
    def setUp(self):
        self.main_program = paddle.static.Program()
        self.startup_program = paddle.static.Program()
        self.layer_help = LayerHelper('TestPrimDistOp')

        with paddle.static.program_guard(
            self.main_program, self.startup_program
        ):
            self.init_prog()

    def init_prog(self):
        # block = self.main_program.global_block()
        # block = self.main_program.global_block()
        self.w = self.layer_help.create_parameter(
            dtype="float", shape=[20], attr=None
        )
        self.w_grad = paddle.static.data(
            name='w_grad', shape=[20], dtype='float'
        )
        self.tmp1 = paddle.static.data(name='tmp1', shape=[20], dtype='float')
        self.tmp2 = paddle.static.data(name='tmp2', shape=[20], dtype='float')
        self.batch_reduced = paddle.static.data(
            name='batch_reduced', shape=[], dtype='float'
        )
        self.attrs = {}

        default_dist_context = get_default_distributed_context()
        _global_process_mesh = auto.ProcessMesh(list(range(nranks)))
        tensor_dist_attr = set_var_dist_attr(
            default_dist_context,
            self.tmp1,
            [-1],
            _global_process_mesh,
            mark_annotated=True,
        )
        tensor_dist_attr = set_var_dist_attr(
            default_dist_context,
            self.tmp1,
            [-1],
            _global_process_mesh,
            mark_annotated=True,
        )

        op = self.layer_help.append_op(
            type="add_p",
            inputs={'X': self.tmp1, 'Y': self.w},
            outputs={'Z': self.w_grad},
            attrs=self.attrs,
        )

        op = self.layer_help.append_op(
            type="reduce_sum_p",
            inputs={'X': self.tmp2},
            outputs={'Y': self.batch_reduced},
            attrs={"axis": [0]},
        )

    def test_loss_and_grad_allreduce(self):

        dist_context = DistributedContext(
            self.main_program, self.startup_program
        )
        completer = Completer(dist_context)
        completer.complete_prim_annotation(self.main_program)
        dist_context.block_state.parse_forward_blocks(self.main_program)
        dist_context.block_state.parse_backward_blocks(self.main_program)
        dist_context.grads_params = {}
        dist_context.grads_params[self.w_grad.name] = self.w.name
        dist_context.synced_gradient = set()
        dist_context.data_parallel_group = list(range(nranks))
        partitioner = Partitioner(dist_context, rank)
        dist_main_prog, dist_startup_prog, _ = partitioner.partition(
            self.main_program, self.startup_program, [(self.w, self.w_grad)]
        )
        ops = dist_main_prog.global_block().ops
        self.assertTrue(ops[1].type == "c_allreduce_sum")
        self.assertTrue(ops[3].type == "c_allreduce_sum")


if __name__ == "__main__":
    unittest.main()