# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from paddle.distributed.passes.pass_utils import split_program
from paddle.vision.models import resnet18 as resnet
import paddle
import paddle.nn as nn
import unittest
import json
import numpy as np


class TestSplitProgram(unittest.TestCase):

    def setUp(self):
        paddle.enable_static()
        if paddle.is_compiled_with_cuda():
            paddle.set_flags({'FLAGS_cudnn_deterministic': 1})

    def get_model(self, batch_size):
        main = paddle.static.Program()
        startup = paddle.static.Program()
        with paddle.static.program_guard(main, startup):
            image = paddle.static.data(shape=[batch_size, 3, 224, 224],
                                       dtype='float32',
                                       name='image')
            label = paddle.static.data(shape=[batch_size, 1],
                                       dtype='int64',
                                       name='label')

            model = resnet(pretrained=False)
            loss_fn = nn.loss.CrossEntropyLoss()

            pred_out = model(image)
            loss = loss_fn(pred_out, label)

            optimizer = paddle.optimizer.SGD(learning_rate=1e-3)
            optimizer.minimize(loss)
        return main, startup, image, label

    def find_startup_vars(self, main_prog, startup_prog):
        self.assertEqual(startup_prog.num_blocks, 1)
        startup_vars = []
        for op in startup_prog.global_block().ops:
            for var_name in op.output_arg_names:
                var = main_prog.global_block().var(var_name)
                if var.persistable:
                    startup_vars.append(var_name)
        return startup_vars

    def test_split_program(self):
        for p in self.get_places():
            vars_expected = self.check_split_program(p, use_split=False)
            vars_actual = self.check_split_program(p, use_split=True)
            self.assertEqual(len(vars_actual), len(vars_expected))
            for actual, expected in zip(vars_actual, vars_expected):
                self.assertEqual(actual.shape, expected.shape)
                np.testing.assert_array_equal(actual,
                                              expected,
                                              err_msg='{}\n{}\n'.format(
                                                  actual, expected))

    def get_places(self):
        places = [paddle.CPUPlace()]
        if paddle.is_compiled_with_cuda():
            places.append(paddle.CUDAPlace(0))
        return places

    def get_var_values(self, scope, var_names):
        values = []
        for var_name in var_names:
            values.append(np.array(scope.find_var(var_name).get_tensor()))
        return values

    def check_split_program(self, place, use_split=True, seed=100, batch_num=5):
        batch_size = 2

        np.random.seed(seed)
        paddle.seed(seed)

        main_prog, startup_prog, image, label = self.get_model(batch_size)
        startup_vars = self.find_startup_vars(main_prog, startup_prog)
        exe = paddle.static.Executor(place)

        image_np = np.random.random(size=image.shape).astype('float32')
        label_np = np.random.randint(low=0,
                                     high=1000,
                                     dtype='int64',
                                     size=label.shape)

        scope = paddle.static.Scope()
        if not use_split:
            with paddle.static.scope_guard(scope):
                exe.run(startup_prog)
                for _ in range(batch_num):
                    exe.run(main_prog,
                            feed={
                                image.name: image_np,
                                label.name: label_np
                            })
            return self.get_var_values(scope, startup_vars)

        op_num = len(main_prog.global_block().ops)
        split_op_indices = [int(op_num / 3.0), int(op_num * 3 / 4.0)]
        programs, input_vars, output_vars = split_program(
            main_prog, split_op_indices)
        op_nums = [0] + split_op_indices + [op_num]
        op_nums = [op_nums[i + 1] - op_nums[i] for i in range(len(op_nums) - 1)]
        num_split = len(split_op_indices) + 1
        self.assertEqual(len(programs), num_split)
        self.assertEqual(len(input_vars), num_split)
        self.assertEqual(len(output_vars), num_split)
        self.assertEqual(len(programs), len(op_nums))
        for p, n in zip(programs, op_nums):
            self.assertEqual(len(p.global_block().ops), n)

        with paddle.static.scope_guard(scope):
            exe.run(startup_prog)
            for _ in range(batch_num):
                tmp_vars = {image.name: image_np, label.name: label_np}
                for i, program in enumerate(programs):
                    feed_dict = {}
                    for in_name in input_vars[i]:
                        if in_name in startup_vars:
                            continue
                        self.assertTrue(in_name in tmp_vars)
                        if tmp_vars[in_name] is not None:
                            feed_dict[in_name] = tmp_vars[in_name]

                    output_var_values = exe.run(program,
                                                feed=feed_dict,
                                                fetch_list=output_vars[i],
                                                return_numpy=False)
                    for out_name, out_value in zip(output_vars[i],
                                                   output_var_values):
                        if not out_value._is_initialized():
                            tmp_vars[out_name] = np.ndarray(
                                out_value._get_dims()).astype('float32')
                        else:
                            tmp_vars[out_name] = np.array(out_value)

        return self.get_var_values(scope, startup_vars)


if __name__ == "__main__":
    unittest.main()