# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import contextlib
import math
import os
import sys
import unittest

import numpy as np

# TODO: remove sys.path.append
sys.path.append("../legacy_test")
import nets

import paddle
from paddle import fluid


def convolution_net(
    data, label, input_dim, class_dim=2, emb_dim=32, hid_dim=32
):
    emb = fluid.layers.embedding(
        input=data, size=[input_dim, emb_dim], is_sparse=True
    )
    conv_3 = nets.sequence_conv_pool(
        input=emb,
        num_filters=hid_dim,
        filter_size=3,
        act="tanh",
        pool_type="sqrt",
    )
    conv_4 = nets.sequence_conv_pool(
        input=emb,
        num_filters=hid_dim,
        filter_size=4,
        act="tanh",
        pool_type="sqrt",
    )
    prediction = paddle.static.nn.fc(
        x=[conv_3, conv_4], size=class_dim, activation="softmax"
    )
    cost = paddle.nn.functional.cross_entropy(
        input=prediction, label=label, reduction='none', use_softmax=False
    )
    avg_cost = paddle.mean(cost)
    accuracy = paddle.static.accuracy(input=prediction, label=label)
    return avg_cost, accuracy, prediction


def train(
    word_dict,
    net_method,
    use_cuda,
    parallel=False,
    save_dirname=None,
    is_local=True,
):
    BATCH_SIZE = 128
    PASS_NUM = 5
    dict_dim = len(word_dict)
    class_dim = 2

    data = paddle.static.data(
        name="words", shape=[-1, 1], dtype="int64", lod_level=1
    )
    label = paddle.static.data(name="label", shape=[-1, 1], dtype="int64")

    if not parallel:
        cost, acc_out, prediction = net_method(
            data, label, input_dim=dict_dim, class_dim=class_dim
        )
    else:
        raise NotImplementedError()

    adagrad = paddle.optimizer.Adagrad(learning_rate=0.002)
    adagrad.minimize(cost)

    train_data = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.imdb.train(word_dict), buf_size=1000
        ),
        batch_size=BATCH_SIZE,
    )
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)

    def train_loop(main_program):
        exe.run(fluid.default_startup_program())

        for pass_id in range(PASS_NUM):
            for data in train_data():
                cost_val, acc_val = exe.run(
                    main_program,
                    feed=feeder.feed(data),
                    fetch_list=[cost, acc_out],
                )
                print("cost=" + str(cost_val) + " acc=" + str(acc_val))
                if cost_val < 0.4 and acc_val > 0.8:
                    if save_dirname is not None:
                        fluid.io.save_inference_model(
                            save_dirname, ["words"], prediction, exe
                        )
                    return
                if math.isnan(float(cost_val)):
                    sys.exit("got NaN loss, training failed.")
        raise AssertionError(f"Cost is too large for {net_method.__name__}")

    if is_local:
        train_loop(fluid.default_main_program())
    else:
        port = os.getenv("PADDLE_PSERVER_PORT", "6174")
        pserver_ips = os.getenv("PADDLE_PSERVER_IPS")  # ip,ip...
        eplist = []
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
        trainers = int(os.getenv("PADDLE_TRAINERS"))
        current_endpoint = os.getenv("POD_IP") + ":" + port
        trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
        training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER")
        t = paddle.distributed.transpiler.DistributeTranspiler()
        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
        if training_role == "PSERVER":
            pserver_prog = t.get_pserver_program(current_endpoint)
            pserver_startup = t.get_startup_program(
                current_endpoint, pserver_prog
            )
            exe.run(pserver_startup)
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            train_loop(t.get_trainer_program())


def infer(word_dict, use_cuda, save_dirname=None):
    if save_dirname is None:
        return

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
        # the feed_target_names (the names of variables that will be fed
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [
            inference_program,
            feed_target_names,
            fetch_targets,
        ] = fluid.io.load_inference_model(save_dirname, exe)

        word_dict_len = len(word_dict)

        # Setup input by creating LoDTensor to represent sequence of words.
        # Here each word is the basic element of the LoDTensor and the shape of
        # each word (base_shape) should be [1] since it is simply an index to
        # look up for the corresponding word vector.
        # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]],
        # which has only one level of detail. Then the created LoDTensor will have only
        # one higher level structure (sequence of words, or sentence) than the basic
        # element (word). Hence the LoDTensor will hold data for three sentences of
        # length 3, 4 and 2, respectively.
        # Note that recursive_sequence_lengths should be a list of lists.
        recursive_seq_lens = [[3, 4, 2]]
        base_shape = [1]
        # The range of random integers is [low, high]
        tensor_words = fluid.create_random_int_lodtensor(
            recursive_seq_lens, base_shape, place, low=0, high=word_dict_len - 1
        )

        # Construct feed as a dictionary of {feed_target_name: feed_target_data}
        # and results will contain a list of data corresponding to fetch_targets.
        assert feed_target_names[0] == "words"
        results = exe.run(
            inference_program,
            feed={feed_target_names[0]: tensor_words},
            fetch_list=fetch_targets,
            return_numpy=False,
        )
        print(results[0].recursive_sequence_lengths())
        np_data = np.array(results[0])
        print("Inference Shape: ", np_data.shape)
        print("Inference results: ", np_data)


def main(word_dict, net_method, use_cuda, parallel=False, save_dirname=None):
    if use_cuda and not fluid.core.is_compiled_with_cuda():
        return

    train(
        word_dict,
        net_method,
        use_cuda,
        parallel=parallel,
        save_dirname=save_dirname,
    )
    infer(word_dict, use_cuda, save_dirname)


class TestUnderstandSentiment(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.word_dict = paddle.dataset.imdb.word_dict()

    @contextlib.contextmanager
    def new_program_scope(self):
        prog = fluid.Program()
        startup_prog = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
                yield

    def test_conv_cpu(self):
        with self.new_program_scope():
            main(
                self.word_dict,
                net_method=convolution_net,
                use_cuda=False,
                save_dirname="understand_sentiment_conv.inference.model",
            )

    def test_conv_cpu_parallel(self):
        with self.new_program_scope():
            main(
                self.word_dict,
                net_method=convolution_net,
                use_cuda=False,
                parallel=True,
            )

    def test_conv_gpu(self):
        with self.new_program_scope():
            main(
                self.word_dict,
                net_method=convolution_net,
                use_cuda=True,
                save_dirname="understand_sentiment_conv.inference.model",
            )

    def test_conv_gpu_parallel(self):
        with self.new_program_scope():
            main(
                self.word_dict,
                net_method=convolution_net,
                use_cuda=True,
                parallel=True,
            )


if __name__ == '__main__':
    unittest.main()