#!/usr/bin/env python
# -*- encoding:utf-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
    AutoDL definition
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import sys
import argparse
import numpy as np
import subprocess
import paddle.fluid as fluid
from reinforce_policy_gradient import ReinforcePolicyGradient
from policy_model import PolicyModel
from autodl_agent import AutoDLAgent
import utils
import collections


class AutoDL(object):
    """
        AutoDL class
    """
    def __init__(self):
        """
            init
        """
        self.parse_args = self._init_parser()
        self.bl_decay = self.parse_args.bl_decay
        self.log_dir = self.parse_args.log_dir
        self.early_stop = self.parse_args.early_stop
        self.data_path = self.parse_args.data_path
        self.num_models = self.parse_args.num_models
        self.batch_size = self.parse_args.batch_size
        self.chunk_size= self.parse_args.chunk_size

        self._init_dir_path()
        self.model = PolicyModel(self.parse_args)
        algo_hyperparas = {'lr': self.parse_args.learning_rate}
        self.algorithm = ReinforcePolicyGradient(self.model,
                                                 hyperparas=algo_hyperparas)
        self.autodl_agent = AutoDLAgent(self.algorithm, self.parse_args)
        self.total_reward = 0

    def _init_dir_path(self):
        """
            init dir path
        """
        utils.prepare(self.log_dir)
        utils.prepare(self.log_dir, "actions")
        utils.prepare(self.log_dir, "rewards")
        utils.prepare(self.log_dir, "checkpoints")

    def _init_parser(self):
        """
            init parser
        """
        parser = argparse.ArgumentParser(description='AutoDL Parser',
                                         prog='AutoDL')
        parser.add_argument('-v', '--version', action='version',
                            version='%(prog)s 0.1')
        parser.add_argument('--num_nodes', dest="num_nodes", nargs="?",
                            type=int, const=10, default=10,
                            help="number of nodes")
        parser.add_argument('--num_tokens', dest="num_tokens", nargs="?",
                            type=int, const=10, default=10,
                            help="number of tokens")
        parser.add_argument('--learning_rate', dest="learning_rate", nargs="?",
                            type=float, default=1e-3,
                            help="learning rate")
        parser.add_argument('--batch_size', dest="batch_size", nargs="?",
                            type=int, const=10, default=10, help="batch size")
        parser.add_argument('--num_models', dest="num_models", nargs="?",
                            type=int, const=32000, default=32000,
                            help="maximum number of models sampled")
        parser.add_argument('--early_stop', dest="early_stop", nargs="?",
                            type=int, const=20, default=20, help="early stop")
        parser.add_argument('--log_dir', dest="log_dir", nargs="?", type=str,
                            const="./log", default="./log",
                            help="directory of log")
        parser.add_argument('--input_size', dest="input_size", nargs="?",
                            type=int, const=10, default=10, help="input size")
        parser.add_argument('--hidden_size', dest="hidden_size", nargs="?",
                            type=int, const=64, default=64, help="hidden size")
        parser.add_argument('--num_layers', dest="num_layers", nargs="?",
                            type=int, const=2, default=2, help="num layers")
        parser.add_argument('--bl_decay', dest="bl_decay", nargs="?",
                            type=float, const=0.9, default=0.9,
                            help="base line decay")
        # inception train config
        parser.add_argument('--data_path', dest="data_path", nargs="?",
                            type=str, default="./cifar/pickle-cifar-10",
                            help="path of data files")
        parser.add_argument('--chunk_size', dest="chunk_size", nargs="?",
                            type=int, const=100, default=100,
                            help="chunk size")
        parse_args = parser.parse_args()
        return parse_args

    def supervisor(self, mid):
        """
            execute cnn training
            sample cmd: python -u inception_train/train.py --mid=9 \
                    --early_stop=20 --data_path=./cifar/pickle-cifar-10
        """
        tokens, adjvec = utils.load_action(mid, self.log_dir)
        cmd = ("CUDA_VISIBLE_DEVICES=1 python -u inception_train/train.py \
               --mid=%d --early_stop=%d --logdir=%s --data_path=%s --chunk_size=%d") % \
              (mid, self.early_stop, self.log_dir, self.data_path, self.chunk_size)
        print("cmd:{}".format(cmd))
        while True:
            try:
                subprocess.check_call(cmd, shell=True)
                break
            except subprocess.CalledProcessError as e:
                print("[%s] training model #%d exits with exit code %d" %
                      (utils.stime(), mid, e.returncode), file=sys.stderr)
                return

    def simple_run(self):
        """
            simple run
        """
        print("Simple run target is 20")
        mid = 0
        shadow = 0
        is_first = True
        while mid <= self.num_models:
            actions_to, actions_ad = self.autodl_agent.sample()
            rewards = np.count_nonzero(actions_to == 1, axis=1).astype("int32")
            # moving average
            current_mean_reward = np.mean(rewards)
            if is_first:
                shadow = current_mean_reward
                is_first = False
            else:
                shadow = shadow * self.bl_decay \
                        + current_mean_reward * (1 - self.bl_decay)
            self.autodl_agent.learn((np.array(actions_to).astype("int32"),
                                    np.array(actions_ad).astype("int32")),
                                    rewards - shadow)

            if mid % 10 == 0:
                print('mid=%d, average rewards=%.3f' % (mid, np.mean(rewards)))
            mid += 1

    def run(self):
        """
            run
        """
        rewards = []
        mid = 0
        while mid <= self.num_models:
            actions_to, actions_ad = self.autodl_agent.sample()

            for action in zip(actions_to, actions_ad):
                utils.dump_action(mid, action, self.log_dir)
                self.supervisor(mid)
                current_reward = utils.load_reward(mid, self.log_dir)
                if not np.isnan(current_reward):
                    rewards.append(current_reward.item())
                mid += 1

            if len(rewards) % self.batch_size == 0:
                print("[%s] step = %d, average accuracy = %.3f" %
                      (utils.stime(), self.autodl_agent.global_step,
                       np.mean(rewards)))
                rewards_array = np.array(rewards).astype("float32")
                if self.total_reward == 0:
                    self.total_reward = rewards_array.mean()
                else:
                    self.total_reward = self.total_reward * self.bl_decay \
                            + (1 - self.bl_decay) * rewards_array.mean()
                rewards_array = rewards_array - self.total_reward
                self.autodl_agent.learn([actions_to.astype("int32"),
                                         actions_ad.astype("int32")],
                                        rewards_array ** 3)
                rewards = []