# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import collections
import py_paddle.swig_paddle as api
import numpy as np
import paddle.trainer.PyDataProvider2 as dp2

__all__ = ['DataConverter']


class IDataConverter(object):
    def __init__(self, input_type, pos):
        """
        :param input_type: data type
        :type input_type: dp2.InputType
        :param pos: which input, start from 0
        :type pos: int
        """
        self.input_type = input_type
        assert isinstance(self.input_type, dp2.InputType)
        self.pos = pos

    def convert(self, data, argument):
        """
        Conv data to paddle format.
        :param data: input data
        :param argument: paddle format
        """
        pass


class DenseConvert(IDataConverter):
    def __init__(self, input_type, pos):
        IDataConverter.__init__(self, input_type, pos)

    def convert(self, data, argument):
        """
        :param data: input data
        :type data: list | numpy array
        :param argument: the type which paddle is acceptable
        :type argument: Paddle's Arguments
        """
        assert isinstance(argument, api.Arguments)
        if data.dtype != np.float32:
            data = data.astype(np.float32)
        m = api.Matrix.createDenseFromNumpy(data, True, False)
        argument.setSlotValue(self.pos, m)


class SparseBinaryConvert(IDataConverter):
    def __init__(self, input_type, pos):
        IDataConverter.__init__(self, input_type, pos)
        self.__rows__ = [0]
        self.__cols__ = []
        self.__height__ = 0
        self.__nnz__ = 0
        self.__value__ = []

    def fill_csr(self, data):
        self.__height__ = len(data)
        for x in data:
            self.__rows__.append(self.__rows__[-1] + len(x))
        self.__cols__ = data.flatten()

    def convert(self, data, argument):
        assert isinstance(argument, api.Arguments)

        fill_csr(data)
        m = api.Matrix.createSparse(self.__height__, self.input_type.dim,
                                    len(self.__cols__),
                                    len(self.__value__) == 0)
        assert isinstance(m, api.Matrix)
        m.sparseCopyFrom(self.__rows__, self.__cols__, self.__value__)
        argument.setSlotValue(self.pos, m)


class SparseFloatConvert(SparseBinaryConvert):
    def __init__(self, input_type, pos):
        SparseBinaryConvert.__init__(self, input_type, pos)

    def fill_csr(self, data):
        self.__height__ = len(data)
        for x in data:
            self.__rows__.append(self.__rows__[-1] + len(x))
        self.__cols__.extend((x[0] for x in data))
        self.__value__.extend((x[1] for x in data))


class IndexConvert(IDataConverter):
    def __init__(self, input_type, pos):
        IDataConverter.__init__(self, input_type, pos)
        self.__ids__ = []

    def convert(self, data, argument):
        assert isinstance(argument, api.Arguments)
        self.__ids__ = data.flatten()
        ids = api.IVector.create(self.__ids__)
        argument.setSlotIds(self.pos, ids)


class SequenceConvert(IDataConverter):
    def __init__(self, input_type, pos, inner_convert, setter):
        """
        :param input_type: the type of input data
        :type input_type: dp2.InputType
        :param pos: the position of this input
        :type pos: int
        :param inner_convert: DataConvert type
        :type inner_convert: DenseConvert|SparseBinaryConvert|
                             SparseFloatConvert|IndexConvert
        :param setter:
        :type setter:
        """
        IDataConverter.__init__(self, input_type, pos)
        self.__seq__ = [0]
        self.__inner_convert__ = inner_convert
        self.__setter__ = setter

    def fill_seq(self, data):
        for each in data:
            self.__seq__.append(self.__seq__[-1] + self.get_size(each))

    def convert(self, data, argument):
        fill_seq(data)
        seq = api.IVector.create(self.__seq__, False)
        self.__setter__(argument, self.pos, seq)

        dat = []
        for each in data:
            dat.append(each)
        self.__inner_scanner__.convert(dat, argument)

    def get_size(self, data):
        if isinstance(self.__inner_scanner__, SequenceConvert):
            return sum(self.__inner_scanner__.get_size(item) for item in dat)
        else:
            return len(data)


class DataConverter(object):
    def __init__(self, input):
        """
        Usege:

        .. code-block:: python
            inputs = [('image', dense_vector), ('label', integer_value)]
            cvt = DataConverter(inputs)
            arg = cvt(minibatch_data, {'image':0, 'label':1})

        :param input_mapper: list of (input_name, input_type)
        :type input_mapper: list
        """
        self.input_names = []
        self.input_types = []
        for each in input:
            self.input_names.append(each[0])
            self.input_types.append(each[1])
            assert isinstance(each[1], dp2.InputType)

    def convert(self, data, input_dict=None, argument=None):
        """
        Convert minibatch data to Paddle's argument. The data is numpy array
        or list.

        :param data: input samples, for example, [column0, column1, ...] or
                     (column0, column1, ...) each column is one minibatch
                     feature. Note, if only one column featrue, data also
                     shuld be a list or tupe, [column0] or (column0).
        :type data: list|tuple
        :param input_dict: a dictionary to specify the correspondence
                           of data_layer and input data. If None,
                           the feature order in argument and data is the same.
        :type input_dict: dict, like {string:integer, string, integer, ...}|None
        :param argument: converted data will be saved in this argument. If None,
                         it will create a Paddle's Arguments firstly.
        :param type: swig_paddle.Arguments|None
        """
        if argument is None:
            argument = api.Arguments.createArguments(0)
        assert isinstance(argument, api.Arguments)
        argument.resize(len(self.input_types))

        converts = [
            DataConverter.create_converter(i, each_type)
            for i, each_type in enumerate(self.input_types)
        ]

        for i, cvt in enumerate(converts):
            if input_dict is not None:
                dat = data[input_dict[self.input_names[i]]]
            else:
                dat = data[i]
            cvt.convert(dat, argument)

        return argument

    def __call__(self, dat, argument=None):
        return self.convert(dat, argument)

    @staticmethod
    def create_converter(pos, each):
        assert isinstance(each, dp2.InputType)
        retv = None
        if each.type == dp2.DataType.Dense:
            retv = DenseConvert(each, pos)
        elif each.type == dp2.DataType.Index:
            retv = IndexConvert(each, pos)
        elif each.type == dp2.DataType.SparseNonValue:
            retv = SparseBinaryConvert(each, pos)
        elif each.type == dp2.DataType.SparseValue:
            retv = SparseFloatConvert(each, pos)
        assert retv is not None

        if each.seq_type == dp2.SequenceType.SUB_SEQUENCE:
            retv = SequenceConvert(
                each, pos, retv,
                lambda arg, pos, seq: arg.setSlotSubSequenceStartPositions(pos, seq)
            )

        if each.seq_type in [
                dp2.SequenceType.SUB_SEQUENCE, dp2.SequenceType.SEQUENCE
        ]:
            retv = SequenceConvert(
                each, pos, retv,
                lambda arg, pos, seq: arg.setSlotSequenceStartPositions(pos, seq)
            )
        return retv