#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.

import sys
from .reader_utils import DataReader
try:
    import cPickle as pickle
    from cStringIO import StringIO
except ImportError:
    import pickle
    from io import BytesIO
import numpy as np
import random

python_ver = sys.version_info


class FeatureReader(DataReader):
    """
    Data reader for youtube-8M dataset, which was stored as features extracted by prior networks
    This is for the three models: lstm, attention cluster, nextvlad

    dataset cfg: num_classes
                 batch_size
                 list
                 NextVlad only: eigen_file
    """

    def __init__(self, name, mode, cfg):
        self.name = name
        self.mode = mode
        self.num_classes = cfg.MODEL.num_classes

        # set batch size and file list
        self.batch_size = cfg[mode.upper()]['batch_size']
        self.filelist = cfg[mode.upper()]['filelist']
        self.eigen_file = cfg.MODEL.get('eigen_file', None)
        self.seg_num = cfg.MODEL.get('seg_num', None)

    def create_reader(self):
        fl = open(self.filelist).readlines()
        fl = [line.strip() for line in fl if line.strip() != '']
        if self.mode == 'train':
            random.shuffle(fl)

        def reader():
            batch_out = []
            for filepath in fl:
                if python_ver < (3, 0):
                    data = pickle.load(open(filepath, 'rb'))
                else:
                    data = pickle.load(open(filepath, 'rb'), encoding='bytes')
                indexes = list(range(len(data)))
                if self.mode == 'train':
                    random.shuffle(indexes)
                for i in indexes:
                    record = data[i]
                    nframes = record[b'nframes']
                    rgb = record[b'feature'].astype(float)
                    audio = record[b'audio'].astype(float)
                    if self.mode != 'infer':
                        label = record[b'label']
                        one_hot_label = make_one_hot(label, self.num_classes)
                    video = record[b'video']

                    rgb = rgb[0:nframes, :]
                    audio = audio[0:nframes, :]

                    if self.name != 'NEXTVLAD':
                        rgb = dequantize(
                            rgb,
                            max_quantized_value=2.,
                            min_quantized_value=-2.)
                        audio = dequantize(
                            audio,
                            max_quantized_value=2,
                            min_quantized_value=-2)

                    if self.name == 'ATTENTIONCLUSTER':
                        sample_inds = generate_random_idx(rgb.shape[0],
                                                          self.seg_num)
                        rgb = rgb[sample_inds]
                        audio = audio[sample_inds]
                    if self.mode != 'infer':
                        batch_out.append((rgb, audio, one_hot_label))
                    else:
                        batch_out.append((rgb, audio, video))
                    if len(batch_out) == self.batch_size:
                        yield batch_out
                        batch_out = []

        return reader


def dequantize(feat_vector, max_quantized_value=2., min_quantized_value=-2.):
    """
    Dequantize the feature from the byte format to the float format
    """

    assert max_quantized_value > min_quantized_value
    quantized_range = max_quantized_value - min_quantized_value
    scalar = quantized_range / 255.0
    bias = (quantized_range / 512.0) + min_quantized_value

    return feat_vector * scalar + bias


def make_one_hot(label, dim=3862):
    one_hot_label = np.zeros(dim)
    one_hot_label = one_hot_label.astype(float)
    for ind in label:
        one_hot_label[int(ind)] = 1
    return one_hot_label


def generate_random_idx(feature_len, seg_num):
    idxs = []
    stride = float(feature_len) / seg_num
    for i in range(seg_num):
        pos = (i + np.random.random()) * stride
        idxs.append(min(feature_len - 1, int(pos)))
    return idxs