#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

import numpy as np
import unittest

import paddle.fluid as fluid
import paddle.fluid.initializer as initializer
from paddle.fluid import Program, program_guard

from op_test import OpTest


def nce(input, weight, bias, sample_weight, labels, num_classes,
        num_sample_class):
    samples = []
    sample_labels = []
    batch_size = input.shape[0]
    num_true_class = labels.shape[1]
    for i in range(batch_size):
        w = 1 if sample_weight is None else sample_weight[i]
        for label in labels[i]:
            samples.append((i, label, True, w))
            sample_labels.append(label)
        for num in range(num_sample_class):
            samples.append((i, num, False, w))
            sample_labels.append(num)
    # forward bias
    sample_out = np.zeros(len(samples)).astype(np.float32)
    if bias is not None:
        for i in range(len(samples)):
            sample_out[i] = bias[samples[i][1]]
    # forward weight
    for i in range(len(samples)):
        sample_out[i] += np.dot(input[samples[i][0]], weight[samples[i][1]])

    # forward activation
    sample_out = 1.0 / (1.0 + np.exp(-sample_out))
    # forward cost
    out = np.zeros(batch_size).astype(np.float32)
    b = 1.0 / num_classes * num_sample_class
    for i in range(len(samples)):
        o = sample_out[i]
        cost = -np.log(o / (o + b)) if samples[i][2] else -np.log(b / (o + b))
        out[samples[i][0]] += cost * samples[i][3]
    return (out[:, np.newaxis], np.array(sample_out).reshape(
        batch_size, num_sample_class + num_true_class),
            np.array(sample_labels).reshape(batch_size,
                                            num_sample_class + num_true_class))


class TestNCE(OpTest):
    def generate_data(self, dim, batch_size, num_classes, num_true_class,
                      num_neg_samples, is_sparse):
        input = np.random.randn(batch_size, dim).astype(np.float32)
        weight = np.random.randn(num_classes, dim).astype(np.float32)
        bias = np.random.randn(num_classes).astype(np.float32)
        sample_weight = np.random.randn(batch_size).astype(np.float32)
        labels = np.random.randint(0, num_classes,
                                   (batch_size, num_true_class)).astype("int64")
        self.attrs = {
            'num_total_classes': num_classes,
            'num_neg_samples': num_neg_samples,
            'custom_neg_classes': list(range(num_neg_samples)),
            'seed': 0,
            'sampler': 0,
            'is_sparse': is_sparse
        }
        self.inputs = {
            'Input': input,
            'Label': labels,
            'Weight': weight,
            'Bias': bias,
            'SampleWeight': sample_weight
        }

    def set_data(self):
        self.generate_data(5, 25, 100, 1, 2, False)

    def compute(self):
        out = nce(self.inputs['Input'], self.inputs['Weight'],
                  self.inputs['Bias'], self.inputs['SampleWeight'],
                  self.inputs['Label'], self.attrs['num_total_classes'],
                  self.attrs['num_neg_samples'])
        self.outputs = {
            'Cost': out[0],
            'SampleLogits': out[1],
            'SampleLabels': out[2]
        }

    def setUp(self):
        self.op_type = 'nce'
        self.set_data()
        self.compute()

    def test_check_output(self):
        self.check_output()

    def test_check_grad(self):
        self.check_grad(
            ["Input", "Weight", "Bias"], "Cost", max_relative_error=0.02)


class TestNCECase1Tensor(TestNCE):
    def set_data(self):
        self.generate_data(10, 20, 100, 2, 5, False)


class TestNCECase1SelectedRows(unittest.TestCase):
    def setUp(self):
        self.base_lr = 0.0001
        self.batch_size = 8

    @staticmethod
    def get_place():
        place = fluid.core.CPUPlace()
        return place

    @staticmethod
    def get_train_data(batch_size):
        batchs = []
        for i in range(batch_size):
            input = np.random.randn(batch_size, 10).astype(np.float32)
            labels = np.random.randint(0, 20, (batch_size, 1))
            batchs.append([input, labels])
        return batchs

    def get_optimizer(self):
        # SGD optimizer
        optimizer = fluid.optimizer.SGD(learning_rate=self.base_lr)
        return optimizer

    def train_network(self, num_total_classes, num_neg_samples, sampler,
                      custom_dist, is_sparse):
        input = fluid.layers.data(name="input", shape=[10], dtype="float32")
        label = fluid.layers.data(name="label", shape=[1], dtype="int64")

        w_param = fluid.default_main_program().global_block().create_parameter(
            shape=[num_total_classes, 10],
            dtype='float32',
            name='nce_w',
            initializer=initializer.ConstantInitializer())
        b_param = fluid.default_main_program().global_block().create_parameter(
            shape=[num_total_classes, 1],
            dtype='float32',
            name='nce_b',
            initializer=initializer.ConstantInitializer())

        cost = fluid.layers.nce(input=input,
                                label=label,
                                num_total_classes=num_total_classes,
                                sampler=sampler,
                                custom_dist=custom_dist,
                                sample_weight=None,
                                param_attr='nce_w',
                                bias_attr='nce_b',
                                seed=1,
                                num_neg_samples=num_neg_samples,
                                is_sparse=is_sparse)
        avg_cost = fluid.layers.mean(cost)
        # optimizer
        optimizer = self.get_optimizer()
        optimizer.minimize(avg_cost)

        return [avg_cost, [input, label]]

    def test_input_is_selected_rows(self):
        place = self.get_place()
        exe = fluid.Executor(place)

        data = self.get_train_data(self.batch_size)
        nid_freq_arr = np.random.dirichlet(np.ones(20) * 1000).astype('float32')

        rets = []
        # for dense
        dense_scope = fluid.core.Scope()
        dense_startup_program = fluid.framework.Program()
        dense_train_program = fluid.framework.Program()
        with fluid.scope_guard(dense_scope):
            with fluid.program_guard(dense_train_program,
                                     dense_startup_program):
                cost, feeds = self.train_network(20, 5, "custom_dist",
                                                 nid_freq_arr.tolist(), False)
                feeder = fluid.DataFeeder(feed_list=feeds, place=place)
                exe.run(dense_startup_program)
                loss_val = exe.run(dense_train_program,
                                   feed=feeder.feed(data),
                                   fetch_list=[cost.name])
                rets.append(np.mean(loss_val))

        # for sparse
        sparse_scope = fluid.core.Scope()
        sparse_startup_program = fluid.framework.Program()
        sparse_train_program = fluid.framework.Program()
        with fluid.scope_guard(sparse_scope):
            with fluid.program_guard(sparse_train_program,
                                     sparse_startup_program):
                cost, feeds = self.train_network(20, 5, "custom_dist",
                                                 nid_freq_arr.tolist(), True)
                feeder = fluid.DataFeeder(feed_list=feeds, place=place)
                exe.run(sparse_startup_program)
                loss_val = exe.run(sparse_train_program,
                                   feed=feeder.feed(data),
                                   fetch_list=[cost.name])
                rets.append(np.mean(loss_val))

        self.assertEqual(rets[0], rets[1])


class TestNCE_OpError(unittest.TestCase):
    def test_errors(self):
        with program_guard(Program(), Program()):
            input1 = fluid.create_lod_tensor(
                np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace())
            label1 = fluid.layers.data(
                name='label1', shape=[-1, 4], dtype="int64")
            # the input(input) of nce layer must be Variable.
            self.assertRaises(TypeError, fluid.layers.nce, input1, label1, 5)

            input2 = fluid.layers.data(
                name='input2', shape=[-1, 4], dtype="float32")
            label2 = fluid.create_lod_tensor(
                np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace())
            # the input(label) of nce layer must be Variable.
            self.assertRaises(TypeError, fluid.layers.nce, input2, label2, 5)

            input3 = fluid.layers.data(
                name='input3', shape=[-1, 4], dtype="float16")
            label3 = fluid.layers.data(
                name='label3', shape=[-1, 1], dtype="int64")
            # the data type of input(input) must be float32 or float64.
            self.assertRaises(TypeError, fluid.layers.nce, input3, label3, 5)

            input4 = fluid.layers.data(
                name='input4', shape=[-1, 4], dtype="float32")
            label4 = fluid.layers.data(
                name='label4', shape=[-1, 1], dtype="int32")
            # the data type of input(label) must be int64.
            self.assertRaises(TypeError, fluid.layers.nce, input4, label4, 5)


if __name__ == '__main__':
    unittest.main()