#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import paddle.fluid.layers as layers
from paddle.fluid.contrib.decoder.beam_search_decoder import *


def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim,
                   target_dict_dim, is_generating, beam_size, max_length):
    def encoder():
        # Encoder implementation of RNN translation
        src_word = layers.data(
            name="src_word", shape=[1], dtype='int64', lod_level=1)
        src_embedding = layers.embedding(
            input=src_word,
            size=[source_dict_dim, embedding_dim],
            dtype='float32',
            is_sparse=True)

        fc1 = layers.fc(input=src_embedding, size=encoder_size * 4, act='tanh')
        lstm_hidden0, lstm_0 = layers.dynamic_lstm(
            input=fc1, size=encoder_size * 4)
        encoder_out = layers.sequence_last_step(input=lstm_hidden0)
        return encoder_out

    def decoder_state_cell(context):
        # Decoder state cell, specifies the hidden state variable and its updater
        h = InitState(init=context, need_reorder=True)
        state_cell = StateCell(
            inputs={'x': None}, states={'h': h}, out_state='h')

        @state_cell.state_updater
        def updater(state_cell):
            current_word = state_cell.get_input('x')
            prev_h = state_cell.get_state('h')
            # make sure lod of h heritted from prev_h
            h = layers.fc(input=[prev_h, current_word],
                          size=decoder_size,
                          act='tanh')
            state_cell.set_state('h', h)

        return state_cell

    def decoder_train(state_cell):
        # Decoder for training implementation of RNN translation
        trg_word = layers.data(
            name="target_word", shape=[1], dtype='int64', lod_level=1)
        trg_embedding = layers.embedding(
            input=trg_word,
            size=[target_dict_dim, embedding_dim],
            dtype='float32',
            is_sparse=True)

        # A training decoder
        decoder = TrainingDecoder(state_cell)

        # Define the computation in each RNN step done by decoder
        with decoder.block():
            current_word = decoder.step_input(trg_embedding)
            decoder.state_cell.compute_state(inputs={'x': current_word})
            current_score = layers.fc(input=decoder.state_cell.get_state('h'),
                                      size=target_dict_dim,
                                      act='softmax')
            decoder.state_cell.update_states()
            decoder.output(current_score)

        return decoder()

    def decoder_infer(state_cell):
        # Decoder for inference implementation
        init_ids = layers.data(
            name="init_ids", shape=[1], dtype="int64", lod_level=2)
        init_scores = layers.data(
            name="init_scores", shape=[1], dtype="float32", lod_level=2)

        # A beam search decoder for inference
        decoder = BeamSearchDecoder(
            state_cell=state_cell,
            init_ids=init_ids,
            init_scores=init_scores,
            target_dict_dim=target_dict_dim,
            word_dim=embedding_dim,
            input_var_dict={},
            topk_size=50,
            sparse_emb=True,
            max_len=max_length,
            beam_size=beam_size,
            end_id=1,
            name=None)
        decoder.decode()
        translation_ids, translation_scores = decoder()

        return translation_ids, translation_scores

    context = encoder()
    state_cell = decoder_state_cell(context)

    if not is_generating:
        label = layers.data(
            name="target_next_word", shape=[1], dtype='int64', lod_level=1)

        rnn_out = decoder_train(state_cell)

        cost = layers.cross_entropy(input=rnn_out, label=label)
        avg_cost = layers.mean(x=cost)

        feeding_list = ['src_word', 'target_word', 'target_next_word']
        return avg_cost, feeding_list
    else:
        translation_ids, translation_scores = decoder_infer(state_cell)
        feeding_list = ['src_word']
        return translation_ids, translation_scores, feeding_list