#edit-mode: -*- python -*-
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.

# Note: when making change to this file, please make sure
# sample_trainer_config_qb_rnn.conf is changed accordingly so that the uniitest
# for comparing these two nets can pass (test_CompareTwoNets)

default_initial_std(0.1)
default_device(0)

word_dim = 1451594
l1 = 0
l2 = 0

model_type("recurrent_nn")

sparse_update = get_config_arg("sparse_update", bool, False)

TrainData(ProtoData(
            type = "proto_sequence",
            files = ('trainer/tests/train.list'), 
            ))

Settings(
    algorithm='sgd',
    batch_size=100,
    learning_rate=0.0001,
    learning_rate_decay_a=4e-08,
    learning_rate_decay_b=0.0,
    learning_rate_schedule='poly',
)


wordvec_dim = 128
layer2_dim = 96
layer3_dim = 96
hidden_dim = 128

slot_names = ["qb", "qw", "tb", "tw"]

def SimpleRecurrentLayer(name, 
                         size, 
                         active_type, 
                         bias, 
                         input_layer_name, 
                         parameter_name,
                         seq_reversed = False):
    RecurrentLayerGroupBegin(name + "_layer_group", 
                             in_links=[input_layer_name], 
                             out_links=[name],
                             seq_reversed=seq_reversed)
    memory_name = Memory(name=name, size=size)
    Layer(
        name = name,
        type = "mixed",
        size = size,
        active_type = active_type,
        bias = bias,
        inputs = [IdentityProjection(input_layer_name),
                  FullMatrixProjection(memory_name,
                                       parameter_name = parameter_name,
                                       ),
                  ]
        )
    RecurrentLayerGroupEnd(name + "_layer_group")


def ltr_network(network_name,
                word_dim=word_dim,
                wordvec_dim=wordvec_dim,
                layer2_dim=layer2_dim,
                layer3_dim=layer3_dim,
                hidden_dim=hidden_dim,
                slot_names=slot_names,
                l1=l1,
                l2=l2):

    slotnum = len(slot_names)
    for i in xrange(slotnum):
        Inputs(slot_names[i] + network_name)
    for i in xrange(slotnum):
        Layer(
            name = slot_names[i] + network_name,
            type = "data",
            size = word_dim,
            device = -1,
        )
        Layer(
            name = slot_names[i] + "_embedding_" + network_name,
            type = "mixed",
            size = wordvec_dim,
            bias = False,
            device = -1,
            inputs = TableProjection(slot_names[i] + network_name,
                                     parameter_name = "embedding.w0",
                                     decay_rate_l1=l1,
                                     sparse_remote_update = True,
                                     sparse_update = sparse_update,
                                     ),
        )
        SimpleRecurrentLayer(
            name = slot_names[i] + "_rnn1_" + network_name,
            size = hidden_dim,
            active_type = "tanh",
            bias = Bias(initial_std = 0,
                        parameter_name = "rnn1.bias"),
            input_layer_name = slot_names[i] + "_embedding_" + network_name,
            parameter_name = "rnn1.w0",
            )
        Layer(
            name = slot_names[i] + "_rnnlast_" + network_name,
            type = "seqlastins",
            inputs = [
                slot_names[i] + "_rnn1_" + network_name,
            ],
        )
    Layer(
        name = "layer2_" + network_name,
        type = "fc",
        active_type = "tanh",
        size = layer2_dim,
        bias = Bias(parameter_name = "layer2.bias"),
        inputs = [Input(slot_name + "_rnnlast_" + network_name, 
                        parameter_name = "_layer2_" + slot_name + ".w", 
                        decay_rate = l2, 
                        initial_smart = True) for slot_name in slot_names]
    )
    Layer(
        name = "layer3_" + network_name,
        type = "fc",
        active_type = "tanh",
        size = layer3_dim,
        bias = Bias(parameter_name = "layer3.bias"),
        inputs = [
            Input("layer2_" + network_name, 
                  parameter_name = "_layer3.w", 
                  decay_rate = l2, 
                  initial_smart = True),
        ]
    )
    Layer(
        name = "output_" + network_name,
        type = "fc",
        size = 1,
        bias = False,
        inputs = [
                  Input("layer3_" + network_name,
                       parameter_name = "_layerO.w"),
                 ],
        )


ltr_network("left")
ltr_network("right")
Inputs("label")
Layer(
    name = "label",
    type = "data",
    size = 1,
    )
Outputs("cost", "qb_rnnlast_left")
Layer(
    name = "cost",
    type = "rank-cost",
    inputs = ["output_left", "output_right", "label"],
    )