#edit-mode: -*- python -*- # Copyright (c) 2016 Baidu, Inc. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. #Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. # Note: when making change to this file, please make sure # sample_trainer_config_qb_rnn.conf is changed accordingly so that the uniitest # for comparing these two nets can pass (test_CompareTwoNets) default_initial_std(0.1) default_device(0) word_dim = 1451594 l1 = 0 l2 = 0 model_type("recurrent_nn") sparse_update = get_config_arg("sparse_update", bool, False) TrainData(ProtoData( type = "proto_sequence", files = ('trainer/tests/train.list'), )) Settings( algorithm='sgd', batch_size=100, learning_rate=0.0001, learning_rate_decay_a=4e-08, learning_rate_decay_b=0.0, learning_rate_schedule='poly', ) wordvec_dim = 128 layer2_dim = 96 layer3_dim = 96 hidden_dim = 128 slot_names = ["qb", "qw", "tb", "tw"] def SimpleRecurrentLayer(name, size, active_type, bias, input_layer_name, parameter_name, seq_reversed = False): RecurrentLayerGroupBegin(name + "_layer_group", in_links=[input_layer_name], out_links=[name], seq_reversed=seq_reversed) memory_name = Memory(name=name, size=size) Layer( name = name, type = "mixed", size = size, active_type = active_type, bias = bias, inputs = [IdentityProjection(input_layer_name), FullMatrixProjection(memory_name, parameter_name = parameter_name, ), ] ) RecurrentLayerGroupEnd(name + "_layer_group") def ltr_network(network_name, word_dim=word_dim, wordvec_dim=wordvec_dim, layer2_dim=layer2_dim, layer3_dim=layer3_dim, hidden_dim=hidden_dim, slot_names=slot_names, l1=l1, l2=l2): slotnum = len(slot_names) for i in xrange(slotnum): Inputs(slot_names[i] + network_name) for i in xrange(slotnum): Layer( name = slot_names[i] + network_name, type = "data", size = word_dim, device = -1, ) Layer( name = slot_names[i] + "_embedding_" + network_name, type = "mixed", size = wordvec_dim, bias = False, device = -1, inputs = TableProjection(slot_names[i] + network_name, parameter_name = "embedding.w0", decay_rate_l1=l1, sparse_remote_update = True, sparse_update = sparse_update, ), ) SimpleRecurrentLayer( name = slot_names[i] + "_rnn1_" + network_name, size = hidden_dim, active_type = "tanh", bias = Bias(initial_std = 0, parameter_name = "rnn1.bias"), input_layer_name = slot_names[i] + "_embedding_" + network_name, parameter_name = "rnn1.w0", ) Layer( name = slot_names[i] + "_rnnlast_" + network_name, type = "seqlastins", inputs = [ slot_names[i] + "_rnn1_" + network_name, ], ) Layer( name = "layer2_" + network_name, type = "fc", active_type = "tanh", size = layer2_dim, bias = Bias(parameter_name = "layer2.bias"), inputs = [Input(slot_name + "_rnnlast_" + network_name, parameter_name = "_layer2_" + slot_name + ".w", decay_rate = l2, initial_smart = True) for slot_name in slot_names] ) Layer( name = "layer3_" + network_name, type = "fc", active_type = "tanh", size = layer3_dim, bias = Bias(parameter_name = "layer3.bias"), inputs = [ Input("layer2_" + network_name, parameter_name = "_layer3.w", decay_rate = l2, initial_smart = True), ] ) Layer( name = "output_" + network_name, type = "fc", size = 1, bias = False, inputs = [ Input("layer3_" + network_name, parameter_name = "_layerO.w"), ], ) ltr_network("left") ltr_network("right") Inputs("label") Layer( name = "label", type = "data", size = 1, ) Outputs("cost", "qb_rnnlast_left") Layer( name = "cost", type = "rank-cost", inputs = ["output_left", "output_right", "label"], )