#!/usr/bin/env python # Copyright (c) 2016 Baidu, Inc. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from paddle.trainer_config_helpers import * ######################## data source ################################ dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict' dict_file = dict() for line_count, line in enumerate(open(dict_path, "r")): dict_file[line.strip()] = line_count define_py_data_sources(train_list='gserver/tests/Sequence/train.list.nest', test_list=None, module='sequenceGen', obj='process2', args={"dict_file":dict_file}) settings(batch_size=2) ######################## network configure ################################ dict_dim = len(open(dict_path,'r').readlines()) word_dim = 128 hidden_dim = 256 label_dim = 3 data = data_layer(name="word", size=dict_dim) emb_group = embedding_layer(input=data, size=word_dim) # (lstm_input + lstm) is equal to lstmemory def lstm_group(lstm_group_input): with mixed_layer(size=hidden_dim*4) as group_input: group_input += full_matrix_projection(input=lstm_group_input) lstm_output = lstmemory_group(input=group_input, name="lstm_group", size=hidden_dim, act=TanhActivation(), gate_act=SigmoidActivation(), state_act=TanhActivation(), lstm_layer_attr=ExtraLayerAttribute(error_clipping_threshold=50)) return lstm_output lstm_nest_group = recurrent_group(input=SubsequenceInput(emb_group), step=lstm_group, name="lstm_nest_group") # hasSubseq ->(seqlastins) seq lstm_last = last_seq(input=lstm_nest_group, agg_level=AggregateLevel.EACH_SEQUENCE) # seq ->(expand) hasSubseq lstm_expand = expand_layer(input=lstm_last, expand_as=emb_group, expand_level=ExpandLevel.FROM_SEQUENCE) # hasSubseq ->(average) seq lstm_average = pooling_layer(input=lstm_expand, pooling_type=AvgPooling(), agg_level=AggregateLevel.EACH_SEQUENCE) with mixed_layer(size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: output += full_matrix_projection(input=lstm_average) outputs(classification_cost(input=output, label=data_layer(name="label", size=1)))