# Copyright (c) 2016 Baidu, Inc. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from paddle.trainer_config_helpers import * import math define_py_data_sources2( train_list="data/train.list", test_list="data/test.list", module="dataprovider", obj="process") batch_size = 16 settings( learning_method=MomentumOptimizer(), batch_size=batch_size, regularization=L2Regularization(batch_size * 1e-5), average_window=0.5, learning_rate=2e-3, learning_rate_decay_a=5e-7, learning_rate_decay_b=0.5, ) word_dim = 128 hidden_dim = 128 with_rnn = True initial_std = 1 / math.sqrt(hidden_dim) param_attr = ParamAttr(initial_std=initial_std) cpu_layer_attr = ExtraLayerAttribute(device=-1) default_device(0) num_label_types = 23 features = data_layer(name="features", size=76328) word = data_layer(name="word", size=6778) pos = data_layer(name="pos", size=44) chunk = data_layer( name="chunk", size=num_label_types, layer_attr=cpu_layer_attr) emb = embedding_layer( input=word, size=word_dim, param_attr=ParamAttr(initial_std=0)) hidden1 = mixed_layer( size=hidden_dim, act=STanhActivation(), bias_attr=True, input=[ full_matrix_projection(emb), table_projection( pos, param_attr=param_attr) ]) if with_rnn: rnn1 = recurrent_layer( act=ReluActivation(), bias_attr=True, input=hidden1, param_attr=ParamAttr(initial_std=0), ) hidden2 = mixed_layer( size=hidden_dim, act=STanhActivation(), bias_attr=True, input=[full_matrix_projection(hidden1)] + ([full_matrix_projection( rnn1, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), ) if with_rnn: rnn2 = recurrent_layer( reverse=True, act=ReluActivation(), bias_attr=True, input=hidden2, param_attr=ParamAttr(initial_std=0), ) crf_input = mixed_layer( size=num_label_types, bias_attr=False, input=[full_matrix_projection(hidden2), ] + ([full_matrix_projection( rnn2, param_attr=ParamAttr(initial_std=0))] if with_rnn else []), ) crf = crf_layer( input=crf_input, label=chunk, param_attr=ParamAttr( name="crfw", initial_std=0), layer_attr=cpu_layer_attr, ) crf_decoding = crf_decoding_layer( size=num_label_types, input=crf_input, label=chunk, param_attr=ParamAttr(name="crfw"), layer_attr=cpu_layer_attr, ) sum_evaluator( name="error", input=crf_decoding, ) chunk_evaluator( name="chunk_f1", input=[crf_decoding, chunk], chunk_scheme="IOB", num_chunk_types=11, ) inputs(word, pos, chunk, features) outputs(crf)