From 0d6ee488c3f1833e7f17d17fe1bec31fd488cfcc Mon Sep 17 00:00:00 2001 From: guosheng Date: Wed, 29 Nov 2017 16:06:43 +0800 Subject: [PATCH] Refine NER --- sequence_tagging_for_ner/network_conf.py | 23 +++++++++++++---------- sequence_tagging_for_ner/train.py | 8 ++++---- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/sequence_tagging_for_ner/network_conf.py b/sequence_tagging_for_ner/network_conf.py index cc101f9b..6a0a810c 100644 --- a/sequence_tagging_for_ner/network_conf.py +++ b/sequence_tagging_for_ner/network_conf.py @@ -8,7 +8,7 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): mark_dict_len = 2 word_dim = 50 mark_dim = 5 - hidden_dim = 128 + hidden_dim = 300 word = paddle.layer.data( name="word", @@ -23,9 +23,7 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): name="mark", type=paddle.data_type.integer_value_sequence(mark_dict_len)) mark_embedding = paddle.layer.embedding( - input=mark, - size=mark_dim, - param_attr=paddle.attr.Param(initial_std=math.sqrt(1. / word_dim))) + input=mark, size=mark_dim, param_attr=paddle.attr.Param(initial_std=0.)) word_caps_vector = paddle.layer.concat( input=[word_embedding, mark_embedding]) @@ -33,7 +31,7 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): mix_hidden_lr = 1e-3 rnn_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=0.1) hidden_para_attr = paddle.attr.Param( - initial_std=1 / math.sqrt(hidden_dim), learning_rate=mix_hidden_lr) + initial_std=1. / math.sqrt(hidden_dim) / 3, learning_rate=mix_hidden_lr) # the first forward and backward rnn layer share the # input-to-hidden mappings. @@ -41,9 +39,10 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): name="__hidden00__", size=hidden_dim, act=paddle.activation.Tanh(), - bias_attr=paddle.attr.Param(initial_std=1.), + bias_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) / 3), input=word_caps_vector, - param_attr=hidden_para_attr) + param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) / + 3)) fea = [] for direction in ["fwd", "bwd"]: @@ -68,7 +67,7 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): rnn_fea = paddle.layer.fc( size=hidden_dim, - bias_attr=paddle.attr.Param(initial_std=1.), + bias_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) / 3), act=paddle.activation.STanh(), input=fea, param_attr=[hidden_para_attr, rnn_para_attr] * 2) @@ -85,7 +84,8 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): bias_attr=False, input=rnn_fea, act=paddle.activation.Linear(), - param_attr=rnn_para_attr) + param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) / + 3)) if is_train: target = paddle.layer.data( @@ -96,7 +96,10 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): size=label_dict_len, input=emission, label=target, - param_attr=paddle.attr.Param(name="crfw", initial_std=1e-3)) + param_attr=paddle.attr.Param( + name="crfw", + initial_std=1. / math.sqrt(hidden_dim) / 3, + learning_rate=mix_hidden_lr)) crf_dec = paddle.layer.crf_decoding( size=label_dict_len, diff --git a/sequence_tagging_for_ner/train.py b/sequence_tagging_for_ner/train.py index 1f9351f5..0b27d88a 100644 --- a/sequence_tagging_for_ner/train.py +++ b/sequence_tagging_for_ner/train.py @@ -16,8 +16,8 @@ def main(train_data_file, target_file, emb_file, model_save_dir, - num_passes=10, - batch_size=32): + num_passes=100, + batch_size=64): if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) @@ -75,10 +75,10 @@ def main(train_data_file, def event_handler(event): if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 1 == 0: + if event.batch_id % 5 == 0: logger.info("Pass %d, Batch %d, Cost %f, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics)) - if event.batch_id % 1 == 0: + if event.batch_id % 50 == 0: result = trainer.test(reader=test_reader, feeding=feeding) logger.info("\nTest with Pass %d, Batch %d, %s" % (event.pass_id, event.batch_id, result.metrics)) -- GitLab