diff --git a/conv_seq2seq/beamsearch.py b/conv_seq2seq/beamsearch.py index 22318c293996f953c544ebae0a1f43e352718cb5..72fb59d47445b2ad07c07c73093edf35a8f260a6 100644 --- a/conv_seq2seq/beamsearch.py +++ b/conv_seq2seq/beamsearch.py @@ -11,7 +11,6 @@ import reader class BeamSearch(object): """ Generate sequence by beam search - NOTE: this class only implements generating one sentence at a time. """ def __init__(self, diff --git a/generate_sequence_by_rnn_lm/network_conf.py b/generate_sequence_by_rnn_lm/network_conf.py index f1aceb0b7d70c6a2aec601cf935d2f34500d20fc..f2e59a94298435f280a561a1e079defa2ea84f62 100644 --- a/generate_sequence_by_rnn_lm/network_conf.py +++ b/generate_sequence_by_rnn_lm/network_conf.py @@ -57,4 +57,4 @@ def rnn_lm(vocab_dim, else: cost = paddle.layer.classification_cost(input=output, label=target) - return cost, output + return cost diff --git a/generate_sequence_by_rnn_lm/train.py b/generate_sequence_by_rnn_lm/train.py index d8bffd487a6b8c0b74d28a97b16c689255dd472b..198835829812426f49682b3fdb43f6c35c55a319 100644 --- a/generate_sequence_by_rnn_lm/train.py +++ b/generate_sequence_by_rnn_lm/train.py @@ -43,9 +43,14 @@ def train(topology, # create parameters parameters = paddle.parameters.create(topology) + # create sum evaluator + sum_eval = paddle.evaluator.sum(topology) # create trainer trainer = paddle.trainer.SGD( - cost=topology, parameters=parameters, update_equation=adam_optimizer) + cost=topology, + parameters=parameters, + update_equation=adam_optimizer, + extra_layers=sum_eval) # define the event_handler callback def event_handler(event): diff --git a/nested_sequence/text_classification/network_conf.py b/nested_sequence/text_classification/network_conf.py index b4c4066909ac167338554640029b2cbd609e49f7..cc7b3983dbcdeae6d84aaba9038359ea45a7dcf9 100644 --- a/nested_sequence/text_classification/network_conf.py +++ b/nested_sequence/text_classification/network_conf.py @@ -15,11 +15,13 @@ def cnn_cov_group(group_input, hidden_size): conv4 = paddle.networks.sequence_conv_pool( input=group_input, context_len=4, hidden_size=hidden_size) + fc_param_attr = paddle.attr.ParamAttr(name='_cov_value_weight') + fc_bias_attr = paddle.attr.ParamAttr(name='_cov_value_bias') linear_proj = paddle.layer.fc( input=[conv3, conv4], size=hidden_size, - param_attr=paddle.attr.ParamAttr(name='_cov_value_weight'), - bias_attr=paddle.attr.ParamAttr(name='_cov_value_bias'), + param_attr=[fc_param_attr, fc_param_attr], + bias_attr=fc_bias_attr, act=paddle.activation.Linear()) return linear_proj diff --git a/sequence_tagging_for_ner/network_conf.py b/sequence_tagging_for_ner/network_conf.py index cc101f9bc60474ef614fc4ecf0a98fec185e9566..6a0a810c9221c1b6ae83c388d2d560be261946c1 100644 --- a/sequence_tagging_for_ner/network_conf.py +++ b/sequence_tagging_for_ner/network_conf.py @@ -8,7 +8,7 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): mark_dict_len = 2 word_dim = 50 mark_dim = 5 - hidden_dim = 128 + hidden_dim = 300 word = paddle.layer.data( name="word", @@ -23,9 +23,7 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): name="mark", type=paddle.data_type.integer_value_sequence(mark_dict_len)) mark_embedding = paddle.layer.embedding( - input=mark, - size=mark_dim, - param_attr=paddle.attr.Param(initial_std=math.sqrt(1. / word_dim))) + input=mark, size=mark_dim, param_attr=paddle.attr.Param(initial_std=0.)) word_caps_vector = paddle.layer.concat( input=[word_embedding, mark_embedding]) @@ -33,7 +31,7 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): mix_hidden_lr = 1e-3 rnn_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=0.1) hidden_para_attr = paddle.attr.Param( - initial_std=1 / math.sqrt(hidden_dim), learning_rate=mix_hidden_lr) + initial_std=1. / math.sqrt(hidden_dim) / 3, learning_rate=mix_hidden_lr) # the first forward and backward rnn layer share the # input-to-hidden mappings. @@ -41,9 +39,10 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): name="__hidden00__", size=hidden_dim, act=paddle.activation.Tanh(), - bias_attr=paddle.attr.Param(initial_std=1.), + bias_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) / 3), input=word_caps_vector, - param_attr=hidden_para_attr) + param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) / + 3)) fea = [] for direction in ["fwd", "bwd"]: @@ -68,7 +67,7 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): rnn_fea = paddle.layer.fc( size=hidden_dim, - bias_attr=paddle.attr.Param(initial_std=1.), + bias_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) / 3), act=paddle.activation.STanh(), input=fea, param_attr=[hidden_para_attr, rnn_para_attr] * 2) @@ -85,7 +84,8 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): bias_attr=False, input=rnn_fea, act=paddle.activation.Linear(), - param_attr=rnn_para_attr) + param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) / + 3)) if is_train: target = paddle.layer.data( @@ -96,7 +96,10 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True): size=label_dict_len, input=emission, label=target, - param_attr=paddle.attr.Param(name="crfw", initial_std=1e-3)) + param_attr=paddle.attr.Param( + name="crfw", + initial_std=1. / math.sqrt(hidden_dim) / 3, + learning_rate=mix_hidden_lr)) crf_dec = paddle.layer.crf_decoding( size=label_dict_len, diff --git a/sequence_tagging_for_ner/train.py b/sequence_tagging_for_ner/train.py index 1f9351f5628059ea64d09f3f4c72b6d5877a207a..0b27d88a69826c2910796e4e631f7755cee99359 100644 --- a/sequence_tagging_for_ner/train.py +++ b/sequence_tagging_for_ner/train.py @@ -16,8 +16,8 @@ def main(train_data_file, target_file, emb_file, model_save_dir, - num_passes=10, - batch_size=32): + num_passes=100, + batch_size=64): if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) @@ -75,10 +75,10 @@ def main(train_data_file, def event_handler(event): if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 1 == 0: + if event.batch_id % 5 == 0: logger.info("Pass %d, Batch %d, Cost %f, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics)) - if event.batch_id % 1 == 0: + if event.batch_id % 50 == 0: result = trainer.test(reader=test_reader, feeding=feeding) logger.info("\nTest with Pass %d, Batch %d, %s" % (event.pass_id, event.batch_id, result.metrics))