提交 901b6ffb 编写于 作者: W wangmeng28

Merge remote-tracking branch 'upstream/develop' into restructure_ltr

......@@ -11,7 +11,6 @@ import reader
class BeamSearch(object):
"""
Generate sequence by beam search
NOTE: this class only implements generating one sentence at a time.
"""
def __init__(self,
......
......@@ -57,4 +57,4 @@ def rnn_lm(vocab_dim,
else:
cost = paddle.layer.classification_cost(input=output, label=target)
return cost, output
return cost
......@@ -43,9 +43,14 @@ def train(topology,
# create parameters
parameters = paddle.parameters.create(topology)
# create sum evaluator
sum_eval = paddle.evaluator.sum(topology)
# create trainer
trainer = paddle.trainer.SGD(
cost=topology, parameters=parameters, update_equation=adam_optimizer)
cost=topology,
parameters=parameters,
update_equation=adam_optimizer,
extra_layers=sum_eval)
# define the event_handler callback
def event_handler(event):
......
......@@ -15,11 +15,13 @@ def cnn_cov_group(group_input, hidden_size):
conv4 = paddle.networks.sequence_conv_pool(
input=group_input, context_len=4, hidden_size=hidden_size)
fc_param_attr = paddle.attr.ParamAttr(name='_cov_value_weight')
fc_bias_attr = paddle.attr.ParamAttr(name='_cov_value_bias')
linear_proj = paddle.layer.fc(
input=[conv3, conv4],
size=hidden_size,
param_attr=paddle.attr.ParamAttr(name='_cov_value_weight'),
bias_attr=paddle.attr.ParamAttr(name='_cov_value_bias'),
param_attr=[fc_param_attr, fc_param_attr],
bias_attr=fc_bias_attr,
act=paddle.activation.Linear())
return linear_proj
......
......@@ -8,7 +8,7 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
mark_dict_len = 2
word_dim = 50
mark_dim = 5
hidden_dim = 128
hidden_dim = 300
word = paddle.layer.data(
name="word",
......@@ -23,9 +23,7 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
name="mark",
type=paddle.data_type.integer_value_sequence(mark_dict_len))
mark_embedding = paddle.layer.embedding(
input=mark,
size=mark_dim,
param_attr=paddle.attr.Param(initial_std=math.sqrt(1. / word_dim)))
input=mark, size=mark_dim, param_attr=paddle.attr.Param(initial_std=0.))
word_caps_vector = paddle.layer.concat(
input=[word_embedding, mark_embedding])
......@@ -33,7 +31,7 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
mix_hidden_lr = 1e-3
rnn_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=0.1)
hidden_para_attr = paddle.attr.Param(
initial_std=1 / math.sqrt(hidden_dim), learning_rate=mix_hidden_lr)
initial_std=1. / math.sqrt(hidden_dim) / 3, learning_rate=mix_hidden_lr)
# the first forward and backward rnn layer share the
# input-to-hidden mappings.
......@@ -41,9 +39,10 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
name="__hidden00__",
size=hidden_dim,
act=paddle.activation.Tanh(),
bias_attr=paddle.attr.Param(initial_std=1.),
bias_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) / 3),
input=word_caps_vector,
param_attr=hidden_para_attr)
param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) /
3))
fea = []
for direction in ["fwd", "bwd"]:
......@@ -68,7 +67,7 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
rnn_fea = paddle.layer.fc(
size=hidden_dim,
bias_attr=paddle.attr.Param(initial_std=1.),
bias_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) / 3),
act=paddle.activation.STanh(),
input=fea,
param_attr=[hidden_para_attr, rnn_para_attr] * 2)
......@@ -85,7 +84,8 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
bias_attr=False,
input=rnn_fea,
act=paddle.activation.Linear(),
param_attr=rnn_para_attr)
param_attr=paddle.attr.Param(initial_std=1. / math.sqrt(hidden_dim) /
3))
if is_train:
target = paddle.layer.data(
......@@ -96,7 +96,10 @@ def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
size=label_dict_len,
input=emission,
label=target,
param_attr=paddle.attr.Param(name="crfw", initial_std=1e-3))
param_attr=paddle.attr.Param(
name="crfw",
initial_std=1. / math.sqrt(hidden_dim) / 3,
learning_rate=mix_hidden_lr))
crf_dec = paddle.layer.crf_decoding(
size=label_dict_len,
......
......@@ -16,8 +16,8 @@ def main(train_data_file,
target_file,
emb_file,
model_save_dir,
num_passes=10,
batch_size=32):
num_passes=100,
batch_size=64):
if not os.path.exists(model_save_dir):
os.mkdir(model_save_dir)
......@@ -75,10 +75,10 @@ def main(train_data_file,
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 1 == 0:
if event.batch_id % 5 == 0:
logger.info("Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics))
if event.batch_id % 1 == 0:
if event.batch_id % 50 == 0:
result = trainer.test(reader=test_reader, feeding=feeding)
logger.info("\nTest with Pass %d, Batch %d, %s" %
(event.pass_id, event.batch_id, result.metrics))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册