提交 ba0ff690 编写于 作者: C caoying03

fix incorrect parameter sharing between bidirectional rnns.

上级 132a26af
...@@ -4,31 +4,7 @@ import paddle.v2 as paddle ...@@ -4,31 +4,7 @@ import paddle.v2 as paddle
import paddle.v2.evaluator as evaluator import paddle.v2.evaluator as evaluator
def stacked_rnn(input_layer, def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
hidden_size,
hidden_para_attr,
rnn_para_attr,
stack_num=3,
reverse=False):
for i in range(stack_num):
hidden = paddle.layer.fc(
size=hidden_size,
act=paddle.activation.Tanh(),
bias_attr=paddle.attr.Param(initial_std=1.),
input=[input_layer] if not i else [hidden, rnn],
param_attr=[rnn_para_attr]
if not i else [hidden_para_attr, rnn_para_attr])
rnn = paddle.layer.recurrent(
input=hidden,
act=paddle.activation.Relu(),
bias_attr=paddle.attr.Param(initial_std=1.),
reverse=reverse,
param_attr=rnn_para_attr)
return hidden, rnn
def ner_net(word_dict_len, label_dict_len, stack_num=3, is_train=True):
mark_dict_len = 2 mark_dict_len = 2
word_dim = 50 word_dim = 50
mark_dim = 5 mark_dim = 5
...@@ -51,37 +27,55 @@ def ner_net(word_dict_len, label_dict_len, stack_num=3, is_train=True): ...@@ -51,37 +27,55 @@ def ner_net(word_dict_len, label_dict_len, stack_num=3, is_train=True):
size=mark_dim, size=mark_dim,
param_attr=paddle.attr.Param(initial_std=math.sqrt(1. / word_dim))) param_attr=paddle.attr.Param(initial_std=math.sqrt(1. / word_dim)))
emb_layers = [word_embedding, mark_embedding] word_caps_vector = paddle.layer.concat(
input=[word_embedding, mark_embedding])
word_caps_vector = paddle.layer.concat(input=emb_layers)
mix_hidden_lr = 1e-3 mix_hidden_lr = 1e-3
rnn_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=0.1) rnn_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=0.1)
hidden_para_attr = paddle.attr.Param( hidden_para_attr = paddle.attr.Param(
initial_std=1 / math.sqrt(hidden_dim), learning_rate=mix_hidden_lr) initial_std=1 / math.sqrt(hidden_dim), learning_rate=mix_hidden_lr)
forward_hidden, rnn_forward = stacked_rnn(word_caps_vector, hidden_dim, # the first rnn layer shares the input-to-hidden mappings.
hidden_para_attr, rnn_para_attr) hidden = paddle.layer.fc(
backward_hidden, rnn_backward = stacked_rnn( name="__hidden00__",
word_caps_vector, size=hidden_dim,
hidden_dim, act=paddle.activation.Tanh(),
hidden_para_attr, bias_attr=paddle.attr.Param(initial_std=1.),
rnn_para_attr, input=word_caps_vector,
reverse=True) param_attr=hidden_para_attr)
fea = paddle.layer.fc( fea = []
for direction in ["fwd", "bwd"]:
for i in range(stack_num):
if i:
hidden = paddle.layer.fc(
name="__hidden%02d_%s__" % (i, direction),
size=hidden_dim,
act=paddle.activation.STanh(),
bias_attr=paddle.attr.Param(initial_std=1.),
input=[hidden, rnn],
param_attr=[hidden_para_attr, rnn_para_attr])
rnn = paddle.layer.recurrent(
name="__rnn%02d_%s__" % (i, direction),
input=hidden,
act=paddle.activation.Relu(),
bias_attr=paddle.attr.Param(initial_std=1.),
reverse=i % 2 if direction == "fwd" else not i % 2,
param_attr=rnn_para_attr)
fea += [hidden, rnn]
rnn_fea = paddle.layer.fc(
size=hidden_dim, size=hidden_dim,
bias_attr=paddle.attr.Param(initial_std=1.), bias_attr=paddle.attr.Param(initial_std=1.),
act=paddle.activation.STanh(), act=paddle.activation.STanh(),
input=[forward_hidden, rnn_forward, backward_hidden, rnn_backward], input=fea,
param_attr=[ param_attr=[hidden_para_attr, rnn_para_attr] * 2)
hidden_para_attr, rnn_para_attr, hidden_para_attr, rnn_para_attr
])
emission = paddle.layer.fc( emission = paddle.layer.fc(
size=label_dict_len, size=label_dict_len,
bias_attr=False, bias_attr=False,
input=fea, input=rnn_fea,
param_attr=rnn_para_attr) param_attr=rnn_para_attr)
if is_train: if is_train:
......
...@@ -5,6 +5,8 @@ import reader ...@@ -5,6 +5,8 @@ import reader
from utils import * from utils import *
from network_conf import * from network_conf import *
from paddle.v2.layer import parse_network
def main(train_data_file, def main(train_data_file,
test_data_file, test_data_file,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册