提交 ba0ff690 编写于 作者: C caoying03

fix incorrect parameter sharing between bidirectional rnns.

上级 132a26af
......@@ -4,31 +4,7 @@ import paddle.v2 as paddle
import paddle.v2.evaluator as evaluator
def stacked_rnn(input_layer,
hidden_size,
hidden_para_attr,
rnn_para_attr,
stack_num=3,
reverse=False):
for i in range(stack_num):
hidden = paddle.layer.fc(
size=hidden_size,
act=paddle.activation.Tanh(),
bias_attr=paddle.attr.Param(initial_std=1.),
input=[input_layer] if not i else [hidden, rnn],
param_attr=[rnn_para_attr]
if not i else [hidden_para_attr, rnn_para_attr])
rnn = paddle.layer.recurrent(
input=hidden,
act=paddle.activation.Relu(),
bias_attr=paddle.attr.Param(initial_std=1.),
reverse=reverse,
param_attr=rnn_para_attr)
return hidden, rnn
def ner_net(word_dict_len, label_dict_len, stack_num=3, is_train=True):
def ner_net(word_dict_len, label_dict_len, stack_num=2, is_train=True):
mark_dict_len = 2
word_dim = 50
mark_dim = 5
......@@ -51,37 +27,55 @@ def ner_net(word_dict_len, label_dict_len, stack_num=3, is_train=True):
size=mark_dim,
param_attr=paddle.attr.Param(initial_std=math.sqrt(1. / word_dim)))
emb_layers = [word_embedding, mark_embedding]
word_caps_vector = paddle.layer.concat(input=emb_layers)
word_caps_vector = paddle.layer.concat(
input=[word_embedding, mark_embedding])
mix_hidden_lr = 1e-3
rnn_para_attr = paddle.attr.Param(initial_std=0.0, learning_rate=0.1)
hidden_para_attr = paddle.attr.Param(
initial_std=1 / math.sqrt(hidden_dim), learning_rate=mix_hidden_lr)
forward_hidden, rnn_forward = stacked_rnn(word_caps_vector, hidden_dim,
hidden_para_attr, rnn_para_attr)
backward_hidden, rnn_backward = stacked_rnn(
word_caps_vector,
hidden_dim,
hidden_para_attr,
rnn_para_attr,
reverse=True)
# the first rnn layer shares the input-to-hidden mappings.
hidden = paddle.layer.fc(
name="__hidden00__",
size=hidden_dim,
act=paddle.activation.Tanh(),
bias_attr=paddle.attr.Param(initial_std=1.),
input=word_caps_vector,
param_attr=hidden_para_attr)
fea = paddle.layer.fc(
fea = []
for direction in ["fwd", "bwd"]:
for i in range(stack_num):
if i:
hidden = paddle.layer.fc(
name="__hidden%02d_%s__" % (i, direction),
size=hidden_dim,
act=paddle.activation.STanh(),
bias_attr=paddle.attr.Param(initial_std=1.),
input=[hidden, rnn],
param_attr=[hidden_para_attr, rnn_para_attr])
rnn = paddle.layer.recurrent(
name="__rnn%02d_%s__" % (i, direction),
input=hidden,
act=paddle.activation.Relu(),
bias_attr=paddle.attr.Param(initial_std=1.),
reverse=i % 2 if direction == "fwd" else not i % 2,
param_attr=rnn_para_attr)
fea += [hidden, rnn]
rnn_fea = paddle.layer.fc(
size=hidden_dim,
bias_attr=paddle.attr.Param(initial_std=1.),
act=paddle.activation.STanh(),
input=[forward_hidden, rnn_forward, backward_hidden, rnn_backward],
param_attr=[
hidden_para_attr, rnn_para_attr, hidden_para_attr, rnn_para_attr
])
input=fea,
param_attr=[hidden_para_attr, rnn_para_attr] * 2)
emission = paddle.layer.fc(
size=label_dict_len,
bias_attr=False,
input=fea,
input=rnn_fea,
param_attr=rnn_para_attr)
if is_train:
......
......@@ -5,6 +5,8 @@ import reader
from utils import *
from network_conf import *
from paddle.v2.layer import parse_network
def main(train_data_file,
test_data_file,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册