循环的老问题,基本上做各个框架的静态图都会遇到,希望大佬给个方案
Created by: Zhangbeibei1991
我是做事件抽取的(跟咱们百度事件抽取有点关系),想用静态图做联合抽取,就是同事抽取事件和触发词,基本步骤是:第一步:先抽取触发词,第二步:然后用触发词和句子中的每个词concat一下然后再用fc做触发词和要素的角色分类,最后一步是将前两部的损失函数加一块做联合训练,咱么百度给的baseline是pipeline做的,然后我想用联合抽取做,现在的问题是在执行第二步的时候:需要将识别出的触发词再与所在句子中的每个词进行拼接,而识别的触发词都包在(batch_size,seq_len,embedding)里面这就要求我要先将batch_size拆开用while_loop对每个句子做循环,然后再在嵌套对句子中的每个词做while_loop循环,这是比较直观的思路,现在我遇到的问题时,因为我输入的data的占位符的shape的第0维是默认-1的,所以在循环的时候我也写了个-1, 但是在编译过程中就过不去,总是给我包如下错误: `
Error Message Summary:
个人目测应该是-1搞的, 于是我就尝试着将循环的上界(也就是未知的batch_size)改成了2试试,虽然编译模型能通过,但是最终给我的编码输出显示的(4,seq_len,seq_len,embedding), 我不太清楚为啥会变成了4,当我进一步换成了我的batch_size大小, 也就是32时,CPU内存直接爆了,报错如下:
----------------------
Error Message Summary:
Error: Input tensorarray size should > 0.
[Hint: Expected n > 0, but received n:0 <= 0:0.] at (D:\1.7.1\paddle\paddle\fluid\operators\tensor_array_to_tensor_op.cc:99)
[operator < tensor_array_to_tensor > error]
Error: Fail to allocate CPU memory: size = 2600001536 . [Hint: p should not be null.] at (D:\1.7.1\paddle\paddle\fluid\memory\detail\system_allocator.cc:61) [operator < reshape2 > error]`
其实则batch并不大,我循环前输的编码是(32,125,650), 在完整句子中每个词都和其所在句子拼接后的输出期望编码是(32,125,125,1300),按照往常这事没什么问题的(至少torch这么大的是没什么问题的, 更何况paddle的静态图), 所以我想把我的完整代码push出来(要素预测仅到组织预测要素的编码那里),让大佬帮忙解决这个循环问题,如果能解决,我会把我做的联合抽取的demo(可能性能低,因为我没用bert啥的)公开,毕竟现在事件联合抽取的论文有的是,但是公开的用静态图做的联合抽取放出来的模型几乎太少了,咱们的项目库里面也好像是没有, 基础代码如下, 希望大佬鞭尸,解决我多年的疑惑(学tf的时候也碰到这个问题,没解决掉):
` import paddle from models.MLEEevaluator import evaluator_trigger import paddle.fluid as fluid from GCN_trigger.conf import conf as cf from models.MLEEutilization import MLEEutilization import numpy as np from paddle.fluid.layers import * class MLEEjme: def init(self,max_len=100, trigger_class_num=73,entity_embedding_matrix=None, word_embedding_matrix=None,use_cuda=False): self.class_num = trigger_class_num self.max_len = max_len self.entity_embedding_matrix = entity_embedding_matrix self.embedding_matrix = word_embedding_matrix self.sen_input, self.entity_type_input = None, None self.sen_embedding, self.entity_embedding = None, None self.output = None self.use_cuda = use_cuda
def make_input(self):
self.sentence = fluid.layers.data(shape=[self.max_len,1], dtype='int64', name='sentence_input')
self.entity_type = fluid.layers.data(shape=[self.max_len,1], dtype='int64', name='entity_type_input')
self.seq_len = fluid.layers.data(shape=[],dtype='int64',name='seq_len')
self.trigger_labels = fluid.layers.data(shape=[self.max_len,1],dtype='int64',name='trigger_labels')
self.tri_graph_along = fluid.layers.data(shape=[cf.max_len,cf.max_len],name='along')
self.tri_graph_rev = fluid.layers.data(shape=[cf.max_len,cf.max_len],name='rev')
self.tri_graph_loop = fluid.layers.data(shape=[cf.max_len,cf.max_len],name='loop')
def embedded(self):
word_embedding_param = fluid.ParamAttr(name='word_embed_param',
initializer=fluid.initializer.NumpyArrayInitializer(self.embedding_matrix),
trainable=False)
self.word_sentence_embeddings = fluid.layers.embedding(
input=self.sentence,
size=[self.embedding_matrix.shape[0],self.embedding_matrix.shape[1]],
padding_idx=-self.embedding_matrix.shape[0],
param_attr=word_embedding_param,
)
entity_embedding_param = fluid.ParamAttr(name='entity_embed_param',
initializer=fluid.initializer.NumpyArrayInitializer(self.entity_embedding_matrix),
trainable=True)
self.entity_embeddings = fluid.layers.embedding(
input=self.entity_type,
size=[self.entity_embedding_matrix.shape[0],self.entity_embedding_matrix.shape[1]],
padding_idx=-self.entity_embedding_matrix.shape[0],
param_attr=entity_embedding_param
)
def Bi_RNN(self):
inputs = fluid.layers.concat([self.word_sentence_embeddings, self.entity_embeddings],axis=-1)
inputs = fluid.layers.batch_norm(input=inputs)
inputs = fluid.layers.dropout(inputs,dropout_prob=0.5)
# 定义cell并自己搞初始化
cell_fw = fluid.layers.GRUCell(hidden_size=cf.gru_hidden_dim,
gate_activation=fluid.layers.hard_sigmoid,
activation=fluid.layers.relu)
cell_bw = fluid.layers.GRUCell(hidden_size=cf.gru_hidden_dim,
gate_activation=fluid.layers.hard_sigmoid,
activation=fluid.layers.relu)
encode_fw, _ = fluid.layers.rnn(cell=cell_fw,inputs=inputs,sequence_length=self.seq_len)
encode_bw, _ = fluid.layers.rnn(cell=cell_bw,inputs=inputs,sequence_length=self.seq_len,is_reverse=True)
self.encode = fluid.layers.concat([encode_fw,encode_bw],axis=-1)
self.encode = fluid.layers.concat([self.encode,inputs],axis=-1)
self.encode = fluid.layers.dropout(self.encode,dropout_prob=0.5)
# self.encode = inputs
# print(self.encode.shape)
def trigger_prediction(self):
self.trigger_logits = fluid.layers.fc(input=self.encode,
size=self.class_num,name='trigger_logits',
num_flatten_dims=2)
# print(self.trigger_logits.shape)
def argument_prediction(self):
batch_tensor_arry = fluid.layers.create_array(dtype='float32')
t = fill_constant(shape=[1], dtype='int64', value=0) # 循环计数器
batch_bundary = fill_constant(shape=[1], dtype='int64', value=32) # 循环次数-->这个地方真的填数据的时候会自动推导
def loop_cond(t, *args):
return fluid.layers.less_than(t,batch_bundary)
def _seq_loop_body(t, batch_tensors, batch_tensor_arry): # 对batch中的每个句子进行操作
std_index = fluid.layers.reshape(t,shape=(-1,1))
# std_index=[[1]]: 方便用gather_nd从形如(batch_size,seq_len,embedding)中取出句子张量(1,seq_len,embedding)
seq_tensors = fluid.layers.gather_nd(batch_tensors,std_index) # (seq,embedding) --> 序列句子
seq_tensor_array = fluid.layers.create_array(dtype='float32')
wt = fill_constant(shape=[1], dtype='int64', value=0) # 句子内部进行词循环的计数器
sent_bundary = fill_constant(shape=[1], dtype='int64', value=self.encode.shape[1]) # 句子序列的循环上界
def _word_cond(wt, *args):
return fluid.layers.less_than(wt,sent_bundary)
def _word_loop_body(wt,seq_tensors,seq_tensor_array): # 对序列中的每个词进行循环--> seq_tensor: (1,seq_len,embedding)
# seq_tensors = fluid.layers.squeeze(seq_tensors,axes=[0])
one = fluid.layers.fill_constant(shape=[1,1],dtype='int64',value=0)
std_wt = fluid.layers.reshape(x=wt,shape=[-1,1])
std_wt = fluid.layers.concat([one,std_wt],axis=-1) # --> ([[0,wt]]) 从形如(1,seq_len,embedding的),第一位
target_tensor = fluid.layers.gather_nd(seq_tensors,std_wt) # (1,embedding,)---> 词
# expand_tensors = fluid.layers.unsqueeze(input=target_tensor,axes=[0]) # (1,embeddintg)
expand_tensors = fluid.layers.expand(target_tensor,[self.encode.shape[1],1]) # (seq,embedding)
word_concat_seq = concat([expand_tensors,fluid.layers.squeeze(seq_tensors,axes=[0])],axis=-1) # (seq,embedding * 2)
fluid.layers.array_write(word_concat_seq,wt,seq_tensor_array)
# 这里还可能进行某些操作,简单起见先不搞
wt = fluid.layers.increment(x=wt,value=1,in_place=True)
return [wt,seq_tensors,seq_tensor_array]
wt,seq_tensors,seq_tensor_array = while_loop(_word_cond,_word_loop_body,[wt,seq_tensors,seq_tensor_array]) # desire: seq_tensor_array : (seq,seq,embedding * 2)
seq_tensor_array,_ = tensor_array_to_tensor(seq_tensor_array,axis=0,use_stack=True) # # desire: seq_tensor_array : (seq,seq,embedding * 2)
seq_tensor_array = reshape(seq_tensor_array,[self.encode.shape[1],self.encode.shape[1],self.encode.shape[-1] * 2])
fluid.layers.array_write(seq_tensor_array,t,batch_tensor_arry)
t = fluid.layers.increment(x=t,value=1,in_place=True)
return [t, batch_tensors, batch_tensor_arry]
t, seq_word, batch_tensor_arry = fluid.layers.while_loop(loop_cond, _seq_loop_body, [t, self.encode, batch_tensor_arry])
batch_tensor_arry, _ = tensor_array_to_tensor(batch_tensor_arry, axis=0, use_stack=True)
self.batch_tensor_arry = reshape(batch_tensor_arry, [self.encode.shape[0],self.encode.shape[1], self.encode.shape[1], self.encode.shape[-1]])
def get_acc(self):
self.acc = fluid.layers.accuracy(input=fluid.layers.reshape(self.trigger_logits,shape=(-1,self.class_num)),
label=fluid.layers.reshape(self.trigger_labels,shape=(-1,1)))
def loss_fun(self):
probs = fluid.layers.softmax(self.trigger_logits,axis=-1)
loss = fluid.layers.cross_entropy(input=probs,label=self.trigger_labels)
trigger_mask = fluid.layers.sequence_mask(x=self.seq_len,
maxlen=self.max_len,
dtype='float32')
self.avg_loss = fluid.layers.reduce_sum(loss) / fluid.layers.reduce_sum(trigger_mask)
def load_optimizer(self):
fluid.clip.set_gradient_clip(
fluid.clip.GradientClipByGlobalNorm(clip_norm=15.0)
)
self.optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.001,regularization=fluid.regularizer.L2Decay(1e-6))
self.optimizer.minimize(self.avg_loss)
def build_model(self):
self.make_input()
self.embedded()
self.Bi_RNN()
self.trigger_prediction()
self.argument_prediction()
self.get_acc()
self.loss_fun()
def build_config(self):
place = fluid.CUDAPlace(0) if self.use_cuda else fluid.CPUPlace()
self.train_exe = fluid.Executor(place)
self.test_exe = fluid.Executor(place)
self.build_model()
self.test_program = fluid.default_main_program().clone(for_test=True)
feed_order = ['sentence_input', 'entity_type_input', 'trigger_labels', 'seq_len']
feed_var_list = [
self.test_program.global_block().var(var_name) for var_name in feed_order
]
self.feeder_test = fluid.DataFeeder(feed_list=feed_var_list, place=place)
self.load_optimizer()
self.train_program = fluid.default_main_program()
feed_var_list = [
self.train_program.global_block().var(var_name) for var_name in feed_order
]
self.feeder_train = fluid.DataFeeder(feed_list=feed_var_list, place=place)
self.train_exe.run(fluid.default_startup_program())
class MLEErun_model: def init(self,params): super(MLEErun_model, self).init() self.params = params self.ML = MLEEutilization() self.max_len = self.params['max_len'] self.trigger_class_num = self.params['trigger_class_num'] self.argument_class_num = self.params['argument_class_num'] self.name = "model_trigger_entity" self.Model = MLEEjme(max_len=self.max_len, trigger_class_num=self.trigger_class_num, entity_embedding_matrix=self.ML.embed_matrix['entity_type_matrix'], word_embedding_matrix=self.ML.embed_matrix['embedding_matrix']) self.Model.build_config()
self.trigger_i2s = self.ML.convert_dict['trigger_type_dict_i2s']
self.entity_i2s = self.ML.convert_dict['entity_type_dict_i2s']
self.train_golden_events = self.ML.golden_events['train_golden_event']
self.test_golden_events = self.ML.golden_events['test_golden_event']
self.evaluator_trigger = evaluator_trigger(tri_id2label=self.trigger_i2s,
entity_label_id2label=self.entity_i2s,
role_id2label=self.ML.role_i2s)
self.train_seq = self.ML.train_dict['train_seq']
self.test_seq = self.ML.test_dict['test_seq']
self.train_reader = paddle.batch(
paddle.reader.shuffle(
reader=self.genrerator_train(),
buf_size=51200
),
batch_size=32
)
self.test_reader = paddle.batch(
reader=self.genrerator_test(),
batch_size=32
)
def genrerator_train(self):
def reader():
for sent, entity, label, seq_len in zip(self.ML.train_dict['train_input'],self.ML.train_dict['train_entity_inputs'],self.ML.train_dict['train_labels'],self.train_seq):
sent = np.array(sent,dtype='int64')
entity = np.array(entity,dtype='int64')
sent_shape = sent.shape + (1,)
entity_shape = sent.shape + (1,)
sent =sent.reshape(sent_shape)
entity =entity.reshape(entity_shape)
label = np.argmax(np.array(label,dtype='int64'),axis=-1)
label_shape = label.shape + (1,)
label = label.reshape(label_shape)
# seq_len = np.array([seq_len])
yield sent,entity,label,seq_len
return reader
def genrerator_test(self):
def reader():
for sent, entity, label, seq_len in zip(self.ML.test_dict['test_input'],
self.ML.test_dict['test_entity_inputs'],
self.ML.test_dict['test_labels'],
self.test_seq):
sent = np.array(sent,dtype='int64')
entity = np.array(entity,dtype='int64')
sent_shape = sent.shape + (1,)
entity_shape = sent.shape + (1,)
sent =sent.reshape(sent_shape)
entity =entity.reshape(entity_shape)
label = np.argmax(np.array(label,dtype='int64'),axis=-1)
label_shape = label.shape + (1,)
label = label.reshape(label_shape)
# seq_len = np.array(seq_len)
yield sent,entity,label,seq_len
return reader
def training_epochs(self):
train_exe,train_program,feeder = self.Model.train_exe,self.Model.train_program,self.Model.feeder_train
flist = []
for epoch in range(0,self.params['max_epochs'],1,dtype='int64'):
epoch_loss = []
epoch_acc = []
print('epoch: {}'.format(epoch + 1).center(50,'='))
for data in self.train_reader():
loss, acc, encode = train_exe.run(program=train_program,
feed=feeder.feed(data),
fetch_list=[self.Model.avg_loss,self.Model.acc,self.Model.batch_tensor_arry])
# print('loss: {}, acc: {}'.format(loss,acc))
epoch_loss.append(loss[0])
epoch_acc.append(acc[0])
print('Epoch (train): {0}, loss: {1:>6.4}, acc: {2:>6.4%}'.format(epoch + 1, np.mean(epoch_loss), np.mean(epoch_acc)))
f = self.train_test(flist=flist)
# print()
def train_test(self,flist):
test_exe, test_program,feeder = self.Model.test_exe, self.Model.test_program,self.Model.feeder_test
test_logots = []
test_loss = []
for data in self.test_reader():
loss, logits = test_exe.run(program=test_program,
feed=feeder.feed(data),
fetch_list=[self.Model.avg_loss,self.Model.trigger_logits])
test_logots.append(logits)
test_loss.append(loss)
logits = list(np.concatenate(test_logots,axis=0).tolist())
avg_loss = np.mean(test_loss)
pred_trigger_labels, pred_trigger_labels_seq = self.recover_seq(doc_labels=logits,
seq_len=self.test_seq)
real_trigger_labels, real_trigger_labels_seq = self.recover_seq(doc_labels=self.ML.test_dict['test_labels'],
seq_len=self.test_seq)
f = self.evaluator_trigger.compute_trigger_metrics(tri_pred_label=pred_trigger_labels,
tri_true_label=real_trigger_labels,
loss=avg_loss,
flist=flist
)
# print(avg_loss)
return f
def recover_seq(self, doc_labels, seq_len, type='trigger'):
if type == 'trigger':
dict_i2s = self.trigger_i2s
doc_labels = list(np.argmax(doc_labels, axis=-1).tolist())
elif type == 'entity':
dict_i2s = self.entity_i2s
elif type == 'argument':
dict_i2s = self.ML.role_i2s
doc_labels = list(np.argmax(doc_labels, axis=-1).tolist())
doc_labels_seq = []
for i,sent_labels in enumerate(doc_labels):
sent_labels = sent_labels[:seq_len[i]]
doc_labels_seq.append(sent_labels)
if type == 'trigger':
actual_labels = self.evaluator_trigger.trigger_index2reallabel(tri_true_label=doc_labels_seq,tri_id2label=dict_i2s)
elif type == 'entity':
actual_labels = self.evaluator_trigger.trigger_index2reallabel(tri_true_label=doc_labels_seq,tri_id2label=dict_i2s,type='entity')
elif type == 'argument':
actual_labels = self.evaluator_trigger.trigger_index2reallabel(tri_true_label=doc_labels_seq,tri_id2label=dict_i2s, type='argument')
return actual_labels,doc_labels_seq
if name == 'main': params = {'max_epochs': 50, 'max_len': 125, 'trigger_class_num': 73, 'argument_class_num': 9, 'use_development_set': False, 'use_cuda':False} aa = MLEErun_model(params=params) aa.training_epochs() `