fetch 梯度的问题
Created by: Akeepers
我的目标,是计算输入x和当前预测结果y的梯度,然后返回(paddle 静态图),gradients 返回的是个list,我在模型代码里加了计算梯度(采用了APIgradients )的部分,这个结果尝试利用fetch_list, 拿回来,失败了(返回结果没有计算得到的梯度) - fetch返回的结果没有梯度
问题:
- 是不是因为gradients 的结果是list,所以fetch_list拿不回来
- 有没有别的,在静态图计算梯度的方式哇
代码(有删减,应该不影响理解问题)
模型
def get_gradient(sentence_input, infers, selected_idx):
"""Computes the gradients of outputs w.r.t input sentence."""
selected_infer = fluid.layers.index_select(infers,
index=selected_idx,
dim=2)
# selected_label = fluid.layers.index_select(golden_label,
# index=selected_idx,
# dim=2)
# log.info("cal gradient")
fluid.layers.Print(selected_infer)
grads = fluid.gradients(selected_infer, sentence_input)
# log.info(type(grads))
# log.info(grads)
return grads
def create_model(args,
pyreader_name,
ernie_config,
np_event_emb,
is_test=False):
"""event extraction model"""
# define data
src_ids = fluid.layers.data(name='src_ids',
shape=[-1, args.max_seq_len, 1],
dtype='int64')
sent_ids = fluid.layers.data(name='sent_ids',
shape=[-1, args.max_seq_len, 1],
dtype='int64')
trigger_sent_ids = fluid.layers.data(name='trigger_sent_ids',
shape=[-1, args.max_seq_len, 1],
dtype='int64')
pos_ids = fluid.layers.data(name='pos_ids',
shape=[-1, args.max_seq_len, 1],
dtype='int64')
task_ids = fluid.layers.data(name='task_ids',
shape=[-1, args.max_seq_len, 1],
dtype='int64')
input_mask = fluid.layers.data(name='input_mask',
shape=[-1, args.max_seq_len, 1],
dtype='float32')
seq_lens = fluid.layers.data(name='seq_lens', shape=[-1], dtype='int64')
trigger_label = fluid.layers.data(
name='trigger_label',
shape=[-1, args.max_seq_len, args.num_trigger_labels * 2, 1],
dtype='int64')
argument_label = fluid.layers.data(
name='argument_label',
shape=[-1, args.max_seq_len, args.num_argument_labels * 2, 1],
dtype='int64')
trigger_span = fluid.layers.data(name="trigger_span",
shape=[-1, 2, 2],
dtype='int64')
trigger_span_index = fluid.layers.data(name="trigger_span_index",
shape=[-1, args.max_seq_len, 1],
dtype='int64')
trigger_span_mask = fluid.layers.data(name="trigger_span_mask",
shape=[-1, 1],
dtype='float32')
event_type_ids = fluid.layers.data(name='event_type_ids',
shape=[-1],
dtype='int64')
task_flag = fluid.layers.data(name='task_flag', shape=[1], dtype='int64')
entity_ids = fluid.layers.data(name='entity_ids',
shape=[-1, args.max_seq_len, 1],
dtype='int64')
depedency_dis = fluid.layers.data(name='depedency_dis',
shape=[-1, args.max_seq_len, 1],
dtype='int64')
if not is_test:
trigger_label_weight = fluid.layers.data(
name='trigger_label_weight',
shape=[-1, args.max_seq_len, args.num_trigger_labels * 2, 1],
dtype='float32')
argument_label_weight = fluid.layers.data(
name='argument_label_weight',
shape=[-1, args.max_seq_len, args.num_argument_labels * 2, 1],
dtype='float32')
pyreader = fluid.io.DataLoader.from_generator(feed_list=[
src_ids, sent_ids, trigger_sent_ids, pos_ids, task_ids, input_mask,
seq_lens, trigger_label, argument_label, trigger_span,
trigger_span_index, trigger_span_mask, event_type_ids, task_flag,
entity_ids, depedency_dis, trigger_label_weight,
argument_label_weight
],
capacity=70,
iterable=False)
else:
pyreader = fluid.io.DataLoader.from_generator(feed_list=[
src_ids, sent_ids, trigger_sent_ids, pos_ids, task_ids, input_mask,
seq_lens, trigger_label, argument_label, trigger_span,
trigger_span_index, trigger_span_mask, event_type_ids, task_flag,
entity_ids, depedency_dis
],
capacity=70,
iterable=True)
# ernie encoder
ernie = ErnieModel(src_ids=src_ids,
position_ids=pos_ids,
sentence_ids=sent_ids,
task_ids=task_ids,
input_mask=input_mask,
config=ernie_config,
use_fp16=args.use_fp16)
enc_out = ernie.get_sequence_output()
enc_out = fluid.layers.dropout(x=enc_out,
dropout_prob=0.1,
dropout_implementation="upscale_in_train",
is_test=is_test)
def trigger_net():
"""trigger net"""
# full-contact layer
logits = fluid.layers.fc(
input=enc_out,
size=args.num_trigger_labels * 2,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name="task_trigger_extraction_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="task_trigger_extraction_b",
initializer=fluid.initializer.Constant(0.)))
batch_size = logits.shape[0]
seq_len = logits.shape[1]
logits_ = fluid.layers.reshape(logits,
shape=[-1, args.num_trigger_labels * 2])
trigger_label_ = fluid.layers.cast(trigger_label, dtype='float32')
labels_ = fluid.layers.reshape(trigger_label_,
shape=[-1, args.num_trigger_labels * 2])
loss_ = fluid.layers.sigmoid_cross_entropy_with_logits(x=logits_,
label=labels_)
loss = fluid.layers.reshape(
loss_, shape=[batch_size, seq_len, args.num_trigger_labels * 2])
if not is_test:
loss = loss * trigger_label_weight
loss = fluid.layers.reduce_sum(loss,
dim=2) # shape [batch_size, max_seq_len]
input_mask_ = fluid.layers.flatten(input_mask, axis=1)
loss_masked = loss * input_mask_
loss = fluid.layers.mean(x=loss_masked)
# calculate probability
probs = fluid.layers.sigmoid(logits)
logit_gradient = fluid.layers.reshape(
logits_, shape=[-1, args.max_seq_len, args.num_trigger_labels * 2])
selected_idx = fluid.layers.fill_constant(shape=[1],
value=0,
dtype='int64')
grads = get_gradient(src_ids, logit_gradient, selected_idx)
# infer_label_test = fluid.layers.
return probs, loss, grads
def argument_net():
"""argument net"""
# 类似trigger net
return probs, loss, grads
graph_vars = {"inputs": src_ids, "seqlen": seq_lens}
if not is_test:
infer_trigger, trigger_loss = trigger_net()
infer_argument, argument_loss = argument_net()
if args.only_argument:
loss = argument_loss * float(args.argument_loss_weight)
else:
loss = trigger_loss * float(
args.trigger_loss_weight) + argument_loss * float(
args.argument_loss_weight)
graph_vars["loss"] = loss
graph_vars["trigger_loss"] = trigger_loss
graph_vars["argument_loss"] = argument_loss
graph_vars["infer_trigger"] = infer_trigger
graph_vars["trigger_span_mask"] = trigger_span_mask
graph_vars["infer_argument"] = infer_argument
graph_vars["trigger_label"] = fluid.layers.squeeze(trigger_label, [-1])
graph_vars["argument_label"] = fluid.layers.squeeze(
argument_label, [-1])
graph_vars["event_type"] = event_type_ids
else:
# choose task: 0 - trigger; 1 - argument
argument_task_id = fluid.layers.ones_like(task_flag)
flag = (task_flag == argument_task_id)
infers, loss, grads = fluid.layers.cond(flag, argument_net, trigger_net)
# graph_vars['loss'] = loss
graph_vars["infer_result"] = infers
graph_vars["grads"] = grads
print(grads)
# graph_vars["trigger_label"] = trigger_label
# graph_vars["argument_label"] = fluid.layers.squeeze(
# argument_label, [-1])
# graph_vars["event_type"] = event_type_ids
return pyreader, graph_vars
fetch部分代码:
for data in pyreader():
# do trigger extraction
fetch_list = [
graph_vars["inputs"], graph_vars["infer_result"],
graph_vars["seqlen"], graph_vars["grads"]
]
inputs, preds, seqlen, grads = exe.run(feed=feed_list,
program=program,
fetch_list=fetch_list,
return_numpy=False)
print(grads)