构造对抗样本时,stop_gradient to True for its input and output variables using var.stop_gradient=True.
Created by: OleNet
为使您的问题得到快速解决,在建立Issues前,请您先通过如下方式搜索是否有相似问题:【搜索issue关键字】【使用labels筛选】【官方文档】
如果您没有查询到相似问题,为快速解决您的提问,建立issue时请提供如下细节信息:
- 标题:简洁、精准概括您的问题,例如“Insufficient Memory xxx" ”
- 版本、环境信息: 1)PaddlePaddle版本:1.6 2)CPU:- 3)GPU:P40 / CUDA 9.0 / CUDNN 7.3 4)系统环境:centos 6.3/ python 2.7
- 训练信息 1)单机,单卡: 都有 2)显存信息: 22GB 3)Operator信息
- 复现信息:如为报错,请给出复现环境、复现步骤
- 问题描述:请详细描述您的问题,同步贴出报错信息、日志、可复现的代码片段
def attack_loss(ernie, mask_ids, labels, loss, task_fc_fn):
#TODO any difference with fleet_main_program or ParallelProgram or TrainProgram?
program = fluid.default_main_program()
emb = ernie.emb_out
pos_emb = ernie.position_emb_out
sent_emb = ernie.sent_emb_out
task_emb = ernie.task_emb_out
param_grads = fluid.backward.append_backward(loss, parameter_list=[emb.name])
gradient = filter(lambda p: p[0].name==emb.name, param_grads)[0][1]
#gradient = process_gradient(gradient)
gradient_norm = fluid.layers.sign(gradient)
epsilon = 0.001
emb_hat = emb + epsilon * gradient
ernie.forward_emb(emb_hat, pos_emb, sent_emb, task_emb, mask_ids)
graph_vars = task_fc_fn(ernie, labels)
return graph_vars['loss']
def create_model(args, pyreader_name, ernie_config, is_prediction=False, task_name="", is_classify=False, is_regression=False):
if is_classify:
pyreader = fluid.layers.py_reader(
capacity=50,
shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1],
[-1, 1]],
dtypes=['int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64'],
lod_levels=[0, 0, 0, 0, 0, 0, 0, 0],
name=task_name + "_" + pyreader_name,
use_double_buffer=True)
elif is_regression:
pyreader = fluid.layers.py_reader(
capacity=50,
shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1],
[-1, 1]],
dtypes=['int64', 'int64', 'int64', 'int64', 'float32', 'float32', 'int64', 'int64'],
lod_levels=[0, 0, 0, 0, 0, 0, 0, 0],
name=task_name + "_" + pyreader_name,
use_double_buffer=True)
(src_ids, sent_ids, pos_ids, task_ids, input_mask, seq_len, labels, qids) = fluid.layers.read_file(pyreader)
ernie = ErnieModel(
src_ids=src_ids,
position_ids=pos_ids,
sentence_ids=sent_ids,
task_ids=task_ids,
input_mask=input_mask,
config=ernie_config,
case_id=None,
appear_id=None,
use_fp16=args.use_fp16)
graph_vars = task_fc(ernie, labels, args, is_prediction, is_classify, is_regression)
graph_vars["qids"] = qids
adv = True
if adv:
task_fc_fn = partial(task_fc,
args=args,
is_prediction=is_prediction,
is_classify=is_classify,
is_regression=is_regression)
adv_loss = attack_loss(ernie, input_mask, labels, graph_vars['loss'], task_fc_fn)
#task_fc(ernie, args, is_prediction, emb, is_classify, is_regression)
#def emb_to_loss(emb):
# ernie, pyreader, graph_vars = ernie_forward(args, ernie_config, is_prediction, pyreader, emb, is_classify, is_regression)
# return graph_vars['loss']
## def attack(program, ernie):
#adv_loss = attack_loss(main_program, ernie._word_emb_name, graph_vars['loss'].name, emb_to_loss)
graph_vars['loss'] += adv_loss
return pyreader, graph_vars
def task_fc(ernie, labels, args, is_prediction, is_classify=False, is_regression=False):
cls_feats = ernie.get_pooled_output()
cls_feats = fluid.layers.dropout(
x=cls_feats,
dropout_prob=args.cls_dropout_rate,
dropout_implementation="upscale_in_train")
logits = fluid.layers.fc(
input=cls_feats,
size=args.num_labels,
param_attr=fluid.ParamAttr(
name="cls_head_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_head_out_b", initializer=fluid.initializer.Constant(0.)))
if is_prediction:
probs = fluid.layers.softmax(logits)
feed_targets_name = [
src_ids.name, pos_ids.name, sent_ids.name, input_mask.name
]
return pyreader, probs, feed_targets_name
assert is_classify != is_regression, 'is_classify or is_regression must be true and only one of them can be true'
num_seqs = fluid.layers.create_tensor(dtype='int64')
if is_classify:
if args.use_bce:
# hard code for binary class
one_hot_label = fluid.layers.one_hot(input=labels, depth=2)
loss = fluid.layers.sigmoid_cross_entropy_with_logits(
logits, one_hot_label)
# mean?
loss = fluid.layers.reduce_sum(loss)
probs = logits
else:
ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=labels, return_softmax=True)
loss = fluid.layers.mean(x=ce_loss)
accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs)
graph_vars = {
"loss": loss,
"probs": probs,
"accuracy": accuracy,
"labels": labels,
"num_seqs": num_seqs,
"checkpoints": ernie._checkpoints
}
elif is_regression:
if args.use_sigmoid:
logits = fluid.layers.sigmoid(logits)
cost = fluid.layers.square_error_cost(input=logits, label=labels)
loss = fluid.layers.mean(x=cost)
graph_vars = {
"loss": loss,
"probs": logits,
"labels": labels,
"num_seqs": num_seqs,
"checkpoints":ernie._checkpoints
}
else:
raise ValueError('unsupported fine tune mode. only supported classify/regression')
num_seqs = fluid.layers.create_tensor(dtype='int64')
return graph_vars
class ErnieModel(object):
def __init__(self,
src_ids,
position_ids,
sentence_ids,
task_ids,
input_mask,
config,
case_id,
appear_id,
weight_sharing=True,
use_fp16=False):
....
self._build_model(src_ids, position_ids, sentence_ids, task_ids, input_mask)
def _build_model(self, src_ids, position_ids, sentence_ids, task_ids, input_mask):
# padding id in vocabulary must be set to 0
self.emb_out = fluid.layers.embedding(
input=src_ids,
size=[self._voc_size, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer),
is_sparse=False)
self.position_emb_out = fluid.layers.embedding(
input=position_ids,
size=[self._max_position_seq_len, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer))
self.sent_emb_out = fluid.layers.embedding(
sentence_ids,
size=[self._sent_types, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=self._sent_emb_name, initializer=self._param_initializer))
self.task_emb_out = fluid.layers.embedding(
task_ids,
size=[self._task_types, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=self._task_emb_name, initializer=self._param_initializer))
#self.emb_out.stop_gradient = False
#self.position_emb_out.stop_gradient = False
#self.sent_emb_out.stop_gradient = False
#self.task_emb_out.stop_gradient = False
self.emb_out.stop_gradient = False
self.position_emb_out.stop_gradient = True
self.sent_emb_out.stop_gradient = True
self.task_emb_out.stop_gradient = True
self.forward_emb(self.emb_out,
self.position_emb_out,
self.sent_emb_out,
self.task_emb_out,
input_mask)
def forward_emb(self, emb_out, position_emb_out, sent_emb_out, task_emb_out, input_mask):
sum_emb = emb_out + position_emb_out
sum_emb = sum_emb + sent_emb_out
#print('[ERROR] for debuging not add task_emb out')
#emb_out = emb_out + task_emb_out
...