未验证 提交 cd94b6b7 编写于 作者: C ceci3 提交者: GitHub

fix when the task is mnli (#540)

* fix

* fix when mnli
上级 186960cc
......@@ -66,21 +66,26 @@ def compute_neuron_head_importance(task_name,
for w in intermediate_weight:
neuron_importance.append(np.zeros(shape=[w.shape[1]], dtype='float32'))
for batch in data_loader:
input_ids, segment_ids, labels = batch
logits = model(input_ids, segment_ids, attention_mask=[None, head_mask])
loss = loss_fct(logits, labels)
loss.backward()
head_importance += paddle.abs(paddle.to_tensor(head_mask.gradient()))
for w1, b1, w2, current_importance in zip(
intermediate_weight, intermediate_bias, output_weight,
neuron_importance):
current_importance += np.abs(
(np.sum(w1.numpy() * w1.gradient(), axis=0) + b1.numpy() *
b1.gradient()))
current_importance += np.abs(
np.sum(w2.numpy() * w2.gradient(), axis=1))
if task_name.lower() != 'mnli':
data_loader = (data_loader, )
for data in data_loader:
for batch in data:
input_ids, segment_ids, labels = batch
logits = model(
input_ids, segment_ids, attention_mask=[None, head_mask])
loss = loss_fct(logits, labels)
loss.backward()
head_importance += paddle.abs(
paddle.to_tensor(head_mask.gradient()))
for w1, b1, w2, current_importance in zip(
intermediate_weight, intermediate_bias, output_weight,
neuron_importance):
current_importance += np.abs(
(np.sum(w1.numpy() * w1.gradient(), axis=0) + b1.numpy() *
b1.gradient()))
current_importance += np.abs(
np.sum(w2.numpy() * w2.gradient(), axis=1))
return head_importance, neuron_importance
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册