未验证 提交 cd94b6b7 编写于 作者: C ceci3 提交者: GitHub

fix when the task is mnli (#540)

* fix

* fix when mnli
上级 186960cc
...@@ -66,21 +66,26 @@ def compute_neuron_head_importance(task_name, ...@@ -66,21 +66,26 @@ def compute_neuron_head_importance(task_name,
for w in intermediate_weight: for w in intermediate_weight:
neuron_importance.append(np.zeros(shape=[w.shape[1]], dtype='float32')) neuron_importance.append(np.zeros(shape=[w.shape[1]], dtype='float32'))
for batch in data_loader: if task_name.lower() != 'mnli':
input_ids, segment_ids, labels = batch data_loader = (data_loader, )
logits = model(input_ids, segment_ids, attention_mask=[None, head_mask]) for data in data_loader:
loss = loss_fct(logits, labels) for batch in data:
loss.backward() input_ids, segment_ids, labels = batch
head_importance += paddle.abs(paddle.to_tensor(head_mask.gradient())) logits = model(
input_ids, segment_ids, attention_mask=[None, head_mask])
for w1, b1, w2, current_importance in zip( loss = loss_fct(logits, labels)
intermediate_weight, intermediate_bias, output_weight, loss.backward()
neuron_importance): head_importance += paddle.abs(
current_importance += np.abs( paddle.to_tensor(head_mask.gradient()))
(np.sum(w1.numpy() * w1.gradient(), axis=0) + b1.numpy() *
b1.gradient())) for w1, b1, w2, current_importance in zip(
current_importance += np.abs( intermediate_weight, intermediate_bias, output_weight,
np.sum(w2.numpy() * w2.gradient(), axis=1)) neuron_importance):
current_importance += np.abs(
(np.sum(w1.numpy() * w1.gradient(), axis=0) + b1.numpy() *
b1.gradient()))
current_importance += np.abs(
np.sum(w2.numpy() * w2.gradient(), axis=1))
return head_importance, neuron_importance return head_importance, neuron_importance
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册