fix when the task is mnli (#540)

* fix * fix when mnli

fix when the task is mnli (#540)
* fix * fix when mnli
cd94b6b7 · ceci3 · GitHub · 186960cc · cd94b6b7
隐藏空白更改
内联并排

Showing with 20 addition and 15 deletion

paddleslim/nas/ofa/utils/nlp_utils.py paddleslim/nas/ofa/utils/nlp_utils.py +20 -15

未找到文件。
--- a/paddleslim/nas/ofa/utils/nlp_utils.py
+++ b/paddleslim/nas/ofa/utils/nlp_utils.py
@@ -66,21 +66,26 @@ def compute_neuron_head_importance(task_name,
    for w in intermediate_weight:
        neuron_importance.append(np.zeros(shape=[w.shape[1]], dtype='float32'))
-    for batch in data_loader:
+    if task_name.lower() != 'mnli':
-        input_ids, segment_ids, labels = batch
+        data_loader = (data_loader, )
-        logits = model(input_ids, segment_ids, attention_mask=[None, head_mask])
+    for data in data_loader:
-        loss = loss_fct(logits, labels)
+        for batch in data:
-        loss.backward()
+            input_ids, segment_ids, labels = batch
-        head_importance += paddle.abs(paddle.to_tensor(head_mask.gradient()))
+            logits = model(
+                input_ids, segment_ids, attention_mask=[None, head_mask])
-        for w1, b1, w2, current_importance in zip(
+            loss = loss_fct(logits, labels)
-                intermediate_weight, intermediate_bias, output_weight,
+            loss.backward()
-                neuron_importance):
+            head_importance += paddle.abs(
-            current_importance += np.abs(
+                paddle.to_tensor(head_mask.gradient()))
-                (np.sum(w1.numpy() * w1.gradient(), axis=0) + b1.numpy() *
-                 b1.gradient()))
+            for w1, b1, w2, current_importance in zip(
-            current_importance += np.abs(
+                    intermediate_weight, intermediate_bias, output_weight,
-                np.sum(w2.numpy() * w2.gradient(), axis=1))
+                    neuron_importance):
+                current_importance += np.abs(
+                    (np.sum(w1.numpy() * w1.gradient(), axis=0) + b1.numpy() *
+                     b1.gradient()))
+                current_importance += np.abs(
+                    np.sum(w2.numpy() * w2.gradient(), axis=1))
    return head_importance, neuron_importance