to avoid cause issues for unset no_weight_decay models.

there seems be a diff for optimizer about using [] and [{"params":}, {"params":}] params

to avoid cause issues for unset no_weight_decay models.
there seems be a diff for optimizer about using [] and [{"params":}, {"params":}] params
73f4d8e4 · gaotingquan · Tingquan Gao · 31ea33c8 · 73f4d8e4
显示空白变更内容
内联并排

Showing with 22 addition and 19 deletion

ppcls/optimizer/optimizer.py ppcls/optimizer/optimizer.py +22 -19

未找到文件。
--- a/ppcls/optimizer/optimizer.py
+++ b/ppcls/optimizer/optimizer.py
@@ -113,6 +113,8 @@ class Momentum(object):
        # model_list is None in static graph
        parameters = None
        if model_list:
+            # TODO(gaotingquan): to avoid cause issues for unset no_weight_decay models
+            if len(self.no_weight_decay_name_list) > 0:
                params_with_decay = []
                params_without_decay = []
                for m in model_list:
@@ -129,6 +131,8 @@ class Momentum(object):
                    "params": params_without_decay,
                    "weight_decay": 0.0
                }]
+            else:
+                parameters = sum([m.parameters() for m in model_list], [])
        opt = optim.Momentum(
            learning_rate=self.learning_rate,
            momentum=self.momentum,
@@ -279,9 +283,8 @@ class AdamW(object):

        if self.one_dim_param_no_weight_decay:
            self.no_weight_decay_param_name_list += [
-                p.name
-                for model in model_list for n, p in model.named_parameters()
-                if len(p.shape) == 1
+                p.name for model in model_list
+                for n, p in model.named_parameters() if len(p.shape) == 1
            ] if model_list else []

        opt = optim.AdamW(