diff --git a/ppcls/optimizer/optimizer.py b/ppcls/optimizer/optimizer.py
index 7726994933df4c3bd3654e689c99b4eaf3e95337..87190f562e39229c906c331c009cf7a7e294c7f5 100644
--- a/ppcls/optimizer/optimizer.py
+++ b/ppcls/optimizer/optimizer.py
@@ -113,22 +113,26 @@ class Momentum(object):
         # model_list is None in static graph
         parameters = None
         if model_list:
-            params_with_decay = []
-            params_without_decay = []
-            for m in model_list:
-                for n, p in m.named_parameters():
-                    if any(nd in n for nd in self.no_weight_decay_name_list) \
-                        or (self.one_dim_param_no_weight_decay and len(p.shape) == 1):
-                        params_without_decay.append(p)
-                    else:
-                        params_with_decay.append(p)
-            parameters = [{
-                "params": params_with_decay,
-                "weight_decay": self.weight_decay
-            }, {
-                "params": params_without_decay,
-                "weight_decay": 0.0
-            }]
+            # TODO(gaotingquan): to avoid cause issues for unset no_weight_decay models
+            if len(self.no_weight_decay_name_list) > 0:
+                params_with_decay = []
+                params_without_decay = []
+                for m in model_list:
+                    for n, p in m.named_parameters():
+                        if any(nd in n for nd in self.no_weight_decay_name_list) \
+                            or (self.one_dim_param_no_weight_decay and len(p.shape) == 1):
+                            params_without_decay.append(p)
+                        else:
+                            params_with_decay.append(p)
+                parameters = [{
+                    "params": params_with_decay,
+                    "weight_decay": self.weight_decay
+                }, {
+                    "params": params_without_decay,
+                    "weight_decay": 0.0
+                }]
+            else:
+                parameters = sum([m.parameters() for m in model_list], [])
         opt = optim.Momentum(
             learning_rate=self.learning_rate,
             momentum=self.momentum,
@@ -279,9 +283,8 @@ class AdamW(object):
 
         if self.one_dim_param_no_weight_decay:
             self.no_weight_decay_param_name_list += [
-                p.name
-                for model in model_list for n, p in model.named_parameters()
-                if len(p.shape) == 1
+                p.name for model in model_list
+                for n, p in model.named_parameters() if len(p.shape) == 1
             ] if model_list else []
 
         opt = optim.AdamW(