diff --git a/ppcls/optimizer/optimizer.py b/ppcls/optimizer/optimizer.py index 7726994933df4c3bd3654e689c99b4eaf3e95337..87190f562e39229c906c331c009cf7a7e294c7f5 100644 --- a/ppcls/optimizer/optimizer.py +++ b/ppcls/optimizer/optimizer.py @@ -113,22 +113,26 @@ class Momentum(object): # model_list is None in static graph parameters = None if model_list: - params_with_decay = [] - params_without_decay = [] - for m in model_list: - for n, p in m.named_parameters(): - if any(nd in n for nd in self.no_weight_decay_name_list) \ - or (self.one_dim_param_no_weight_decay and len(p.shape) == 1): - params_without_decay.append(p) - else: - params_with_decay.append(p) - parameters = [{ - "params": params_with_decay, - "weight_decay": self.weight_decay - }, { - "params": params_without_decay, - "weight_decay": 0.0 - }] + # TODO(gaotingquan): to avoid cause issues for unset no_weight_decay models + if len(self.no_weight_decay_name_list) > 0: + params_with_decay = [] + params_without_decay = [] + for m in model_list: + for n, p in m.named_parameters(): + if any(nd in n for nd in self.no_weight_decay_name_list) \ + or (self.one_dim_param_no_weight_decay and len(p.shape) == 1): + params_without_decay.append(p) + else: + params_with_decay.append(p) + parameters = [{ + "params": params_with_decay, + "weight_decay": self.weight_decay + }, { + "params": params_without_decay, + "weight_decay": 0.0 + }] + else: + parameters = sum([m.parameters() for m in model_list], []) opt = optim.Momentum( learning_rate=self.learning_rate, momentum=self.momentum, @@ -279,9 +283,8 @@ class AdamW(object): if self.one_dim_param_no_weight_decay: self.no_weight_decay_param_name_list += [ - p.name - for model in model_list for n, p in model.named_parameters() - if len(p.shape) == 1 + p.name for model in model_list + for n, p in model.named_parameters() if len(p.shape) == 1 ] if model_list else [] opt = optim.AdamW(