diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 61febc4e383b5eb0e75c0005330b92fa90ddbe44..93a19de92e1654df2424019d764f1cbbe6314686 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -36,10 +36,18 @@ class Optimizer(object): """ def __init__(self, learning_rate, global_step=None, regularization=None): - assert learning_rate is not None + if not isinstance(learning_rate, float) and \ + not isinstance(learning_rate, framework.Variable): + raise TypeError("learning rate should be float or Variable") self._global_step = global_step self.regularization = regularization - self._global_learning_rate = learning_rate + self._learning_rate = learning_rate + # each program should have a independent learning rate + # program -> Variable(learning_rate) + self._learning_rate_map = dict() + if isinstance(self._learning_rate, framework.Variable): + self._learning_rate_map[framework.default_main_program( + )] = self._learning_rate # Dictionary of accumulators. Some optimizer subclasses need to # allocate and manage extra variables associated with the parameters # to train. These variables are called accumulators. @@ -48,26 +56,33 @@ class Optimizer(object): self.helper = None def _create_global_learning_rate(self): - if isinstance(self._global_learning_rate, float): - self._global_learning_rate = layers.create_global_var( - name=unique_name.generate("learning_rate"), - shape=[1], - value=float(self._global_learning_rate), - dtype='float32', - persistable=True) - - if not isinstance(self._global_learning_rate, framework.Variable): - raise ValueError("learning rate should be a Variable, " - "actual type is %s", - type(self._global_learning_rate)) - - @property - def global_learning_rate(self): + lr = self.global_learning_rate() + + if isinstance(lr, framework.Variable): + return + else: + if not isinstance(self._learning_rate, float): + raise TypeError( + "learning rate variable is create outside optimizer," + "can not create new learning rate variable for new program") + + # create learning rate in the current main program + self._learning_rate_map[framework.default_main_program( + )] = layers.create_global_var( + name=unique_name.generate("learning_rate"), + shape=[1], + value=float(self._learning_rate), + dtype='float32', + persistable=True) + + def global_learning_rate(self, program=None): """ get global decayed learning rate :return: """ - return self._global_learning_rate + if program is None: + program = framework.default_main_program() + return self._learning_rate_map.get(program, None) def _append_optimize_op(self, block, param_and_grad): """ append optimize operator to block and return all the added optimize_op @@ -78,7 +93,7 @@ class Optimizer(object): # create learning rate variable for every parameter param = param_and_grad[0] param_lr = param.optimize_attr['learning_rate'] - return self._global_learning_rate * param_lr + return self.global_learning_rate() * param_lr def _create_accumulators(self, block, parameters): """Create all accumulators needed by the parameters