# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ MPC Optimizers """ from paddle.fluid.optimizer import Optimizer from paddle.fluid import framework from paddle.fluid.framework import program_guard from paddle.fluid.framework import Variable from paddle.fluid.clip import error_clip_callback from paddle.fluid import unique_name from paddle.fluid.initializer import Constant from .backward import append_backward from .mpc_layer_helper import MpcLayerHelper class MPCSGDOptimizer(Optimizer): """ MPCSGDOptimizer Implementation based on Optimizer Class in Paddle """ def __init__(self, learning_rate, regularization=None, name=None): """ """ assert learning_rate is not None super(MPCSGDOptimizer, self).__init__( learning_rate=learning_rate, regularization=regularization, name=name) self.type = "mpc_sgd" def _append_optimize_op(self, block, param_and_grad): """ Optimizer of the stochastic gradient descent algorithm. .. math:: param\_out = param - learning\_rate * grad Parameters: learning_rate (float|Variable): The learning rate used to update parameters. \ Can be a float value or a Variable with one float value as data element. parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. regularization: A Regularizer, such as :ref:`api_fluid_regularizer_L2DecayRegularizer`. \ Optional, default is None. name (str, optional): This parameter is used by developers to print debugging information. \ For details, please refer to :ref:`api_guide_Name`. Default is None. """ assert isinstance(block, framework.Block) mpc_sgd_op = block.append_op( type=self.type, inputs={ "Param": param_and_grad[0], "Grad": param_and_grad[1], "LearningRate": self._create_param_lr(param_and_grad) }, outputs={"ParamOut": param_and_grad[0]}, stop_gradient=True) return mpc_sgd_op def backward(self, loss, startup_program=None, parameter_list=None, no_grad_set=None, callbacks=None): """ The first part of ``minimize``, do auto-diff to append backward operations for the current program. Args: loss (Variable): ``loss`` variable to run optimizations. startup_program (Program, optional): :ref:`api_fluid_Program` for initializing parameters in ``parameter_list``. The default value is None, at this time :ref:`api_fluid_default_startup_program` will be used. parameter_list (list, optional): List of ``Variable`` names to update to minimize ``loss``. The default value is None, at this time all parameters will be updated. no_grad_set (set, optional): Set of ``Variable`` objects that don't need to be updated. The default value is None. callbacks (list, optional): list of callable objects to run when appending backward operator for one parameter. The default value is None. Return: list: list of (param, grad) variable pairs, param is ``Parameter``, grad is the gradient value corresponding to the parameter. Examples: See examples in ``apply_gradients``. """ no_grad_set = self._get_no_grad_set(loss, no_grad_set) self._dtype = loss.dtype if callbacks is None: callbacks = [error_clip_callback] else: assert (isinstance(callbacks, list)) program = loss.block.program assert len(loss.shape) == 2 and loss.shape[0] == 2 and loss.shape[1] == 1, \ "The loss.shape should be (2L,), but the current loss.shape is {}. " \ "Maybe that you should call fluid.layers.mean to process the current loss.".format( loss.shape) with program_guard(program, startup_program): params_grads = append_backward(loss, parameter_list, no_grad_set, callbacks) # Note: since we can't use all_reduce_op now, # dgc_op should be the last op of one grad. self._append_dgc_ops(params_grads) return params_grads def _create_global_learning_rate(self): lr = self._global_learning_rate() if isinstance(lr, framework.Variable): return else: if not isinstance(self._learning_rate, float): raise TypeError( "learning rate variable is create outside optimizer," "can not create new learning rate variable for new program") # create learning rate in the current main program self._learning_rate_map[framework.default_main_program( )] = create_global_var( name=unique_name.generate("learning_rate"), shape=[1], value=float(self._learning_rate), dtype='double', persistable=True) def _create_param_lr(self, param_and_grad): """ create learning rate parameter """ # create learning rate variable for every parameter param = param_and_grad[0] param_lr = param.optimize_attr['learning_rate'] if type(param_lr) == Variable: return param_lr else: if param_lr == 1.0: return self._global_learning_rate() else: with fluid.default_main_program()._lr_schedule_guard( is_with_opt=True), framework.name_scope( 'scale_with_param_lr'): return self._global_learning_rate() * param_lr def _global_learning_rate(self, program=None): """ get global decayed learning rate :return: """ if program is None: program = framework.default_main_program() return self._learning_rate_map.get(program, None) def create_global_var(shape, value, dtype, persistable=False, force_cpu=False, name=None): """ This function creates a new tensor variable with value in the global block(block 0). Parameters: shape (list of int): Shape of the variable value (float): The value of the variable. The new created variable will be filled with it. dtype (str): Data type of the variable persistable (bool, optional): If this variable is persistable. Default: False force_cpu (bool, optional): Force this variable to be on CPU. Default: False name (str, optional): For detailed information, please refer to :ref:`api_guide_Name` . Usually name is no need to set and None by default. Returns: Variable: The created Variable Examples: .. code-block:: python import paddle.fluid as fluid import paddle.fluid.layers as layers var = layers.create_global_var(shape=[2,3], value=1.0, dtype='float32', persistable=True, force_cpu=True, name='new_var') """ helper = MpcLayerHelper("global_var", **locals()) var = helper.create_global_variable( dtype=dtype, shape=shape, persistable=persistable, name=name, stop_gradient=True) helper.set_variable_initializer( var, initializer=Constant( value=float(value), force_cpu=force_cpu)) return var SGD = MPCSGDOptimizer