diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 478fecf74e4013e0d695c68af86a0e39a4a4e845..590d76ae1708f3dc17f048f3b1bf005f834d0bed 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -1291,17 +1291,17 @@ def append_backward(loss, It will be automatically invoked by the optimizer's `minimize` function. Parameters: - loss( :ref:`api_guide_Variable_en` ): The loss variable of the network. - parameter_list(list[Variable|str], optional): List of Parameters or Parameter.names + loss(Tensor): The loss Tensor of the network. + parameter_list(list[Tensor|str], optional): List of Parameters or Parameter.names that need to be updated by optimizers. If it is None, all parameters will be updated. Default: None. - no_grad_set(set[Variable|str], optional): Set of Variables or Variable.names in the :ref:`api_guide_Block_en` 0 whose gradients - should be ignored. All variables with + no_grad_set(set[Tensor|str], optional): Set of Tensors or Tensor.names in the :ref:`api_guide_Block_en` 0 whose gradients + should be ignored. All Tensors with `stop_gradient=True` from all blocks will be automatically added into this set. - If this parameter is not None, the Variables or Variable.names in this set will be added to the default set. + If this parameter is not None, the Tensors or Tensor.names in this set will be added to the default set. Default: None. callbacks(list[callable object], optional): List of callback functions. The callbacks are used for @@ -1312,70 +1312,73 @@ def append_backward(loss, new gradient operator is added into the program. The callable object must have two input - parameters: 'block' and 'context'. - The 'block' is the :ref:`api_guide_Block_en` which + parameters: ``block`` and ``context`` . + The ``block`` is the :ref:`api_guide_Block_en` which the new gradient operator will - be added to. The 'context' is a + be added to. The ``context`` is a map, whose keys are gradient - variable names and values are - corresponding original :ref:`api_guide_Variable_en` . - In addition to this, the 'context' + Tensor names and values are + corresponding original :ref:`api_guide_tensor_en` . + In addition to this, the ``context`` has another special key-value pair: - the key is string '__current_op_desc__' + the key is string ``__current_op_desc__`` and the value is the op_desc of the gradient operator who has just triggered the callable object. Default: None. Returns: - list of tuple ( :ref:`api_guide_Variable_en` , :ref:`api_guide_Variable_en` ): Pairs of parameter and its corresponding gradients. - The key is the parameter and the value is gradient variable. + list of tuple ( :ref:`api_guide_tensor_en` , :ref:`api_guide_tensor_en` ): Pairs of parameter and its corresponding gradients. + The key is the parameter and the value is gradient Tensor. Raises: - AssertionError: If `loss` is not an instance of Variable. + AssertionError: If ``loss`` is not an instance of Tensor. Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle + import paddle.nn.functional as F - x = fluid.data(name='x', shape=[None, 13], dtype='int64') - y = fluid.data(name='y', shape=[None, 1], dtype='float32') - x_emb = fluid.embedding(x, size=[100, 256]) - y_predict = fluid.layers.fc(input=x_emb, size=1, act=None, name='my_fc') - loss = fluid.layers.square_error_cost(input=y_predict, label=y) - avg_loss = fluid.layers.mean(loss) + paddle.enable_static() + + x = paddle.static.data(name='x', shape=[None, 13], dtype='int64') + y = paddle.static.data(name='y', shape=[None, 1], dtype='float32') + x_emb = paddle.static.nn.embedding(x, size=[100, 256]) + y_predict = paddle.static.nn.fc(input=x_emb, size=1, act=None, name='my_fc') + loss = F.square_error_cost(input=y_predict, label=y) + avg_loss = paddle.mean(loss) # Get all weights in main_program, not include bias. - all_weights = [param for param in fluid.default_main_program().block(0).all_parameters() if 'w_' in param.name] + all_weights = [param for param in paddle.static.default_main_program().block(0).all_parameters() if 'w_' in param.name] all_weights_name = [w.name for w in all_weights] # return all param_grads needed to be updated if parameter_list set default None. - p_g_list1 = fluid.backward.append_backward(loss=avg_loss) + p_g_list1 = paddle.static.append_backward(loss=avg_loss) # output: [(embedding_0.w_0, embedding_0.w_0@GRAD), (my_fc.w_0, my_fc.w_0@GRAD), (my_fc.b_0, my_fc.b_0@GRAD)] - # return the param_grads corresponding to parameter_list that can be list of param (Variable). - p_g_list2 = fluid.backward.append_backward(loss=avg_loss, parameter_list=all_weights) + # return the param_grads corresponding to parameter_list that can be list of param (Tensor). + p_g_list2 = paddle.static.append_backward(loss=avg_loss, parameter_list=all_weights) # output: [(embedding_0.w_0, embedding_0.w_0@GRAD), (my_fc.w_0, my_fc.w_0@GRAD)] # parameter_list can be list of param.name (str). - p_g_list3 = fluid.backward.append_backward(loss=avg_loss, parameter_list=all_weights_name) + p_g_list3 = paddle.static.append_backward(loss=avg_loss, parameter_list=all_weights_name) # output: [(embedding_0.w_0, embedding_0.w_0@GRAD), (my_fc.w_0, my_fc.w_0@GRAD)] - # no_grad_set can be set of Variables that means grad will be cut off from these Variables. - p_g_list4 = fluid.backward.append_backward(loss=avg_loss, no_grad_set=set([x_emb])) + # no_grad_set can be set of Tensors that means grad will be cut off from these Tensors. + p_g_list4 = paddle.static.append_backward(loss=avg_loss, no_grad_set=set([x_emb])) # output: [(my_fc.w_0, my_fc.w_0@GRAD), (my_fc.b_0, my_fc.b_0@GRAD)] - # no_grad_set can be set of Variable.name when the Variable is created inside layers and can't be specified explicitly. - p_g_list5 = fluid.backward.append_backward(loss=avg_loss, no_grad_set=set(['my_fc.b_0'])) + # no_grad_set can be set of Tensor.name when the Tensor is created inside layers and can't be specified explicitly. + p_g_list5 = paddle.static.append_backward(loss=avg_loss, no_grad_set=set(['my_fc.b_0'])) # output: [(embedding_0.w_0, embedding_0.w_0@GRAD), (my_fc.w_0, my_fc.w_0@GRAD)] # return [] because all param_grads are filtered by no_grad_set. - p_g_list6 = fluid.backward.append_backward(loss=avg_loss, parameter_list=all_weights, no_grad_set=set(all_weights)) + p_g_list6 = paddle.static.append_backward(loss=avg_loss, parameter_list=all_weights, no_grad_set=set(all_weights)) """ check_type(loss, 'loss', framework.Variable, - 'fluid.backward.append_backward') + 'paddle.static.append_backward') if loss.op is None: # the loss is from a cloned program. Find loss op manually. @@ -1387,7 +1390,7 @@ def append_backward(loss, if callbacks is not None: check_type(callbacks, 'callbacks', list, - 'fluid.backward.append_backward') + 'paddle.static.append_backward') program = loss.block.program root_block = program.block(0) @@ -1727,21 +1730,21 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None): Backpropagate the gradients of targets to inputs. Args: - targets(Variable|list[Variable]): The target variables - inputs(Variable|list[Variable]): The input variables - target_gradients (Variable|list[Variable], optional): The gradient variables + targets(Tensor|list[Tensor]): The target Tensors + inputs(Tensor|list[Tensor]): The input Tensors + target_gradients (Tensor|list[Tensor], optional): The gradient Tensors of targets which has the same shape with targets, If None, ones will be created for them. - no_grad_set(set[Variable|str], optional): Set of Variables or Variable.names in the :ref:`api_guide_Block_en` 0 whose gradients - should be ignored. All variables with + no_grad_set(set[Tensor|str], optional): Set of Tensors or Tensor.names in the :ref:`api_guide_Block_en` 0 whose gradients + should be ignored. All Tensors with `stop_gradient=True` from all blocks will be automatically added into this set. - If this parameter is not None, the Variables or Variable.names in this set will be added to the default set. + If this parameter is not None, the Tensors or Tensor.names in this set will be added to the default set. Default: None. Return: - (list[Variable]): A list of gradients for inputs - If an input does not affect targets, the corresponding gradient variable + (list[Tensor]): A list of gradients for inputs + If an input does not affect targets, the corresponding gradient Tensor will be None """ targets = _as_list(targets) @@ -1865,41 +1868,42 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None): Backpropagate the gradients of targets to inputs. Args: - targets (Variable|list[Variable]): The target variables. - inputs (Variable|list[Variable]): The input variables. - target_gradients (Variable|list[Variable], optional): The gradient variables + targets (Tensor|list[Tensor]): The target Tensors. + inputs (Tensor|list[Tensor]): The input Tensors. + target_gradients (Tensor|list[Tensor], optional): The gradient Tensor of targets which has the same shape with targets, If None, ones will be created for them. - no_grad_set (set[Variable|str], optional): Set of Variables or Variable.names in the :ref:`api_guide_Block_en` 0 whose gradients - should be ignored. All variables with `stop_gradient=True` from all blocks will - be automatically added into this set. If this parameter is not None, the Variables or Variable.names + no_grad_set (set[Tensor|str], optional): Set of Tensors or Tensor.names in the :ref:`api_guide_Block_en` 0 whose gradients + should be ignored. All Tensors with ``stop_gradient=True`` from all blocks will + be automatically added into this set. If this parameter is not None, the Tensors or Tensor.names in this set will be added to the default set. Default: None. Return: - (list[Variable]): A list of gradients for inputs - If an input does not affect targets, the corresponding gradient variable + (list[Tensor]): A list of gradients for inputs + If an input does not affect targets, the corresponding gradient Tensor will be None. Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle + import paddle.nn.functional as F + + paddle.enable_static() - x = fluid.data(name='x', shape=[None,2,8,8], dtype='float32') + x = paddle.static.data(name='x', shape=[None, 2, 8, 8], dtype='float32') x.stop_gradient=False - y = fluid.layers.conv2d(x, 4, 1, bias_attr=False) - y = fluid.layers.relu(y) - y = fluid.layers.conv2d(y, 4, 1, bias_attr=False) - y = fluid.layers.relu(y) - z = fluid.gradients([y], x) - print(z) + y = paddle.static.nn.conv2d(x, 4, 1, bias_attr=False) + y = F.relu(y) + z = paddle.static.gradients([y], x) + print(z) # [var x@GRAD : fluid.VarType.LOD_TENSOR.shape(-1L, 2L, 8L, 8L).astype(VarType.FP32)] """ check_type(targets, 'targets', (framework.Variable, list), - 'fluid.backward.gradients') + 'paddle.static.gradients') check_type(inputs, 'inputs', (framework.Variable, list), - 'fluid.backward.gradients') + 'paddle.static.gradients') check_type(target_gradients, 'target_gradients', ( - framework.Variable, list, type(None)), 'fluid.backward.gradients') + framework.Variable, list, type(None)), 'paddle.static.gradients') outs = calc_gradient(targets, inputs, target_gradients, no_grad_set) return _as_list(outs) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index b4cea6761dcd84e047f98929644a1e264976503d..106c9a00361566c8f8dfe3bafd11ac399382b90e 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -5396,13 +5396,13 @@ def program_guard(main_program, startup_program=None): """ :api_attr: Static Graph - Change the global main program and startup program with `"with"` statement. - Layer functions in the Python `"with"` block will append operators and - variables to the new main programs. + Change the global main program and startup program with ``with`` statement. + Layer functions in the Python ``with`` block will append operators and + Tensors to the new main programs. Args: - main_program(Program): New main program inside `"with"` statement. - startup_program(Program, optional): New startup program inside `"with"` + main_program(Program): New main program inside ``with`` statement. + startup_program(Program, optional): New startup program inside ``with`` statement. :code:`None` means not changing startup program, default_startup_program is still used. Default: None. @@ -5410,13 +5410,14 @@ def program_guard(main_program, startup_program=None): Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - data = fluid.data(name='image', shape=[None, 784, 784], dtype='float32') - hidden = fluid.layers.fc(input=data, size=10, act='relu') + paddle.enable_static() + main_program = paddle.static.Program() + startup_program = paddle.static.Program() + with paddle.static.program_guard(main_program, startup_program): + data = paddle.static.data(name='image', shape=[None, 784, 784], dtype='float32') + hidden = paddle.static.nn.fc(input=data, size=10, act='relu') Notes: The temporary :code:`Program` can be used if the user does not need to construct either of startup program or main program. @@ -5424,20 +5425,22 @@ def program_guard(main_program, startup_program=None): Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle - main_program = fluid.Program() - # does not care about startup program. Just pass a temporary value. - with fluid.program_guard(main_program, fluid.Program()): - data = fluid.data(name='image', shape=[None, 784, 784], dtype='float32') + paddle.enable_static() + main_program = paddle.static.Program() + # does not care about startup program. Just pass a temporary value. + with paddle.static.program_guard(main_program, paddle.static.Program()): + data = paddle.static.data(name='image', shape=[None, 784, 784], dtype='float32') """ from .data_feeder import check_type - check_type(main_program, 'main_program', Program, 'fluid.program_guard') + check_type(main_program, 'main_program', Program, + 'paddle.static.program_guard') main_program = switch_main_program(main_program) if startup_program is not None: check_type(startup_program, 'startup_program', Program, - 'fluid.program_guard') + 'paddle.static.program_guard') startup_program = switch_startup_program(startup_program) try: yield