# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from .. import core as core from .. import framework as framework from ..dygraph.parallel import scale_loss import numpy as np def monkey_patch_eagertensor(): def __str__(self): from paddle.tensor.to_string import eager_tensor_to_string return eager_tensor_to_string(self) @framework.dygraph_only def backward(self, grad_tensor=None, retain_graph=False): """ Run backward of current Graph which starts from current Tensor. The new gradient will accumulat on previous gradient. You can clear gradient by ``Tensor.clear_grad()`` . Args: grad_tensor(Tensor, optional): initial gradient values of the current Tensor. If `grad_tensor` is None, the initial gradient values of the current Tensor would be Tensor filled with 1.0; if `grad_tensor` is not None, it must have the same length as the current Tensor. Teh default value is None. retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter :code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient. Defaults to False. Returns: NoneType: None Examples: .. code-block:: python import paddle x = paddle.to_tensor(5., stop_gradient=False) for i in range(5): y = paddle.pow(x, 4.0) y.backward() print("{}: {}".format(i, x.grad)) # 0: [500.] # 1: [1000.] # 2: [1500.] # 3: [2000.] # 4: [2500.] x.clear_grad() print("{}".format(x.grad)) # 0. grad_tensor=paddle.to_tensor(2.) for i in range(5): y = paddle.pow(x, 4.0) y.backward(grad_tensor) print("{}: {}".format(i, x.grad)) # 0: [1000.] # 1: [2000.] # 2: [3000.] # 3: [4000.] # 4: [5000.] """ if framework.in_dygraph_mode(): if grad_tensor is not None: assert isinstance( grad_tensor, core.eager.EagerTensor ), "The type of grad_tensor must be paddle.Tensor" assert grad_tensor.shape == self.shape, \ "Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format( grad_tensor.name, grad_tensor.shape, self.name, self.shape) grad_tensor = [grad_tensor] else: grad_tensor = [] if core.is_compiled_with_xpu() or core.is_compiled_with_npu(): # TODO(liuyuhui): Currently only for xpu. Will be removed in the future. scaled_loss = scale_loss(self) core.eager.run_backward([scaled_loss], grad_tensor, retain_graph) else: core.eager.run_backward([self], grad_tensor, retain_graph) else: raise ValueError( "Variable.backward() is only available in DyGraph mode") @framework.dygraph_only def gradient(self): """ .. warning:: This API will be deprecated in the future, it is recommended to use :code:`x.grad` which returns the tensor value of the gradient. Get the Gradient of Current Tensor. Returns: ndarray: Numpy value of the gradient of current Tensor Examples: .. code-block:: python import paddle x = paddle.to_tensor(5., stop_gradient=False) y = paddle.pow(x, 4.0) y.backward() print("grad of x: {}".format(x.gradient())) # [500.] """ if self.grad._is_initialized(): return self.grad.numpy() else: return None # TODO(wanghuancoder) support SELECTED_ROWS if hasattr(core, "eager"): setattr(core.eager.EagerTensor, "__str__", __str__) setattr(core.eager.EagerTensor, "backward", backward) setattr(core.eager.EagerTensor, "gradient", gradient)