From 34241dd18f7cf3a3818926d852caef0e1ead7370 Mon Sep 17 00:00:00 2001 From: Sing_chan <51314274+betterpig@users.noreply.github.com> Date: Fri, 1 Apr 2022 17:22:18 +0800 Subject: [PATCH] change vjp to paddle.grad (#41231) * change vjp to paddle.grad * use grad and gradients api * fix preprocess for x * fix a bug, val_and_grad should return a Tensor * detach value and grad to avoid assign error Co-authored-by: levi131 --- .../incubate/optimizer/functional/utils.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/python/paddle/incubate/optimizer/functional/utils.py b/python/paddle/incubate/optimizer/functional/utils.py index c197f8a1ac..3000c82a71 100644 --- a/python/paddle/incubate/optimizer/functional/utils.py +++ b/python/paddle/incubate/optimizer/functional/utils.py @@ -13,7 +13,6 @@ # limitations under the License. import paddle -from paddle.autograd.functional import vjp, Jacobian from paddle.fluid.framework import Variable from paddle.fluid.data_feeder import check_type, check_dtype @@ -86,11 +85,14 @@ def _value_and_gradient(f, x, v=None): value: a tensor that holds the function value. gradient: a tensor that holds the function gradients. """ + # use detach to cut off relation between x and original graph + x = x.detach() + x.stop_gradient = False + value = f(x) if paddle.in_dynamic_mode(): - value, gradient = vjp(f, x, v=v) - gradient = gradient[0] + # only need to compute first order derivative, and some op dont support high order derivative. + gradient = paddle.grad([value], [x], create_graph=False)[0] else: - JJ = Jacobian(f, x) - gradient = JJ[:][0] - value = f(x) - return value, gradient + gradient = paddle.static.gradients([value], [x])[0] + # use detach to make results real number without grad to avoid assign error + return value.detach(), gradient.detach() -- GitLab