From 34241dd18f7cf3a3818926d852caef0e1ead7370 Mon Sep 17 00:00:00 2001
From: Sing_chan <51314274+betterpig@users.noreply.github.com>
Date: Fri, 1 Apr 2022 17:22:18 +0800
Subject: [PATCH] change vjp to paddle.grad (#41231)

* change vjp to paddle.grad

* use grad and gradients api

* fix preprocess for x

* fix a bug, val_and_grad should return a Tensor

* detach value and grad to avoid assign error

Co-authored-by: levi131 <limaolin01@baidu.com>
---
 .../incubate/optimizer/functional/utils.py       | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/python/paddle/incubate/optimizer/functional/utils.py b/python/paddle/incubate/optimizer/functional/utils.py
index c197f8a1acb..3000c82a71e 100644
--- a/python/paddle/incubate/optimizer/functional/utils.py
+++ b/python/paddle/incubate/optimizer/functional/utils.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import paddle
-from paddle.autograd.functional import vjp, Jacobian
 from paddle.fluid.framework import Variable
 from paddle.fluid.data_feeder import check_type, check_dtype
 
@@ -86,11 +85,14 @@ def _value_and_gradient(f, x, v=None):
         value: a tensor that holds the function value.
         gradient: a tensor that holds the function gradients.  
     """
+    # use detach to cut off relation between x and original graph
+    x = x.detach()
+    x.stop_gradient = False
+    value = f(x)
     if paddle.in_dynamic_mode():
-        value, gradient = vjp(f, x, v=v)
-        gradient = gradient[0]
+        # only need to compute first order derivative, and some op dont support high order derivative.
+        gradient = paddle.grad([value], [x], create_graph=False)[0]
     else:
-        JJ = Jacobian(f, x)
-        gradient = JJ[:][0]
-        value = f(x)
-    return value, gradient
+        gradient = paddle.static.gradients([value], [x])[0]
+    # use detach to make results real number without grad to avoid assign error
+    return value.detach(), gradient.detach()
-- 
GitLab