diff --git a/python/paddle/incubate/optimizer/functional/utils.py b/python/paddle/incubate/optimizer/functional/utils.py
index c197f8a1acb5e1fe9117793a308ee35f9549cefd..3000c82a71e87887ced4827744b08c6335edd129 100644
--- a/python/paddle/incubate/optimizer/functional/utils.py
+++ b/python/paddle/incubate/optimizer/functional/utils.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import paddle
-from paddle.autograd.functional import vjp, Jacobian
 from paddle.fluid.framework import Variable
 from paddle.fluid.data_feeder import check_type, check_dtype
 
@@ -86,11 +85,14 @@ def _value_and_gradient(f, x, v=None):
         value: a tensor that holds the function value.
         gradient: a tensor that holds the function gradients.  
     """
+    # use detach to cut off relation between x and original graph
+    x = x.detach()
+    x.stop_gradient = False
+    value = f(x)
     if paddle.in_dynamic_mode():
-        value, gradient = vjp(f, x, v=v)
-        gradient = gradient[0]
+        # only need to compute first order derivative, and some op dont support high order derivative.
+        gradient = paddle.grad([value], [x], create_graph=False)[0]
     else:
-        JJ = Jacobian(f, x)
-        gradient = JJ[:][0]
-        value = f(x)
-    return value, gradient
+        gradient = paddle.static.gradients([value], [x])[0]
+    # use detach to make results real number without grad to avoid assign error
+    return value.detach(), gradient.detach()