diff --git a/python/paddle/incubate/optimizer/functional/bfgs.py b/python/paddle/incubate/optimizer/functional/bfgs.py index 9147444f5a6bb0377e535bad1145040dbecaa005..abdab457fda00aa89eec6ac299fa5ab72116e2b1 100644 --- a/python/paddle/incubate/optimizer/functional/bfgs.py +++ b/python/paddle/incubate/optimizer/functional/bfgs.py @@ -126,7 +126,8 @@ def minimize_bfgs(objective_func, check_initial_inverse_hessian_estimate(initial_inverse_hessian_estimate) Hk = paddle.assign(initial_inverse_hessian_estimate) - xk = initial_position + # use detach and assign to create new tensor rather than =, or xk will share memory and grad with initial_position + xk = paddle.assign(initial_position.detach()) value, g1 = _value_and_gradient(objective_func, xk) num_func_calls = paddle.full(shape=[1], fill_value=1, dtype='int64') diff --git a/python/paddle/incubate/optimizer/functional/lbfgs.py b/python/paddle/incubate/optimizer/functional/lbfgs.py index 1fbae18a4c65ac6e07a44d820d3923403777af05..d4bf511f85a99cda9b4a2ff856dff503981fe43b 100644 --- a/python/paddle/incubate/optimizer/functional/lbfgs.py +++ b/python/paddle/incubate/optimizer/functional/lbfgs.py @@ -113,7 +113,8 @@ def minimize_lbfgs(objective_func, check_initial_inverse_hessian_estimate(initial_inverse_hessian_estimate) H0 = initial_inverse_hessian_estimate - xk = initial_position + # use detach and assign to create new tensor rather than =, or xk will share memory and grad with initial_position + xk = paddle.assign(initial_position.detach()) value, g1 = _value_and_gradient(objective_func, xk) k = paddle.full(shape=[1], fill_value=0, dtype='int64')