未验证 提交 73f97de0 编写于 作者: 姜永久 提交者: GitHub

rm flag retain grad (#49835)

* rm retain grad

* fix zero_dim

* fix zero_dim for xpu

* reset zero dim for xpu

* reset xpu

* reset custom_relu

* Reset flip

* fix zero dim
上级 60ee518a
...@@ -21,7 +21,6 @@ import numpy as np ...@@ -21,7 +21,6 @@ import numpy as np
import paddle import paddle
import paddle.static as static import paddle.static as static
from paddle import fluid
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.utils.cpp_extension.extension_utils import run_cmd
from paddle.vision.transforms import Compose, Normalize from paddle.vision.transforms import Compose, Normalize
...@@ -146,8 +145,10 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): ...@@ -146,8 +145,10 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
paddle.set_device(device) paddle.set_device(device)
t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False) t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
t.retain_grads()
out = func(t) if use_func else paddle.nn.functional.relu(t) out = func(t) if use_func else paddle.nn.functional.relu(t)
out.retain_grads()
dx = paddle.grad( dx = paddle.grad(
outputs=out, outputs=out,
inputs=t, inputs=t,
...@@ -259,7 +260,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -259,7 +260,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
) )
def test_dynamic(self): def test_dynamic(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
for device in self.devices: for device in self.devices:
for dtype in self.dtypes: for dtype in self.dtypes:
if device == 'cpu' and dtype == 'float16': if device == 'cpu' and dtype == 'float16':
...@@ -286,7 +286,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -286,7 +286,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
x_grad, pd_x_grad x_grad, pd_x_grad
), ),
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_static_save_and_load_inference_model(self): def test_static_save_and_load_inference_model(self):
paddle.enable_static() paddle.enable_static()
...@@ -354,7 +353,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -354,7 +353,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
paddle.disable_static() paddle.disable_static()
def test_double_grad_dynamic(self): def test_double_grad_dynamic(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
for device in self.devices: for device in self.devices:
for dtype in self.dtypes: for dtype in self.dtypes:
if device == 'cpu' and dtype == 'float16': if device == 'cpu' and dtype == 'float16':
...@@ -380,7 +378,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): ...@@ -380,7 +378,6 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
dx_grad, pd_dx_grad dx_grad, pd_dx_grad
), ),
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_with_dataloader(self): def test_with_dataloader(self):
for device in self.devices: for device in self.devices:
......
...@@ -30,8 +30,10 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True): ...@@ -30,8 +30,10 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True):
t = paddle.to_tensor(np_x, dtype=dtype) t = paddle.to_tensor(np_x, dtype=dtype)
t.stop_gradient = False t.stop_gradient = False
t.retain_grads()
out = func(t) if use_func else paddle.nn.functional.relu(t) out = func(t) if use_func else paddle.nn.functional.relu(t)
out.retain_grads()
out.stop_gradient = False out.stop_gradient = False
out.backward() out.backward()
...@@ -142,14 +144,14 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix): ...@@ -142,14 +144,14 @@ def custom_relu_static_inference(func, device, np_data, np_label, path_prefix):
def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
import paddle.fluid as fluid
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
paddle.set_device(device) paddle.set_device(device)
t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False) t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
t.retain_grads()
out = func(t) if use_func else paddle.nn.functional.relu(t) out = func(t) if use_func else paddle.nn.functional.relu(t)
out.retain_grads()
dx = paddle.grad( dx = paddle.grad(
outputs=out, outputs=out,
inputs=t, inputs=t,
...@@ -164,7 +166,6 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): ...@@ -164,7 +166,6 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
grad_outputs=paddle.ones_like(t), grad_outputs=paddle.ones_like(t),
create_graph=False, create_graph=False,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
assert ddout[0].numpy() is not None assert ddout[0].numpy() is not None
return dx[0].numpy(), ddout[0].numpy() return dx[0].numpy(), ddout[0].numpy()
......
...@@ -19,7 +19,6 @@ import numpy as np ...@@ -19,7 +19,6 @@ import numpy as np
from utils import extra_cc_args, extra_nvcc_args, paddle_includes from utils import extra_cc_args, extra_nvcc_args, paddle_includes
import paddle import paddle
import paddle.fluid as fluid
from paddle.utils.cpp_extension import get_build_directory, load from paddle.utils.cpp_extension import get_build_directory, load
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.utils.cpp_extension.extension_utils import run_cmd
...@@ -41,24 +40,25 @@ custom_ops = load( ...@@ -41,24 +40,25 @@ custom_ops = load(
def custom_tanh_double_grad_dynamic(func, device, dtype, np_x): def custom_tanh_double_grad_dynamic(func, device, dtype, np_x):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
paddle.set_device(device) paddle.set_device(device)
t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False) t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
t.retain_grads()
out = func(t) out = func(t)
out.stop_gradient = False out.stop_gradient = False
out.retain_grads()
dx = paddle.grad( dx = paddle.grad(
outputs=[out], inputs=[t], create_graph=True, retain_graph=True outputs=[out], inputs=[t], create_graph=True, retain_graph=True
) )
dx[0].retain_grads()
dx[0].backward() dx[0].backward()
assert out.grad is not None assert out.grad is not None
assert dx[0].grad is not None assert dx[0].grad is not None
return dx[0].numpy(), dx[0].grad.numpy(), out.grad.numpy() return dx[0].numpy(), dx[0].grad.numpy(), out.grad.numpy()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
class TestCustomTanhDoubleGradJit(unittest.TestCase): class TestCustomTanhDoubleGradJit(unittest.TestCase):
...@@ -68,7 +68,6 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase): ...@@ -68,7 +68,6 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase):
self.devices = ['cpu'] self.devices = ['cpu']
def test_double_grad_dynamic(self): def test_double_grad_dynamic(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
for device in self.devices: for device in self.devices:
for dtype in self.dtypes: for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
...@@ -102,7 +101,6 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase): ...@@ -102,7 +101,6 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase):
dout, pd_dout dout, pd_dout
), ),
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -24,11 +24,11 @@ import numpy as np ...@@ -24,11 +24,11 @@ import numpy as np
def custom_relu_dynamic(func, device, dtype, np_x, use_func=True): def custom_relu_dynamic(func, device, dtype, np_x, use_func=True):
import paddle import paddle
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
paddle.set_device(device) paddle.set_device(device)
t = paddle.to_tensor(np_x, dtype=dtype) t = paddle.to_tensor(np_x, dtype=dtype)
t.stop_gradient = False t.stop_gradient = False
t.retain_grads()
sys.stdout.flush() sys.stdout.flush()
out = func(t) if use_func else paddle.nn.functional.relu(t) out = func(t) if use_func else paddle.nn.functional.relu(t)
...@@ -36,7 +36,6 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True): ...@@ -36,7 +36,6 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True):
out.backward() out.backward()
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
if t.grad is None: if t.grad is None:
return out.numpy(), t.grad return out.numpy(), t.grad
else: else:
...@@ -105,11 +104,12 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): ...@@ -105,11 +104,12 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
import paddle import paddle
paddle.set_device(device) paddle.set_device(device)
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False) t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
t.retain_grads()
out = func(t) if use_func else paddle.nn.functional.relu(t) out = func(t) if use_func else paddle.nn.functional.relu(t)
out.retain_grads()
dx = paddle.grad( dx = paddle.grad(
outputs=out, outputs=out,
inputs=t, inputs=t,
...@@ -125,7 +125,6 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True): ...@@ -125,7 +125,6 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
create_graph=False, create_graph=False,
) )
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
assert ddout[0].numpy() is not None assert ddout[0].numpy() is not None
return dx[0].numpy(), ddout[0].numpy() return dx[0].numpy(), ddout[0].numpy()
......
...@@ -30,8 +30,6 @@ class TestCollectiveAllToAllSingle(unittest.TestCase): ...@@ -30,8 +30,6 @@ class TestCollectiveAllToAllSingle(unittest.TestCase):
paddle.distributed.is_initialized() paddle.distributed.is_initialized()
), "The distributed environment has been initialized." ), "The distributed environment has been initialized."
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def test_collective_alltoall_single(self): def test_collective_alltoall_single(self):
rank = dist.get_rank() rank = dist.get_rank()
size = dist.get_world_size() size = dist.get_world_size()
......
...@@ -23,7 +23,6 @@ import paddle.distributed as dist ...@@ -23,7 +23,6 @@ import paddle.distributed as dist
class TestCollectiveBatchIsendIrecv(unittest.TestCase): class TestCollectiveBatchIsendIrecv(unittest.TestCase):
def setUp(self): def setUp(self):
dist.init_parallel_env() dist.init_parallel_env()
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def test_collective_batch_isend_irecv(self): def test_collective_batch_isend_irecv(self):
rank = dist.get_rank() rank = dist.get_rank()
......
...@@ -24,7 +24,6 @@ from paddle.distributed.communication.reduce_scatter import _reduce_scatter_base ...@@ -24,7 +24,6 @@ from paddle.distributed.communication.reduce_scatter import _reduce_scatter_base
class TestCollectiveReduceScatter(unittest.TestCase): class TestCollectiveReduceScatter(unittest.TestCase):
def setUp(self): def setUp(self):
dist.init_parallel_env() dist.init_parallel_env()
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def test_collective_reduce_scatter_sum(self): def test_collective_reduce_scatter_sum(self):
rank = dist.get_rank() rank = dist.get_rank()
......
...@@ -34,7 +34,6 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase): ...@@ -34,7 +34,6 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase):
def setUp(self): def setUp(self):
strategy = fleet.DistributedStrategy() strategy = fleet.DistributedStrategy()
fleet.init(is_collective=True, strategy=strategy) fleet.init(is_collective=True, strategy=strategy)
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
def test_parallel_margin_softmax_cross_entropy(self): def test_parallel_margin_softmax_cross_entropy(self):
margin1s = [1.0, 1.0, 1.35] margin1s = [1.0, 1.0, 1.35]
...@@ -93,6 +92,7 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase): ...@@ -93,6 +92,7 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase):
norm_weight = paddle.divide(weight, weight_l2) norm_weight = paddle.divide(weight, weight_l2)
data = paddle.matmul(norm_input, norm_weight) data = paddle.matmul(norm_input, norm_weight)
data.retain_grads()
data.stop_gradient = False data.stop_gradient = False
sta = ( sta = (
...@@ -118,6 +118,7 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase): ...@@ -118,6 +118,7 @@ class TestParallelMarginSoftmaxCrossEntropyOp(unittest.TestCase):
group=check_group, group=check_group,
) )
integral_data = integral_data.detach().clone() integral_data = integral_data.detach().clone()
integral_data.retain_grads()
integral_data.stop_gradient = False integral_data.stop_gradient = False
# add arcface margin to logit # add arcface margin to logit
......
...@@ -96,11 +96,9 @@ class TestTanhTripleGradCheck(unittest.TestCase): ...@@ -96,11 +96,9 @@ class TestTanhTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.tanh_wrapper, [x], y, x_init=x_arr, place=place self.tanh_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -128,11 +126,9 @@ class TestTanhDoubleGradCheck(unittest.TestCase): ...@@ -128,11 +126,9 @@ class TestTanhDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.tanh_wrapper, [x], y, x_init=x_arr, place=place self.tanh_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -160,11 +156,9 @@ class TestAbsDoubleGradCheck(unittest.TestCase): ...@@ -160,11 +156,9 @@ class TestAbsDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.abs_wrapper, [x], y, x_init=x_arr, place=place self.abs_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -256,11 +250,9 @@ class TestELUDoubleGradCheck(unittest.TestCase): ...@@ -256,11 +250,9 @@ class TestELUDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.elu_wrapper, [x], y, x_init=x_arr, place=place self.elu_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -292,11 +284,9 @@ class TestCELUDoubleGradCheck(unittest.TestCase): ...@@ -292,11 +284,9 @@ class TestCELUDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.celu_wrapper, [x], y, x_init=x_arr, place=place self.celu_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -390,11 +380,9 @@ class TestSquareDoubleGradCheck(unittest.TestCase): ...@@ -390,11 +380,9 @@ class TestSquareDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.square_wrapper, [x], y, x_init=x_arr, place=place self.square_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -424,11 +412,9 @@ class TestLogDoubleGradCheck(unittest.TestCase): ...@@ -424,11 +412,9 @@ class TestLogDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.log_wrapper, [x], y, x_init=x_arr, place=place self.log_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -456,11 +442,9 @@ class TestSinDoubleGradCheck(unittest.TestCase): ...@@ -456,11 +442,9 @@ class TestSinDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.sin_wrapper, [x], y, x_init=x_arr, place=place self.sin_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
...@@ -488,11 +472,9 @@ class TestCosDoubleGradCheck(unittest.TestCase): ...@@ -488,11 +472,9 @@ class TestCosDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps [x], y, x_init=x_arr, place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.cos_wrapper, [x], y, x_init=x_arr, place=place self.cos_wrapper, [x], y, x_init=x_arr, place=place
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_grad(self): def test_grad(self):
paddle.enable_static() paddle.enable_static()
......
...@@ -37,16 +37,12 @@ class TestAssignOp(op_test.OpTest): ...@@ -37,16 +37,12 @@ class TestAssignOp(op_test.OpTest):
def test_forward(self): def test_forward(self):
paddle.enable_static() paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.check_output(check_eager=True) self.check_output(check_eager=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.disable_static() paddle.disable_static()
def test_backward(self): def test_backward(self):
paddle.enable_static() paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.check_grad(['X'], 'Out', check_eager=True) self.check_grad(['X'], 'Out', check_eager=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.disable_static() paddle.disable_static()
...@@ -60,23 +56,18 @@ class TestAssignFP16Op(op_test.OpTest): ...@@ -60,23 +56,18 @@ class TestAssignFP16Op(op_test.OpTest):
def test_forward(self): def test_forward(self):
paddle.enable_static() paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.check_output(check_eager=True) self.check_output(check_eager=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.disable_static() paddle.disable_static()
def test_backward(self): def test_backward(self):
paddle.enable_static() paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.check_grad(['X'], 'Out', check_eager=True) self.check_grad(['X'], 'Out', check_eager=True)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.disable_static() paddle.disable_static()
class TestAssignOpWithLoDTensorArray(unittest.TestCase): class TestAssignOpWithLoDTensorArray(unittest.TestCase):
def test_assign_LoDTensorArray(self): def test_assign_LoDTensorArray(self):
paddle.enable_static() paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
main_program = Program() main_program = Program()
startup_program = Program() startup_program = Program()
with program_guard(main_program): with program_guard(main_program):
...@@ -92,7 +83,6 @@ class TestAssignOpWithLoDTensorArray(unittest.TestCase): ...@@ -92,7 +83,6 @@ class TestAssignOpWithLoDTensorArray(unittest.TestCase):
sums = paddle.tensor.array_read(array=init_array, i=i) sums = paddle.tensor.array_read(array=init_array, i=i)
mean = paddle.mean(sums) mean = paddle.mean(sums)
append_backward(mean) append_backward(mean)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
place = ( place = (
fluid.CUDAPlace(0) fluid.CUDAPlace(0)
...@@ -207,12 +197,13 @@ class TestAssignOApi(unittest.TestCase): ...@@ -207,12 +197,13 @@ class TestAssignOApi(unittest.TestCase):
np.testing.assert_allclose(result3.numpy(), np.array([1]), rtol=1e-05) np.testing.assert_allclose(result3.numpy(), np.array([1]), rtol=1e-05)
def test_clone(self): def test_clone(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.python_api = paddle.clone self.python_api = paddle.clone
x = paddle.ones([2]) x = paddle.ones([2])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
clone_x = paddle.clone(x) clone_x = paddle.clone(x)
clone_x.retain_grads()
y = clone_x**3 y = clone_x**3
y.backward() y.backward()
...@@ -220,7 +211,6 @@ class TestAssignOApi(unittest.TestCase): ...@@ -220,7 +211,6 @@ class TestAssignOApi(unittest.TestCase):
np.testing.assert_array_equal(x, [1, 1]) np.testing.assert_array_equal(x, [1, 1])
np.testing.assert_array_equal(clone_x.grad.numpy(), [3, 3]) np.testing.assert_array_equal(clone_x.grad.numpy(), [3, 3])
np.testing.assert_array_equal(x.grad.numpy(), [3, 3]) np.testing.assert_array_equal(x.grad.numpy(), [3, 3])
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.enable_static() paddle.enable_static()
with program_guard(Program(), Program()): with program_guard(Program(), Program()):
...@@ -241,7 +231,6 @@ class TestAssignOApi(unittest.TestCase): ...@@ -241,7 +231,6 @@ class TestAssignOApi(unittest.TestCase):
class TestAssignOpErrorApi(unittest.TestCase): class TestAssignOpErrorApi(unittest.TestCase):
def test_errors(self): def test_errors(self):
paddle.enable_static() paddle.enable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with program_guard(Program(), Program()): with program_guard(Program(), Program()):
# The type of input must be Variable or numpy.ndarray. # The type of input must be Variable or numpy.ndarray.
x1 = fluid.create_lod_tensor( x1 = fluid.create_lod_tensor(
...@@ -251,7 +240,6 @@ class TestAssignOpErrorApi(unittest.TestCase): ...@@ -251,7 +240,6 @@ class TestAssignOpErrorApi(unittest.TestCase):
# When the type of input is numpy.ndarray, the dtype of input must be float32, int32. # When the type of input is numpy.ndarray, the dtype of input must be float32, int32.
x2 = np.array([[2.5, 2.5]], dtype='uint8') x2 = np.array([[2.5, 2.5]], dtype='uint8')
self.assertRaises(TypeError, paddle.assign, x2) self.assertRaises(TypeError, paddle.assign, x2)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
paddle.disable_static() paddle.disable_static()
def test_type_error(self): def test_type_error(self):
...@@ -281,7 +269,6 @@ class TestAssignDoubleGradCheck(unittest.TestCase): ...@@ -281,7 +269,6 @@ class TestAssignDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.assign_wrapper, [data], out, x_init=[data_arr], place=place self.assign_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -313,7 +300,6 @@ class TestAssignTripleGradCheck(unittest.TestCase): ...@@ -313,7 +300,6 @@ class TestAssignTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.assign_wrapper, [data], out, x_init=[data_arr], place=place self.assign_wrapper, [data], out, x_init=[data_arr], place=place
) )
......
...@@ -166,6 +166,7 @@ class TestDygraphTripleGrad(TestCase): ...@@ -166,6 +166,7 @@ class TestDygraphTripleGrad(TestCase):
@dygraph_guard @dygraph_guard
def func_example_with_gradient_and_create_graph(self): def func_example_with_gradient_and_create_graph(self):
x = random_var(self.shape) x = random_var(self.shape)
x.retain_grads()
x_np = x.numpy() x_np = x.numpy()
x.stop_gradient = False x.stop_gradient = False
...@@ -222,10 +223,8 @@ class TestDygraphTripleGrad(TestCase): ...@@ -222,10 +223,8 @@ class TestDygraphTripleGrad(TestCase):
np.testing.assert_allclose(dddx_grad_actual, dddx_expected, rtol=1e-05) np.testing.assert_allclose(dddx_grad_actual, dddx_expected, rtol=1e-05)
def test_all_cases(self): def test_all_cases(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.func_exception() self.func_exception()
self.func_example_with_gradient_and_create_graph() self.func_example_with_gradient_and_create_graph()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
class TestDygraphTripleGradBradcastCase(TestCase): class TestDygraphTripleGradBradcastCase(TestCase):
...@@ -259,6 +258,7 @@ class TestDygraphTripleGradBradcastCase(TestCase): ...@@ -259,6 +258,7 @@ class TestDygraphTripleGradBradcastCase(TestCase):
@dygraph_guard @dygraph_guard
def func_example_with_gradient_and_create_graph(self): def func_example_with_gradient_and_create_graph(self):
x = random_var(self.x_shape) x = random_var(self.x_shape)
x.retain_grads()
x_np = x.numpy() x_np = x.numpy()
x.stop_gradient = False x.stop_gradient = False
...@@ -316,9 +316,7 @@ class TestDygraphTripleGradBradcastCase(TestCase): ...@@ -316,9 +316,7 @@ class TestDygraphTripleGradBradcastCase(TestCase):
np.testing.assert_allclose(dddx_grad_actual, dddx_expected, rtol=1e-05) np.testing.assert_allclose(dddx_grad_actual, dddx_expected, rtol=1e-05)
def test_all_cases(self): def test_all_cases(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
self.func_example_with_gradient_and_create_graph() self.func_example_with_gradient_and_create_graph()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
# d_ddout is none, dtype is float32 # d_ddout is none, dtype is float32
......
...@@ -72,7 +72,6 @@ class TestVariable(unittest.TestCase): ...@@ -72,7 +72,6 @@ class TestVariable(unittest.TestCase):
np.testing.assert_array_equal(res1.numpy(), res2.numpy()) np.testing.assert_array_equal(res1.numpy(), res2.numpy())
def test_trace_backward(self): def test_trace_backward(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with fluid.dygraph.guard(): with fluid.dygraph.guard():
a = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) a = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
b = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) b = np.random.uniform(0.1, 1, self.shape).astype(self.dtype)
...@@ -80,8 +79,11 @@ class TestVariable(unittest.TestCase): ...@@ -80,8 +79,11 @@ class TestVariable(unittest.TestCase):
y = fluid.dygraph.to_variable(b) y = fluid.dygraph.to_variable(b)
x.stop_gradient = False x.stop_gradient = False
y.stop_gradient = False y.stop_gradient = False
x.retain_grads()
y.retain_grads()
loss = _legacy_C_ops.elementwise_mul(x, y) loss = _legacy_C_ops.elementwise_mul(x, y)
loss.retain_grads()
loss.backward() loss.backward()
x_grad = x.gradient() x_grad = x.gradient()
...@@ -89,7 +91,6 @@ class TestVariable(unittest.TestCase): ...@@ -89,7 +91,6 @@ class TestVariable(unittest.TestCase):
np.testing.assert_array_equal(x_grad, loss.gradient() * b) np.testing.assert_array_equal(x_grad, loss.gradient() * b)
np.testing.assert_array_equal(y_grad, loss.gradient() * a) np.testing.assert_array_equal(y_grad, loss.gradient() * a)
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -505,17 +505,18 @@ class TestReshapeZeroTensor(unittest.TestCase): ...@@ -505,17 +505,18 @@ class TestReshapeZeroTensor(unittest.TestCase):
class TestReshapeAPI_ZeroDim(unittest.TestCase): class TestReshapeAPI_ZeroDim(unittest.TestCase):
def test_dygraph(self): def test_dygraph(self):
paddle.disable_static() paddle.disable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
out = paddle.reshape(x, [1]) out = paddle.reshape(x, [1])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1]) self.assertEqual(out.shape, [1])
self.assertEqual(out.grad.shape, [1]) self.assertEqual(out.grad.shape, [1])
out = paddle.reshape(x, [-1, 1]) out = paddle.reshape(x, [-1, 1])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1, 1]) self.assertEqual(out.shape, [1, 1])
...@@ -524,6 +525,7 @@ class TestReshapeAPI_ZeroDim(unittest.TestCase): ...@@ -524,6 +525,7 @@ class TestReshapeAPI_ZeroDim(unittest.TestCase):
x = paddle.rand([1]) x = paddle.rand([1])
x.stop_gradient = False x.stop_gradient = False
out = paddle.reshape(x, []) out = paddle.reshape(x, [])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, [1]) self.assertEqual(x.grad.shape, [1])
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
......
...@@ -895,7 +895,6 @@ class TestSliceDoubleGradCheck(unittest.TestCase): ...@@ -895,7 +895,6 @@ class TestSliceDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.slice_wrapper, [data], out, x_init=[data_arr], place=place self.slice_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -931,7 +930,6 @@ class TestSliceTripleGradCheck(unittest.TestCase): ...@@ -931,7 +930,6 @@ class TestSliceTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.slice_wrapper, [data], out, x_init=[data_arr], place=place self.slice_wrapper, [data], out, x_init=[data_arr], place=place
) )
......
...@@ -43,7 +43,6 @@ class TestSparseElementWiseAPI(unittest.TestCase): ...@@ -43,7 +43,6 @@ class TestSparseElementWiseAPI(unittest.TestCase):
""" """
def setUp(self): def setUp(self):
paddle.fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
np.random.seed(2022) np.random.seed(2022)
self.op_list = op_list self.op_list = op_list
self.csr_shape = [128, 256] self.csr_shape = [128, 256]
...@@ -109,7 +108,9 @@ class TestSparseElementWiseAPI(unittest.TestCase): ...@@ -109,7 +108,9 @@ class TestSparseElementWiseAPI(unittest.TestCase):
y, dtype=dtype, stop_gradient=False y, dtype=dtype, stop_gradient=False
) )
coo_x = s_dense_x.to_sparse_coo(sparse_dim) coo_x = s_dense_x.to_sparse_coo(sparse_dim)
coo_x.retain_grads()
coo_y = s_dense_y.to_sparse_coo(sparse_dim) coo_y = s_dense_y.to_sparse_coo(sparse_dim)
coo_y.retain_grads()
actual_res = get_actual_res(coo_x, coo_y, op) actual_res = get_actual_res(coo_x, coo_y, op)
actual_res.backward(actual_res) actual_res.backward(actual_res)
...@@ -157,9 +158,12 @@ class TestSparseElementWiseAPI(unittest.TestCase): ...@@ -157,9 +158,12 @@ class TestSparseElementWiseAPI(unittest.TestCase):
sp_a = sparse.sparse_coo_tensor( sp_a = sparse.sparse_coo_tensor(
indices_data, values1_data, shape, stop_gradient=False indices_data, values1_data, shape, stop_gradient=False
) )
sp_a.retain_grads()
sp_b = sparse.sparse_coo_tensor( sp_b = sparse.sparse_coo_tensor(
indices_data, values2_data, shape, stop_gradient=False indices_data, values2_data, shape, stop_gradient=False
) )
sp_b.retain_grads()
values1 = paddle.to_tensor(values1_data, stop_gradient=False) values1 = paddle.to_tensor(values1_data, stop_gradient=False)
values2 = paddle.to_tensor(values2_data, stop_gradient=False) values2 = paddle.to_tensor(values2_data, stop_gradient=False)
...@@ -185,6 +189,7 @@ class TestSparseElementWiseAPI(unittest.TestCase): ...@@ -185,6 +189,7 @@ class TestSparseElementWiseAPI(unittest.TestCase):
sp_a = sparse.sparse_coo_tensor( sp_a = sparse.sparse_coo_tensor(
indices_data, values_data, shape, stop_gradient=False indices_data, values_data, shape, stop_gradient=False
) )
sp_a.retain_grads()
bias_values = [1.0, 2.0] bias_values = [1.0, 2.0]
......
...@@ -17,7 +17,6 @@ import unittest ...@@ -17,7 +17,6 @@ import unittest
import numpy as np import numpy as np
import paddle import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
devices = ['cpu', 'gpu'] devices = ['cpu', 'gpu']
...@@ -148,7 +147,6 @@ class TestSparseConvert(unittest.TestCase): ...@@ -148,7 +147,6 @@ class TestSparseConvert(unittest.TestCase):
assert np.array_equal(dense_x.grad.numpy(), out_grad.to_dense().numpy()) assert np.array_equal(dense_x.grad.numpy(), out_grad.to_dense().numpy())
def test_coo_to_dense(self): def test_coo_to_dense(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]]
values = [1.0, 2.0, 3.0, 4.0, 5.0] values = [1.0, 2.0, 3.0, 4.0, 5.0]
indices_dtypes = ['int32', 'int64'] indices_dtypes = ['int32', 'int64']
...@@ -159,6 +157,7 @@ class TestSparseConvert(unittest.TestCase): ...@@ -159,6 +157,7 @@ class TestSparseConvert(unittest.TestCase):
shape=[3, 4], shape=[3, 4],
stop_gradient=False, stop_gradient=False,
) )
sparse_x.retain_grads()
dense_tensor = sparse_x.to_dense() dense_tensor = sparse_x.to_dense()
# test to_dense_grad backward # test to_dense_grad backward
out_grad = [ out_grad = [
...@@ -180,12 +179,12 @@ class TestSparseConvert(unittest.TestCase): ...@@ -180,12 +179,12 @@ class TestSparseConvert(unittest.TestCase):
shape=[3, 4], shape=[3, 4],
stop_gradient=False, stop_gradient=False,
) )
sparse_x_cpu.retain_grads()
dense_tensor_cpu = sparse_x_cpu.to_dense() dense_tensor_cpu = sparse_x_cpu.to_dense()
dense_tensor_cpu.backward(paddle.to_tensor(out_grad)) dense_tensor_cpu.backward(paddle.to_tensor(out_grad))
assert np.array_equal( assert np.array_equal(
correct_x_grad, sparse_x_cpu.grad.values().numpy() correct_x_grad, sparse_x_cpu.grad.values().numpy()
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_to_sparse_csr(self): def test_to_sparse_csr(self):
x = [[0, 1, 0, 2], [0, 0, 3, 0], [4, 5, 0, 0]] x = [[0, 1, 0, 2], [0, 0, 3, 0], [4, 5, 0, 0]]
...@@ -202,7 +201,6 @@ class TestSparseConvert(unittest.TestCase): ...@@ -202,7 +201,6 @@ class TestSparseConvert(unittest.TestCase):
assert np.array_equal(dense_tensor.numpy(), x) assert np.array_equal(dense_tensor.numpy(), x)
def test_coo_values_grad(self): def test_coo_values_grad(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]] indices = [[0, 0, 1, 2, 2], [1, 3, 2, 0, 1]]
values = [1.0, 2.0, 3.0, 4.0, 5.0] values = [1.0, 2.0, 3.0, 4.0, 5.0]
sparse_x = paddle.sparse.sparse_coo_tensor( sparse_x = paddle.sparse.sparse_coo_tensor(
...@@ -211,6 +209,7 @@ class TestSparseConvert(unittest.TestCase): ...@@ -211,6 +209,7 @@ class TestSparseConvert(unittest.TestCase):
shape=[3, 4], shape=[3, 4],
stop_gradient=False, stop_gradient=False,
) )
sparse_x.retain_grads()
values_tensor = sparse_x.values() values_tensor = sparse_x.values()
out_grad = [2.0, 3.0, 5.0, 8.0, 9.0] out_grad = [2.0, 3.0, 5.0, 8.0, 9.0]
# test coo_values_grad # test coo_values_grad
...@@ -230,6 +229,7 @@ class TestSparseConvert(unittest.TestCase): ...@@ -230,6 +229,7 @@ class TestSparseConvert(unittest.TestCase):
shape=[3, 4, 2], shape=[3, 4, 2],
stop_gradient=False, stop_gradient=False,
) )
sparse_x.retain_grads()
values_tensor = sparse_x.values() values_tensor = sparse_x.values()
out_grad = [ out_grad = [
[2.0, 2.0], [2.0, 2.0],
...@@ -241,7 +241,6 @@ class TestSparseConvert(unittest.TestCase): ...@@ -241,7 +241,6 @@ class TestSparseConvert(unittest.TestCase):
# test coo_values_grad # test coo_values_grad
values_tensor.backward(paddle.to_tensor(out_grad)) values_tensor.backward(paddle.to_tensor(out_grad))
assert np.array_equal(out_grad, sparse_x.grad.values().numpy()) assert np.array_equal(out_grad, sparse_x.grad.values().numpy())
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_sparse_coo_tensor_grad(self): def test_sparse_coo_tensor_grad(self):
for device in devices: for device in devices:
......
...@@ -22,7 +22,6 @@ import paddle.fluid as fluid ...@@ -22,7 +22,6 @@ import paddle.fluid as fluid
class TensorFillDiagonal_Test(unittest.TestCase): class TensorFillDiagonal_Test(unittest.TestCase):
def test_dim2_normal(self): def test_dim2_normal(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array([[1, 2, 2], [2, 1, 2], [2, 2, 1]]).astype( expected_np = np.array([[1, 2, 2], [2, 1, 2], [2, 2, 1]]).astype(
'float32' 'float32'
) )
...@@ -44,6 +43,7 @@ class TensorFillDiagonal_Test(unittest.TestCase): ...@@ -44,6 +43,7 @@ class TensorFillDiagonal_Test(unittest.TestCase):
x = paddle.ones((3, 3), dtype=dtype) x = paddle.ones((3, 3), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
y.fill_diagonal_(1, offset=0, wrap=True) y.fill_diagonal_(1, offset=0, wrap=True)
loss = y.sum() loss = y.sum()
loss.backward() loss.backward()
...@@ -55,10 +55,8 @@ class TensorFillDiagonal_Test(unittest.TestCase): ...@@ -55,10 +55,8 @@ class TensorFillDiagonal_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_offset(self): def test_offset(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array([[2, 2, 1], [2, 2, 2], [2, 2, 2]]).astype( expected_np = np.array([[2, 2, 1], [2, 2, 2], [2, 2, 2]]).astype(
'float32' 'float32'
) )
...@@ -80,6 +78,7 @@ class TensorFillDiagonal_Test(unittest.TestCase): ...@@ -80,6 +78,7 @@ class TensorFillDiagonal_Test(unittest.TestCase):
x = paddle.ones((3, 3), dtype=dtype) x = paddle.ones((3, 3), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
y.fill_diagonal_(1, offset=2, wrap=True) y.fill_diagonal_(1, offset=2, wrap=True)
loss = y.sum() loss = y.sum()
loss.backward() loss.backward()
...@@ -91,7 +90,6 @@ class TensorFillDiagonal_Test(unittest.TestCase): ...@@ -91,7 +90,6 @@ class TensorFillDiagonal_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_bool(self): def test_bool(self):
expected_np = np.array( expected_np = np.array(
...@@ -116,7 +114,6 @@ class TensorFillDiagonal_Test(unittest.TestCase): ...@@ -116,7 +114,6 @@ class TensorFillDiagonal_Test(unittest.TestCase):
self.assertEqual((x.numpy() == expected_np).all(), True) self.assertEqual((x.numpy() == expected_np).all(), True)
def test_dim2_unnormal_wrap(self): def test_dim2_unnormal_wrap(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array( expected_np = np.array(
[ [
[1, 2, 2], [1, 2, 2],
...@@ -154,6 +151,7 @@ class TensorFillDiagonal_Test(unittest.TestCase): ...@@ -154,6 +151,7 @@ class TensorFillDiagonal_Test(unittest.TestCase):
x = paddle.ones((7, 3), dtype=dtype) x = paddle.ones((7, 3), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
y.fill_diagonal_(1, offset=0, wrap=True) y.fill_diagonal_(1, offset=0, wrap=True)
loss = y.sum() loss = y.sum()
loss.backward() loss.backward()
...@@ -165,10 +163,8 @@ class TensorFillDiagonal_Test(unittest.TestCase): ...@@ -165,10 +163,8 @@ class TensorFillDiagonal_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_dim2_unnormal_unwrap(self): def test_dim2_unnormal_unwrap(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array( expected_np = np.array(
[ [
[1, 2, 2], [1, 2, 2],
...@@ -206,6 +202,7 @@ class TensorFillDiagonal_Test(unittest.TestCase): ...@@ -206,6 +202,7 @@ class TensorFillDiagonal_Test(unittest.TestCase):
x = paddle.ones((7, 3), dtype=dtype) x = paddle.ones((7, 3), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
y.fill_diagonal_(1, offset=0, wrap=False) y.fill_diagonal_(1, offset=0, wrap=False)
loss = y.sum() loss = y.sum()
loss.backward() loss.backward()
...@@ -217,10 +214,8 @@ class TensorFillDiagonal_Test(unittest.TestCase): ...@@ -217,10 +214,8 @@ class TensorFillDiagonal_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
def test_dim_larger2_normal(self): def test_dim_larger2_normal(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
expected_np = np.array( expected_np = np.array(
[ [
[[1, 2, 2], [2, 2, 2], [2, 2, 2]], [[1, 2, 2], [2, 2, 2], [2, 2, 2]],
...@@ -250,6 +245,7 @@ class TensorFillDiagonal_Test(unittest.TestCase): ...@@ -250,6 +245,7 @@ class TensorFillDiagonal_Test(unittest.TestCase):
x = paddle.ones((3, 3, 3), dtype=dtype) x = paddle.ones((3, 3, 3), dtype=dtype)
x.stop_gradient = False x.stop_gradient = False
y = x * 2 y = x * 2
y.retain_grads()
y.fill_diagonal_(1, offset=0, wrap=True) y.fill_diagonal_(1, offset=0, wrap=True)
loss = y.sum() loss = y.sum()
loss.backward() loss.backward()
...@@ -261,7 +257,6 @@ class TensorFillDiagonal_Test(unittest.TestCase): ...@@ -261,7 +257,6 @@ class TensorFillDiagonal_Test(unittest.TestCase):
(y.grad.numpy().astype('float32') == expected_grad).all(), (y.grad.numpy().astype('float32') == expected_grad).all(),
True, True,
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -286,7 +286,6 @@ class TestTileDoubleGradCheck(unittest.TestCase): ...@@ -286,7 +286,6 @@ class TestTileDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check( gradient_checker.double_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.double_grad_check_for_dygraph( gradient_checker.double_grad_check_for_dygraph(
self.tile_wrapper, [data], out, x_init=[data_arr], place=place self.tile_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -318,7 +317,6 @@ class TestTileTripleGradCheck(unittest.TestCase): ...@@ -318,7 +317,6 @@ class TestTileTripleGradCheck(unittest.TestCase):
gradient_checker.triple_grad_check( gradient_checker.triple_grad_check(
[data], out, x_init=[data_arr], place=place, eps=eps [data], out, x_init=[data_arr], place=place, eps=eps
) )
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
gradient_checker.triple_grad_check_for_dygraph( gradient_checker.triple_grad_check_for_dygraph(
self.tile_wrapper, [data], out, x_init=[data_arr], place=place self.tile_wrapper, [data], out, x_init=[data_arr], place=place
) )
...@@ -335,24 +333,26 @@ class TestTileTripleGradCheck(unittest.TestCase): ...@@ -335,24 +333,26 @@ class TestTileTripleGradCheck(unittest.TestCase):
class TestTileAPI_ZeroDim(unittest.TestCase): class TestTileAPI_ZeroDim(unittest.TestCase):
def test_dygraph(self): def test_dygraph(self):
paddle.disable_static() paddle.disable_static()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
out = paddle.tile(x, []) out = paddle.tile(x, [])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.grad.shape, []) self.assertEqual(out.grad.shape, [])
out = paddle.tile(x, [3]) out = paddle.tile(x, [3])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [3]) self.assertEqual(out.shape, [3])
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.grad.shape, [3]) self.assertEqual(out.grad.shape, [3])
out = paddle.tile(x, [2, 3]) out = paddle.tile(x, [2, 3])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [2, 3]) self.assertEqual(out.shape, [2, 3])
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
......
...@@ -21,8 +21,6 @@ import paddle ...@@ -21,8 +21,6 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.nn.functional as F import paddle.nn.functional as F
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
unary_api_list = [ unary_api_list = [
paddle.nn.functional.elu, paddle.nn.functional.elu,
paddle.nn.functional.gelu, paddle.nn.functional.gelu,
...@@ -102,7 +100,9 @@ class TestUnaryAPI(unittest.TestCase): ...@@ -102,7 +100,9 @@ class TestUnaryAPI(unittest.TestCase):
for api in unary_api_list: for api in unary_api_list:
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = api(x) out = api(x)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.shape, []) self.assertEqual(x.shape, [])
...@@ -202,7 +202,9 @@ class TestReduceAPI(unittest.TestCase): ...@@ -202,7 +202,9 @@ class TestReduceAPI(unittest.TestCase):
else: else:
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = api(x, None) out = api(x, None)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.shape, []) self.assertEqual(x.shape, [])
...@@ -291,12 +293,16 @@ class TestBinaryAPI(unittest.TestCase): ...@@ -291,12 +293,16 @@ class TestBinaryAPI(unittest.TestCase):
y = paddle.rand([]) y = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
y.stop_gradient = False y.stop_gradient = False
x.retain_grads()
y.retain_grads()
if isinstance(api, dict): if isinstance(api, dict):
out = api['func'](x, y) out = api['func'](x, y)
out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y) out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y)
np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) np.testing.assert_array_equal(out_cls.numpy(), out.numpy())
else: else:
out = api(x, y) out = api(x, y)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.shape, []) self.assertEqual(x.shape, [])
...@@ -312,12 +318,16 @@ class TestBinaryAPI(unittest.TestCase): ...@@ -312,12 +318,16 @@ class TestBinaryAPI(unittest.TestCase):
y = paddle.rand([]) y = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
y.stop_gradient = False y.stop_gradient = False
x.retain_grads()
y.retain_grads()
if isinstance(api, dict): if isinstance(api, dict):
out = api['func'](x, y) out = api['func'](x, y)
out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y) out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y)
np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) np.testing.assert_array_equal(out_cls.numpy(), out.numpy())
else: else:
out = api(x, y) out = api(x, y)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.shape, [2, 3, 4]) self.assertEqual(x.shape, [2, 3, 4])
...@@ -331,6 +341,8 @@ class TestBinaryAPI(unittest.TestCase): ...@@ -331,6 +341,8 @@ class TestBinaryAPI(unittest.TestCase):
# 3) x is 0D , y is ND # 3) x is 0D , y is ND
x = paddle.rand([]) x = paddle.rand([])
y = paddle.rand([2, 3, 4]) y = paddle.rand([2, 3, 4])
x.retain_grads()
y.retain_grads()
x.stop_gradient = False x.stop_gradient = False
y.stop_gradient = False y.stop_gradient = False
if isinstance(api, dict): if isinstance(api, dict):
...@@ -339,6 +351,8 @@ class TestBinaryAPI(unittest.TestCase): ...@@ -339,6 +351,8 @@ class TestBinaryAPI(unittest.TestCase):
np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) np.testing.assert_array_equal(out_cls.numpy(), out.numpy())
else: else:
out = api(x, y) out = api(x, y)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.shape, []) self.assertEqual(x.shape, [])
...@@ -352,9 +366,11 @@ class TestBinaryAPI(unittest.TestCase): ...@@ -352,9 +366,11 @@ class TestBinaryAPI(unittest.TestCase):
# 4) x is 0D , y is scalar # 4) x is 0D , y is scalar
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
y = 0.5 y = 0.5
if isinstance(api, dict): if isinstance(api, dict):
out = getattr(paddle.Tensor, api['cls_method'])(x, y) out = getattr(paddle.Tensor, api['cls_method'])(x, y)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.shape, []) self.assertEqual(x.shape, [])
...@@ -528,7 +544,9 @@ class TestSundryAPI(unittest.TestCase): ...@@ -528,7 +544,9 @@ class TestSundryAPI(unittest.TestCase):
def test_flip(self): def test_flip(self):
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.flip(x, axis=[]) out = paddle.flip(x, axis=[])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.shape, []) self.assertEqual(x.shape, [])
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -618,7 +636,9 @@ class TestSundryAPI(unittest.TestCase): ...@@ -618,7 +636,9 @@ class TestSundryAPI(unittest.TestCase):
def test_pow_factor(self): def test_pow_factor(self):
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.pow(x, 2.0) out = paddle.pow(x, 2.0)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -628,7 +648,9 @@ class TestSundryAPI(unittest.TestCase): ...@@ -628,7 +648,9 @@ class TestSundryAPI(unittest.TestCase):
def test_cast(self): def test_cast(self):
x = paddle.full([], 1.0, 'float32') x = paddle.full([], 1.0, 'float32')
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.cast(x, 'int32') out = paddle.cast(x, 'int32')
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -638,7 +660,9 @@ class TestSundryAPI(unittest.TestCase): ...@@ -638,7 +660,9 @@ class TestSundryAPI(unittest.TestCase):
def test_cumprod(self): def test_cumprod(self):
x = paddle.full([], 1.0, 'float32') x = paddle.full([], 1.0, 'float32')
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.cumprod(x, 0) out = paddle.cumprod(x, 0)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -651,7 +675,9 @@ class TestSundryAPI(unittest.TestCase): ...@@ -651,7 +675,9 @@ class TestSundryAPI(unittest.TestCase):
def test_clip(self): def test_clip(self):
x = paddle.uniform([], None, -10, 10) x = paddle.uniform([], None, -10, 10)
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.clip(x, -5, 5) out = paddle.clip(x, -5, 5)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -661,7 +687,9 @@ class TestSundryAPI(unittest.TestCase): ...@@ -661,7 +687,9 @@ class TestSundryAPI(unittest.TestCase):
def test_increment(self): def test_increment(self):
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.increment(x, 1.0) out = paddle.increment(x, 1.0)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -694,8 +722,10 @@ class TestSundryAPI(unittest.TestCase): ...@@ -694,8 +722,10 @@ class TestSundryAPI(unittest.TestCase):
def test_gather_1D(self): def test_gather_1D(self):
x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False) x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
x.retain_grads()
index = paddle.full([], 2, 'int64') index = paddle.full([], 2, 'int64')
out = paddle.gather(x, index) out = paddle.gather(x, index)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -707,8 +737,10 @@ class TestSundryAPI(unittest.TestCase): ...@@ -707,8 +737,10 @@ class TestSundryAPI(unittest.TestCase):
x = paddle.to_tensor( x = paddle.to_tensor(
[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False
) )
x.retain_grads()
index = paddle.full([], 1, 'int64') index = paddle.full([], 1, 'int64')
out = paddle.gather(x, index) out = paddle.gather(x, index)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [3]) self.assertEqual(out.shape, [3])
...@@ -720,8 +752,10 @@ class TestSundryAPI(unittest.TestCase): ...@@ -720,8 +752,10 @@ class TestSundryAPI(unittest.TestCase):
x = paddle.to_tensor( x = paddle.to_tensor(
[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], stop_gradient=False
) )
x.retain_grads()
index = paddle.full([], 1, 'int64') index = paddle.full([], 1, 'int64')
out = paddle.gather(x, index, axis=1) out = paddle.gather(x, index, axis=1)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [2]) self.assertEqual(out.shape, [2])
...@@ -731,9 +765,11 @@ class TestSundryAPI(unittest.TestCase): ...@@ -731,9 +765,11 @@ class TestSundryAPI(unittest.TestCase):
def test_scatter_1D(self): def test_scatter_1D(self):
x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False) x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False)
x.retain_grads()
index = paddle.full([], 2, 'int64') index = paddle.full([], 2, 'int64')
updates = paddle.full([], 4.0) updates = paddle.full([], 4.0)
out = paddle.scatter(x, index, updates) out = paddle.scatter(x, index, updates)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [5]) self.assertEqual(out.shape, [5])
...@@ -747,6 +783,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -747,6 +783,7 @@ class TestSundryAPI(unittest.TestCase):
index = paddle.full([], 1, 'int64') index = paddle.full([], 1, 'int64')
updates = paddle.to_tensor([1.0, 2.0, 3.0]) updates = paddle.to_tensor([1.0, 2.0, 3.0])
out = paddle.scatter(x, index, updates) out = paddle.scatter(x, index, updates)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [2, 3]) self.assertEqual(out.shape, [2, 3])
...@@ -762,10 +799,18 @@ class TestSundryAPI(unittest.TestCase): ...@@ -762,10 +799,18 @@ class TestSundryAPI(unittest.TestCase):
x2.stop_gradient = False x2.stop_gradient = False
x3.stop_gradient = False x3.stop_gradient = False
x1.retain_grads()
x2.retain_grads()
x3.retain_grads()
out1 = paddle.diagflat(x1, 1) out1 = paddle.diagflat(x1, 1)
out2 = paddle.diagflat(x2, -1) out2 = paddle.diagflat(x2, -1)
out3 = paddle.diagflat(x3, 0) out3 = paddle.diagflat(x3, 0)
out1.retain_grads()
out2.retain_grads()
out3.retain_grads()
out1.backward() out1.backward()
out2.backward() out2.backward()
out3.backward() out3.backward()
...@@ -800,8 +845,11 @@ class TestSundryAPI(unittest.TestCase): ...@@ -800,8 +845,11 @@ class TestSundryAPI(unittest.TestCase):
def test_scatter_nd(self): def test_scatter_nd(self):
index = paddle.to_tensor([3], dtype="int64") index = paddle.to_tensor([3], dtype="int64")
updates = paddle.full([], 2, dtype='float32') updates = paddle.full([], 2, dtype='float32')
updates.retain_grads()
updates.stop_gradient = False updates.stop_gradient = False
out = paddle.scatter_nd(index, updates, [5]) out = paddle.scatter_nd(index, updates, [5])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [5]) self.assertEqual(out.shape, [5])
...@@ -818,6 +866,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -818,6 +866,7 @@ class TestSundryAPI(unittest.TestCase):
x = paddle.randn(()) x = paddle.randn(())
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.kthvalue(x, 1) out = paddle.kthvalue(x, 1)
out[0].backward() out[0].backward()
...@@ -838,6 +887,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -838,6 +887,7 @@ class TestSundryAPI(unittest.TestCase):
paddle.set_device(place) paddle.set_device(place)
x = paddle.randn(()) x = paddle.randn(())
x.retain_grads()
x.stop_gradient = False x.stop_gradient = False
out = paddle.mode(x) out = paddle.mode(x)
...@@ -854,11 +904,13 @@ class TestSundryAPI(unittest.TestCase): ...@@ -854,11 +904,13 @@ class TestSundryAPI(unittest.TestCase):
def test_flatten(self): def test_flatten(self):
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
start_axis = 0 start_axis = 0
stop_axis = -1 stop_axis = -1
out = paddle.flatten(x, start_axis=start_axis, stop_axis=stop_axis) out = paddle.flatten(x, start_axis=start_axis, stop_axis=stop_axis)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, [1]) self.assertEqual(out.shape, [1])
...@@ -868,7 +920,9 @@ class TestSundryAPI(unittest.TestCase): ...@@ -868,7 +920,9 @@ class TestSundryAPI(unittest.TestCase):
def test_scale(self): def test_scale(self):
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.scale(x, scale=2.0, bias=1.0) out = paddle.scale(x, scale=2.0, bias=1.0)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -911,6 +965,9 @@ class TestSundryAPI(unittest.TestCase): ...@@ -911,6 +965,9 @@ class TestSundryAPI(unittest.TestCase):
out1 = paddle.add_n(x1) out1 = paddle.add_n(x1)
out2 = paddle.add_n([x2, x3]) out2 = paddle.add_n([x2, x3])
out1.retain_grads()
out2.retain_grads()
out1.backward() out1.backward()
out2.backward() out2.backward()
...@@ -928,26 +985,31 @@ class TestSundryAPI(unittest.TestCase): ...@@ -928,26 +985,31 @@ class TestSundryAPI(unittest.TestCase):
def test_reshape_list(self): def test_reshape_list(self):
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.reshape(x, []) out = paddle.reshape(x, [])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
self.assertEqual(out.grad.shape, []) self.assertEqual(out.grad.shape, [])
out = paddle.reshape(x, [1]) out = paddle.reshape(x, [1])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1]) self.assertEqual(out.shape, [1])
self.assertEqual(out.grad.shape, [1]) self.assertEqual(out.grad.shape, [1])
out = paddle.reshape(x, [-1]) out = paddle.reshape(x, [-1])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1]) self.assertEqual(out.shape, [1])
self.assertEqual(out.grad.shape, [1]) self.assertEqual(out.grad.shape, [1])
out = paddle.reshape(x, [-1, 1]) out = paddle.reshape(x, [-1, 1])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
self.assertEqual(out.shape, [1, 1]) self.assertEqual(out.shape, [1, 1])
...@@ -955,9 +1017,11 @@ class TestSundryAPI(unittest.TestCase): ...@@ -955,9 +1017,11 @@ class TestSundryAPI(unittest.TestCase):
def test_reshape_tensor(self): def test_reshape_tensor(self):
x = paddle.rand([1, 1]) x = paddle.rand([1, 1])
x.retain_grads()
x.stop_gradient = False x.stop_gradient = False
out = paddle.reshape(x, []) out = paddle.reshape(x, [])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(x.grad.shape, [1, 1])
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -965,6 +1029,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -965,6 +1029,7 @@ class TestSundryAPI(unittest.TestCase):
new_shape = paddle.to_tensor([1, 1, 1], "int32") new_shape = paddle.to_tensor([1, 1, 1], "int32")
out = paddle.reshape(x, new_shape) out = paddle.reshape(x, new_shape)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(x.grad.shape, [1, 1])
self.assertEqual(out.shape, [1, 1, 1]) self.assertEqual(out.shape, [1, 1, 1])
...@@ -972,6 +1037,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -972,6 +1037,7 @@ class TestSundryAPI(unittest.TestCase):
new_shape = paddle.to_tensor([-1], "int32") new_shape = paddle.to_tensor([-1], "int32")
out = paddle.reshape(x, new_shape) out = paddle.reshape(x, new_shape)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(x.grad.shape, [1, 1])
self.assertEqual(out.shape, [1]) self.assertEqual(out.shape, [1])
...@@ -979,6 +1045,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -979,6 +1045,7 @@ class TestSundryAPI(unittest.TestCase):
new_shape = [paddle.full([], -1, "int32"), paddle.full([], 1, "int32")] new_shape = [paddle.full([], -1, "int32"), paddle.full([], 1, "int32")]
out = paddle.reshape(x, new_shape) out = paddle.reshape(x, new_shape)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(x.grad.shape, [1, 1])
self.assertEqual(out.shape, [1, 1]) self.assertEqual(out.shape, [1, 1])
...@@ -1019,6 +1086,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1019,6 +1086,7 @@ class TestSundryAPI(unittest.TestCase):
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
out = paddle.reverse(x, axis=[]) out = paddle.reverse(x, axis=[])
out.retain_grads()
out.backward() out.backward()
self.assertEqual(x.shape, []) self.assertEqual(x.shape, [])
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
...@@ -1029,9 +1097,14 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1029,9 +1097,14 @@ class TestSundryAPI(unittest.TestCase):
x2 = paddle.rand([]) x2 = paddle.rand([])
x1.stop_gradient = False x1.stop_gradient = False
x2.stop_gradient = False x2.stop_gradient = False
x1.retain_grads()
x2.retain_grads()
out1 = paddle.sort(x1, axis=-1) out1 = paddle.sort(x1, axis=-1)
out2 = paddle.sort(x2, axis=0) out2 = paddle.sort(x2, axis=0)
out1.retain_grads()
out2.retain_grads()
out1.backward() out1.backward()
out2.backward() out2.backward()
...@@ -1051,9 +1124,15 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1051,9 +1124,15 @@ class TestSundryAPI(unittest.TestCase):
x2 = paddle.rand([]) x2 = paddle.rand([])
x1.stop_gradient = False x1.stop_gradient = False
x2.stop_gradient = False x2.stop_gradient = False
x1.retain_grads()
x2.retain_grads()
out1 = paddle.argsort(x1, axis=-1) out1 = paddle.argsort(x1, axis=-1)
out2 = paddle.argsort(x2, axis=0) out2 = paddle.argsort(x2, axis=0)
out1.retain_grads()
out2.retain_grads()
out1.backward() out1.backward()
out2.backward() out2.backward()
...@@ -1075,6 +1154,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1075,6 +1154,7 @@ class TestSundryAPI(unittest.TestCase):
w0 = paddle.rand([]) w0 = paddle.rand([])
x0.stop_gradient = False x0.stop_gradient = False
y0.stop_gradient = False y0.stop_gradient = False
y0.retain_grads()
out0 = paddle.lerp(x0, y0, w0) out0 = paddle.lerp(x0, y0, w0)
out0.backward() out0.backward()
...@@ -1089,6 +1169,8 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1089,6 +1169,8 @@ class TestSundryAPI(unittest.TestCase):
w1 = paddle.rand([]) w1 = paddle.rand([])
x1.stop_gradient = False x1.stop_gradient = False
y1.stop_gradient = False y1.stop_gradient = False
x1.retain_grads()
y1.retain_grads()
out1 = paddle.lerp(x1, y1, w1) out1 = paddle.lerp(x1, y1, w1)
out1.backward() out1.backward()
...@@ -1103,6 +1185,8 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1103,6 +1185,8 @@ class TestSundryAPI(unittest.TestCase):
w2 = paddle.rand([]) w2 = paddle.rand([])
x2.stop_gradient = False x2.stop_gradient = False
y2.stop_gradient = False y2.stop_gradient = False
x2.retain_grads()
y2.retain_grads()
out2 = paddle.lerp(x2, y2, w2) out2 = paddle.lerp(x2, y2, w2)
out2.backward() out2.backward()
...@@ -1120,6 +1204,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1120,6 +1204,7 @@ class TestSundryAPI(unittest.TestCase):
x = paddle.randn(()) x = paddle.randn(())
x.stop_gradient = False x.stop_gradient = False
x.retain_grads()
out = paddle.repeat_interleave(x, 2, None) out = paddle.repeat_interleave(x, 2, None)
out.backward() out.backward()
...@@ -1145,6 +1230,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1145,6 +1230,7 @@ class TestSundryAPI(unittest.TestCase):
dtype='float32', dtype='float32',
stop_gradient=False, stop_gradient=False,
) )
logit.retain_grads()
label = paddle.to_tensor( label = paddle.to_tensor(
[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype='float32' [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype='float32'
) )
...@@ -1153,6 +1239,7 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1153,6 +1239,7 @@ class TestSundryAPI(unittest.TestCase):
out0 = F.sigmoid_focal_loss(logit, label, normalizer=fg_num_0) out0 = F.sigmoid_focal_loss(logit, label, normalizer=fg_num_0)
out1 = F.sigmoid_focal_loss(logit, label, normalizer=fg_num_1) out1 = F.sigmoid_focal_loss(logit, label, normalizer=fg_num_1)
out0.retain_grads()
np.testing.assert_array_equal( np.testing.assert_array_equal(
out0.numpy(), out0.numpy(),
...@@ -1173,7 +1260,10 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1173,7 +1260,10 @@ class TestSundryAPI(unittest.TestCase):
x2 = paddle.full([], 2) x2 = paddle.full([], 2)
x1.stop_gradient = False x1.stop_gradient = False
x2.stop_gradient = False x2.stop_gradient = False
x1.retain_grads()
x2.retain_grads()
out = paddle.where(x1 > x2, x1, x2) out = paddle.where(x1 > x2, x1, x2)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
self.assertEqual(out.numpy(), 2) self.assertEqual(out.numpy(), 2)
...@@ -1186,9 +1276,12 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1186,9 +1276,12 @@ class TestSundryAPI(unittest.TestCase):
def test_atan2(self): def test_atan2(self):
x1 = paddle.full([], 0) x1 = paddle.full([], 0)
x2 = paddle.full([], 2) x2 = paddle.full([], 2)
x1.retain_grads()
x2.retain_grads()
x1.stop_gradient = False x1.stop_gradient = False
x2.stop_gradient = False x2.stop_gradient = False
out = paddle.atan2(x1, x2) out = paddle.atan2(x1, x2)
out.retain_grads()
out.backward() out.backward()
self.assertEqual(out.shape, []) self.assertEqual(out.shape, [])
self.assertEqual(out.numpy(), 0) self.assertEqual(out.numpy(), 0)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册