未验证 提交 c40c16a9 编写于 作者: X xiemoyuan 提交者: GitHub

Modified the return value of tensor.grad from numpy to tensor. (#32142)

* Modified the return value of tensor.grad from numpy as tensor.

* Modify unittests.

* fixed bugs.

* Add warning info for x.grad

* fixed unittests which used x.grad

* fixed bug.
上级 eae34059
...@@ -26,6 +26,7 @@ from .base import switch_to_static_graph ...@@ -26,6 +26,7 @@ from .base import switch_to_static_graph
from .math_op_patch import monkey_patch_math_varbase from .math_op_patch import monkey_patch_math_varbase
from .parallel import scale_loss from .parallel import scale_loss
from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE
import paddle.utils.deprecated as deprecated
class TensorHookRemoveHelper(object): class TensorHookRemoveHelper(object):
...@@ -238,8 +239,16 @@ def monkey_patch_varbase(): ...@@ -238,8 +239,16 @@ def monkey_patch_varbase():
"Variable.backward() is only available in DyGraph mode") "Variable.backward() is only available in DyGraph mode")
@framework.dygraph_only @framework.dygraph_only
@deprecated(
since="2.1.0",
reason="Please use x.grad, which returns the tensor value of the gradient."
)
def gradient(self): def gradient(self):
""" """
.. warning::
This API will be deprecated in the future, it is recommended to use
:code:`x.grad` which returns the tensor value of the gradient.
Get the Gradient of Current Tensor. Get the Gradient of Current Tensor.
Returns: Returns:
...@@ -253,7 +262,7 @@ def monkey_patch_varbase(): ...@@ -253,7 +262,7 @@ def monkey_patch_varbase():
x = paddle.to_tensor(5., stop_gradient=False) x = paddle.to_tensor(5., stop_gradient=False)
y = paddle.pow(x, 4.0) y = paddle.pow(x, 4.0)
y.backward() y.backward()
print("grad of x: {}".format(x.grad)) print("grad of x: {}".format(x.gradient()))
# [500.] # [500.]
""" """
...@@ -337,10 +346,28 @@ def monkey_patch_varbase(): ...@@ -337,10 +346,28 @@ def monkey_patch_varbase():
@property @property
def grad(self): def grad(self):
""" """
The alias of gradient(). .. warning::
""" This API will return the tensor value of the gradient. If you want
to get the numpy value of the gradient, you can use :code:`x.grad.numpy()`.
Get the Gradient of Current Tensor.
Returns:
Tensor: the gradient of current Tensor
return self.gradient() Examples:
.. code-block:: python
import paddle
x = paddle.to_tensor(5., stop_gradient=False)
y = paddle.pow(x, 4.0)
y.backward()
print("grad of x: {}".format(x.grad))
# Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=False, [500.])
"""
return self._grad_ivar()
def clear_grad(self): def clear_grad(self):
""" """
......
...@@ -58,7 +58,7 @@ def concat_dynamic(func, dtype, np_inputs, axis_v, with_attr=False): ...@@ -58,7 +58,7 @@ def concat_dynamic(func, dtype, np_inputs, axis_v, with_attr=False):
out = func(inputs, axis) out = func(inputs, axis)
out.stop_gradient = False out.stop_gradient = False
out.backward() out.backward()
grad_inputs = [x.grad for x in inputs] grad_inputs = [x.grad.numpy() for x in inputs]
return out.numpy(), grad_inputs return out.numpy(), grad_inputs
......
...@@ -63,7 +63,10 @@ def conj_dynamic(func, dtype, np_input): ...@@ -63,7 +63,10 @@ def conj_dynamic(func, dtype, np_input):
sum_out.real().backward() sum_out.real().backward()
else: else:
sum_out.backward() sum_out.backward()
if x.grad is None:
return out.numpy(), x.grad return out.numpy(), x.grad
else:
return out.numpy(), x.grad.numpy()
def conj_static(func, shape, dtype, np_input): def conj_static(func, shape, dtype, np_input):
......
...@@ -34,7 +34,10 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True): ...@@ -34,7 +34,10 @@ def custom_relu_dynamic(func, device, dtype, np_x, use_func=True):
out.backward() out.backward()
if t.grad is None:
return out.numpy(), t.grad return out.numpy(), t.grad
else:
return out.numpy(), t.grad.numpy()
def custom_relu_static(func, def custom_relu_static(func,
......
...@@ -110,7 +110,8 @@ class TestDistTraning(unittest.TestCase): ...@@ -110,7 +110,8 @@ class TestDistTraning(unittest.TestCase):
def check_acc(self, grad, grad_sum, acc_grad): def check_acc(self, grad, grad_sum, acc_grad):
if grad is not None: if grad is not None:
grad_sum = grad_sum + grad grad_sum = grad_sum + grad.numpy()
acc_grad = acc_grad.numpy() if acc_grad is not None else None
np.testing.assert_allclose(grad_sum, acc_grad, rtol=1e-6) np.testing.assert_allclose(grad_sum, acc_grad, rtol=1e-6)
return grad_sum return grad_sum
......
...@@ -349,7 +349,8 @@ class TestLayerTo(unittest.TestCase): ...@@ -349,7 +349,8 @@ class TestLayerTo(unittest.TestCase):
paddle.fluid.core.VarDesc.VarType.FP64) paddle.fluid.core.VarDesc.VarType.FP64)
self.assertEqual(self.linear.buf_name.dtype, self.assertEqual(self.linear.buf_name.dtype,
paddle.fluid.core.VarDesc.VarType.FP64) paddle.fluid.core.VarDesc.VarType.FP64)
self.assertTrue(np.allclose(self.linear.weight.grad, self.new_grad)) self.assertTrue(
np.allclose(self.linear.weight.grad.numpy(), self.new_grad))
self.assertTrue(self.linear.weight._grad_ivar().dtype, self.assertTrue(self.linear.weight._grad_ivar().dtype,
paddle.fluid.core.VarDesc.VarType.FP64) paddle.fluid.core.VarDesc.VarType.FP64)
...@@ -358,7 +359,8 @@ class TestLayerTo(unittest.TestCase): ...@@ -358,7 +359,8 @@ class TestLayerTo(unittest.TestCase):
paddle.fluid.core.VarDesc.VarType.FP64) paddle.fluid.core.VarDesc.VarType.FP64)
self.assertEqual(self.linear.buf_name.dtype, self.assertEqual(self.linear.buf_name.dtype,
paddle.fluid.core.VarDesc.VarType.FP64) paddle.fluid.core.VarDesc.VarType.FP64)
self.assertTrue(np.allclose(self.linear.weight.grad, self.new_grad)) self.assertTrue(
np.allclose(self.linear.weight.grad.numpy(), self.new_grad))
self.assertTrue(self.linear.weight._grad_ivar().dtype, self.assertTrue(self.linear.weight._grad_ivar().dtype,
paddle.fluid.core.VarDesc.VarType.FP64) paddle.fluid.core.VarDesc.VarType.FP64)
......
...@@ -46,7 +46,7 @@ class TestTensorBackward(unittest.TestCase): ...@@ -46,7 +46,7 @@ class TestTensorBackward(unittest.TestCase):
x_grad = np.matmul(grad, y.T) x_grad = np.matmul(grad, y.T)
self.assertTrue(np.allclose(x_grad, x_tensor.grad)) self.assertTrue(np.allclose(x_grad, x_tensor.grad.numpy()))
class TestBackwardAPI(unittest.TestCase): class TestBackwardAPI(unittest.TestCase):
...@@ -75,7 +75,8 @@ class TestBackwardAPI(unittest.TestCase): ...@@ -75,7 +75,8 @@ class TestBackwardAPI(unittest.TestCase):
x_grad = np.matmul(grad, y.T) x_grad = np.matmul(grad, y.T)
self.assertTrue(np.allclose(x_grad * 2, x_tensor.grad)) self.assertTrue(
np.allclose(x_grad * 2, x_tensor.grad.numpy()))
def test_backward_single_tensor(self): def test_backward_single_tensor(self):
for dtype in self._dtypes: for dtype in self._dtypes:
...@@ -94,7 +95,7 @@ class TestBackwardAPI(unittest.TestCase): ...@@ -94,7 +95,7 @@ class TestBackwardAPI(unittest.TestCase):
x_grad = np.matmul(grad, y.T) x_grad = np.matmul(grad, y.T)
self.assertTrue(np.allclose(x_grad, x_tensor.grad)) self.assertTrue(np.allclose(x_grad, x_tensor.grad.numpy()))
def test_backward_none_grad_tensor(self): def test_backward_none_grad_tensor(self):
for dtype in self._dtypes: for dtype in self._dtypes:
...@@ -112,7 +113,7 @@ class TestBackwardAPI(unittest.TestCase): ...@@ -112,7 +113,7 @@ class TestBackwardAPI(unittest.TestCase):
x_grad = np.matmul(grad, y.T) x_grad = np.matmul(grad, y.T)
self.assertTrue(np.allclose(x_grad, x_tensor.grad)) self.assertTrue(np.allclose(x_grad, x_tensor.grad.numpy()))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -506,15 +506,15 @@ class TestImperative(unittest.TestCase): ...@@ -506,15 +506,15 @@ class TestImperative(unittest.TestCase):
for i in range(10): for i in range(10):
y = paddle.pow(x, 4.0) y = paddle.pow(x, 4.0)
y.backward() y.backward()
self.assertEqual(x.grad, (i + 1) * 500) self.assertEqual(x.grad.numpy(), (i + 1) * 500)
x.clear_gradient() x.clear_gradient()
self.assertEqual(x.grad, 0.) self.assertEqual(x.grad.numpy(), 0.)
for i in range(10): for i in range(10):
y = paddle.pow(x, 4.0) y = paddle.pow(x, 4.0)
y.backward() y.backward()
self.assertEqual(x.grad, (i + 1) * 500) self.assertEqual(x.grad.numpy(), (i + 1) * 500)
x.clear_grad() x.clear_grad()
self.assertEqual(x.grad, 0.) self.assertEqual(x.grad.numpy(), 0.)
def test_simple_net(sort_sum_gradient): def test_simple_net(sort_sum_gradient):
fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient}) fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient})
...@@ -527,9 +527,9 @@ class TestImperative(unittest.TestCase): ...@@ -527,9 +527,9 @@ class TestImperative(unittest.TestCase):
loss2 = x * z loss2 = x * z
loss1.backward(retain_graph=True) loss1.backward(retain_graph=True)
loss2.backward(retain_graph=True) loss2.backward(retain_graph=True)
self.assertTrue(np.array_equal(x.grad, [23.])) self.assertTrue(np.array_equal(x.grad.numpy(), [23.]))
self.assertTrue(np.array_equal(y.grad, [25.])) self.assertTrue(np.array_equal(y.grad.numpy(), [25.]))
self.assertTrue(np.array_equal(z.grad, [5.])) self.assertTrue(np.array_equal(z.grad.numpy(), [5.]))
x.clear_grad() x.clear_grad()
y.clear_grad() y.clear_grad()
z.clear_grad() z.clear_grad()
...@@ -542,13 +542,13 @@ class TestImperative(unittest.TestCase): ...@@ -542,13 +542,13 @@ class TestImperative(unittest.TestCase):
loss = fun(x, y, z) loss = fun(x, y, z)
loss.backward(retain_graph=True) loss.backward(retain_graph=True)
# x.grad = 2*x*y + z + 2*y = 27 # x.grad = 2*x*y + z + 2*y = 27
self.assertTrue(np.array_equal(x.grad, [27])) self.assertTrue(np.array_equal(x.grad.numpy(), [27]))
loss.backward(retain_graph=True) loss.backward(retain_graph=True)
self.assertTrue(np.array_equal(x.grad, [54])) self.assertTrue(np.array_equal(x.grad.numpy(), [54]))
loss.backward() loss.backward()
self.assertTrue(np.array_equal(x.grad, [81])) self.assertTrue(np.array_equal(x.grad.numpy(), [81]))
with self.assertRaises(RuntimeError): with self.assertRaises(RuntimeError):
loss.backward() loss.backward()
...@@ -558,8 +558,8 @@ class TestImperative(unittest.TestCase): ...@@ -558,8 +558,8 @@ class TestImperative(unittest.TestCase):
dx = paddle.grad([loss1], x, create_graph=True)[0] dx = paddle.grad([loss1], x, create_graph=True)[0]
loss = loss1 + loss2 + dx loss = loss1 + loss2 + dx
loss.backward() loss.backward()
self.assertTrue(np.array_equal(dx.grad, [1])) self.assertTrue(np.array_equal(dx.grad.numpy(), [1]))
self.assertTrue(np.array_equal(x.grad, [108])) self.assertTrue(np.array_equal(x.grad.numpy(), [108]))
def test_mlp(sort_sum_gradient): def test_mlp(sort_sum_gradient):
fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient}) fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient})
...@@ -579,28 +579,34 @@ class TestImperative(unittest.TestCase): ...@@ -579,28 +579,34 @@ class TestImperative(unittest.TestCase):
detach_x = x.detach() detach_x = x.detach()
clear_loss = mlp2(detach_x) clear_loss = mlp2(detach_x)
clear_loss.backward() clear_loss.backward()
expected_weight1_grad = expected_weight1_grad + mlp2._linear1.weight.grad expected_weight1_grad = (
expected_bias1_grad = expected_bias1_grad + mlp2._linear1.bias.grad expected_weight1_grad + mlp2._linear1.weight.grad.numpy())
expected_weight2_grad = expected_weight2_grad + mlp2._linear2.weight.grad expected_bias1_grad = (
expected_bias2_grad = expected_bias2_grad + mlp2._linear2.bias.grad expected_bias1_grad + mlp2._linear1.bias.grad.numpy())
expected_weight2_grad = (
expected_weight2_grad + mlp2._linear2.weight.grad.numpy())
expected_bias2_grad = (
expected_bias2_grad + mlp2._linear2.bias.grad.numpy())
loss = mlp1(x) loss = mlp1(x)
loss.backward() loss.backward()
self.assertTrue(np.array_equal(loss.grad, [1])) self.assertTrue(np.array_equal(loss.grad.numpy(), [1]))
self.assertTrue( self.assertTrue(
np.allclose(mlp1._linear1.weight.grad, np.allclose(mlp1._linear1.weight.grad.numpy(),
expected_weight1_grad)) expected_weight1_grad))
self.assertTrue( self.assertTrue(
np.allclose(mlp1._linear1.bias.grad, expected_bias1_grad)) np.allclose(mlp1._linear1.bias.grad.numpy(),
expected_bias1_grad))
self.assertTrue( self.assertTrue(
np.allclose(mlp1._linear2.weight.grad, np.allclose(mlp1._linear2.weight.grad.numpy(),
expected_weight2_grad)) expected_weight2_grad))
self.assertTrue( self.assertTrue(
np.allclose(mlp1._linear2.bias.grad, expected_bias2_grad)) np.allclose(mlp1._linear2.bias.grad.numpy(),
expected_bias2_grad))
mlp2.clear_gradients() mlp2.clear_gradients()
self.assertTrue(np.array_equal(clear_loss.grad, [1])) self.assertTrue(np.array_equal(clear_loss.grad.numpy(), [1]))
if ((batch_id + 1) % 10) == 0: if ((batch_id + 1) % 10) == 0:
mlp1.clear_gradients() mlp1.clear_gradients()
expected_weight1_grad = 0. expected_weight1_grad = 0.
......
...@@ -177,7 +177,7 @@ class TestDygraphInplace(unittest.TestCase): ...@@ -177,7 +177,7 @@ class TestDygraphInplace(unittest.TestCase):
var_d = var_c**2 var_d = var_c**2
loss = var_d.sum() loss = var_d.sum()
loss.backward() loss.backward()
grad_var_a_inplace = var_a.grad grad_var_a_inplace = var_a.grad.numpy()
with paddle.fluid.dygraph.guard(): with paddle.fluid.dygraph.guard():
var_a = paddle.to_tensor(self.input_var_numpy).astype(self.dtype) var_a = paddle.to_tensor(self.input_var_numpy).astype(self.dtype)
...@@ -188,7 +188,7 @@ class TestDygraphInplace(unittest.TestCase): ...@@ -188,7 +188,7 @@ class TestDygraphInplace(unittest.TestCase):
var_d = var_c**2 var_d = var_c**2
loss = var_d.sum() loss = var_d.sum()
loss.backward() loss.backward()
grad_var_a = var_a.grad grad_var_a = var_a.grad.numpy()
self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a)) self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a))
...@@ -209,7 +209,7 @@ class TestDygraphInplace(unittest.TestCase): ...@@ -209,7 +209,7 @@ class TestDygraphInplace(unittest.TestCase):
loss = var_d.sum() loss = var_d.sum()
loss.backward() loss.backward()
grad_var_a_inplace = var_a.grad grad_var_a_inplace = var_a.grad.numpy()
with paddle.fluid.dygraph.guard(): with paddle.fluid.dygraph.guard():
var_a = paddle.to_tensor(self.input_var_numpy).astype(self.dtype) var_a = paddle.to_tensor(self.input_var_numpy).astype(self.dtype)
...@@ -224,7 +224,7 @@ class TestDygraphInplace(unittest.TestCase): ...@@ -224,7 +224,7 @@ class TestDygraphInplace(unittest.TestCase):
loss = var_d.sum() loss = var_d.sum()
loss.backward() loss.backward()
grad_var_a = var_a.grad grad_var_a = var_a.grad.numpy()
self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a)) self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a))
......
...@@ -110,7 +110,8 @@ class TestLookAhead(unittest.TestCase): ...@@ -110,7 +110,8 @@ class TestLookAhead(unittest.TestCase):
out = layer(image) out = layer(image)
loss = loss_fn(out, label) loss = loss_fn(out, label)
loss.backward() loss.backward()
fast_param = layer.bias.numpy() - SGD_LR * layer.bias.grad fast_param = (
layer.bias.numpy() - SGD_LR * layer.bias.grad.numpy())
opt.step() opt.step()
if idx == 1: if idx == 1:
slow_param = fast_param slow_param = fast_param
......
...@@ -50,7 +50,8 @@ class TestPyLayer(unittest.TestCase): ...@@ -50,7 +50,8 @@ class TestPyLayer(unittest.TestCase):
z2 = paddle.tanh(input2) + paddle.tanh(input2) z2 = paddle.tanh(input2) + paddle.tanh(input2)
z2.mean().backward() z2.mean().backward()
self.assertTrue(np.max(np.abs((input1.grad - input2.grad))) < 1e-10) self.assertTrue(
np.max(np.abs((input1.grad.numpy() - input2.grad.numpy()))) < 1e-10)
def test_simple_pylayer_return_none_with_no_grad(self): def test_simple_pylayer_return_none_with_no_grad(self):
class tanh(PyLayer): class tanh(PyLayer):
...@@ -110,7 +111,8 @@ class TestPyLayer(unittest.TestCase): ...@@ -110,7 +111,8 @@ class TestPyLayer(unittest.TestCase):
z2 = paddle.tanh(input2) z2 = paddle.tanh(input2)
z2.mean().backward() z2.mean().backward()
self.assertTrue(np.max(np.abs((input1.grad - input2.grad))) < 1e-10) self.assertTrue(
np.max(np.abs((input1.grad.numpy() - input2.grad.numpy()))) < 1e-10)
def test_pylayer_dtype(self): def test_pylayer_dtype(self):
class tanh(PyLayer): class tanh(PyLayer):
......
...@@ -75,15 +75,15 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -75,15 +75,15 @@ class TestTensorRegisterHook(unittest.TestCase):
o.backward() o.backward()
# z.grad is not affected # z.grad is not affected
self.assertTrue(np.array_equal(z.grad, w.numpy())) self.assertTrue(np.array_equal(z.grad.numpy(), w.numpy()))
# w.grad is not changed by hook # w.grad is not changed by hook
self.assertTrue(np.array_equal(w.grad, z.numpy())) self.assertTrue(np.array_equal(w.grad.numpy(), z.numpy()))
# x.grad and y.grad are changed if run hook # x.grad and y.grad are changed if run hook
self.assertTrue( self.assertTrue(
np.array_equal(x.grad, np.array_equal(x.grad.numpy(),
z.numpy() * 2 if not removed else z.numpy())) z.numpy() * 2 if not removed else z.numpy()))
self.assertTrue( self.assertTrue(
np.array_equal(y.grad, np.array_equal(y.grad.numpy(),
z.numpy() * 2 if not removed else z.numpy())) z.numpy() * 2 if not removed else z.numpy()))
def run_print_hook_for_interior_var(print_hook, removed=False): def run_print_hook_for_interior_var(print_hook, removed=False):
...@@ -111,10 +111,10 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -111,10 +111,10 @@ class TestTensorRegisterHook(unittest.TestCase):
o.backward() o.backward()
# all grads are not affected # all grads are not affected
self.assertTrue(np.array_equal(z.grad, w.numpy())) self.assertTrue(np.array_equal(z.grad.numpy(), w.numpy()))
self.assertTrue(np.array_equal(w.grad, z.numpy())) self.assertTrue(np.array_equal(w.grad.numpy(), z.numpy()))
self.assertTrue(np.array_equal(x.grad, z.numpy())) self.assertTrue(np.array_equal(x.grad.numpy(), z.numpy()))
self.assertTrue(np.array_equal(y.grad, z.numpy())) self.assertTrue(np.array_equal(y.grad.numpy(), z.numpy()))
def double_hook(grad): def double_hook(grad):
grad = grad * 2 grad = grad * 2
...@@ -165,12 +165,12 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -165,12 +165,12 @@ class TestTensorRegisterHook(unittest.TestCase):
o.backward() o.backward()
# z.grad, w.grad, x.grad is not affected # z.grad, w.grad, x.grad is not affected
self.assertTrue(np.array_equal(z.grad, w.numpy())) self.assertTrue(np.array_equal(z.grad.numpy(), w.numpy()))
self.assertTrue(np.array_equal(w.grad, z.numpy())) self.assertTrue(np.array_equal(w.grad.numpy(), z.numpy()))
self.assertTrue(np.array_equal(x.grad, z.numpy())) self.assertTrue(np.array_equal(x.grad.numpy(), z.numpy()))
# y.grad are changed if run hook # y.grad are changed if run hook
self.assertTrue( self.assertTrue(
np.array_equal(y.grad, np.array_equal(y.grad.numpy(),
z.numpy() * 2 if not removed else z.numpy())) z.numpy() * 2 if not removed else z.numpy()))
# register hook # register hook
...@@ -217,14 +217,14 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -217,14 +217,14 @@ class TestTensorRegisterHook(unittest.TestCase):
base_grad = np.array([5., 9., 13., 19.]) base_grad = np.array([5., 9., 13., 19.])
# x.grad is not changed # x.grad is not changed
self.assertTrue(np.array_equal(x.grad, base_grad)) self.assertTrue(np.array_equal(x.grad.numpy(), base_grad))
# b.grad is changed by x.hook # b.grad is changed by x.hook
self.assertTrue( self.assertTrue(
np.array_equal(b.grad, base_grad * 2 np.array_equal(b.grad.numpy(), base_grad * 2
if not removed else base_grad)) if not removed else base_grad))
# a.grad is changed by x.hook and a.hook # a.grad is changed by x.hook and a.hook
self.assertTrue( self.assertTrue(
np.array_equal(a.grad, base_grad * 4 np.array_equal(a.grad.numpy(), base_grad * 4
if not removed else base_grad)) if not removed else base_grad))
# register hook # register hook
...@@ -265,7 +265,7 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -265,7 +265,7 @@ class TestTensorRegisterHook(unittest.TestCase):
base_grad = np.array([5., 9., 13., 19.]) base_grad = np.array([5., 9., 13., 19.])
# x.grad is changed by x.hook # x.grad is changed by x.hook
self.assertTrue( self.assertTrue(
np.array_equal(x.grad, base_grad * 2 np.array_equal(x.grad.numpy(), base_grad * 2
if not removed else base_grad)) if not removed else base_grad))
# register hook # register hook
...@@ -294,7 +294,8 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -294,7 +294,8 @@ class TestTensorRegisterHook(unittest.TestCase):
loss = loss_fn(out, label) loss = loss_fn(out, label)
loss.backward() loss.backward()
return ret1.grad, net.linear1.weight.grad, net.linear1.bias.grad return (ret1.grad.numpy(), net.linear1.weight.grad.numpy(),
net.linear1.bias.grad.numpy())
data = np.random.uniform( data = np.random.uniform(
size=[self.batch_size, self.in_size]).astype('float32') size=[self.batch_size, self.in_size]).astype('float32')
...@@ -355,7 +356,7 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -355,7 +356,7 @@ class TestTensorRegisterHook(unittest.TestCase):
o.backward() o.backward()
return z.numpy(), w.grad, x.grad, y.grad return z.numpy(), w.grad.numpy(), x.grad.numpy(), y.grad.numpy()
def double_hook(grad): def double_hook(grad):
return grad * 2 return grad * 2
...@@ -428,7 +429,7 @@ class TestTensorRegisterHook(unittest.TestCase): ...@@ -428,7 +429,7 @@ class TestTensorRegisterHook(unittest.TestCase):
# after changed by hook: 8.0 # after changed by hook: 8.0
z.backward() z.backward()
self.assertTrue(np.array_equal(x.grad, np.array([8.]))) self.assertTrue(np.array_equal(x.grad.numpy(), np.array([8.])))
def test_remove_one_hook_multiple_times(self): def test_remove_one_hook_multiple_times(self):
for device in self.devices: for device in self.devices:
......
...@@ -65,7 +65,8 @@ class TestVarBase(unittest.TestCase): ...@@ -65,7 +65,8 @@ class TestVarBase(unittest.TestCase):
y = clone_x**2 y = clone_x**2
y.backward() y.backward()
self.assertTrue( self.assertTrue(
np.array_equal(x.grad, np.array([2.4]).astype('float32'))) np.array_equal(x.grad.numpy(),
np.array([2.4]).astype('float32')))
y = x.cpu() y = x.cpu()
self.assertEqual(y.place.__repr__(), "CPUPlace") self.assertEqual(y.place.__repr__(), "CPUPlace")
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
...@@ -260,14 +261,14 @@ class TestVarBase(unittest.TestCase): ...@@ -260,14 +261,14 @@ class TestVarBase(unittest.TestCase):
y = x**2 y = x**2
y.backward() y.backward()
self.assertTrue(np.array_equal(x.grad, [20.0])) self.assertTrue(np.array_equal(x.grad.numpy(), [20.0]))
self.assertEqual(detach_x.grad, None) self.assertEqual(detach_x.grad, None)
detach_x.stop_gradient = False # Set stop_gradient to be False, supported auto-grad detach_x.stop_gradient = False # Set stop_gradient to be False, supported auto-grad
z = 3 * detach_x**2 z = 3 * detach_x**2
z.backward() z.backward()
self.assertTrue(np.array_equal(x.grad, [20.0])) self.assertTrue(np.array_equal(x.grad.numpy(), [20.0]))
self.assertTrue(np.array_equal(detach_x.grad, [60.0])) self.assertTrue(np.array_equal(detach_x.grad.numpy(), [60.0]))
# Due to sharing of data with origin Tensor, There are some unsafe operations: # Due to sharing of data with origin Tensor, There are some unsafe operations:
with self.assertRaises(RuntimeError): with self.assertRaises(RuntimeError):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册