未验证 提交 fb8f2de6 编写于 作者: 姜永久 提交者: GitHub

rm unittests eager guard tests part19 rnn2sparse_model (#48878)

* rm unittests eager guard tests part19 rnn2sparse_model

* fix conflix

* fix set value
上级 3900d562
......@@ -24,7 +24,6 @@ import paddle.fluid.layers as layers
import paddle.nn as nn
from paddle import Model, set_device
from paddle.fluid.data_feeder import convert_dtype
from paddle.fluid.framework import _test_eager_guard
from paddle.fluid.layers.utils import map_structure
from paddle.nn import (
RNN,
......@@ -392,16 +391,11 @@ class TestBeamSearch(ModuleApiTest):
]
return inputs
def func_check_output(self):
def test_check_output(self):
self.setUp()
self.make_inputs()
self.check_output()
def test_check_output(self):
with _test_eager_guard():
self.func_check_output()
self.func_check_output()
class EncoderCell(SimpleRNNCell):
def __init__(
......@@ -699,16 +693,11 @@ class TestDynamicDecode(ModuleApiTest):
]
return inputs
def func_check_output(self):
def test_check_output(self):
self.setUp()
self.make_inputs()
self.check_output()
def test_check_output(self):
with _test_eager_guard():
self.func_check_output()
self.func_check_output()
if __name__ == '__main__':
unittest.main()
......@@ -21,7 +21,6 @@ import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import _test_eager_guard
from paddle.fluid.layer_helper import LayerHelper
......@@ -70,7 +69,7 @@ class TestSetValueApi(TestSetValueBase):
paddle.enable_static()
return out
def func_test_api(self):
def test_api(self):
static_out = self._run_static()
dynamic_out = self._run_dynamic()
self._get_answer()
......@@ -87,11 +86,6 @@ class TestSetValueApi(TestSetValueBase):
msg=error_msg.format("dynamic", self.data, dynamic_out),
)
def test_api(self):
with _test_eager_guard():
self.func_test_api()
self.func_test_api()
# 1. Test different type of item: int, Python slice, Paddle Tensor
# 1.1 item is int
......@@ -1034,6 +1028,7 @@ class TestBackward(unittest.TestCase):
paddle.disable_static()
def func_test_dynamic(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
model = Model()
x = paddle.ones([1, 12, 3, 3]).astype("float32")
y = paddle.ones([1, 12, 3, 3]).astype("float32")
......@@ -1042,17 +1037,11 @@ class TestBackward(unittest.TestCase):
self.assertTrue(var.grad.shape == x.grad[0, :, 0, 0].shape)
self.assertTrue((0 == x.grad[0, :, 0, 0]).all())
def test_dynamic(self):
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
with _test_eager_guard():
self.func_test_dynamic()
self.func_test_dynamic()
fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
class TestGradientTruncated(unittest.TestCase):
def func_test_consistent_with_competitor(self):
def test_consistent_with_competitor(self):
paddle.disable_static()
def set_value(t, value):
......@@ -1309,11 +1298,6 @@ class TestGradientTruncated(unittest.TestCase):
self.assertTrue(not x.stop_gradient)
self.assertTrue(not x.is_leaf)
def test_consistent_with_competitor(self):
with _test_eager_guard():
self.func_test_consistent_with_competitor()
self.func_test_consistent_with_competitor()
def test_static_graph(self):
paddle.enable_static()
......@@ -1497,7 +1481,7 @@ class TestSetValueInplace(unittest.TestCase):
self.assertTrue(id(b) == id(c))
np.testing.assert_array_equal(b.numpy(), c.numpy())
self.assertEqual(b.inplace_version, 1)
self.assertEqual(b.inplace_version, 0)
paddle.enable_static()
......
......@@ -20,7 +20,6 @@ from op_test import OpTest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.framework import _test_eager_guard
from paddle.fluid.op import Operator
paddle.enable_static()
......@@ -307,11 +306,6 @@ class TestSGDV2(unittest.TestCase):
adam.step()
adam.clear_gradients()
def test_eager(self):
with _test_eager_guard():
self.test_sgd_dygraph()
self.test_sgd_group_dygraph()
class TestSGDMultiPrecision2_0(unittest.TestCase):
def dygraph_sgd_mp(self, mp):
......
......@@ -18,7 +18,6 @@ import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import _test_eager_guard
def call_sfl_functional(
......@@ -161,16 +160,6 @@ class TestSigmoidFocalLoss(unittest.TestCase):
gamma,
reduction,
)
with _test_eager_guard():
eager_result = test_dygraph(
place,
logit_np,
label_np,
normalizer_np,
alpha,
gamma,
reduction,
)
expected = calc_sigmoid_focal_loss(
logit_np,
label_np,
......@@ -188,9 +177,6 @@ class TestSigmoidFocalLoss(unittest.TestCase):
np.testing.assert_allclose(
dy_result, expected, rtol=1e-05
)
np.testing.assert_allclose(
eager_result, expected, rtol=1e-05
)
def test_SigmoidFocalLoss_error(self):
paddle.disable_static()
......
......@@ -25,7 +25,6 @@ from op_test import OpTest
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
from paddle.fluid.framework import _test_eager_guard
# 2D normal case
......@@ -259,7 +258,7 @@ class TestSolveOpBatched_case8(OpTest):
class TestSolveOpError(unittest.TestCase):
def func_errors(self):
def test_errors(self):
with program_guard(Program(), Program()):
# The input type of solve_op must be Variable.
x1 = fluid.create_lod_tensor(
......@@ -297,11 +296,6 @@ class TestSolveOpError(unittest.TestCase):
y7 = fluid.data(name="y7", shape=[2, 4, 3], dtype="float64")
self.assertRaises(ValueError, paddle.linalg.solve, x7, y7)
def test_dygraph(self):
with _test_eager_guard():
self.func_errors()
self.func_errors()
# 2D + vector case, FP64
class TestSolveOpAPI_1(unittest.TestCase):
......@@ -341,7 +335,7 @@ class TestSolveOpAPI_1(unittest.TestCase):
for place in self.place:
self.check_static_result(place=place)
def func_dygraph(self):
def test_dygraph(self):
def run(place):
paddle.disable_static(place)
np.random.seed(2021)
......@@ -362,11 +356,6 @@ class TestSolveOpAPI_1(unittest.TestCase):
for place in self.place:
run(place)
def test_dygraph(self):
with _test_eager_guard():
self.func_dygraph()
self.func_dygraph()
# 2D normal case, FP64
class TestSolveOpAPI_2(unittest.TestCase):
......@@ -407,7 +396,7 @@ class TestSolveOpAPI_2(unittest.TestCase):
for place in self.place:
self.check_static_result(place=place)
def func_dygraph(self):
def test_dygraph(self):
def run(place):
paddle.disable_static(place)
np.random.seed(2021)
......@@ -427,11 +416,6 @@ class TestSolveOpAPI_2(unittest.TestCase):
for place in self.place:
run(place)
def test_dygraph(self):
with _test_eager_guard():
self.func_dygraph()
self.func_dygraph()
# 2D normal case, FP32
class TestSolveOpAPI_3(unittest.TestCase):
......@@ -472,7 +456,7 @@ class TestSolveOpAPI_3(unittest.TestCase):
for place in self.place:
self.check_static_result(place=place)
def func_dygraph(self):
def test_dygraph(self):
def run(place):
paddle.disable_static(place)
np.random.seed(2021)
......@@ -493,11 +477,6 @@ class TestSolveOpAPI_3(unittest.TestCase):
for place in self.place:
run(place)
def test_dygraph(self):
with _test_eager_guard():
self.func_dygraph()
self.func_dygraph()
# 3D + y broadcast case, FP64
class TestSolveOpAPI_4(unittest.TestCase):
......@@ -537,7 +516,7 @@ class TestSolveOpAPI_4(unittest.TestCase):
for place in self.place:
self.check_static_result(place=place)
def func_dygraph(self):
def test_dygraph(self):
def run(place):
paddle.disable_static(place)
np.random.seed(2021)
......@@ -558,11 +537,6 @@ class TestSolveOpAPI_4(unittest.TestCase):
for place in self.place:
run(place)
def test_dygraph(self):
with _test_eager_guard():
self.func_dygraph()
self.func_dygraph()
class TestSolveOpSingularAPI(unittest.TestCase):
# Singular matrix is ​​not invertible
......@@ -599,7 +573,7 @@ class TestSolveOpSingularAPI(unittest.TestCase):
paddle.enable_static()
self.check_static_result(place=place)
def func_dygraph(self):
def test_dygraph(self):
for place in self.places:
with fluid.dygraph.guard(place):
input_x_np = np.ones([4, 4]).astype(self.dtype)
......@@ -613,11 +587,6 @@ class TestSolveOpSingularAPI(unittest.TestCase):
except ValueError as ex:
print("The mat is singular")
def test_dygraph(self):
with _test_eager_guard():
self.func_dygraph()
self.func_dygraph()
if __name__ == "__main__":
unittest.main()
......@@ -19,7 +19,6 @@ import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.framework import _test_eager_guard
class TestSortOnCPU(unittest.TestCase):
......@@ -75,19 +74,14 @@ class TestSortDygraph(unittest.TestCase):
else:
self.place = core.CPUPlace()
def func_api_0(self):
def test_api_0(self):
paddle.disable_static(self.place)
var_x = paddle.to_tensor(self.input_data)
out = paddle.sort(var_x)
self.assertEqual((np.sort(self.input_data) == out.numpy()).all(), True)
paddle.enable_static()
def test_api_0(self):
with _test_eager_guard():
self.func_api_0()
self.func_api_0()
def func_api_1(self):
def test_api_1(self):
paddle.disable_static(self.place)
var_x = paddle.to_tensor(self.input_data)
out = paddle.sort(var_x, axis=-1)
......@@ -95,8 +89,3 @@ class TestSortDygraph(unittest.TestCase):
(np.sort(self.input_data, axis=-1) == out.numpy()).all(), True
)
paddle.enable_static()
def test_api_1(self):
with _test_eager_guard():
self.func_api_1()
self.func_api_1()
......@@ -19,156 +19,146 @@ import numpy as np
import paddle
import paddle.sparse as sparse
from paddle.fluid import core
from paddle.fluid.framework import _test_eager_guard
class TestSparseConv(unittest.TestCase):
def test_conv3d(self):
with _test_eager_guard():
kernel = [[[[[1], [1], [1]], [[1], [1], [1]], [[1], [1], [1]]]]]
dense_kernel = paddle.to_tensor(
kernel, dtype='float32', stop_gradient=False
)
dense_kernel = paddle.reshape(dense_kernel, [1, 3, 3, 1, 1])
paddings = [0, 0, 0]
strides = [1, 1, 1]
dilations = [1, 1, 1]
bias = [1]
indices = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 2], [1, 3, 2, 3]]
values = [1, 2, 3, 4]
indices = paddle.to_tensor(indices, dtype='int32')
values = paddle.to_tensor(values, dtype='float32')
dense_shape = [1, 1, 3, 4, 1]
correct_out_values = [[5], [11]]
sparse_input = core.eager.sparse_coo_tensor(
indices, values, dense_shape, False
)
out = paddle.sparse.nn.functional.conv3d(
sparse_input,
dense_kernel,
bias=paddle.to_tensor(bias, dtype='float32'),
stride=strides,
padding=paddings,
dilation=dilations,
groups=1,
data_format="NDHWC",
)
out.backward(out)
out = paddle.sparse.coalesce(out)
assert np.array_equal(correct_out_values, out.values().numpy())
kernel = [[[[[1], [1], [1]], [[1], [1], [1]], [[1], [1], [1]]]]]
dense_kernel = paddle.to_tensor(
kernel, dtype='float32', stop_gradient=False
)
dense_kernel = paddle.reshape(dense_kernel, [1, 3, 3, 1, 1])
paddings = [0, 0, 0]
strides = [1, 1, 1]
dilations = [1, 1, 1]
bias = [1]
indices = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 2], [1, 3, 2, 3]]
values = [1, 2, 3, 4]
indices = paddle.to_tensor(indices, dtype='int32')
values = paddle.to_tensor(values, dtype='float32')
dense_shape = [1, 1, 3, 4, 1]
correct_out_values = [[5], [11]]
sparse_input = core.eager.sparse_coo_tensor(
indices, values, dense_shape, False
)
out = paddle.sparse.nn.functional.conv3d(
sparse_input,
dense_kernel,
bias=paddle.to_tensor(bias, dtype='float32'),
stride=strides,
padding=paddings,
dilation=dilations,
groups=1,
data_format="NDHWC",
)
out.backward(out)
out = paddle.sparse.coalesce(out)
assert np.array_equal(correct_out_values, out.values().numpy())
def test_subm_conv3d(self):
with _test_eager_guard():
indices = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 2], [1, 3, 2, 3]]
values = [[1], [2], [3], [4]]
indices = paddle.to_tensor(indices, dtype='int32')
values = paddle.to_tensor(values, dtype='float32')
dense_shape = [1, 1, 3, 4, 1]
sparse_x = paddle.sparse.sparse_coo_tensor(
indices, values, dense_shape, stop_gradient=True
)
weight = paddle.randn((1, 3, 3, 1, 1), dtype='float32')
y = paddle.sparse.nn.functional.subm_conv3d(
sparse_x, weight, key='subm_conv'
)
assert np.array_equal(
sparse_x.indices().numpy(), y.indices().numpy()
)
indices = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 2], [1, 3, 2, 3]]
values = [[1], [2], [3], [4]]
indices = paddle.to_tensor(indices, dtype='int32')
values = paddle.to_tensor(values, dtype='float32')
dense_shape = [1, 1, 3, 4, 1]
sparse_x = paddle.sparse.sparse_coo_tensor(
indices, values, dense_shape, stop_gradient=True
)
weight = paddle.randn((1, 3, 3, 1, 1), dtype='float32')
y = paddle.sparse.nn.functional.subm_conv3d(
sparse_x, weight, key='subm_conv'
)
assert np.array_equal(sparse_x.indices().numpy(), y.indices().numpy())
def test_Conv3D(self):
with _test_eager_guard():
# (4, non_zero_num), 4-D:(N, D, H, W)
indices = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 2], [1, 3, 2, 3]]
# (non_zero_num, C)
values = [[1], [2], [3], [4]]
indices = paddle.to_tensor(indices, dtype='int32')
values = paddle.to_tensor(values, dtype='float32')
dense_shape = [1, 1, 3, 4, 1]
correct_out_values = [[4], [10]]
sparse_input = paddle.sparse.sparse_coo_tensor(
indices, values, dense_shape, False
)
# (4, non_zero_num), 4-D:(N, D, H, W)
indices = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 2], [1, 3, 2, 3]]
# (non_zero_num, C)
values = [[1], [2], [3], [4]]
indices = paddle.to_tensor(indices, dtype='int32')
values = paddle.to_tensor(values, dtype='float32')
dense_shape = [1, 1, 3, 4, 1]
correct_out_values = [[4], [10]]
sparse_input = paddle.sparse.sparse_coo_tensor(
indices, values, dense_shape, False
)
sparse_conv3d = paddle.sparse.nn.Conv3D(
1, 1, (1, 3, 3), data_format='NDHWC'
sparse_conv3d = paddle.sparse.nn.Conv3D(
1, 1, (1, 3, 3), data_format='NDHWC'
)
sparse_out = sparse_conv3d(sparse_input)
# test errors
with self.assertRaises(ValueError):
# Currently, only support data_format='NDHWC'
conv3d = paddle.sparse.nn.SubmConv3D(
1, 1, (1, 3, 3), data_format='NCDHW', key='subm_conv'
)
sparse_out = sparse_conv3d(sparse_input)
# test errors
with self.assertRaises(ValueError):
# Currently, only support data_format='NDHWC'
conv3d = paddle.sparse.nn.SubmConv3D(
1, 1, (1, 3, 3), data_format='NCDHW', key='subm_conv'
)
def test_SubmConv3D(self):
with _test_eager_guard():
indices = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 2], [1, 3, 2, 3]]
values = [[1], [2], [3], [4]]
indices = paddle.to_tensor(indices, dtype='int32')
values = paddle.to_tensor(values, dtype='float32')
dense_shape = [1, 1, 3, 4, 1]
correct_out_values = [[4], [10]]
sparse_input = paddle.sparse.sparse_coo_tensor(
indices, values, dense_shape, False
)
indices = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 2], [1, 3, 2, 3]]
values = [[1], [2], [3], [4]]
indices = paddle.to_tensor(indices, dtype='int32')
values = paddle.to_tensor(values, dtype='float32')
dense_shape = [1, 1, 3, 4, 1]
correct_out_values = [[4], [10]]
sparse_input = paddle.sparse.sparse_coo_tensor(
indices, values, dense_shape, False
)
subm_conv3d = paddle.sparse.nn.SubmConv3D(
1, 1, (1, 3, 3), data_format='NDHWC', key='subm_conv'
subm_conv3d = paddle.sparse.nn.SubmConv3D(
1, 1, (1, 3, 3), data_format='NDHWC', key='subm_conv'
)
# test extra_repr
print(subm_conv3d.extra_repr())
sparse_out = subm_conv3d(sparse_input)
# the output shape of subm_conv is same as input shape
assert np.array_equal(indices, sparse_out.indices().numpy())
# test errors
with self.assertRaises(ValueError):
# Currently, only support data_format='NDHWC'
conv3d = paddle.sparse.nn.SubmConv3D(
1, 1, (1, 3, 3), data_format='NCDHW', key='subm_conv'
)
# test extra_repr
print(subm_conv3d.extra_repr())
sparse_out = subm_conv3d(sparse_input)
# the output shape of subm_conv is same as input shape
assert np.array_equal(indices, sparse_out.indices().numpy())
# test errors
with self.assertRaises(ValueError):
# Currently, only support data_format='NDHWC'
conv3d = paddle.sparse.nn.SubmConv3D(
1, 1, (1, 3, 3), data_format='NCDHW', key='subm_conv'
)
def test_Conv3D_bias(self):
with _test_eager_guard():
paddle.seed(0)
shape = [1, 4, 4, 4, 3]
x = paddle.randn(shape)
sp_x = x.to_sparse_coo(4)
conv3d = paddle.nn.Conv3D(3, 2, 3, data_format='NDHWC')
sp_conv3d = paddle.sparse.nn.Conv3D(3, 2, 3, data_format='NDHWC')
sp_conv3d.weight.set_value(
paddle.to_tensor(conv3d.weight.numpy().transpose(2, 3, 4, 1, 0))
)
sp_conv3d.bias.set_value(paddle.to_tensor(conv3d.bias.numpy()))
x.stop_gradient = False
out = conv3d(x)
loss = out.mean()
loss.backward()
sp_x.stop_gradient = False
sp_out = sp_conv3d(sp_x)
dense_out = sp_out.to_dense()
sp_loss = dense_out.mean()
sp_loss.backward()
assert np.allclose(
out.numpy(), dense_out.numpy(), atol=1e-3, rtol=1e-3
)
assert np.allclose(
conv3d.weight.grad.numpy().transpose(2, 3, 4, 1, 0),
sp_conv3d.weight.grad.numpy(),
atol=1e-3,
rtol=1e-3,
)
assert np.allclose(
conv3d.bias.grad.numpy(),
sp_conv3d.bias.grad.numpy(),
atol=1e-5,
rtol=1e-5,
)
paddle.seed(0)
shape = [1, 4, 4, 4, 3]
x = paddle.randn(shape)
sp_x = x.to_sparse_coo(4)
conv3d = paddle.nn.Conv3D(3, 2, 3, data_format='NDHWC')
sp_conv3d = paddle.sparse.nn.Conv3D(3, 2, 3, data_format='NDHWC')
sp_conv3d.weight.set_value(
paddle.to_tensor(conv3d.weight.numpy().transpose(2, 3, 4, 1, 0))
)
sp_conv3d.bias.set_value(paddle.to_tensor(conv3d.bias.numpy()))
x.stop_gradient = False
out = conv3d(x)
loss = out.mean()
loss.backward()
sp_x.stop_gradient = False
sp_out = sp_conv3d(sp_x)
dense_out = sp_out.to_dense()
sp_loss = dense_out.mean()
sp_loss.backward()
assert np.allclose(out.numpy(), dense_out.numpy(), atol=1e-3, rtol=1e-3)
assert np.allclose(
conv3d.weight.grad.numpy().transpose(2, 3, 4, 1, 0),
sp_conv3d.weight.grad.numpy(),
atol=1e-3,
rtol=1e-3,
)
assert np.allclose(
conv3d.bias.grad.numpy(),
sp_conv3d.bias.grad.numpy(),
atol=1e-5,
rtol=1e-5,
)
class TestStatic(unittest.TestCase):
......
......@@ -17,32 +17,29 @@ import unittest
import numpy as np
import paddle
from paddle.fluid.framework import _test_eager_guard
class TestSparseCopy(unittest.TestCase):
def test_copy_sparse_coo(self):
with _test_eager_guard():
np_x = [[0, 1.0, 0], [2.0, 0, 0], [0, 3.0, 0]]
np_values = [1.0, 2.0, 3.0]
dense_x = paddle.to_tensor(np_x, dtype='float32')
coo_x = dense_x.to_sparse_coo(2)
np_x_2 = [[0, 3.0, 0], [2.0, 0, 0], [0, 3.0, 0]]
dense_x_2 = paddle.to_tensor(np_x_2, dtype='float32')
coo_x_2 = dense_x_2.to_sparse_coo(2)
coo_x_2.copy_(coo_x, True)
assert np.array_equal(np_values, coo_x_2.values().numpy())
np_x = [[0, 1.0, 0], [2.0, 0, 0], [0, 3.0, 0]]
np_values = [1.0, 2.0, 3.0]
dense_x = paddle.to_tensor(np_x, dtype='float32')
coo_x = dense_x.to_sparse_coo(2)
np_x_2 = [[0, 3.0, 0], [2.0, 0, 0], [0, 3.0, 0]]
dense_x_2 = paddle.to_tensor(np_x_2, dtype='float32')
coo_x_2 = dense_x_2.to_sparse_coo(2)
coo_x_2.copy_(coo_x, True)
assert np.array_equal(np_values, coo_x_2.values().numpy())
def test_copy_sparse_csr(self):
with _test_eager_guard():
np_x = [[0, 1.0, 0], [2.0, 0, 0], [0, 3.0, 0]]
np_values = [1.0, 2.0, 3.0]
dense_x = paddle.to_tensor(np_x, dtype='float32')
csr_x = dense_x.to_sparse_csr()
np_x_2 = [[0, 3.0, 0], [2.0, 0, 0], [0, 3.0, 0]]
dense_x_2 = paddle.to_tensor(np_x_2, dtype='float32')
csr_x_2 = dense_x_2.to_sparse_csr()
csr_x_2.copy_(csr_x, True)
assert np.array_equal(np_values, csr_x_2.values().numpy())
np_x = [[0, 1.0, 0], [2.0, 0, 0], [0, 3.0, 0]]
np_values = [1.0, 2.0, 3.0]
dense_x = paddle.to_tensor(np_x, dtype='float32')
csr_x = dense_x.to_sparse_csr()
np_x_2 = [[0, 3.0, 0], [2.0, 0, 0], [0, 3.0, 0]]
dense_x_2 = paddle.to_tensor(np_x_2, dtype='float32')
csr_x_2 = dense_x_2.to_sparse_csr()
csr_x_2.copy_(csr_x, True)
assert np.array_equal(np_values, csr_x_2.values().numpy())
......@@ -22,7 +22,6 @@ import numpy as np
import paddle
import paddle.fluid.core as core
from paddle.fluid.framework import _test_eager_guard
def get_cuda_version():
......@@ -51,90 +50,86 @@ class TestSparseAttentionAPI1(unittest.TestCase):
self.use_mask = True
def test_dygraph(self):
with _test_eager_guard():
self.shape = [
self.batch_size,
self.num_heads,
self.seq_len,
self.head_dim,
]
query = paddle.rand(self.shape, self.dtype)
key = paddle.rand(self.shape, self.dtype)
value = paddle.rand(self.shape, self.dtype)
query.stop_gradient = False
key.stop_gradient = False
value.stop_gradient = False
mask = paddle.nn.functional.dropout(
paddle.ones([self.seq_len, self.seq_len]),
mode='downscale_in_infer',
self.shape = [
self.batch_size,
self.num_heads,
self.seq_len,
self.head_dim,
]
query = paddle.rand(self.shape, self.dtype)
key = paddle.rand(self.shape, self.dtype)
value = paddle.rand(self.shape, self.dtype)
query.stop_gradient = False
key.stop_gradient = False
value.stop_gradient = False
mask = paddle.nn.functional.dropout(
paddle.ones([self.seq_len, self.seq_len]),
mode='downscale_in_infer',
)
mask = mask.expand(
[self.batch_size, self.num_heads, self.seq_len, self.seq_len]
)
sp_mask = mask.reshape([-1, self.seq_len, self.seq_len]).to_sparse_csr()
query_sp = copy.deepcopy(query)
key_sp = copy.deepcopy(key)
value_sp = copy.deepcopy(value)
query_sp.stop_gradient = False
key_sp.stop_gradient = False
value_sp.stop_gradient = False
if self.use_mask:
kp_mask = paddle.randint(
0, 2, [self.batch_size, self.seq_len]
).astype(self.dtype)
attn_mask = paddle.randint(
0, 2, [self.seq_len, self.seq_len]
).astype(self.dtype)
sdd = paddle.matmul(query, key, False, True) / math.sqrt(
float(self.head_dim)
)
mask = mask.expand(
[self.batch_size, self.num_heads, self.seq_len, self.seq_len]
sdd = (
sdd
+ ((mask * kp_mask.unsqueeze([1, 2]) * attn_mask) - 1.0) * 1e9
)
sp_mask = mask.reshape(
[-1, self.seq_len, self.seq_len]
).to_sparse_csr()
query_sp = copy.deepcopy(query)
key_sp = copy.deepcopy(key)
value_sp = copy.deepcopy(value)
query_sp.stop_gradient = False
key_sp.stop_gradient = False
value_sp.stop_gradient = False
if self.use_mask:
kp_mask = paddle.randint(
0, 2, [self.batch_size, self.seq_len]
).astype(self.dtype)
attn_mask = paddle.randint(
0, 2, [self.seq_len, self.seq_len]
).astype(self.dtype)
sdd = paddle.matmul(query, key, False, True) / math.sqrt(
float(self.head_dim)
)
sdd = (
sdd
+ ((mask * kp_mask.unsqueeze([1, 2]) * attn_mask) - 1.0)
* 1e9
)
softmax = paddle.nn.functional.softmax(sdd)
output = paddle.matmul(softmax, value)
output.backward()
output_sp = paddle.sparse.nn.functional.attention(
query_sp, key_sp, value_sp, sp_mask, kp_mask, attn_mask
)
output_sp.backward()
else:
sdd = paddle.matmul(query, key, False, True) / math.sqrt(
float(self.head_dim)
)
sdd = sdd + (mask - 1.0) * 1e9
softmax = paddle.nn.functional.softmax(sdd)
output = paddle.matmul(softmax, value)
output.backward()
output_sp = paddle.sparse.nn.functional.attention(
query_sp, key_sp, value_sp, sp_mask
)
output_sp.backward()
np.testing.assert_allclose(
output_sp.numpy(), output.numpy(), rtol=1e-05
)
np.testing.assert_allclose(
query_sp.grad.numpy(), query.grad.numpy(), rtol=1e-05
softmax = paddle.nn.functional.softmax(sdd)
output = paddle.matmul(softmax, value)
output.backward()
output_sp = paddle.sparse.nn.functional.attention(
query_sp, key_sp, value_sp, sp_mask, kp_mask, attn_mask
)
np.testing.assert_allclose(
key_sp.grad.numpy(), key.grad.numpy(), rtol=1e-05
output_sp.backward()
else:
sdd = paddle.matmul(query, key, False, True) / math.sqrt(
float(self.head_dim)
)
np.testing.assert_allclose(
value_sp.grad.numpy(), value.grad.numpy(), rtol=1e-05
sdd = sdd + (mask - 1.0) * 1e9
softmax = paddle.nn.functional.softmax(sdd)
output = paddle.matmul(softmax, value)
output.backward()
output_sp = paddle.sparse.nn.functional.attention(
query_sp, key_sp, value_sp, sp_mask
)
output_sp.backward()
np.testing.assert_allclose(
output_sp.numpy(), output.numpy(), rtol=1e-05
)
np.testing.assert_allclose(
query_sp.grad.numpy(), query.grad.numpy(), rtol=1e-05
)
np.testing.assert_allclose(
key_sp.grad.numpy(), key.grad.numpy(), rtol=1e-05
)
np.testing.assert_allclose(
value_sp.grad.numpy(), value.grad.numpy(), rtol=1e-05
)
class TestSparseAttentionAPI2(TestSparseAttentionAPI1):
......
......@@ -17,7 +17,6 @@ import unittest
import numpy as np
import paddle
from paddle.fluid.framework import _test_eager_guard
from paddle.sparse import nn
......@@ -41,28 +40,27 @@ class TestGradientAdd(unittest.TestCase):
return out
def test(self):
with _test_eager_guard():
x = paddle.randn((3, 3))
sparse_x = x.to_sparse_coo(sparse_dim=2)
x = paddle.randn((3, 3))
sparse_x = x.to_sparse_coo(sparse_dim=2)
x.stop_gradient = False
sparse_x.stop_gradient = False
x.stop_gradient = False
sparse_x.stop_gradient = False
dense_out = self.dense(x)
loss = dense_out.mean()
loss.backward(retain_graph=True)
dense_out = self.dense(x)
loss = dense_out.mean()
loss.backward(retain_graph=True)
sparse_out = self.sparse(sparse_x)
sparse_loss = sparse_out.values().mean()
sparse_loss.backward(retain_graph=True)
sparse_out = self.sparse(sparse_x)
sparse_loss = sparse_out.values().mean()
sparse_loss.backward(retain_graph=True)
assert np.allclose(dense_out.numpy(), sparse_out.to_dense().numpy())
assert np.allclose(x.grad.numpy(), sparse_x.grad.to_dense().numpy())
assert np.allclose(dense_out.numpy(), sparse_out.to_dense().numpy())
assert np.allclose(x.grad.numpy(), sparse_x.grad.to_dense().numpy())
loss.backward()
sparse_loss.backward()
loss.backward()
sparse_loss.backward()
assert np.allclose(x.grad.numpy(), sparse_x.grad.to_dense().numpy())
assert np.allclose(x.grad.numpy(), sparse_x.grad.to_dense().numpy())
if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册