未验证 提交 50de8a4f 编写于 作者: zhouweiwei2014's avatar zhouweiwei2014 提交者: GitHub

fix behavior of device_id=None in Tensor.cuda (#44515)

* fix behavior of device_id=None in Tensor.cuda

* fix CI
上级 98f8fa4c
......@@ -75,7 +75,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
#if CUDA_VERSION >= 11070
/* Step1: Forward: softmax{CSR} * value{Dense} -> out{Dense}, reuse */
SparseCsrTensor dsoftmax;
CsrDenseMatmulGradKernel<T, Context>(
MatmulCsrDenseGradKernel<T, Context>(
dev_ctx, softmax, value, dout, &dsoftmax, dvalue);
/* Step2: Calculate grad of sdd_result, manualy not reuse */
......
......@@ -263,7 +263,7 @@ void FusedAttentionCsrKernel(
/* Step3: DSD Matmul, reuse */
softmax->set_dims(phi::make_ddim({q_dim[0], q_dim[1], q_dim[2], q_dim[2]}));
CsrDenseMatmulKernel<T, Context>(dev_ctx, *softmax, value, out);
MatmulCsrDenseKernel<T, Context>(dev_ctx, *softmax, value, out);
#else
PADDLE_THROW(
phi::errors::Unimplemented("forward of 'sparse.nn.functional.attention' "
......
......@@ -866,15 +866,20 @@ def monkey_patch_varbase():
return res
@framework.dygraph_only
def cuda(self, device_id=0, blocking=True):
def cuda(self, device_id=None, blocking=True):
if device_id is None:
device_id = 0
if not isinstance(device_id, int):
raise ValueError("\'device_id\' must be a positive integer")
if self.place.is_gpu_place():
res_place = framework._current_expected_place()
if not isinstance(res_place, core.CUDAPlace):
res_place = core.CUDAPlace(0)
elif isinstance(device_id, int):
res_place = core.CUDAPlace(device_id)
else:
raise ValueError("device_id must be int|None")
if self.place._equals(res_place):
return self
else:
res = self._copy_to(core.CUDAPlace(device_id), True)
res = self._copy_to(res_place, True)
res.stop_gradient = self.stop_gradient
res.persistable = self.persistable
return res
......
......@@ -34,7 +34,7 @@ class TestVarBase(unittest.TestCase):
def func_test_to_tensor(self):
def _test_place(place):
def check_with_place(place):
with fluid.dygraph.guard():
paddle.set_default_dtype('float32')
# set_default_dtype should not take effect on int
......@@ -79,6 +79,7 @@ class TestVarBase(unittest.TestCase):
y = x.pin_memory()
self.assertEqual(y.place.__repr__(), "Place(gpu_pinned)")
y = x.cuda()
self.assertEqual(y.place.__repr__(), "Place(gpu:0)")
y = x.cuda(None)
self.assertEqual(y.place.__repr__(), "Place(gpu:0)")
y = x.cuda(device_id=0)
......@@ -266,16 +267,16 @@ class TestVarBase(unittest.TestCase):
with self.assertRaises(ValueError):
paddle.to_tensor([[1], [2, 3]], place=1)
_test_place(core.CPUPlace())
_test_place("cpu")
check_with_place(core.CPUPlace())
check_with_place("cpu")
if core.is_compiled_with_cuda():
_test_place(core.CUDAPinnedPlace())
_test_place("gpu_pinned")
_test_place(core.CUDAPlace(0))
_test_place("gpu:0")
check_with_place(core.CUDAPinnedPlace())
check_with_place("gpu_pinned")
check_with_place(core.CUDAPlace(0))
check_with_place("gpu:0")
if core.is_compiled_with_npu():
_test_place(core.NPUPlace(0))
_test_place("npu:0")
check_with_place(core.NPUPlace(0))
check_with_place("npu:0")
def test_to_tensor(self):
with _test_eager_guard():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册