未验证 提交 50de8a4f 编写于 作者: zhouweiwei2014's avatar zhouweiwei2014 提交者: GitHub

fix behavior of device_id=None in Tensor.cuda (#44515)

* fix behavior of device_id=None in Tensor.cuda

* fix CI
上级 98f8fa4c
...@@ -75,7 +75,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx, ...@@ -75,7 +75,7 @@ void FusedAttentionCsrGradKernel(const Context& dev_ctx,
#if CUDA_VERSION >= 11070 #if CUDA_VERSION >= 11070
/* Step1: Forward: softmax{CSR} * value{Dense} -> out{Dense}, reuse */ /* Step1: Forward: softmax{CSR} * value{Dense} -> out{Dense}, reuse */
SparseCsrTensor dsoftmax; SparseCsrTensor dsoftmax;
CsrDenseMatmulGradKernel<T, Context>( MatmulCsrDenseGradKernel<T, Context>(
dev_ctx, softmax, value, dout, &dsoftmax, dvalue); dev_ctx, softmax, value, dout, &dsoftmax, dvalue);
/* Step2: Calculate grad of sdd_result, manualy not reuse */ /* Step2: Calculate grad of sdd_result, manualy not reuse */
......
...@@ -263,7 +263,7 @@ void FusedAttentionCsrKernel( ...@@ -263,7 +263,7 @@ void FusedAttentionCsrKernel(
/* Step3: DSD Matmul, reuse */ /* Step3: DSD Matmul, reuse */
softmax->set_dims(phi::make_ddim({q_dim[0], q_dim[1], q_dim[2], q_dim[2]})); softmax->set_dims(phi::make_ddim({q_dim[0], q_dim[1], q_dim[2], q_dim[2]}));
CsrDenseMatmulKernel<T, Context>(dev_ctx, *softmax, value, out); MatmulCsrDenseKernel<T, Context>(dev_ctx, *softmax, value, out);
#else #else
PADDLE_THROW( PADDLE_THROW(
phi::errors::Unimplemented("forward of 'sparse.nn.functional.attention' " phi::errors::Unimplemented("forward of 'sparse.nn.functional.attention' "
......
...@@ -866,15 +866,20 @@ def monkey_patch_varbase(): ...@@ -866,15 +866,20 @@ def monkey_patch_varbase():
return res return res
@framework.dygraph_only @framework.dygraph_only
def cuda(self, device_id=0, blocking=True): def cuda(self, device_id=None, blocking=True):
if device_id is None: if device_id is None:
device_id = 0 res_place = framework._current_expected_place()
if not isinstance(device_id, int): if not isinstance(res_place, core.CUDAPlace):
raise ValueError("\'device_id\' must be a positive integer") res_place = core.CUDAPlace(0)
if self.place.is_gpu_place(): elif isinstance(device_id, int):
res_place = core.CUDAPlace(device_id)
else:
raise ValueError("device_id must be int|None")
if self.place._equals(res_place):
return self return self
else: else:
res = self._copy_to(core.CUDAPlace(device_id), True) res = self._copy_to(res_place, True)
res.stop_gradient = self.stop_gradient res.stop_gradient = self.stop_gradient
res.persistable = self.persistable res.persistable = self.persistable
return res return res
......
...@@ -34,7 +34,7 @@ class TestVarBase(unittest.TestCase): ...@@ -34,7 +34,7 @@ class TestVarBase(unittest.TestCase):
def func_test_to_tensor(self): def func_test_to_tensor(self):
def _test_place(place): def check_with_place(place):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
paddle.set_default_dtype('float32') paddle.set_default_dtype('float32')
# set_default_dtype should not take effect on int # set_default_dtype should not take effect on int
...@@ -79,6 +79,7 @@ class TestVarBase(unittest.TestCase): ...@@ -79,6 +79,7 @@ class TestVarBase(unittest.TestCase):
y = x.pin_memory() y = x.pin_memory()
self.assertEqual(y.place.__repr__(), "Place(gpu_pinned)") self.assertEqual(y.place.__repr__(), "Place(gpu_pinned)")
y = x.cuda() y = x.cuda()
self.assertEqual(y.place.__repr__(), "Place(gpu:0)")
y = x.cuda(None) y = x.cuda(None)
self.assertEqual(y.place.__repr__(), "Place(gpu:0)") self.assertEqual(y.place.__repr__(), "Place(gpu:0)")
y = x.cuda(device_id=0) y = x.cuda(device_id=0)
...@@ -266,16 +267,16 @@ class TestVarBase(unittest.TestCase): ...@@ -266,16 +267,16 @@ class TestVarBase(unittest.TestCase):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
paddle.to_tensor([[1], [2, 3]], place=1) paddle.to_tensor([[1], [2, 3]], place=1)
_test_place(core.CPUPlace()) check_with_place(core.CPUPlace())
_test_place("cpu") check_with_place("cpu")
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
_test_place(core.CUDAPinnedPlace()) check_with_place(core.CUDAPinnedPlace())
_test_place("gpu_pinned") check_with_place("gpu_pinned")
_test_place(core.CUDAPlace(0)) check_with_place(core.CUDAPlace(0))
_test_place("gpu:0") check_with_place("gpu:0")
if core.is_compiled_with_npu(): if core.is_compiled_with_npu():
_test_place(core.NPUPlace(0)) check_with_place(core.NPUPlace(0))
_test_place("npu:0") check_with_place("npu:0")
def test_to_tensor(self): def test_to_tensor(self):
with _test_eager_guard(): with _test_eager_guard():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册