未验证 提交 28b64075 编写于 作者: X xiayanming 提交者: GitHub

[HIP] fix op not support AMD GPU bug, the flag PADDLE_WITH_ROCM is in… (#35394)

* [HIP] fix op not support AMD GPU bug, the flag PADDLE_WITH_ROCM is invalid

* [HIP] fix op not support AMD GPU bug, the flag PADDLE_WITH_ROCM is invalid

* [HIP] fix op not support AMD GPU bug
上级 266fcbe0
...@@ -131,7 +131,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double, ...@@ -131,7 +131,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double,
ops::MemcpyD2HKernel, plat::float16, ops::MemcpyD2HKernel, plat::float16,
ops::MemcpyD2HKernel); ops::MemcpyD2HKernel);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double, REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double,
ops::MemcpyD2HKernel, int, ops::MemcpyD2HKernel, ops::MemcpyD2HKernel, int, ops::MemcpyD2HKernel,
int64_t, ops::MemcpyD2HKernel, bool, int64_t, ops::MemcpyD2HKernel, bool,
......
...@@ -131,7 +131,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double, ...@@ -131,7 +131,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double,
ops::MemcpyH2DKernel, plat::float16, ops::MemcpyH2DKernel, plat::float16,
ops::MemcpyH2DKernel); ops::MemcpyH2DKernel);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double, REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double,
ops::MemcpyH2DKernel, int, ops::MemcpyH2DKernel, ops::MemcpyH2DKernel, int, ops::MemcpyH2DKernel,
int64_t, ops::MemcpyH2DKernel, bool, int64_t, ops::MemcpyH2DKernel, bool,
......
...@@ -141,7 +141,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double, ...@@ -141,7 +141,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double,
ops::MemcpyKernel, plat::float16, ops::MemcpyKernel, plat::float16,
ops::MemcpyKernel); ops::MemcpyKernel);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double, REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double,
ops::MemcpyKernel, int, ops::MemcpyKernel, ops::MemcpyKernel, int, ops::MemcpyKernel,
int64_t, ops::MemcpyKernel, bool, int64_t, ops::MemcpyKernel, bool,
......
...@@ -64,7 +64,7 @@ class TestMemcpy_FillConstant(unittest.TestCase): ...@@ -64,7 +64,7 @@ class TestMemcpy_FillConstant(unittest.TestCase):
}) })
return main_program, gpu_var, pinned_var return main_program, gpu_var, pinned_var
def test_gpu_cpoy_to_pinned(self): def test_gpu_copy_to_pinned(self):
main_program, gpu_var, pinned_var = self.get_prog() main_program, gpu_var, pinned_var = self.get_prog()
main_program.global_block().append_op( main_program.global_block().append_op(
type='memcpy', type='memcpy',
...@@ -79,7 +79,7 @@ class TestMemcpy_FillConstant(unittest.TestCase): ...@@ -79,7 +79,7 @@ class TestMemcpy_FillConstant(unittest.TestCase):
self.assertTrue(np.allclose(gpu_, pinned_)) self.assertTrue(np.allclose(gpu_, pinned_))
self.assertTrue(np.allclose(pinned_, np.ones((10, 10)))) self.assertTrue(np.allclose(pinned_, np.ones((10, 10))))
def test_pinned_cpoy_gpu(self): def test_pinned_copy_gpu(self):
main_program, gpu_var, pinned_var = self.get_prog() main_program, gpu_var, pinned_var = self.get_prog()
main_program.global_block().append_op( main_program.global_block().append_op(
type='memcpy', type='memcpy',
...@@ -94,6 +94,59 @@ class TestMemcpy_FillConstant(unittest.TestCase): ...@@ -94,6 +94,59 @@ class TestMemcpy_FillConstant(unittest.TestCase):
self.assertTrue(np.allclose(gpu_, pinned_)) self.assertTrue(np.allclose(gpu_, pinned_))
self.assertTrue(np.allclose(gpu_, np.zeros((10, 10)))) self.assertTrue(np.allclose(gpu_, np.zeros((10, 10))))
def test_hip_copy_bool_value(self):
if core.is_compiled_with_rocm():
paddle.enable_static()
main_program = Program()
with program_guard(main_program):
pinned_var_name = "tensor@Pinned"
gpu_var_name = "tensor@GPU"
pinned_var = main_program.global_block().create_var(
name=pinned_var_name,
shape=[1],
dtype='bool',
persistable=False,
stop_gradient=True)
gpu_var = main_program.global_block().create_var(
name=gpu_var_name,
shape=[1],
dtype='bool',
persistable=False,
stop_gradient=True)
main_program.global_block().append_op(
type="fill_constant",
outputs={"Out": gpu_var_name},
attrs={
"shape": [1],
"dtype": gpu_var.dtype,
"value": False,
"place_type": 1
})
main_program.global_block().append_op(
type="fill_constant",
outputs={"Out": pinned_var_name},
attrs={
"shape": [1],
"dtype": gpu_var.dtype,
"value": True,
"place_type": 2
})
main_program.global_block().append_op(
type='memcpy',
inputs={'X': pinned_var},
outputs={'Out': gpu_var},
attrs={'dst_place_type': 1})
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
gpu_, pinned_ = exe.run(main_program,
feed={},
fetch_list=[gpu_var.name, pinned_var.name])
expect_value = np.array([1]).astype('bool')
self.assertTrue(np.array_equal(gpu_, expect_value))
else:
pass
class TestMemcpyOPError(unittest.TestCase): class TestMemcpyOPError(unittest.TestCase):
def get_prog(self): def get_prog(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册