diff --git a/paddle/fluid/operators/memcpy_d2h_op.cc b/paddle/fluid/operators/memcpy_d2h_op.cc index 41b8b367918f80d3bb615e9e7cb8d3c37f7ba04f..3158b0963a43add3b27de9a1e404a70b5155d975 100644 --- a/paddle/fluid/operators/memcpy_d2h_op.cc +++ b/paddle/fluid/operators/memcpy_d2h_op.cc @@ -131,7 +131,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double, ops::MemcpyD2HKernel, plat::float16, ops::MemcpyD2HKernel); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM) +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double, ops::MemcpyD2HKernel, int, ops::MemcpyD2HKernel, int64_t, ops::MemcpyD2HKernel, bool, diff --git a/paddle/fluid/operators/memcpy_h2d_op.cc b/paddle/fluid/operators/memcpy_h2d_op.cc index e439be162018361ef119f5372f009b9b49bd81aa..f100dc6f7a53ee122bf5d791e6a20b6e88097e57 100644 --- a/paddle/fluid/operators/memcpy_h2d_op.cc +++ b/paddle/fluid/operators/memcpy_h2d_op.cc @@ -131,7 +131,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double, ops::MemcpyH2DKernel, plat::float16, ops::MemcpyH2DKernel); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM) +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double, ops::MemcpyH2DKernel, int, ops::MemcpyH2DKernel, int64_t, ops::MemcpyH2DKernel, bool, diff --git a/paddle/fluid/operators/memcpy_op.cc b/paddle/fluid/operators/memcpy_op.cc index ecd2d48dcbd102baffaccfd5de369462b5f8e527..56eee13cb060a66178d907bff55c4aef67f59e60 100644 --- a/paddle/fluid/operators/memcpy_op.cc +++ b/paddle/fluid/operators/memcpy_op.cc @@ -141,7 +141,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double, ops::MemcpyKernel, plat::float16, ops::MemcpyKernel); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM) +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double, ops::MemcpyKernel, int, ops::MemcpyKernel, int64_t, ops::MemcpyKernel, bool, diff --git a/python/paddle/fluid/tests/unittests/test_memcpy_op.py b/python/paddle/fluid/tests/unittests/test_memcpy_op.py index 3fecef9397c63043e6c3a1995217f2d9e78c0f58..d6efe4d471efdc4ccbe7995b14dbbfb82c67674a 100755 --- a/python/paddle/fluid/tests/unittests/test_memcpy_op.py +++ b/python/paddle/fluid/tests/unittests/test_memcpy_op.py @@ -64,7 +64,7 @@ class TestMemcpy_FillConstant(unittest.TestCase): }) return main_program, gpu_var, pinned_var - def test_gpu_cpoy_to_pinned(self): + def test_gpu_copy_to_pinned(self): main_program, gpu_var, pinned_var = self.get_prog() main_program.global_block().append_op( type='memcpy', @@ -79,7 +79,7 @@ class TestMemcpy_FillConstant(unittest.TestCase): self.assertTrue(np.allclose(gpu_, pinned_)) self.assertTrue(np.allclose(pinned_, np.ones((10, 10)))) - def test_pinned_cpoy_gpu(self): + def test_pinned_copy_gpu(self): main_program, gpu_var, pinned_var = self.get_prog() main_program.global_block().append_op( type='memcpy', @@ -94,6 +94,59 @@ class TestMemcpy_FillConstant(unittest.TestCase): self.assertTrue(np.allclose(gpu_, pinned_)) self.assertTrue(np.allclose(gpu_, np.zeros((10, 10)))) + def test_hip_copy_bool_value(self): + if core.is_compiled_with_rocm(): + paddle.enable_static() + main_program = Program() + with program_guard(main_program): + pinned_var_name = "tensor@Pinned" + gpu_var_name = "tensor@GPU" + pinned_var = main_program.global_block().create_var( + name=pinned_var_name, + shape=[1], + dtype='bool', + persistable=False, + stop_gradient=True) + gpu_var = main_program.global_block().create_var( + name=gpu_var_name, + shape=[1], + dtype='bool', + persistable=False, + stop_gradient=True) + main_program.global_block().append_op( + type="fill_constant", + outputs={"Out": gpu_var_name}, + attrs={ + "shape": [1], + "dtype": gpu_var.dtype, + "value": False, + "place_type": 1 + }) + main_program.global_block().append_op( + type="fill_constant", + outputs={"Out": pinned_var_name}, + attrs={ + "shape": [1], + "dtype": gpu_var.dtype, + "value": True, + "place_type": 2 + }) + + main_program.global_block().append_op( + type='memcpy', + inputs={'X': pinned_var}, + outputs={'Out': gpu_var}, + attrs={'dst_place_type': 1}) + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + gpu_, pinned_ = exe.run(main_program, + feed={}, + fetch_list=[gpu_var.name, pinned_var.name]) + expect_value = np.array([1]).astype('bool') + self.assertTrue(np.array_equal(gpu_, expect_value)) + else: + pass + class TestMemcpyOPError(unittest.TestCase): def get_prog(self):