From 28b64075ac4b3ef6e5616edcd803166f6d799a32 Mon Sep 17 00:00:00 2001 From: xiayanming <41795079@qq.com> Date: Tue, 7 Sep 2021 10:31:40 +0800 Subject: [PATCH] =?UTF-8?q?[HIP]=20fix=20op=20not=20support=20AMD=20GPU=20?= =?UTF-8?q?bug,=20the=20flag=20PADDLE=5FWITH=5FROCM=20is=20in=E2=80=A6=20(?= =?UTF-8?q?#35394)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [HIP] fix op not support AMD GPU bug, the flag PADDLE_WITH_ROCM is invalid * [HIP] fix op not support AMD GPU bug, the flag PADDLE_WITH_ROCM is invalid * [HIP] fix op not support AMD GPU bug --- paddle/fluid/operators/memcpy_d2h_op.cc | 2 +- paddle/fluid/operators/memcpy_h2d_op.cc | 2 +- paddle/fluid/operators/memcpy_op.cc | 2 +- .../fluid/tests/unittests/test_memcpy_op.py | 57 ++++++++++++++++++- 4 files changed, 58 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/memcpy_d2h_op.cc b/paddle/fluid/operators/memcpy_d2h_op.cc index 41b8b367918..3158b0963a4 100644 --- a/paddle/fluid/operators/memcpy_d2h_op.cc +++ b/paddle/fluid/operators/memcpy_d2h_op.cc @@ -131,7 +131,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double, ops::MemcpyD2HKernel, plat::float16, ops::MemcpyD2HKernel); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM) +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double, ops::MemcpyD2HKernel, int, ops::MemcpyD2HKernel, int64_t, ops::MemcpyD2HKernel, bool, diff --git a/paddle/fluid/operators/memcpy_h2d_op.cc b/paddle/fluid/operators/memcpy_h2d_op.cc index e439be16201..f100dc6f7a5 100644 --- a/paddle/fluid/operators/memcpy_h2d_op.cc +++ b/paddle/fluid/operators/memcpy_h2d_op.cc @@ -131,7 +131,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double, ops::MemcpyH2DKernel, plat::float16, ops::MemcpyH2DKernel); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM) +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double, ops::MemcpyH2DKernel, int, ops::MemcpyH2DKernel, int64_t, ops::MemcpyH2DKernel, bool, diff --git a/paddle/fluid/operators/memcpy_op.cc b/paddle/fluid/operators/memcpy_op.cc index ecd2d48dcbd..56eee13cb06 100644 --- a/paddle/fluid/operators/memcpy_op.cc +++ b/paddle/fluid/operators/memcpy_op.cc @@ -141,7 +141,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double, ops::MemcpyKernel, plat::float16, ops::MemcpyKernel); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM) +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double, ops::MemcpyKernel, int, ops::MemcpyKernel, int64_t, ops::MemcpyKernel, bool, diff --git a/python/paddle/fluid/tests/unittests/test_memcpy_op.py b/python/paddle/fluid/tests/unittests/test_memcpy_op.py index 3fecef9397c..d6efe4d471e 100755 --- a/python/paddle/fluid/tests/unittests/test_memcpy_op.py +++ b/python/paddle/fluid/tests/unittests/test_memcpy_op.py @@ -64,7 +64,7 @@ class TestMemcpy_FillConstant(unittest.TestCase): }) return main_program, gpu_var, pinned_var - def test_gpu_cpoy_to_pinned(self): + def test_gpu_copy_to_pinned(self): main_program, gpu_var, pinned_var = self.get_prog() main_program.global_block().append_op( type='memcpy', @@ -79,7 +79,7 @@ class TestMemcpy_FillConstant(unittest.TestCase): self.assertTrue(np.allclose(gpu_, pinned_)) self.assertTrue(np.allclose(pinned_, np.ones((10, 10)))) - def test_pinned_cpoy_gpu(self): + def test_pinned_copy_gpu(self): main_program, gpu_var, pinned_var = self.get_prog() main_program.global_block().append_op( type='memcpy', @@ -94,6 +94,59 @@ class TestMemcpy_FillConstant(unittest.TestCase): self.assertTrue(np.allclose(gpu_, pinned_)) self.assertTrue(np.allclose(gpu_, np.zeros((10, 10)))) + def test_hip_copy_bool_value(self): + if core.is_compiled_with_rocm(): + paddle.enable_static() + main_program = Program() + with program_guard(main_program): + pinned_var_name = "tensor@Pinned" + gpu_var_name = "tensor@GPU" + pinned_var = main_program.global_block().create_var( + name=pinned_var_name, + shape=[1], + dtype='bool', + persistable=False, + stop_gradient=True) + gpu_var = main_program.global_block().create_var( + name=gpu_var_name, + shape=[1], + dtype='bool', + persistable=False, + stop_gradient=True) + main_program.global_block().append_op( + type="fill_constant", + outputs={"Out": gpu_var_name}, + attrs={ + "shape": [1], + "dtype": gpu_var.dtype, + "value": False, + "place_type": 1 + }) + main_program.global_block().append_op( + type="fill_constant", + outputs={"Out": pinned_var_name}, + attrs={ + "shape": [1], + "dtype": gpu_var.dtype, + "value": True, + "place_type": 2 + }) + + main_program.global_block().append_op( + type='memcpy', + inputs={'X': pinned_var}, + outputs={'Out': gpu_var}, + attrs={'dst_place_type': 1}) + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + gpu_, pinned_ = exe.run(main_program, + feed={}, + fetch_list=[gpu_var.name, pinned_var.name]) + expect_value = np.array([1]).astype('bool') + self.assertTrue(np.array_equal(gpu_, expect_value)) + else: + pass + class TestMemcpyOPError(unittest.TestCase): def get_prog(self): -- GitLab