From 45d1fb8d5e5e0662943203b2fbe64dbcf1811e25 Mon Sep 17 00:00:00 2001 From: 0x45f <23097963+0x45f@users.noreply.github.com> Date: Mon, 21 Mar 2022 20:59:11 +0800 Subject: [PATCH] Refine to_tensor for eager mode and support gpu_pinned (#40535) * Refine to_tensor for eager mode * support gpu_pinned * refine code * support gpu_pinned copy_to * fix layer.__setattr__ * support to_tensor for gpu_pinned * fix unit test * refine gpu_pinned * restore the original code * add is_gup_pinned() and refine eager.Tensor._copy_to() --- .../fluid/distributed/collective/ProcessGroupNCCL.cc | 2 +- .../tests/data_structure_tests/eager_tensor_test.cc | 2 +- paddle/fluid/pybind/eager_method.cc | 4 ++-- paddle/phi/api/include/tensor.h | 12 ++++++++++-- paddle/phi/api/lib/tensor.cc | 6 +++++- paddle/phi/kernels/gpu/copy_kernel.cu | 3 ++- python/paddle/fluid/dygraph/layers.py | 3 ++- .../fluid/tests/unittests/test_egr_python_api.py | 4 ++++ python/paddle/tensor/creation.py | 2 +- 9 files changed, 28 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc index 7f21bcee87a..70e111b0a9a 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc @@ -417,7 +417,7 @@ void CheckTensorsInDifferentDevices(const std::vector& tensors, std::set used_devices; for (const auto& t : tensors) { - PADDLE_ENFORCE_EQ(t.is_cuda() && t.is_dense_tensor(), true, + PADDLE_ENFORCE_EQ(t.is_gpu() && t.is_dense_tensor(), true, platform::errors::InvalidArgument( "Tensors must be CUDA and dense tensor.")); diff --git a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc index c8b2d22dcf9..5fec38bf25a 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc @@ -90,7 +90,7 @@ TEST(Tensor, MemberFunction) { VLOG(6) << "Set impl"; CHECK_EQ(et3.initialized(), true); CHECK_EQ(et3.is_cpu(), true); - CHECK_EQ(et3.is_cuda(), false); + CHECK_EQ(et3.is_gpu(), false); CHECK_EQ(et3.numel(), 2); auto expected_dim = phi::make_ddim({1, 2}); CHECK_EQ(et3.dims(), expected_dim); diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 49745e5679d..dd237b76728 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -175,7 +175,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, PyObject* args, pybind11::detail::npy_api::NPY_ARRAY_WRITEABLE_, nullptr); - if (self->tensor.is_cpu()) { + if (self->tensor.is_cpu() || self->tensor.is_gpu_pinned()) { auto dense_tensor = std::dynamic_pointer_cast(self->tensor.impl()); platform::CPUPlace place; @@ -184,7 +184,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, PyObject* args, pybind11::detail::array_proxy(array)->data), place, dense_tensor->data(), sizeof_dtype * numel); #if defined(PADDLE_WITH_CUDA) - } else if (self->tensor.is_cuda()) { + } else if (self->tensor.is_gpu()) { auto dense_tensor = std::dynamic_pointer_cast(self->tensor.impl()); diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h index c58ebe69523..649f4d11383 100644 --- a/paddle/phi/api/include/tensor.h +++ b/paddle/phi/api/include/tensor.h @@ -269,12 +269,20 @@ class PADDLE_API Tensor final { bool is_cpu() const; /** - * @brief Determine whether the tensor device is CUDA + * @brief Determine whether the tensor device is GPU * * @return true * @return false */ - bool is_cuda() const; + bool is_gpu() const; + + /** + * @brief Determine whether the tensor device is GPU_PINNED + * + * @return true + * @return false + */ + bool is_gpu_pinned() const; /* Part 4: Data Access methods */ diff --git a/paddle/phi/api/lib/tensor.cc b/paddle/phi/api/lib/tensor.cc index 066287d4244..b9b6ca36f67 100644 --- a/paddle/phi/api/lib/tensor.cc +++ b/paddle/phi/api/lib/tensor.cc @@ -163,10 +163,14 @@ bool Tensor::is_cpu() const { return paddle::platform::is_cpu_place(inner_place()); } -bool Tensor::is_cuda() const { +bool Tensor::is_gpu() const { return paddle::platform::is_gpu_place(inner_place()); } +bool Tensor::is_gpu_pinned() const { + return paddle::platform::is_cuda_pinned_place(inner_place()); +} + /* Part 4: Data Access methods */ template diff --git a/paddle/phi/kernels/gpu/copy_kernel.cu b/paddle/phi/kernels/gpu/copy_kernel.cu index 4545f9ce436..a16c8369cc9 100644 --- a/paddle/phi/kernels/gpu/copy_kernel.cu +++ b/paddle/phi/kernels/gpu/copy_kernel.cu @@ -87,7 +87,8 @@ void Copy(const Context& dev_ctx, : reinterpret_cast(dev_ctx).stream(); paddle::memory::Copy( dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); - } else if (paddle::platform::is_cpu_place(src_place) && // NOLINT + } else if ((paddle::platform::is_cpu_place(src_place) || + paddle::platform::is_cuda_pinned_place(src_place)) && // NOLINT paddle::platform::is_gpu_place(dst_place)) { auto src_cpu_place = src_place; auto dst_gpu_place = dst_place; diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 6957850d205..f4334085620 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -1155,7 +1155,8 @@ class Layer(object): layers[name] = None else: _buffers = self.__dict__.get('_buffers', None) - if type(value) == core.VarBase: + if type(value) == core.VarBase or \ + type(value) == core.eager.Tensor: if _buffers is None: raise ValueError( "super(YourLayer, self).__init__() should be called first" diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index 98ef339e045..8166598677a 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -637,6 +637,10 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): self.assertTrue(tensor3.persistable, True) self.assertTrue(tensor3.stop_gradient, True) self.assertTrue(tensor3.place.is_gpu_place()) + tensor4 = paddle.to_tensor([1, 2, 3], place='gpu_pinned') + tensor5 = tensor4._copy_to(core.CUDAPlace(0), True) + self.assertTrue( + np.array_equal(tensor4.numpy(), tensor5.numpy())) else: tensor3 = tensor2._copy_to(core.CPUPlace(), True) self.assertTrue(np.array_equal(tensor3.numpy(), arr2)) diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index bdb0eabe2bb..9cef336aa54 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -127,7 +127,7 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): "\n\tFaild to convert input data to a regular ndarray :\n\t - Usually " "this means the input data contains nested lists with different lengths. " ) - elif isinstance(data, paddle.Tensor): + elif isinstance(data, (paddle.Tensor, core.eager.Tensor)): data = data._copy_to(place, False) data = _handle_dtype(data, dtype) data.stop_gradient = stop_gradient -- GitLab