未验证 提交 45d1fb8d 编写于 作者: 0 0x45f 提交者: GitHub

Refine to_tensor for eager mode and support gpu_pinned (#40535)

* Refine to_tensor for eager mode

* support gpu_pinned

* refine code

* support gpu_pinned copy_to

* fix layer.__setattr__

* support to_tensor for gpu_pinned

* fix unit test

* refine gpu_pinned

* restore the original code

* add is_gup_pinned() and refine eager.Tensor._copy_to()
上级 f9be89e0
...@@ -417,7 +417,7 @@ void CheckTensorsInDifferentDevices(const std::vector<Tensor>& tensors, ...@@ -417,7 +417,7 @@ void CheckTensorsInDifferentDevices(const std::vector<Tensor>& tensors,
std::set<Place> used_devices; std::set<Place> used_devices;
for (const auto& t : tensors) { for (const auto& t : tensors) {
PADDLE_ENFORCE_EQ(t.is_cuda() && t.is_dense_tensor(), true, PADDLE_ENFORCE_EQ(t.is_gpu() && t.is_dense_tensor(), true,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Tensors must be CUDA and dense tensor.")); "Tensors must be CUDA and dense tensor."));
......
...@@ -90,7 +90,7 @@ TEST(Tensor, MemberFunction) { ...@@ -90,7 +90,7 @@ TEST(Tensor, MemberFunction) {
VLOG(6) << "Set impl"; VLOG(6) << "Set impl";
CHECK_EQ(et3.initialized(), true); CHECK_EQ(et3.initialized(), true);
CHECK_EQ(et3.is_cpu(), true); CHECK_EQ(et3.is_cpu(), true);
CHECK_EQ(et3.is_cuda(), false); CHECK_EQ(et3.is_gpu(), false);
CHECK_EQ(et3.numel(), 2); CHECK_EQ(et3.numel(), 2);
auto expected_dim = phi::make_ddim({1, 2}); auto expected_dim = phi::make_ddim({1, 2});
CHECK_EQ(et3.dims(), expected_dim); CHECK_EQ(et3.dims(), expected_dim);
......
...@@ -175,7 +175,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, PyObject* args, ...@@ -175,7 +175,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, PyObject* args,
pybind11::detail::npy_api::NPY_ARRAY_WRITEABLE_, pybind11::detail::npy_api::NPY_ARRAY_WRITEABLE_,
nullptr); nullptr);
if (self->tensor.is_cpu()) { if (self->tensor.is_cpu() || self->tensor.is_gpu_pinned()) {
auto dense_tensor = auto dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl()); std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl());
platform::CPUPlace place; platform::CPUPlace place;
...@@ -184,7 +184,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, PyObject* args, ...@@ -184,7 +184,7 @@ static PyObject* tensor_method_numpy(TensorObject* self, PyObject* args,
pybind11::detail::array_proxy(array)->data), pybind11::detail::array_proxy(array)->data),
place, dense_tensor->data(), sizeof_dtype * numel); place, dense_tensor->data(), sizeof_dtype * numel);
#if defined(PADDLE_WITH_CUDA) #if defined(PADDLE_WITH_CUDA)
} else if (self->tensor.is_cuda()) { } else if (self->tensor.is_gpu()) {
auto dense_tensor = auto dense_tensor =
std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl()); std::dynamic_pointer_cast<phi::DenseTensor>(self->tensor.impl());
......
...@@ -269,12 +269,20 @@ class PADDLE_API Tensor final { ...@@ -269,12 +269,20 @@ class PADDLE_API Tensor final {
bool is_cpu() const; bool is_cpu() const;
/** /**
* @brief Determine whether the tensor device is CUDA * @brief Determine whether the tensor device is GPU
* *
* @return true * @return true
* @return false * @return false
*/ */
bool is_cuda() const; bool is_gpu() const;
/**
* @brief Determine whether the tensor device is GPU_PINNED
*
* @return true
* @return false
*/
bool is_gpu_pinned() const;
/* Part 4: Data Access methods */ /* Part 4: Data Access methods */
......
...@@ -163,10 +163,14 @@ bool Tensor::is_cpu() const { ...@@ -163,10 +163,14 @@ bool Tensor::is_cpu() const {
return paddle::platform::is_cpu_place(inner_place()); return paddle::platform::is_cpu_place(inner_place());
} }
bool Tensor::is_cuda() const { bool Tensor::is_gpu() const {
return paddle::platform::is_gpu_place(inner_place()); return paddle::platform::is_gpu_place(inner_place());
} }
bool Tensor::is_gpu_pinned() const {
return paddle::platform::is_cuda_pinned_place(inner_place());
}
/* Part 4: Data Access methods */ /* Part 4: Data Access methods */
template <typename T> template <typename T>
......
...@@ -87,7 +87,8 @@ void Copy(const Context& dev_ctx, ...@@ -87,7 +87,8 @@ void Copy(const Context& dev_ctx,
: reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream(); : reinterpret_cast<const phi::GPUContext&>(dev_ctx).stream();
paddle::memory::Copy( paddle::memory::Copy(
dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
} else if (paddle::platform::is_cpu_place(src_place) && // NOLINT } else if ((paddle::platform::is_cpu_place(src_place) ||
paddle::platform::is_cuda_pinned_place(src_place)) && // NOLINT
paddle::platform::is_gpu_place(dst_place)) { paddle::platform::is_gpu_place(dst_place)) {
auto src_cpu_place = src_place; auto src_cpu_place = src_place;
auto dst_gpu_place = dst_place; auto dst_gpu_place = dst_place;
......
...@@ -1155,7 +1155,8 @@ class Layer(object): ...@@ -1155,7 +1155,8 @@ class Layer(object):
layers[name] = None layers[name] = None
else: else:
_buffers = self.__dict__.get('_buffers', None) _buffers = self.__dict__.get('_buffers', None)
if type(value) == core.VarBase: if type(value) == core.VarBase or \
type(value) == core.eager.Tensor:
if _buffers is None: if _buffers is None:
raise ValueError( raise ValueError(
"super(YourLayer, self).__init__() should be called first" "super(YourLayer, self).__init__() should be called first"
......
...@@ -637,6 +637,10 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase): ...@@ -637,6 +637,10 @@ class EagerVariablePropertiesAndMethodsTestCase(unittest.TestCase):
self.assertTrue(tensor3.persistable, True) self.assertTrue(tensor3.persistable, True)
self.assertTrue(tensor3.stop_gradient, True) self.assertTrue(tensor3.stop_gradient, True)
self.assertTrue(tensor3.place.is_gpu_place()) self.assertTrue(tensor3.place.is_gpu_place())
tensor4 = paddle.to_tensor([1, 2, 3], place='gpu_pinned')
tensor5 = tensor4._copy_to(core.CUDAPlace(0), True)
self.assertTrue(
np.array_equal(tensor4.numpy(), tensor5.numpy()))
else: else:
tensor3 = tensor2._copy_to(core.CPUPlace(), True) tensor3 = tensor2._copy_to(core.CPUPlace(), True)
self.assertTrue(np.array_equal(tensor3.numpy(), arr2)) self.assertTrue(np.array_equal(tensor3.numpy(), arr2))
......
...@@ -127,7 +127,7 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): ...@@ -127,7 +127,7 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True):
"\n\tFaild to convert input data to a regular ndarray :\n\t - Usually " "\n\tFaild to convert input data to a regular ndarray :\n\t - Usually "
"this means the input data contains nested lists with different lengths. " "this means the input data contains nested lists with different lengths. "
) )
elif isinstance(data, paddle.Tensor): elif isinstance(data, (paddle.Tensor, core.eager.Tensor)):
data = data._copy_to(place, False) data = data._copy_to(place, False)
data = _handle_dtype(data, dtype) data = _handle_dtype(data, dtype)
data.stop_gradient = stop_gradient data.stop_gradient = stop_gradient
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册