From 4da467370f3be2e6336d51760fba9debb0304318 Mon Sep 17 00:00:00 2001 From: wanghuancoder Date: Sun, 3 Apr 2022 15:39:41 +0800 Subject: [PATCH] [Eager] do not mutabledata when init (#41331) * do not mutabledata when init, test=develop * refine, test=develop * fix copy_, test=develop * refine, test=develop --- paddle/fluid/pybind/eager.cc | 7 ++--- paddle/fluid/pybind/eager_method.cc | 11 ++++++-- .../test_cuda_max_memory_allocated.py | 28 +++++++++++++++---- .../unittests/test_cuda_memory_reserved.py | 28 +++++++++++++++---- 4 files changed, 54 insertions(+), 20 deletions(-) diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index 5278f371dd4..657c79e7bd3 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -77,9 +77,6 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name, phi::make_intrusive(place), phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype), ddims)); - if (phi::product(ddims) > 0) { - dense_tensor->mutable_data(place); - } self->tensor.set_impl(dense_tensor); } @@ -92,6 +89,7 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name, } void InitTensorWithNumpyValue(TensorObject* self, const py::object& array, + const paddle::platform::Place& place, bool zero_copy = false) { PADDLE_ENFORCE_EQ( self->tensor.defined(), true, @@ -102,7 +100,6 @@ void InitTensorWithNumpyValue(TensorObject* self, const py::object& array, "eager tensor before init it with NumPy.")); phi::DenseTensor* impl_ptr = static_cast(self->tensor.impl().get()); - paddle::platform::Place place = impl_ptr->place(); if (platform::is_cpu_place(place)) { SetTensorFromPyArray(impl_ptr, array, place, zero_copy); } else if (platform::is_xpu_place(place)) { @@ -289,7 +286,7 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr, EmptyTensorInitializer(py_tensor_ptr, act_name, place, persistable, stop_gradient); - InitTensorWithNumpyValue(py_tensor_ptr, numpy_value, zero_copy); + InitTensorWithNumpyValue(py_tensor_ptr, numpy_value, place, zero_copy); } // initialize Tensor by Tensor or framework::Tensor (mix args and diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index d9face124bd..814243e0a57 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -330,17 +330,22 @@ static PyObject* tensor_method_copy_(TensorObject* self, PyObject* args, bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1); VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to " << self->tensor.name(); - if (!self->tensor.defined()) { + if (!self->tensor.initialized()) { egr::EagerUtils::autograd_meta(&(self->tensor)) ->SetStopGradient( egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient()); egr::EagerUtils::autograd_meta(&(self->tensor)) ->SetPersistable( egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable()); + if (src_tensor.initialized()) { + self->tensor.copy_(src_tensor, src_tensor.inner_place(), blocking); + } + } else { + if (src_tensor.initialized()) { + self->tensor.copy_(src_tensor, self->tensor.inner_place(), blocking); + } } - self->tensor.copy_(src_tensor, self->tensor.inner_place(), blocking); - VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to " << self->tensor.name(); Py_INCREF(Py_None); diff --git a/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py b/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py index 51c9ba182ab..ae8bdeed1ef 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py @@ -16,10 +16,11 @@ import paddle import unittest from paddle.fluid import core from paddle.device.cuda import device_count, memory_allocated, max_memory_allocated +from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestMaxMemoryAllocated(unittest.TestCase): - def test_max_memory_allocated(self, device=None): + def func_test_max_memory_allocated(self, device=None): if core.is_compiled_with_cuda(): alloc_time = 100 max_alloc_size = 10000 @@ -35,16 +36,26 @@ class TestMaxMemoryAllocated(unittest.TestCase): self.assertEqual(peak_memory_allocated_size, max_memory_allocated(device)) - def test_max_memory_allocated_for_all_places(self): + def test_max_memory_allocated(self): + with _test_eager_guard(): + self.func_test_max_memory_allocated() + self.func_test_max_memory_allocated() + + def func_test_max_memory_allocated_for_all_places(self): if core.is_compiled_with_cuda(): gpu_num = device_count() for i in range(gpu_num): paddle.device.set_device("gpu:" + str(i)) - self.test_max_memory_allocated(core.CUDAPlace(i)) - self.test_max_memory_allocated(i) - self.test_max_memory_allocated("gpu:" + str(i)) + self.func_test_max_memory_allocated(core.CUDAPlace(i)) + self.func_test_max_memory_allocated(i) + self.func_test_max_memory_allocated("gpu:" + str(i)) - def test_max_memory_allocated_exception(self): + def test_max_memory_allocated_for_all_places(self): + with _test_eager_guard(): + self.func_test_max_memory_allocated_for_all_places() + self.func_test_max_memory_allocated_for_all_places() + + def func_test_max_memory_allocated_exception(self): if core.is_compiled_with_cuda(): wrong_device = [ core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu" @@ -56,6 +67,11 @@ class TestMaxMemoryAllocated(unittest.TestCase): with self.assertRaises(BaseException): max_memory_allocated() + def test_max_memory_allocated_exception(self): + with _test_eager_guard(): + self.func_test_max_memory_allocated_exception() + self.func_test_max_memory_allocated_exception() + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py b/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py index 149760de8b2..ca551ab4a3f 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py @@ -17,26 +17,37 @@ import unittest import numpy as np from paddle.fluid import core from paddle.device.cuda import device_count, memory_reserved +from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph class TestMemoryreserved(unittest.TestCase): - def test_memory_reserved(self, device=None): + def func_test_memory_reserved(self, device=None): if core.is_compiled_with_cuda(): tensor = paddle.zeros(shape=[256]) alloc_size = 4 * 256 # 256 float32 data, with 4 bytes for each one memory_reserved_size = memory_reserved(device) self.assertEqual(memory_reserved_size, alloc_size) - def test_memory_reserved_for_all_places(self): + def test_memory_reserved(self): + with _test_eager_guard(): + self.func_test_memory_reserved() + self.func_test_memory_reserved() + + def func_test_memory_reserved_for_all_places(self): if core.is_compiled_with_cuda(): gpu_num = device_count() for i in range(gpu_num): paddle.device.set_device("gpu:" + str(i)) - self.test_memory_reserved(core.CUDAPlace(i)) - self.test_memory_reserved(i) - self.test_memory_reserved("gpu:" + str(i)) + self.func_test_memory_reserved(core.CUDAPlace(i)) + self.func_test_memory_reserved(i) + self.func_test_memory_reserved("gpu:" + str(i)) - def test_memory_reserved_exception(self): + def test_memory_reserved_for_all_places(self): + with _test_eager_guard(): + self.func_test_memory_reserved_for_all_places() + self.func_test_memory_reserved_for_all_places() + + def func_test_memory_reserved_exception(self): if core.is_compiled_with_cuda(): wrong_device = [ core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu" @@ -48,6 +59,11 @@ class TestMemoryreserved(unittest.TestCase): with self.assertRaises(BaseException): memory_reserved() + def test_memory_reserved_exception(self): + with _test_eager_guard(): + self.func_test_memory_reserved_exception() + self.func_test_memory_reserved_exception() + if __name__ == "__main__": unittest.main() -- GitLab