未验证 提交 4da46737 编写于 作者: W wanghuancoder 提交者: GitHub

[Eager] do not mutabledata when init (#41331)

* do not mutabledata when init, test=develop

* refine, test=develop

* fix copy_, test=develop

* refine, test=develop
上级 868a3203
...@@ -77,9 +77,6 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name, ...@@ -77,9 +77,6 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name,
phi::make_intrusive<paddle::experimental::SharedStorage>(place), phi::make_intrusive<paddle::experimental::SharedStorage>(place),
phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype), phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype),
ddims)); ddims));
if (phi::product(ddims) > 0) {
dense_tensor->mutable_data(place);
}
self->tensor.set_impl(dense_tensor); self->tensor.set_impl(dense_tensor);
} }
...@@ -92,6 +89,7 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name, ...@@ -92,6 +89,7 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name,
} }
void InitTensorWithNumpyValue(TensorObject* self, const py::object& array, void InitTensorWithNumpyValue(TensorObject* self, const py::object& array,
const paddle::platform::Place& place,
bool zero_copy = false) { bool zero_copy = false) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
self->tensor.defined(), true, self->tensor.defined(), true,
...@@ -102,7 +100,6 @@ void InitTensorWithNumpyValue(TensorObject* self, const py::object& array, ...@@ -102,7 +100,6 @@ void InitTensorWithNumpyValue(TensorObject* self, const py::object& array,
"eager tensor before init it with NumPy.")); "eager tensor before init it with NumPy."));
phi::DenseTensor* impl_ptr = phi::DenseTensor* impl_ptr =
static_cast<phi::DenseTensor*>(self->tensor.impl().get()); static_cast<phi::DenseTensor*>(self->tensor.impl().get());
paddle::platform::Place place = impl_ptr->place();
if (platform::is_cpu_place(place)) { if (platform::is_cpu_place(place)) {
SetTensorFromPyArray<platform::CPUPlace>(impl_ptr, array, place, zero_copy); SetTensorFromPyArray<platform::CPUPlace>(impl_ptr, array, place, zero_copy);
} else if (platform::is_xpu_place(place)) { } else if (platform::is_xpu_place(place)) {
...@@ -289,7 +286,7 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr, ...@@ -289,7 +286,7 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr,
EmptyTensorInitializer(py_tensor_ptr, act_name, place, persistable, EmptyTensorInitializer(py_tensor_ptr, act_name, place, persistable,
stop_gradient); stop_gradient);
InitTensorWithNumpyValue(py_tensor_ptr, numpy_value, zero_copy); InitTensorWithNumpyValue(py_tensor_ptr, numpy_value, place, zero_copy);
} }
// initialize Tensor by Tensor or framework::Tensor (mix args and // initialize Tensor by Tensor or framework::Tensor (mix args and
......
...@@ -330,17 +330,22 @@ static PyObject* tensor_method_copy_(TensorObject* self, PyObject* args, ...@@ -330,17 +330,22 @@ static PyObject* tensor_method_copy_(TensorObject* self, PyObject* args,
bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1); bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1);
VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to " VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to "
<< self->tensor.name(); << self->tensor.name();
if (!self->tensor.defined()) { if (!self->tensor.initialized()) {
egr::EagerUtils::autograd_meta(&(self->tensor)) egr::EagerUtils::autograd_meta(&(self->tensor))
->SetStopGradient( ->SetStopGradient(
egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient()); egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient());
egr::EagerUtils::autograd_meta(&(self->tensor)) egr::EagerUtils::autograd_meta(&(self->tensor))
->SetPersistable( ->SetPersistable(
egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable()); egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable());
if (src_tensor.initialized()) {
self->tensor.copy_(src_tensor, src_tensor.inner_place(), blocking);
}
} else {
if (src_tensor.initialized()) {
self->tensor.copy_(src_tensor, self->tensor.inner_place(), blocking);
}
} }
self->tensor.copy_(src_tensor, self->tensor.inner_place(), blocking);
VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to " VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to "
<< self->tensor.name(); << self->tensor.name();
Py_INCREF(Py_None); Py_INCREF(Py_None);
......
...@@ -16,10 +16,11 @@ import paddle ...@@ -16,10 +16,11 @@ import paddle
import unittest import unittest
from paddle.fluid import core from paddle.fluid import core
from paddle.device.cuda import device_count, memory_allocated, max_memory_allocated from paddle.device.cuda import device_count, memory_allocated, max_memory_allocated
from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
class TestMaxMemoryAllocated(unittest.TestCase): class TestMaxMemoryAllocated(unittest.TestCase):
def test_max_memory_allocated(self, device=None): def func_test_max_memory_allocated(self, device=None):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
alloc_time = 100 alloc_time = 100
max_alloc_size = 10000 max_alloc_size = 10000
...@@ -35,16 +36,26 @@ class TestMaxMemoryAllocated(unittest.TestCase): ...@@ -35,16 +36,26 @@ class TestMaxMemoryAllocated(unittest.TestCase):
self.assertEqual(peak_memory_allocated_size, self.assertEqual(peak_memory_allocated_size,
max_memory_allocated(device)) max_memory_allocated(device))
def test_max_memory_allocated_for_all_places(self): def test_max_memory_allocated(self):
with _test_eager_guard():
self.func_test_max_memory_allocated()
self.func_test_max_memory_allocated()
def func_test_max_memory_allocated_for_all_places(self):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
gpu_num = device_count() gpu_num = device_count()
for i in range(gpu_num): for i in range(gpu_num):
paddle.device.set_device("gpu:" + str(i)) paddle.device.set_device("gpu:" + str(i))
self.test_max_memory_allocated(core.CUDAPlace(i)) self.func_test_max_memory_allocated(core.CUDAPlace(i))
self.test_max_memory_allocated(i) self.func_test_max_memory_allocated(i)
self.test_max_memory_allocated("gpu:" + str(i)) self.func_test_max_memory_allocated("gpu:" + str(i))
def test_max_memory_allocated_exception(self): def test_max_memory_allocated_for_all_places(self):
with _test_eager_guard():
self.func_test_max_memory_allocated_for_all_places()
self.func_test_max_memory_allocated_for_all_places()
def func_test_max_memory_allocated_exception(self):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
wrong_device = [ wrong_device = [
core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu" core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu"
...@@ -56,6 +67,11 @@ class TestMaxMemoryAllocated(unittest.TestCase): ...@@ -56,6 +67,11 @@ class TestMaxMemoryAllocated(unittest.TestCase):
with self.assertRaises(BaseException): with self.assertRaises(BaseException):
max_memory_allocated() max_memory_allocated()
def test_max_memory_allocated_exception(self):
with _test_eager_guard():
self.func_test_max_memory_allocated_exception()
self.func_test_max_memory_allocated_exception()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -17,26 +17,37 @@ import unittest ...@@ -17,26 +17,37 @@ import unittest
import numpy as np import numpy as np
from paddle.fluid import core from paddle.fluid import core
from paddle.device.cuda import device_count, memory_reserved from paddle.device.cuda import device_count, memory_reserved
from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
class TestMemoryreserved(unittest.TestCase): class TestMemoryreserved(unittest.TestCase):
def test_memory_reserved(self, device=None): def func_test_memory_reserved(self, device=None):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
tensor = paddle.zeros(shape=[256]) tensor = paddle.zeros(shape=[256])
alloc_size = 4 * 256 # 256 float32 data, with 4 bytes for each one alloc_size = 4 * 256 # 256 float32 data, with 4 bytes for each one
memory_reserved_size = memory_reserved(device) memory_reserved_size = memory_reserved(device)
self.assertEqual(memory_reserved_size, alloc_size) self.assertEqual(memory_reserved_size, alloc_size)
def test_memory_reserved_for_all_places(self): def test_memory_reserved(self):
with _test_eager_guard():
self.func_test_memory_reserved()
self.func_test_memory_reserved()
def func_test_memory_reserved_for_all_places(self):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
gpu_num = device_count() gpu_num = device_count()
for i in range(gpu_num): for i in range(gpu_num):
paddle.device.set_device("gpu:" + str(i)) paddle.device.set_device("gpu:" + str(i))
self.test_memory_reserved(core.CUDAPlace(i)) self.func_test_memory_reserved(core.CUDAPlace(i))
self.test_memory_reserved(i) self.func_test_memory_reserved(i)
self.test_memory_reserved("gpu:" + str(i)) self.func_test_memory_reserved("gpu:" + str(i))
def test_memory_reserved_exception(self): def test_memory_reserved_for_all_places(self):
with _test_eager_guard():
self.func_test_memory_reserved_for_all_places()
self.func_test_memory_reserved_for_all_places()
def func_test_memory_reserved_exception(self):
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
wrong_device = [ wrong_device = [
core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu" core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu"
...@@ -48,6 +59,11 @@ class TestMemoryreserved(unittest.TestCase): ...@@ -48,6 +59,11 @@ class TestMemoryreserved(unittest.TestCase):
with self.assertRaises(BaseException): with self.assertRaises(BaseException):
memory_reserved() memory_reserved()
def test_memory_reserved_exception(self):
with _test_eager_guard():
self.func_test_memory_reserved_exception()
self.func_test_memory_reserved_exception()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册