未验证 提交 4da46737 编写于 作者: W wanghuancoder 提交者: GitHub

[Eager] do not mutabledata when init (#41331)

* do not mutabledata when init, test=develop

* refine, test=develop

* fix copy_, test=develop

* refine, test=develop
上级 868a3203
......@@ -77,9 +77,6 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name,
phi::make_intrusive<paddle::experimental::SharedStorage>(place),
phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype),
ddims));
if (phi::product(ddims) > 0) {
dense_tensor->mutable_data(place);
}
self->tensor.set_impl(dense_tensor);
}
......@@ -92,6 +89,7 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name,
}
void InitTensorWithNumpyValue(TensorObject* self, const py::object& array,
const paddle::platform::Place& place,
bool zero_copy = false) {
PADDLE_ENFORCE_EQ(
self->tensor.defined(), true,
......@@ -102,7 +100,6 @@ void InitTensorWithNumpyValue(TensorObject* self, const py::object& array,
"eager tensor before init it with NumPy."));
phi::DenseTensor* impl_ptr =
static_cast<phi::DenseTensor*>(self->tensor.impl().get());
paddle::platform::Place place = impl_ptr->place();
if (platform::is_cpu_place(place)) {
SetTensorFromPyArray<platform::CPUPlace>(impl_ptr, array, place, zero_copy);
} else if (platform::is_xpu_place(place)) {
......@@ -289,7 +286,7 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr,
EmptyTensorInitializer(py_tensor_ptr, act_name, place, persistable,
stop_gradient);
InitTensorWithNumpyValue(py_tensor_ptr, numpy_value, zero_copy);
InitTensorWithNumpyValue(py_tensor_ptr, numpy_value, place, zero_copy);
}
// initialize Tensor by Tensor or framework::Tensor (mix args and
......
......@@ -330,17 +330,22 @@ static PyObject* tensor_method_copy_(TensorObject* self, PyObject* args,
bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1);
VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to "
<< self->tensor.name();
if (!self->tensor.defined()) {
if (!self->tensor.initialized()) {
egr::EagerUtils::autograd_meta(&(self->tensor))
->SetStopGradient(
egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient());
egr::EagerUtils::autograd_meta(&(self->tensor))
->SetPersistable(
egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable());
if (src_tensor.initialized()) {
self->tensor.copy_(src_tensor, src_tensor.inner_place(), blocking);
}
} else {
if (src_tensor.initialized()) {
self->tensor.copy_(src_tensor, self->tensor.inner_place(), blocking);
}
}
self->tensor.copy_(src_tensor, self->tensor.inner_place(), blocking);
VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to "
<< self->tensor.name();
Py_INCREF(Py_None);
......
......@@ -16,10 +16,11 @@ import paddle
import unittest
from paddle.fluid import core
from paddle.device.cuda import device_count, memory_allocated, max_memory_allocated
from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
class TestMaxMemoryAllocated(unittest.TestCase):
def test_max_memory_allocated(self, device=None):
def func_test_max_memory_allocated(self, device=None):
if core.is_compiled_with_cuda():
alloc_time = 100
max_alloc_size = 10000
......@@ -35,16 +36,26 @@ class TestMaxMemoryAllocated(unittest.TestCase):
self.assertEqual(peak_memory_allocated_size,
max_memory_allocated(device))
def test_max_memory_allocated_for_all_places(self):
def test_max_memory_allocated(self):
with _test_eager_guard():
self.func_test_max_memory_allocated()
self.func_test_max_memory_allocated()
def func_test_max_memory_allocated_for_all_places(self):
if core.is_compiled_with_cuda():
gpu_num = device_count()
for i in range(gpu_num):
paddle.device.set_device("gpu:" + str(i))
self.test_max_memory_allocated(core.CUDAPlace(i))
self.test_max_memory_allocated(i)
self.test_max_memory_allocated("gpu:" + str(i))
self.func_test_max_memory_allocated(core.CUDAPlace(i))
self.func_test_max_memory_allocated(i)
self.func_test_max_memory_allocated("gpu:" + str(i))
def test_max_memory_allocated_exception(self):
def test_max_memory_allocated_for_all_places(self):
with _test_eager_guard():
self.func_test_max_memory_allocated_for_all_places()
self.func_test_max_memory_allocated_for_all_places()
def func_test_max_memory_allocated_exception(self):
if core.is_compiled_with_cuda():
wrong_device = [
core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu"
......@@ -56,6 +67,11 @@ class TestMaxMemoryAllocated(unittest.TestCase):
with self.assertRaises(BaseException):
max_memory_allocated()
def test_max_memory_allocated_exception(self):
with _test_eager_guard():
self.func_test_max_memory_allocated_exception()
self.func_test_max_memory_allocated_exception()
if __name__ == "__main__":
unittest.main()
......@@ -17,26 +17,37 @@ import unittest
import numpy as np
from paddle.fluid import core
from paddle.device.cuda import device_count, memory_reserved
from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph
class TestMemoryreserved(unittest.TestCase):
def test_memory_reserved(self, device=None):
def func_test_memory_reserved(self, device=None):
if core.is_compiled_with_cuda():
tensor = paddle.zeros(shape=[256])
alloc_size = 4 * 256 # 256 float32 data, with 4 bytes for each one
memory_reserved_size = memory_reserved(device)
self.assertEqual(memory_reserved_size, alloc_size)
def test_memory_reserved_for_all_places(self):
def test_memory_reserved(self):
with _test_eager_guard():
self.func_test_memory_reserved()
self.func_test_memory_reserved()
def func_test_memory_reserved_for_all_places(self):
if core.is_compiled_with_cuda():
gpu_num = device_count()
for i in range(gpu_num):
paddle.device.set_device("gpu:" + str(i))
self.test_memory_reserved(core.CUDAPlace(i))
self.test_memory_reserved(i)
self.test_memory_reserved("gpu:" + str(i))
self.func_test_memory_reserved(core.CUDAPlace(i))
self.func_test_memory_reserved(i)
self.func_test_memory_reserved("gpu:" + str(i))
def test_memory_reserved_exception(self):
def test_memory_reserved_for_all_places(self):
with _test_eager_guard():
self.func_test_memory_reserved_for_all_places()
self.func_test_memory_reserved_for_all_places()
def func_test_memory_reserved_exception(self):
if core.is_compiled_with_cuda():
wrong_device = [
core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu"
......@@ -48,6 +59,11 @@ class TestMemoryreserved(unittest.TestCase):
with self.assertRaises(BaseException):
memory_reserved()
def test_memory_reserved_exception(self):
with _test_eager_guard():
self.func_test_memory_reserved_exception()
self.func_test_memory_reserved_exception()
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册