[Eager] do not mutabledata when init (#41331)

* do not mutabledata when init, test=develop * refine, test=develop * fix copy_, test=develop * refine, test=develop

[Eager] do not mutabledata when init (#41331)
* do not mutabledata when init, test=develop * refine, test=develop * fix copy_, test=develop * refine, test=develop
4da46737 · wanghuancoder · GitHub · 868a3203 · 4da46737 · 4da46737
4 changed file
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -77,9 +77,6 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name,
            phi::make_intrusive<paddle::experimental::SharedStorage>(place),
            phi::DenseTensorMeta(paddle::framework::TransToPhiDataType(dtype),
                                 ddims));
-    if (phi::product(ddims) > 0) {
-      dense_tensor->mutable_data(place);
-    }
    self->tensor.set_impl(dense_tensor);
  }

@@ -92,6 +89,7 @@ void EmptyTensorInitializer(TensorObject* self, const std::string& name,
 }

 void InitTensorWithNumpyValue(TensorObject* self, const py::object& array,
+                              const paddle::platform::Place& place,
                              bool zero_copy = false) {
  PADDLE_ENFORCE_EQ(
      self->tensor.defined(), true,
@@ -102,7 +100,6 @@ void InitTensorWithNumpyValue(TensorObject* self, const py::object& array,
          "eager tensor before init it with NumPy."));
  phi::DenseTensor* impl_ptr =
      static_cast<phi::DenseTensor*>(self->tensor.impl().get());
-  paddle::platform::Place place = impl_ptr->place();
  if (platform::is_cpu_place(place)) {
    SetTensorFromPyArray<platform::CPUPlace>(impl_ptr, array, place, zero_copy);
  } else if (platform::is_xpu_place(place)) {
@@ -289,7 +286,7 @@ void AutoInitTensorByPyArray(TensorObject* py_tensor_ptr,

  EmptyTensorInitializer(py_tensor_ptr, act_name, place, persistable,
                         stop_gradient);
-  InitTensorWithNumpyValue(py_tensor_ptr, numpy_value, zero_copy);
+  InitTensorWithNumpyValue(py_tensor_ptr, numpy_value, place, zero_copy);
 }

 // initialize Tensor by Tensor or framework::Tensor (mix args and

--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -330,17 +330,22 @@ static PyObject* tensor_method_copy_(TensorObject* self, PyObject* args,
  bool blocking = CastPyArg2AttrBoolean(PyTuple_GET_ITEM(args, 1), 1);
  VLOG(6) << "Start Copy Tensor " << src_tensor.name() << " to "
          << self->tensor.name();
-  if (!self->tensor.defined()) {
+  if (!self->tensor.initialized()) {
    egr::EagerUtils::autograd_meta(&(self->tensor))
        ->SetStopGradient(
            egr::EagerUtils::autograd_meta(&(src_tensor))->StopGradient());
    egr::EagerUtils::autograd_meta(&(self->tensor))
        ->SetPersistable(
            egr::EagerUtils::autograd_meta(&(src_tensor))->Persistable());
+    if (src_tensor.initialized()) {
+      self->tensor.copy_(src_tensor, src_tensor.inner_place(), blocking);
+    }
+  } else {
+    if (src_tensor.initialized()) {
+      self->tensor.copy_(src_tensor, self->tensor.inner_place(), blocking);
+    }
  }

-  self->tensor.copy_(src_tensor, self->tensor.inner_place(), blocking);
-
  VLOG(6) << "Finish Copy Tensor " << src_tensor.name() << " to "
          << self->tensor.name();
  Py_INCREF(Py_None);

--- a/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_max_memory_allocated.py
@@ -16,10 +16,11 @@ import paddle
 import unittest
 from paddle.fluid import core
 from paddle.device.cuda import device_count, memory_allocated, max_memory_allocated
+from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph


 class TestMaxMemoryAllocated(unittest.TestCase):
-    def test_max_memory_allocated(self, device=None):
+    def func_test_max_memory_allocated(self, device=None):
        if core.is_compiled_with_cuda():
            alloc_time = 100
            max_alloc_size = 10000
@@ -35,16 +36,26 @@ class TestMaxMemoryAllocated(unittest.TestCase):
            self.assertEqual(peak_memory_allocated_size,
                             max_memory_allocated(device))

-    def test_max_memory_allocated_for_all_places(self):
+    def test_max_memory_allocated(self):
+        with _test_eager_guard():
+            self.func_test_max_memory_allocated()
+        self.func_test_max_memory_allocated()
+
+    def func_test_max_memory_allocated_for_all_places(self):
        if core.is_compiled_with_cuda():
            gpu_num = device_count()
            for i in range(gpu_num):
                paddle.device.set_device("gpu:" + str(i))
-                self.test_max_memory_allocated(core.CUDAPlace(i))
-                self.test_max_memory_allocated(i)
-                self.test_max_memory_allocated("gpu:" + str(i))
+                self.func_test_max_memory_allocated(core.CUDAPlace(i))
+                self.func_test_max_memory_allocated(i)
+                self.func_test_max_memory_allocated("gpu:" + str(i))

-    def test_max_memory_allocated_exception(self):
+    def test_max_memory_allocated_for_all_places(self):
+        with _test_eager_guard():
+            self.func_test_max_memory_allocated_for_all_places()
+        self.func_test_max_memory_allocated_for_all_places()
+
+    def func_test_max_memory_allocated_exception(self):
        if core.is_compiled_with_cuda():
            wrong_device = [
                core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu"
@@ -56,6 +67,11 @@ class TestMaxMemoryAllocated(unittest.TestCase):
            with self.assertRaises(BaseException):
                max_memory_allocated()

+    def test_max_memory_allocated_exception(self):
+        with _test_eager_guard():
+            self.func_test_max_memory_allocated_exception()
+        self.func_test_max_memory_allocated_exception()
+

 if __name__ == "__main__":
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py
+++ b/python/paddle/fluid/tests/unittests/test_cuda_memory_reserved.py
@@ -17,26 +17,37 @@ import unittest
 import numpy as np
 from paddle.fluid import core
 from paddle.device.cuda import device_count, memory_reserved
+from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph


 class TestMemoryreserved(unittest.TestCase):
-    def test_memory_reserved(self, device=None):
+    def func_test_memory_reserved(self, device=None):
        if core.is_compiled_with_cuda():
            tensor = paddle.zeros(shape=[256])
            alloc_size = 4 * 256  # 256 float32 data, with 4 bytes for each one
            memory_reserved_size = memory_reserved(device)
            self.assertEqual(memory_reserved_size, alloc_size)

-    def test_memory_reserved_for_all_places(self):
+    def test_memory_reserved(self):
+        with _test_eager_guard():
+            self.func_test_memory_reserved()
+        self.func_test_memory_reserved()
+
+    def func_test_memory_reserved_for_all_places(self):
        if core.is_compiled_with_cuda():
            gpu_num = device_count()
            for i in range(gpu_num):
                paddle.device.set_device("gpu:" + str(i))
-                self.test_memory_reserved(core.CUDAPlace(i))
-                self.test_memory_reserved(i)
-                self.test_memory_reserved("gpu:" + str(i))
+                self.func_test_memory_reserved(core.CUDAPlace(i))
+                self.func_test_memory_reserved(i)
+                self.func_test_memory_reserved("gpu:" + str(i))

-    def test_memory_reserved_exception(self):
+    def test_memory_reserved_for_all_places(self):
+        with _test_eager_guard():
+            self.func_test_memory_reserved_for_all_places()
+        self.func_test_memory_reserved_for_all_places()
+
+    def func_test_memory_reserved_exception(self):
        if core.is_compiled_with_cuda():
            wrong_device = [
                core.CPUPlace(), device_count() + 1, -2, 0.5, "gpu1", "npu"
@@ -48,6 +59,11 @@ class TestMemoryreserved(unittest.TestCase):
            with self.assertRaises(BaseException):
                memory_reserved()

+    def test_memory_reserved_exception(self):
+        with _test_eager_guard():
+            self.func_test_memory_reserved_exception()
+        self.func_test_memory_reserved_exception()
+

 if __name__ == "__main__":
    unittest.main()