Add pinned memory to host memory stats (#43096)

* Add pinned memory to HostMemoryStats * Add macro for WrapStatAllocator * Fix CI errors

Add pinned memory to host memory stats (#43096)
* Add pinned memory to HostMemoryStats * Add macro for WrapStatAllocator * Fix CI errors
c4b7c485 · Ruibiao Chen · GitHub · 0e10f247 · c4b7c485 · c4b7c485
4 changed file
--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -931,9 +931,15 @@ class AllocatorFacadePrivate {
  void WrapStatAllocator() {
    for (auto& pair : allocators_) {
+      // Now memory stats is only supported for CPU and GPU
+      const platform::Place& place = pair.first;
+      if (platform::is_cpu_place(place) ||
+          platform::is_cuda_pinned_place(place) ||
+          platform::is_gpu_place(place)) {
        pair.second = std::make_shared<StatAllocator>(pair.second);
      }
    }
+  }
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
  // a standalone CUDA allocator to support multi-stream GC in new executor

--- a/paddle/fluid/memory/allocation/pinned_allocator.cc
+++ b/paddle/fluid/memory/allocation/pinned_allocator.cc
@@ -13,7 +13,7 @@
 // limitations under the License.
 #include "paddle/fluid/memory/allocation/pinned_allocator.h"
+#include "paddle/fluid/memory/stats.h"
 namespace paddle {
 namespace memory {
 namespace allocation {
@@ -24,6 +24,7 @@ void CPUPinnedAllocator::FreeImpl(phi::Allocation *allocation) {
 #else
  PADDLE_ENFORCE_GPU_SUCCESS(cudaFreeHost(allocation->ptr()));
 #endif
+  HOST_MEMORY_STAT_UPDATE(Reserved, 0, -allocation->size());
  delete allocation;
 }
 phi::Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) {
@@ -33,6 +34,7 @@ phi::Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) {
 #else
  PADDLE_ENFORCE_GPU_SUCCESS(cudaHostAlloc(&ptr, size, cudaHostAllocPortable));
 #endif
+  HOST_MEMORY_STAT_UPDATE(Reserved, 0, size);
  return new Allocation(ptr, size, platform::CUDAPinnedPlace());
 }
 }  // namespace allocation

--- a/paddle/fluid/memory/allocation/stat_allocator.h
+++ b/paddle/fluid/memory/allocation/stat_allocator.h
@@ -45,11 +45,13 @@ class StatAllocator : public Allocator {
    phi::Allocator::AllocationPtr allocation =
        underlying_allocator_->Allocate(size);
-    if (platform::is_cpu_place(allocation->place())) {
+    const platform::Place& place = allocation->place();
-      HOST_MEMORY_STAT_UPDATE(Allocated, allocation->place().GetDeviceId(),
+    if (platform::is_cpu_place(place) ||
+        platform::is_cuda_pinned_place(place)) {
+      HOST_MEMORY_STAT_UPDATE(Allocated, place.GetDeviceId(),
                              allocation->size());
    } else {
-      DEVICE_MEMORY_STAT_UPDATE(Allocated, allocation->place().GetDeviceId(),
+      DEVICE_MEMORY_STAT_UPDATE(Allocated, place.GetDeviceId(),
                                allocation->size());
    }
    return allocation.release();

--- a/paddle/fluid/memory/detail/system_allocator.cc
+++ b/paddle/fluid/memory/detail/system_allocator.cc
@@ -211,6 +211,7 @@ void* CUDAPinnedAllocator::Alloc(size_t* index, size_t size) {
  if (result == gpuSuccess) {
    *index = 1;  // PINNED memory
    cuda_pinnd_alloc_size_ += size;
+    HOST_MEMORY_STAT_UPDATE(Reserved, 0, size);
    return p;
  } else {
    LOG(WARNING) << "cudaHostAlloc failed.";
@@ -255,6 +256,7 @@ void CUDAPinnedAllocator::Free(void* p, size_t size, size_t index) {
            err));
  }
 #endif
+  HOST_MEMORY_STAT_UPDATE(Reserved, 0, -size);
 }
 bool CUDAPinnedAllocator::UseGpu() const { return false; }