未验证 提交 c4b7c485 编写于 作者: R Ruibiao Chen 提交者: GitHub

Add pinned memory to host memory stats (#43096)

* Add pinned memory to HostMemoryStats

* Add macro for WrapStatAllocator

* Fix CI errors
上级 0e10f247
......@@ -931,9 +931,15 @@ class AllocatorFacadePrivate {
void WrapStatAllocator() {
for (auto& pair : allocators_) {
// Now memory stats is only supported for CPU and GPU
const platform::Place& place = pair.first;
if (platform::is_cpu_place(place) ||
platform::is_cuda_pinned_place(place) ||
platform::is_gpu_place(place)) {
pair.second = std::make_shared<StatAllocator>(pair.second);
}
}
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// a standalone CUDA allocator to support multi-stream GC in new executor
......
......@@ -13,7 +13,7 @@
// limitations under the License.
#include "paddle/fluid/memory/allocation/pinned_allocator.h"
#include "paddle/fluid/memory/stats.h"
namespace paddle {
namespace memory {
namespace allocation {
......@@ -24,6 +24,7 @@ void CPUPinnedAllocator::FreeImpl(phi::Allocation *allocation) {
#else
PADDLE_ENFORCE_GPU_SUCCESS(cudaFreeHost(allocation->ptr()));
#endif
HOST_MEMORY_STAT_UPDATE(Reserved, 0, -allocation->size());
delete allocation;
}
phi::Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) {
......@@ -33,6 +34,7 @@ phi::Allocation *CPUPinnedAllocator::AllocateImpl(size_t size) {
#else
PADDLE_ENFORCE_GPU_SUCCESS(cudaHostAlloc(&ptr, size, cudaHostAllocPortable));
#endif
HOST_MEMORY_STAT_UPDATE(Reserved, 0, size);
return new Allocation(ptr, size, platform::CUDAPinnedPlace());
}
} // namespace allocation
......
......@@ -45,11 +45,13 @@ class StatAllocator : public Allocator {
phi::Allocator::AllocationPtr allocation =
underlying_allocator_->Allocate(size);
if (platform::is_cpu_place(allocation->place())) {
HOST_MEMORY_STAT_UPDATE(Allocated, allocation->place().GetDeviceId(),
const platform::Place& place = allocation->place();
if (platform::is_cpu_place(place) ||
platform::is_cuda_pinned_place(place)) {
HOST_MEMORY_STAT_UPDATE(Allocated, place.GetDeviceId(),
allocation->size());
} else {
DEVICE_MEMORY_STAT_UPDATE(Allocated, allocation->place().GetDeviceId(),
DEVICE_MEMORY_STAT_UPDATE(Allocated, place.GetDeviceId(),
allocation->size());
}
return allocation.release();
......
......@@ -211,6 +211,7 @@ void* CUDAPinnedAllocator::Alloc(size_t* index, size_t size) {
if (result == gpuSuccess) {
*index = 1; // PINNED memory
cuda_pinnd_alloc_size_ += size;
HOST_MEMORY_STAT_UPDATE(Reserved, 0, size);
return p;
} else {
LOG(WARNING) << "cudaHostAlloc failed.";
......@@ -255,6 +256,7 @@ void CUDAPinnedAllocator::Free(void* p, size_t size, size_t index) {
err));
}
#endif
HOST_MEMORY_STAT_UPDATE(Reserved, 0, -size);
}
bool CUDAPinnedAllocator::UseGpu() const { return false; }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册