From 26cd0bb5a59d913f8c216ceee0c6abb46317e31e Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 29 Jun 2017 19:13:24 +0800 Subject: [PATCH] ENH: count allocated fallback size for performance --- paddle/memory/detail/system_allocator.cc | 52 +++++++++++++------ paddle/memory/detail/system_allocator.h | 3 +- .../paddle/trainer_config_helpers/networks.py | 4 +- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/paddle/memory/detail/system_allocator.cc b/paddle/memory/detail/system_allocator.cc index 332ff062d4..2b0fbfa87e 100644 --- a/paddle/memory/detail/system_allocator.cc +++ b/paddle/memory/detail/system_allocator.cc @@ -39,22 +39,22 @@ void* CPUAllocator::Alloc(size_t& index, size_t size) { // pointer shall not be dereferenced -- so we make it nullptr. if (size <= 0) return nullptr; - if (FLAGS_use_pinned_memory) { - void* p = malloc(size); - if (p != nullptr) { - mlock(p, size); - } - } + index = 0; // unlock memory void* p = malloc(size); - if (p != nullptr && FLAGS_use_pinned_memory) { - mlock(p, size); + + if (p != nullptr) { + if (FLAGS_use_pinned_memory) { + index = 1; + mlock(p, size); // lock memory + } } + return p; } void CPUAllocator::Free(void* p, size_t size, size_t index) { - if (p != nullptr && FLAGS_use_pinned_memory) { + if (p != nullptr && index == 1) { munlock(p, size); } free(p); @@ -73,26 +73,34 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) { // Reserve memory for page tables, etc. size_t reserving = capacity - paddle::platform::GpuMaxAllocSize(); - size_t remaining = available > reserving ? available - reserving : 0; + size_t usable = available > reserving ? available - reserving : 0; // If remaining size no less than expected size, using general // cudaMalloc to allocate GPU memory. void* p = 0; - if (size <= remaining) { + if (size <= usable) { cudaError_t result = cudaMalloc(&p, size); if (result == cudaSuccess) { index = 0; - total_alloc_size_ += size; + gpu_alloc_size_ += size; return p; } } // If remaining size less than expected size or cudaMalloc failed, // cudaMallocHost will be considered as a fallback allocator. + // + // NOTE: here, we use GpuMaxAllocSize() as the maximum memory size + // of host fallback allocation. Allocates too much would reduce + // the amount of memory available to the underlying system for paging. + usable = paddle::platform::GpuMaxAllocSize() - fallback_alloc_size_; + + if (size > usable) return nullptr; + cudaError_t result = cudaMallocHost(&p, size); if (result == cudaSuccess) { index = 1; - total_alloc_size_ += size; + fallback_alloc_size_ += size; return p; } @@ -100,16 +108,26 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) { } void GPUAllocator::Free(void* p, size_t size, size_t index) { + cudaError_t err; + + if (index == 0) { + PADDLE_ASSERT(gpu_alloc_size_ >= size); + gpu_alloc_size_ -= size; + err = cudaFree(p); + } else { + PADDLE_ASSERT(fallback_alloc_size_ >= size); + fallback_alloc_size_ -= size; + err = cudaFreeHost(p); + } + // Purposefully allow cudaErrorCudartUnloading, because // that is returned if you ever call cudaFree after the // driver has already shutdown. This happens only if the // process is terminating, in which case we don't care if // cudaFree succeeds. - PADDLE_ASSERT(total_alloc_size_ >= size); - total_alloc_size_ -= size; - cudaError_t err = index == 1 ? cudaFreeHost(p) : cudaFree(p); if (err != cudaErrorCudartUnloading) { - platform::throw_on_error(err, "cudaFree{Host} failed"); + platform::throw_on_error(err, + "cudaFree{Host} failed in GPUAllocator::Free."); } } diff --git a/paddle/memory/detail/system_allocator.h b/paddle/memory/detail/system_allocator.h index e15302ce4f..7093c42967 100644 --- a/paddle/memory/detail/system_allocator.h +++ b/paddle/memory/detail/system_allocator.h @@ -47,7 +47,8 @@ class GPUAllocator : public SystemAllocator { virtual void Free(void* p, size_t size, size_t index); private: - size_t total_alloc_size_ = 0; + size_t gpu_alloc_size_ = 0; + size_t fallback_alloc_size_ = 0; }; #endif // PADDLE_ONLY_CPU diff --git a/python/paddle/trainer_config_helpers/networks.py b/python/paddle/trainer_config_helpers/networks.py index 67154a8d7d..1bf59ed484 100755 --- a/python/paddle/trainer_config_helpers/networks.py +++ b/python/paddle/trainer_config_helpers/networks.py @@ -1381,7 +1381,7 @@ def inputs(layers, *args): if len(args) != 0: layers.extend(args) - Inputs(* [l.name for l in layers]) + Inputs(*[l.name for l in layers]) def outputs(layers, *args): @@ -1424,7 +1424,7 @@ def outputs(layers, *args): assert len(layers) > 0 if HasInputsSet(): # input already set - Outputs(* [l.name for l in layers]) + Outputs(*[l.name for l in layers]) return # just return outputs. if len(layers) != 1: -- GitLab