From 158d6c4d1956225e7aa8ddaa4e4af852060916da Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Mon, 26 Mar 2018 20:36:07 +0800 Subject: [PATCH] add unit test --- paddle/fluid/framework/tensor.h | 3 - .../fluid/memory/detail/system_allocator.cc | 7 ++- paddle/fluid/memory/memcpy.cc | 39 +++++++++++++ paddle/fluid/memory/memory.cc | 49 +++++++---------- paddle/fluid/memory/memory.h | 10 ++-- paddle/fluid/memory/memory_test.cc | 55 +++++++++++++++++++ paddle/fluid/platform/place.cc | 1 + 7 files changed, 124 insertions(+), 40 deletions(-) diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 6eb678e301..6f878541e6 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -92,9 +92,6 @@ class Tensor { /*! Return the numel of the memory block. */ inline int64_t numel() const; - /*! Return the numel of the memory block. */ - inline bool isPinned() const; - /*! Resize the dimensions of the memory block. */ inline Tensor& Resize(const DDim& dims); diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc index d20c5c8682..2f3c10aeb2 100644 --- a/paddle/fluid/memory/detail/system_allocator.cc +++ b/paddle/fluid/memory/detail/system_allocator.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/fluid/memory/detail/system_allocator.h" #include "paddle/fluid/platform/assert.h" +#include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/gpu_info.h" @@ -127,10 +128,12 @@ void* CUDAPinnedAllocator::Alloc(size_t& index, size_t size) { // NOTE: here, we use CpuMaxAllocSize()/2 as the maximum memory size // of host pinned allocation. Allocates too much would reduce // the amount of memory available to the underlying system for paging. - size_t usable = CpuMaxAllocSize() / 2 - cuda_pinnd_alloc_size_; + size_t usable = + paddle::platform::CpuMaxAllocSize() / 2 - cuda_pinnd_alloc_size_; if (size > usable) return nullptr; + void* p; // PINNED memory is visible to all CUDA contexts. cudaError_t result = cudaMallocHost(&p, size); @@ -161,7 +164,7 @@ void CUDAPinnedAllocator::Free(void* p, size_t size, size_t index) { } } -bool CUDAPinnedAllocator::UseGpu() const { return true; } +bool CUDAPinnedAllocator::UseGpu() const { return false; } #endif diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc index b991360d04..eddcaab8be 100644 --- a/paddle/fluid/memory/memcpy.cc +++ b/paddle/fluid/memory/memcpy.cc @@ -56,6 +56,45 @@ void Copy( } } +template <> +void Copy( + platform::CPUPlace dst_place, void* dst, + platform::CUDAPinnedPlace src_place, const void* src, size_t num) { + std::memcpy(dst, src, num); +} + +template <> +void Copy( + platform::CUDAPinnedPlace dst_place, void* dst, + platform::CPUPlace src_place, const void* src, size_t num) { + std::memcpy(dst, src, num); +} + +template <> +void Copy( + platform::CUDAPinnedPlace dst_place, void* dst, + platform::CUDAPinnedPlace src_place, const void* src, size_t num) { + std::memcpy(dst, src, num); +} + +template <> +void Copy( + platform::CUDAPinnedPlace dst_place, void* dst, + platform::CUDAPlace src_place, const void* src, size_t num, + cudaStream_t stream) { + platform::SetDeviceId(src_place.device); + platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream); +} + +template <> +void Copy( + platform::CUDAPlace dst_place, void* dst, + platform::CUDAPinnedPlace src_place, const void* src, size_t num, + cudaStream_t stream) { + platform::SetDeviceId(dst_place.device); + platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream); +} + #endif } // namespace memory diff --git a/paddle/fluid/memory/memory.cc b/paddle/fluid/memory/memory.cc index 6da9f00656..94b43af147 100644 --- a/paddle/fluid/memory/memory.cc +++ b/paddle/fluid/memory/memory.cc @@ -82,16 +82,6 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { return as[gpu_id]; } -BuddyAllocator* GetCUDAPinnedBuddyAllocator(int gpu_id) { - static BuddyAllocator* as = NULL; - if (as == NULL) { - as = new BuddyAllocator(new detail::CUDAPinnedAllocator, - platform::CpuMinChunkSize(), - platform::CpuMaxChunkSize()); - } - return as; -} - template <> size_t Used(platform::CUDAPlace place) { return GetGPUBuddyAllocator(place.device)->Used(); @@ -100,8 +90,7 @@ size_t Used(platform::CUDAPlace place) { template <> void* Alloc(platform::CUDAPlace place, size_t size) { auto* buddy_allocator = GetGPUBuddyAllocator(place.device); - void* ptr = buddy_allocator->Alloc(size); - + auto* ptr = buddy_allocator->Alloc(size); if (ptr == nullptr) { int cur_dev = platform::GetCurrentDeviceId(); platform::SetDeviceId(place.device); @@ -123,37 +112,39 @@ void Free(platform::CUDAPlace place, void* p) { GetGPUBuddyAllocator(place.device)->Free(p); } +BuddyAllocator* GetCUDAPinnedBuddyAllocator() { + static BuddyAllocator* ba = NULL; + if (ba == NULL) { + ba = new BuddyAllocator(new detail::CUDAPinnedAllocator, + platform::CpuMinChunkSize(), + platform::CpuMaxChunkSize()); + } + return ba; +} + +template <> size_t Used(platform::CUDAPinnedPlace place) { - return GetGPUBuddyAllocator(place.device)->Used(); + return GetCUDAPinnedBuddyAllocator()->Used(); } template <> void* Alloc(platform::CUDAPinnedPlace place, size_t size) { - auto* buddy_allocator = GetCUDAPinnedBuddyAllocator(place.device); + auto* buddy_allocator = GetCUDAPinnedBuddyAllocator(); void* ptr = buddy_allocator->Alloc(size); - if (ptr == nullptr) { - int cur_dev = platform::GetCurrentDeviceId(); - platform::SetDeviceId(place.device); - size_t avail, total; - platform::GpuMemoryUsage(avail, total); - LOG(WARNING) << "Cannot allocate " << size << " bytes in GPU " - << place.device << ", available " << avail << " bytes"; - LOG(WARNING) << "total " << total; - LOG(WARNING) << "GpuMinChunkSize " << platform::GpuMinChunkSize(); - LOG(WARNING) << "GpuMaxChunkSize " << platform::GpuMaxChunkSize(); - LOG(WARNING) << "GPU memory used: " << Used(place); - platform::SetDeviceId(cur_dev); - } + // if (ptr == nullptr) { + // LOG(WARNING) << "Cannot allocate " << size << " bytes in CUDAPinnedPlace + // " + // << ", available " << avail << " bytes" + // } return ptr; } template <> void Free(platform::CUDAPinnedPlace place, void* p) { - GetCUDAPinnedBuddyAllocator(place.device)->Free(p); + GetCUDAPinnedBuddyAllocator()->Free(p); } - #endif size_t Usage::operator()(const platform::CPUPlace& cpu) const { diff --git a/paddle/fluid/memory/memory.h b/paddle/fluid/memory/memory.h index fba7372e71..3e6bfddd69 100644 --- a/paddle/fluid/memory/memory.h +++ b/paddle/fluid/memory/memory.h @@ -33,7 +33,7 @@ namespace memory { * address is valid or not. */ template -void* Alloc(Place place, size_t size, bool is_pinned = false); +void* Alloc(Place place, size_t size); /** * \brief Free memory block in one place. @@ -43,7 +43,7 @@ void* Alloc(Place place, size_t size, bool is_pinned = false); * */ template -void Free(Place place, void* ptr, bool is_pinned = false); +void Free(Place place, void* ptr); /** * \brief Total size of used memory in one place. @@ -75,13 +75,11 @@ class PODDeleter { static_assert(std::is_pod::value, "T must be POD"); public: - explicit PODDeleter(Place place, bool is_pinned = false) - : place_(place), is_pinned_(is_pinned) {} - void operator()(T* ptr) { Free(place_, static_cast(ptr), is_pinned_); } + explicit PODDeleter(Place place) : place_(place) {} + void operator()(T* ptr) { Free(place_, static_cast(ptr)); } private: Place place_; - bool is_pinned_; }; /** diff --git a/paddle/fluid/memory/memory_test.cc b/paddle/fluid/memory/memory_test.cc index eb27a52b25..5254cd28cc 100644 --- a/paddle/fluid/memory/memory_test.cc +++ b/paddle/fluid/memory/memory_test.cc @@ -141,4 +141,59 @@ TEST(BuddyAllocator, GPUMultAlloc) { } } +size_t align(size_t size, paddle::platform::CUDAPinnedPlace place) { + size += sizeof(paddle::memory::detail::Metadata); + size_t alignment = paddle::platform::CpuMinChunkSize(); + size_t remaining = size % alignment; + return remaining == 0 ? size : size + (alignment - remaining); +} + +TEST(BuddyAllocator, CUDAPinnedAllocator) { + void *p = nullptr; + + EXPECT_EQ(p, nullptr); + + paddle::platform::CUDAPinnedPlace cpu; + p = paddle::memory::Alloc(cpu, 4096); + + EXPECT_NE(p, nullptr); + + paddle::platform::Place place = cpu; + EXPECT_EQ(paddle::memory::Used(cpu), paddle::memory::memory_usage(place)); + + paddle::memory::Free(cpu, p); +} + +TEST(BuddyAllocator, CUDAPinnedMultAllocator) { + paddle::platform::CUDAPinnedPlace cpu; + + std::unordered_map ps; + + size_t total_size = paddle::memory::Used(cpu); + EXPECT_EQ(total_size, 0UL); + + for (auto size : + {0, 128, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304}) { + ps[paddle::memory::Alloc(cpu, size)] = size; + + // Buddy Allocator doesn't manage too large memory chunk + if (paddle::memory::Used(cpu) == total_size) continue; + + size_t aligned_size = align(size, cpu); + total_size += aligned_size; + EXPECT_EQ(total_size, paddle::memory::Used(cpu)); + } + + for (auto p : ps) { + EXPECT_EQ(is_aligned(p.first), true); + paddle::memory::Free(cpu, p.first); + + // Buddy Allocator doesn't manage too large memory chunk + if (paddle::memory::Used(cpu) == total_size) continue; + + size_t aligned_size = align(p.second, cpu); + total_size -= aligned_size; + EXPECT_EQ(total_size, paddle::memory::Used(cpu)); + } +} #endif diff --git a/paddle/fluid/platform/place.cc b/paddle/fluid/platform/place.cc index 11ca87c211..655ce8485d 100644 --- a/paddle/fluid/platform/place.cc +++ b/paddle/fluid/platform/place.cc @@ -26,6 +26,7 @@ class PlacePrinter : public boost::static_visitor<> { void operator()(const CUDAPlace &p) { os_ << "CUDAPlace(" << p.device << ")"; } + void operator()(const CUDAPinnedPlace &p) { os_ << "CUDAPinnedPlace"; } private: std::ostream &os_; -- GitLab