提交 158d6c4d 编写于 作者: C chengduoZH

add unit test

上级 18eb7730
...@@ -92,9 +92,6 @@ class Tensor { ...@@ -92,9 +92,6 @@ class Tensor {
/*! Return the numel of the memory block. */ /*! Return the numel of the memory block. */
inline int64_t numel() const; inline int64_t numel() const;
/*! Return the numel of the memory block. */
inline bool isPinned() const;
/*! Resize the dimensions of the memory block. */ /*! Resize the dimensions of the memory block. */
inline Tensor& Resize(const DDim& dims); inline Tensor& Resize(const DDim& dims);
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#include "paddle/fluid/memory/detail/system_allocator.h" #include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/assert.h" #include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/gpu_info.h"
...@@ -127,10 +128,12 @@ void* CUDAPinnedAllocator::Alloc(size_t& index, size_t size) { ...@@ -127,10 +128,12 @@ void* CUDAPinnedAllocator::Alloc(size_t& index, size_t size) {
// NOTE: here, we use CpuMaxAllocSize()/2 as the maximum memory size // NOTE: here, we use CpuMaxAllocSize()/2 as the maximum memory size
// of host pinned allocation. Allocates too much would reduce // of host pinned allocation. Allocates too much would reduce
// the amount of memory available to the underlying system for paging. // the amount of memory available to the underlying system for paging.
size_t usable = CpuMaxAllocSize() / 2 - cuda_pinnd_alloc_size_; size_t usable =
paddle::platform::CpuMaxAllocSize() / 2 - cuda_pinnd_alloc_size_;
if (size > usable) return nullptr; if (size > usable) return nullptr;
void* p;
// PINNED memory is visible to all CUDA contexts. // PINNED memory is visible to all CUDA contexts.
cudaError_t result = cudaMallocHost(&p, size); cudaError_t result = cudaMallocHost(&p, size);
...@@ -161,7 +164,7 @@ void CUDAPinnedAllocator::Free(void* p, size_t size, size_t index) { ...@@ -161,7 +164,7 @@ void CUDAPinnedAllocator::Free(void* p, size_t size, size_t index) {
} }
} }
bool CUDAPinnedAllocator::UseGpu() const { return true; } bool CUDAPinnedAllocator::UseGpu() const { return false; }
#endif #endif
......
...@@ -56,6 +56,45 @@ void Copy<platform::CUDAPlace, platform::CUDAPlace>( ...@@ -56,6 +56,45 @@ void Copy<platform::CUDAPlace, platform::CUDAPlace>(
} }
} }
template <>
void Copy<platform::CPUPlace, platform::CUDAPinnedPlace>(
platform::CPUPlace dst_place, void* dst,
platform::CUDAPinnedPlace src_place, const void* src, size_t num) {
std::memcpy(dst, src, num);
}
template <>
void Copy<platform::CUDAPinnedPlace, platform::CPUPlace>(
platform::CUDAPinnedPlace dst_place, void* dst,
platform::CPUPlace src_place, const void* src, size_t num) {
std::memcpy(dst, src, num);
}
template <>
void Copy<platform::CUDAPinnedPlace, platform::CUDAPinnedPlace>(
platform::CUDAPinnedPlace dst_place, void* dst,
platform::CUDAPinnedPlace src_place, const void* src, size_t num) {
std::memcpy(dst, src, num);
}
template <>
void Copy<platform::CUDAPinnedPlace, platform::CUDAPlace>(
platform::CUDAPinnedPlace dst_place, void* dst,
platform::CUDAPlace src_place, const void* src, size_t num,
cudaStream_t stream) {
platform::SetDeviceId(src_place.device);
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyDeviceToHost, stream);
}
template <>
void Copy<platform::CUDAPlace, platform::CUDAPinnedPlace>(
platform::CUDAPlace dst_place, void* dst,
platform::CUDAPinnedPlace src_place, const void* src, size_t num,
cudaStream_t stream) {
platform::SetDeviceId(dst_place.device);
platform::GpuMemcpyAsync(dst, src, num, cudaMemcpyHostToDevice, stream);
}
#endif #endif
} // namespace memory } // namespace memory
......
...@@ -82,16 +82,6 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { ...@@ -82,16 +82,6 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
return as[gpu_id]; return as[gpu_id];
} }
BuddyAllocator* GetCUDAPinnedBuddyAllocator(int gpu_id) {
static BuddyAllocator* as = NULL;
if (as == NULL) {
as = new BuddyAllocator(new detail::CUDAPinnedAllocator,
platform::CpuMinChunkSize(),
platform::CpuMaxChunkSize());
}
return as;
}
template <> template <>
size_t Used<platform::CUDAPlace>(platform::CUDAPlace place) { size_t Used<platform::CUDAPlace>(platform::CUDAPlace place) {
return GetGPUBuddyAllocator(place.device)->Used(); return GetGPUBuddyAllocator(place.device)->Used();
...@@ -100,8 +90,7 @@ size_t Used<platform::CUDAPlace>(platform::CUDAPlace place) { ...@@ -100,8 +90,7 @@ size_t Used<platform::CUDAPlace>(platform::CUDAPlace place) {
template <> template <>
void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) { void* Alloc<platform::CUDAPlace>(platform::CUDAPlace place, size_t size) {
auto* buddy_allocator = GetGPUBuddyAllocator(place.device); auto* buddy_allocator = GetGPUBuddyAllocator(place.device);
void* ptr = buddy_allocator->Alloc(size); auto* ptr = buddy_allocator->Alloc(size);
if (ptr == nullptr) { if (ptr == nullptr) {
int cur_dev = platform::GetCurrentDeviceId(); int cur_dev = platform::GetCurrentDeviceId();
platform::SetDeviceId(place.device); platform::SetDeviceId(place.device);
...@@ -123,37 +112,39 @@ void Free<platform::CUDAPlace>(platform::CUDAPlace place, void* p) { ...@@ -123,37 +112,39 @@ void Free<platform::CUDAPlace>(platform::CUDAPlace place, void* p) {
GetGPUBuddyAllocator(place.device)->Free(p); GetGPUBuddyAllocator(place.device)->Free(p);
} }
BuddyAllocator* GetCUDAPinnedBuddyAllocator() {
static BuddyAllocator* ba = NULL;
if (ba == NULL) {
ba = new BuddyAllocator(new detail::CUDAPinnedAllocator,
platform::CpuMinChunkSize(),
platform::CpuMaxChunkSize());
}
return ba;
}
template <>
size_t Used<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place) { size_t Used<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place) {
return GetGPUBuddyAllocator(place.device)->Used(); return GetCUDAPinnedBuddyAllocator()->Used();
} }
template <> template <>
void* Alloc<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place, void* Alloc<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place,
size_t size) { size_t size) {
auto* buddy_allocator = GetCUDAPinnedBuddyAllocator(place.device); auto* buddy_allocator = GetCUDAPinnedBuddyAllocator();
void* ptr = buddy_allocator->Alloc(size); void* ptr = buddy_allocator->Alloc(size);
if (ptr == nullptr) { // if (ptr == nullptr) {
int cur_dev = platform::GetCurrentDeviceId(); // LOG(WARNING) << "Cannot allocate " << size << " bytes in CUDAPinnedPlace
platform::SetDeviceId(place.device); // "
size_t avail, total; // << ", available " << avail << " bytes"
platform::GpuMemoryUsage(avail, total); // }
LOG(WARNING) << "Cannot allocate " << size << " bytes in GPU "
<< place.device << ", available " << avail << " bytes";
LOG(WARNING) << "total " << total;
LOG(WARNING) << "GpuMinChunkSize " << platform::GpuMinChunkSize();
LOG(WARNING) << "GpuMaxChunkSize " << platform::GpuMaxChunkSize();
LOG(WARNING) << "GPU memory used: " << Used<platform::CUDAPlace>(place);
platform::SetDeviceId(cur_dev);
}
return ptr; return ptr;
} }
template <> template <>
void Free<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place, void* p) { void Free<platform::CUDAPinnedPlace>(platform::CUDAPinnedPlace place, void* p) {
GetCUDAPinnedBuddyAllocator(place.device)->Free(p); GetCUDAPinnedBuddyAllocator()->Free(p);
} }
#endif #endif
size_t Usage::operator()(const platform::CPUPlace& cpu) const { size_t Usage::operator()(const platform::CPUPlace& cpu) const {
......
...@@ -33,7 +33,7 @@ namespace memory { ...@@ -33,7 +33,7 @@ namespace memory {
* address is valid or not. * address is valid or not.
*/ */
template <typename Place> template <typename Place>
void* Alloc(Place place, size_t size, bool is_pinned = false); void* Alloc(Place place, size_t size);
/** /**
* \brief Free memory block in one place. * \brief Free memory block in one place.
...@@ -43,7 +43,7 @@ void* Alloc(Place place, size_t size, bool is_pinned = false); ...@@ -43,7 +43,7 @@ void* Alloc(Place place, size_t size, bool is_pinned = false);
* *
*/ */
template <typename Place> template <typename Place>
void Free(Place place, void* ptr, bool is_pinned = false); void Free(Place place, void* ptr);
/** /**
* \brief Total size of used memory in one place. * \brief Total size of used memory in one place.
...@@ -75,13 +75,11 @@ class PODDeleter { ...@@ -75,13 +75,11 @@ class PODDeleter {
static_assert(std::is_pod<T>::value, "T must be POD"); static_assert(std::is_pod<T>::value, "T must be POD");
public: public:
explicit PODDeleter(Place place, bool is_pinned = false) explicit PODDeleter(Place place) : place_(place) {}
: place_(place), is_pinned_(is_pinned) {} void operator()(T* ptr) { Free(place_, static_cast<void*>(ptr)); }
void operator()(T* ptr) { Free(place_, static_cast<void*>(ptr), is_pinned_); }
private: private:
Place place_; Place place_;
bool is_pinned_;
}; };
/** /**
......
...@@ -141,4 +141,59 @@ TEST(BuddyAllocator, GPUMultAlloc) { ...@@ -141,4 +141,59 @@ TEST(BuddyAllocator, GPUMultAlloc) {
} }
} }
size_t align(size_t size, paddle::platform::CUDAPinnedPlace place) {
size += sizeof(paddle::memory::detail::Metadata);
size_t alignment = paddle::platform::CpuMinChunkSize();
size_t remaining = size % alignment;
return remaining == 0 ? size : size + (alignment - remaining);
}
TEST(BuddyAllocator, CUDAPinnedAllocator) {
void *p = nullptr;
EXPECT_EQ(p, nullptr);
paddle::platform::CUDAPinnedPlace cpu;
p = paddle::memory::Alloc(cpu, 4096);
EXPECT_NE(p, nullptr);
paddle::platform::Place place = cpu;
EXPECT_EQ(paddle::memory::Used(cpu), paddle::memory::memory_usage(place));
paddle::memory::Free(cpu, p);
}
TEST(BuddyAllocator, CUDAPinnedMultAllocator) {
paddle::platform::CUDAPinnedPlace cpu;
std::unordered_map<void *, size_t> ps;
size_t total_size = paddle::memory::Used(cpu);
EXPECT_EQ(total_size, 0UL);
for (auto size :
{0, 128, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304}) {
ps[paddle::memory::Alloc(cpu, size)] = size;
// Buddy Allocator doesn't manage too large memory chunk
if (paddle::memory::Used(cpu) == total_size) continue;
size_t aligned_size = align(size, cpu);
total_size += aligned_size;
EXPECT_EQ(total_size, paddle::memory::Used(cpu));
}
for (auto p : ps) {
EXPECT_EQ(is_aligned(p.first), true);
paddle::memory::Free(cpu, p.first);
// Buddy Allocator doesn't manage too large memory chunk
if (paddle::memory::Used(cpu) == total_size) continue;
size_t aligned_size = align(p.second, cpu);
total_size -= aligned_size;
EXPECT_EQ(total_size, paddle::memory::Used(cpu));
}
}
#endif #endif
...@@ -26,6 +26,7 @@ class PlacePrinter : public boost::static_visitor<> { ...@@ -26,6 +26,7 @@ class PlacePrinter : public boost::static_visitor<> {
void operator()(const CUDAPlace &p) { void operator()(const CUDAPlace &p) {
os_ << "CUDAPlace(" << p.device << ")"; os_ << "CUDAPlace(" << p.device << ")";
} }
void operator()(const CUDAPinnedPlace &p) { os_ << "CUDAPinnedPlace"; }
private: private:
std::ostream &os_; std::ostream &os_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册