From b2a7261d3cecde4153fbfcbc9aaef769fe936c4e Mon Sep 17 00:00:00 2001 From: From00 Date: Tue, 25 Jan 2022 10:53:33 +0800 Subject: [PATCH] Add GetBasePtr interface in paddle::memory (#39145) --- paddle/fluid/memory/CMakeLists.txt | 7 ++++++ paddle/fluid/memory/allocation/CMakeLists.txt | 7 ------ paddle/fluid/memory/allocation/allocator.h | 9 +------ .../memory/allocation/allocator_facade.cc | 19 +++++++++++++++ .../memory/allocation/allocator_facade.h | 2 ++ .../base_ptr_test.cu => get_base_ptr_test.cu} | 24 +++++++++---------- paddle/fluid/memory/malloc.cc | 4 ++++ paddle/fluid/memory/malloc.h | 2 ++ 8 files changed, 47 insertions(+), 27 deletions(-) rename paddle/fluid/memory/{allocation/base_ptr_test.cu => get_base_ptr_test.cu} (80%) diff --git a/paddle/fluid/memory/CMakeLists.txt b/paddle/fluid/memory/CMakeLists.txt index 97952e4b716..023b40518ed 100644 --- a/paddle/fluid/memory/CMakeLists.txt +++ b/paddle/fluid/memory/CMakeLists.txt @@ -34,6 +34,13 @@ if (WITH_ROCM) DEPS device_context malloc) endif() +if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") + nv_test(get_base_ptr_test SRCS get_base_ptr_test.cu DEPS malloc gpu_info) + set_tests_properties(get_base_ptr_test PROPERTIES + ENVIRONMENT "FLAGS_allocator_strategy=auto_growth; + FLAGS_use_stream_safe_cuda_allocator=true;") +endif() + #if (WITH_GPU) # nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory) #endif() diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 939ad140415..c0d1934a703 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -125,10 +125,3 @@ if(NOT WIN32) cc_library(mmap_allocator SRCS mmap_allocator.cc DEPS allocator) cc_test(mmap_allocator_test SRCS mmap_allocator_test.cc DEPS mmap_allocator allocator) endif(NOT WIN32) - -if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") - nv_test(base_ptr_test SRCS base_ptr_test.cu DEPS malloc gpu_info) - set_tests_properties(base_ptr_test PROPERTIES - ENVIRONMENT "FLAGS_allocator_strategy=auto_growth; - FLAGS_use_stream_safe_cuda_allocator=true;") -endif() diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index 3f04d475163..878633d1a62 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -93,14 +93,7 @@ class Allocation : public pten::Allocation { const platform::Place& place) : pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {} - void* base_ptr() const { - PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth", - paddle::platform::errors::Unimplemented( - "base_ptr() is only implemented for auto_growth " - "strategy, not support %s strategy", - FLAGS_allocator_strategy)); - return base_ptr_; - } + void* base_ptr() const { return base_ptr_; } private: inline void RegisterDecoratedAllocator(Allocator* allocator) { diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 6615bdf4b13..7cdac0de613 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -282,6 +282,10 @@ class AllocatorFacadePrivate { return iter->second; } + void* GetBasePtr(const std::shared_ptr& allocation) { + return static_cast(allocation.get())->base_ptr(); + } + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) bool HasCUDAAllocator(const platform::CUDAPlace& place, const gpuStream_t& stream) { @@ -821,6 +825,21 @@ const std::shared_ptr& AllocatorFacade::GetAllocator( return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1); } +void* AllocatorFacade::GetBasePtr( + const std::shared_ptr& allocation) { + PADDLE_ENFORCE_EQ(GetAllocatorStrategy(), AllocatorStrategy::kAutoGrowth, + paddle::platform::errors::Unimplemented( + "GetBasePtr() is only implemented for auto_growth " + "strategy, not support allocator strategy: %d", + static_cast(GetAllocatorStrategy()))); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(allocation->place()), true, + paddle::platform::errors::Unimplemented( + "GetBasePtr() is only implemented for CUDAPlace(), not " + "suppot place: %s", + allocation->place())); + return m_->GetBasePtr(allocation); +} + std::shared_ptr AllocatorFacade::AllocShared( const platform::Place& place, size_t size) { return std::shared_ptr(Alloc(place, size)); diff --git a/paddle/fluid/memory/allocation/allocator_facade.h b/paddle/fluid/memory/allocation/allocator_facade.h index 76e2f0b5a94..a9b92e1801e 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.h +++ b/paddle/fluid/memory/allocation/allocator_facade.h @@ -51,6 +51,8 @@ class AllocatorFacade { const std::shared_ptr& GetAllocator(const platform::Place& place); + void* GetBasePtr(const std::shared_ptr& allocation); + // Allocate a shared allocation. std::shared_ptr AllocShared(const platform::Place& place, size_t size); diff --git a/paddle/fluid/memory/allocation/base_ptr_test.cu b/paddle/fluid/memory/get_base_ptr_test.cu similarity index 80% rename from paddle/fluid/memory/allocation/base_ptr_test.cu rename to paddle/fluid/memory/get_base_ptr_test.cu index 5edabfcb9f5..fe1d73b6028 100644 --- a/paddle/fluid/memory/allocation/base_ptr_test.cu +++ b/paddle/fluid/memory/get_base_ptr_test.cu @@ -35,9 +35,9 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { void OneByOneAllocTest() { for (size_t i = 0; i < alloc_times_; ++i) { size_t size = dis_(random_engine_); - AllocationPtr allocation = Alloc(place_, size); + auto allocation = AllocShared(place_, size); - void* base_ptr = static_cast(allocation.get())->base_ptr(); + void* base_ptr = GetBasePtr(allocation); void* system_ptr = platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); EXPECT_EQ(base_ptr, system_ptr); @@ -47,21 +47,21 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { } void BatchByBatchAllocTest() { - std::vector allocations; + std::vector> allocations; allocations.reserve(batch_size_); size_t batch_num = alloc_times_ / batch_size_; for (size_t i = 0; i < batch_num; ++i) { for (size_t j = 0; j < batch_size_; ++j) { size_t size = dis_(random_engine_); - AllocationPtr allocation = Alloc(place_, size); + auto allocation = AllocShared(place_, size); - void* base_ptr = static_cast(allocation.get())->base_ptr(); + void* base_ptr = GetBasePtr(allocation); void* system_ptr = platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); EXPECT_EQ(base_ptr, system_ptr); - allocations.emplace_back(std::move(allocation)); + allocations.emplace_back(allocation); } allocations.clear(); } @@ -70,19 +70,19 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { } void ContinuousAllocTest() { - std::vector allocations; + std::vector> allocations; allocations.reserve(alloc_times_); for (size_t i = 0; i < alloc_times_; ++i) { size_t size = dis_(random_engine_); - AllocationPtr allocation = Alloc(place_, size); + auto allocation = AllocShared(place_, size); - void* base_ptr = static_cast(allocation.get())->base_ptr(); + void* base_ptr = GetBasePtr(allocation); void* system_ptr = platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); EXPECT_EQ(base_ptr, system_ptr); - allocations.emplace_back(std::move(allocation)); + allocations.emplace_back(allocation); } allocations.clear(); @@ -90,8 +90,8 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { } void ZeroSizeAllocTest() { - AllocationPtr allocation = Alloc(place_, 0); - void* base_ptr = static_cast(allocation.get())->base_ptr(); + auto allocation = AllocShared(place_, 0); + void* base_ptr = GetBasePtr(allocation); void* system_ptr = platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); EXPECT_EQ(base_ptr, system_ptr); diff --git a/paddle/fluid/memory/malloc.cc b/paddle/fluid/memory/malloc.cc index 3e859377e98..63c562be97f 100644 --- a/paddle/fluid/memory/malloc.cc +++ b/paddle/fluid/memory/malloc.cc @@ -47,6 +47,10 @@ bool InSameStream(const std::shared_ptr& allocation, stream); } +void* GetBasePtr(const std::shared_ptr& allocation) { + return allocation::AllocatorFacade::Instance().GetBasePtr(allocation); +} + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size, const gpuStream_t& stream) { diff --git a/paddle/fluid/memory/malloc.h b/paddle/fluid/memory/malloc.h index 6443e91f08c..855cbb775a1 100644 --- a/paddle/fluid/memory/malloc.h +++ b/paddle/fluid/memory/malloc.h @@ -44,6 +44,8 @@ extern std::shared_ptr AllocShared(const platform::Place& place, extern bool InSameStream(const std::shared_ptr& allocation, const platform::Stream& stream); +extern void* GetBasePtr(const std::shared_ptr& allocation); + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) extern AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size, const gpuStream_t& stream); -- GitLab