diff --git a/paddle/fluid/memory/CMakeLists.txt b/paddle/fluid/memory/CMakeLists.txt index 97952e4b71641e00e27592380a0fd88f2c17b1a0..023b40518edf216f76642aae1577507ee2c36486 100644 --- a/paddle/fluid/memory/CMakeLists.txt +++ b/paddle/fluid/memory/CMakeLists.txt @@ -34,6 +34,13 @@ if (WITH_ROCM) DEPS device_context malloc) endif() +if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") + nv_test(get_base_ptr_test SRCS get_base_ptr_test.cu DEPS malloc gpu_info) + set_tests_properties(get_base_ptr_test PROPERTIES + ENVIRONMENT "FLAGS_allocator_strategy=auto_growth; + FLAGS_use_stream_safe_cuda_allocator=true;") +endif() + #if (WITH_GPU) # nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory) #endif() diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 939ad140415df45619018536520e3ffb9d681366..c0d1934a703b66a8ab8a1eab0c1d0680d73b9e17 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -125,10 +125,3 @@ if(NOT WIN32) cc_library(mmap_allocator SRCS mmap_allocator.cc DEPS allocator) cc_test(mmap_allocator_test SRCS mmap_allocator_test.cc DEPS mmap_allocator allocator) endif(NOT WIN32) - -if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON") - nv_test(base_ptr_test SRCS base_ptr_test.cu DEPS malloc gpu_info) - set_tests_properties(base_ptr_test PROPERTIES - ENVIRONMENT "FLAGS_allocator_strategy=auto_growth; - FLAGS_use_stream_safe_cuda_allocator=true;") -endif() diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index 3f04d47516377251011174b1382679ba41fdca02..878633d1a62915383aa1c5306dcc7940d06282e4 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -93,14 +93,7 @@ class Allocation : public pten::Allocation { const platform::Place& place) : pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {} - void* base_ptr() const { - PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth", - paddle::platform::errors::Unimplemented( - "base_ptr() is only implemented for auto_growth " - "strategy, not support %s strategy", - FLAGS_allocator_strategy)); - return base_ptr_; - } + void* base_ptr() const { return base_ptr_; } private: inline void RegisterDecoratedAllocator(Allocator* allocator) { diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 6615bdf4b138b483761c82312841f5887f6075c7..7cdac0de6138f13325500759c0ca2a392e2000f9 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -282,6 +282,10 @@ class AllocatorFacadePrivate { return iter->second; } + void* GetBasePtr(const std::shared_ptr& allocation) { + return static_cast(allocation.get())->base_ptr(); + } + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) bool HasCUDAAllocator(const platform::CUDAPlace& place, const gpuStream_t& stream) { @@ -821,6 +825,21 @@ const std::shared_ptr& AllocatorFacade::GetAllocator( return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1); } +void* AllocatorFacade::GetBasePtr( + const std::shared_ptr& allocation) { + PADDLE_ENFORCE_EQ(GetAllocatorStrategy(), AllocatorStrategy::kAutoGrowth, + paddle::platform::errors::Unimplemented( + "GetBasePtr() is only implemented for auto_growth " + "strategy, not support allocator strategy: %d", + static_cast(GetAllocatorStrategy()))); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(allocation->place()), true, + paddle::platform::errors::Unimplemented( + "GetBasePtr() is only implemented for CUDAPlace(), not " + "suppot place: %s", + allocation->place())); + return m_->GetBasePtr(allocation); +} + std::shared_ptr AllocatorFacade::AllocShared( const platform::Place& place, size_t size) { return std::shared_ptr(Alloc(place, size)); diff --git a/paddle/fluid/memory/allocation/allocator_facade.h b/paddle/fluid/memory/allocation/allocator_facade.h index 76e2f0b5a94f6ddae8e8fb6281bdfcf70f10b76c..a9b92e1801e4a3c74941388f864172f078d7128a 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.h +++ b/paddle/fluid/memory/allocation/allocator_facade.h @@ -51,6 +51,8 @@ class AllocatorFacade { const std::shared_ptr& GetAllocator(const platform::Place& place); + void* GetBasePtr(const std::shared_ptr& allocation); + // Allocate a shared allocation. std::shared_ptr AllocShared(const platform::Place& place, size_t size); diff --git a/paddle/fluid/memory/allocation/base_ptr_test.cu b/paddle/fluid/memory/get_base_ptr_test.cu similarity index 80% rename from paddle/fluid/memory/allocation/base_ptr_test.cu rename to paddle/fluid/memory/get_base_ptr_test.cu index 5edabfcb9f5e7efab1242da5f5c091bebcf74c11..fe1d73b60284968d1e0022eb0383bcbcdc25856f 100644 --- a/paddle/fluid/memory/allocation/base_ptr_test.cu +++ b/paddle/fluid/memory/get_base_ptr_test.cu @@ -35,9 +35,9 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { void OneByOneAllocTest() { for (size_t i = 0; i < alloc_times_; ++i) { size_t size = dis_(random_engine_); - AllocationPtr allocation = Alloc(place_, size); + auto allocation = AllocShared(place_, size); - void* base_ptr = static_cast(allocation.get())->base_ptr(); + void* base_ptr = GetBasePtr(allocation); void* system_ptr = platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); EXPECT_EQ(base_ptr, system_ptr); @@ -47,21 +47,21 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { } void BatchByBatchAllocTest() { - std::vector allocations; + std::vector> allocations; allocations.reserve(batch_size_); size_t batch_num = alloc_times_ / batch_size_; for (size_t i = 0; i < batch_num; ++i) { for (size_t j = 0; j < batch_size_; ++j) { size_t size = dis_(random_engine_); - AllocationPtr allocation = Alloc(place_, size); + auto allocation = AllocShared(place_, size); - void* base_ptr = static_cast(allocation.get())->base_ptr(); + void* base_ptr = GetBasePtr(allocation); void* system_ptr = platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); EXPECT_EQ(base_ptr, system_ptr); - allocations.emplace_back(std::move(allocation)); + allocations.emplace_back(allocation); } allocations.clear(); } @@ -70,19 +70,19 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { } void ContinuousAllocTest() { - std::vector allocations; + std::vector> allocations; allocations.reserve(alloc_times_); for (size_t i = 0; i < alloc_times_; ++i) { size_t size = dis_(random_engine_); - AllocationPtr allocation = Alloc(place_, size); + auto allocation = AllocShared(place_, size); - void* base_ptr = static_cast(allocation.get())->base_ptr(); + void* base_ptr = GetBasePtr(allocation); void* system_ptr = platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); EXPECT_EQ(base_ptr, system_ptr); - allocations.emplace_back(std::move(allocation)); + allocations.emplace_back(allocation); } allocations.clear(); @@ -90,8 +90,8 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { } void ZeroSizeAllocTest() { - AllocationPtr allocation = Alloc(place_, 0); - void* base_ptr = static_cast(allocation.get())->base_ptr(); + auto allocation = AllocShared(place_, 0); + void* base_ptr = GetBasePtr(allocation); void* system_ptr = platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); EXPECT_EQ(base_ptr, system_ptr); diff --git a/paddle/fluid/memory/malloc.cc b/paddle/fluid/memory/malloc.cc index 3e859377e98d801e775461d9cfaaa50fe9c43e8e..63c562be97fa0728b26761ac856caf755717a64d 100644 --- a/paddle/fluid/memory/malloc.cc +++ b/paddle/fluid/memory/malloc.cc @@ -47,6 +47,10 @@ bool InSameStream(const std::shared_ptr& allocation, stream); } +void* GetBasePtr(const std::shared_ptr& allocation) { + return allocation::AllocatorFacade::Instance().GetBasePtr(allocation); +} + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size, const gpuStream_t& stream) { diff --git a/paddle/fluid/memory/malloc.h b/paddle/fluid/memory/malloc.h index 6443e91f08cbeb7c3f504e8f4894808bffd5bbf1..855cbb775a1096ba749d93667c71268045645a15 100644 --- a/paddle/fluid/memory/malloc.h +++ b/paddle/fluid/memory/malloc.h @@ -44,6 +44,8 @@ extern std::shared_ptr AllocShared(const platform::Place& place, extern bool InSameStream(const std::shared_ptr& allocation, const platform::Stream& stream); +extern void* GetBasePtr(const std::shared_ptr& allocation); + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) extern AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size, const gpuStream_t& stream);