未验证 提交 b2a7261d 编写于 作者: F From00 提交者: GitHub

Add GetBasePtr interface in paddle::memory (#39145)

上级 529f1425
...@@ -34,6 +34,13 @@ if (WITH_ROCM) ...@@ -34,6 +34,13 @@ if (WITH_ROCM)
DEPS device_context malloc) DEPS device_context malloc)
endif() endif()
if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
nv_test(get_base_ptr_test SRCS get_base_ptr_test.cu DEPS malloc gpu_info)
set_tests_properties(get_base_ptr_test PROPERTIES
ENVIRONMENT "FLAGS_allocator_strategy=auto_growth;
FLAGS_use_stream_safe_cuda_allocator=true;")
endif()
#if (WITH_GPU) #if (WITH_GPU)
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory) # nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
#endif() #endif()
...@@ -125,10 +125,3 @@ if(NOT WIN32) ...@@ -125,10 +125,3 @@ if(NOT WIN32)
cc_library(mmap_allocator SRCS mmap_allocator.cc DEPS allocator) cc_library(mmap_allocator SRCS mmap_allocator.cc DEPS allocator)
cc_test(mmap_allocator_test SRCS mmap_allocator_test.cc DEPS mmap_allocator allocator) cc_test(mmap_allocator_test SRCS mmap_allocator_test.cc DEPS mmap_allocator allocator)
endif(NOT WIN32) endif(NOT WIN32)
if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
nv_test(base_ptr_test SRCS base_ptr_test.cu DEPS malloc gpu_info)
set_tests_properties(base_ptr_test PROPERTIES
ENVIRONMENT "FLAGS_allocator_strategy=auto_growth;
FLAGS_use_stream_safe_cuda_allocator=true;")
endif()
...@@ -93,14 +93,7 @@ class Allocation : public pten::Allocation { ...@@ -93,14 +93,7 @@ class Allocation : public pten::Allocation {
const platform::Place& place) const platform::Place& place)
: pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {} : pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {}
void* base_ptr() const { void* base_ptr() const { return base_ptr_; }
PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth",
paddle::platform::errors::Unimplemented(
"base_ptr() is only implemented for auto_growth "
"strategy, not support %s strategy",
FLAGS_allocator_strategy));
return base_ptr_;
}
private: private:
inline void RegisterDecoratedAllocator(Allocator* allocator) { inline void RegisterDecoratedAllocator(Allocator* allocator) {
......
...@@ -282,6 +282,10 @@ class AllocatorFacadePrivate { ...@@ -282,6 +282,10 @@ class AllocatorFacadePrivate {
return iter->second; return iter->second;
} }
void* GetBasePtr(const std::shared_ptr<pten::Allocation>& allocation) {
return static_cast<Allocation*>(allocation.get())->base_ptr();
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
bool HasCUDAAllocator(const platform::CUDAPlace& place, bool HasCUDAAllocator(const platform::CUDAPlace& place,
const gpuStream_t& stream) { const gpuStream_t& stream) {
...@@ -821,6 +825,21 @@ const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator( ...@@ -821,6 +825,21 @@ const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator(
return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1); return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1);
} }
void* AllocatorFacade::GetBasePtr(
const std::shared_ptr<pten::Allocation>& allocation) {
PADDLE_ENFORCE_EQ(GetAllocatorStrategy(), AllocatorStrategy::kAutoGrowth,
paddle::platform::errors::Unimplemented(
"GetBasePtr() is only implemented for auto_growth "
"strategy, not support allocator strategy: %d",
static_cast<int>(GetAllocatorStrategy())));
PADDLE_ENFORCE_EQ(platform::is_gpu_place(allocation->place()), true,
paddle::platform::errors::Unimplemented(
"GetBasePtr() is only implemented for CUDAPlace(), not "
"suppot place: %s",
allocation->place()));
return m_->GetBasePtr(allocation);
}
std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared( std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size) { const platform::Place& place, size_t size) {
return std::shared_ptr<pten::Allocation>(Alloc(place, size)); return std::shared_ptr<pten::Allocation>(Alloc(place, size));
......
...@@ -51,6 +51,8 @@ class AllocatorFacade { ...@@ -51,6 +51,8 @@ class AllocatorFacade {
const std::shared_ptr<Allocator>& GetAllocator(const platform::Place& place); const std::shared_ptr<Allocator>& GetAllocator(const platform::Place& place);
void* GetBasePtr(const std::shared_ptr<Allocation>& allocation);
// Allocate a shared allocation. // Allocate a shared allocation.
std::shared_ptr<Allocation> AllocShared(const platform::Place& place, std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
size_t size); size_t size);
......
...@@ -35,9 +35,9 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { ...@@ -35,9 +35,9 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
void OneByOneAllocTest() { void OneByOneAllocTest() {
for (size_t i = 0; i < alloc_times_; ++i) { for (size_t i = 0; i < alloc_times_; ++i) {
size_t size = dis_(random_engine_); size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size); auto allocation = AllocShared(place_, size);
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr(); void* base_ptr = GetBasePtr(allocation);
void* system_ptr = void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr); EXPECT_EQ(base_ptr, system_ptr);
...@@ -47,21 +47,21 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { ...@@ -47,21 +47,21 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
} }
void BatchByBatchAllocTest() { void BatchByBatchAllocTest() {
std::vector<AllocationPtr> allocations; std::vector<std::shared_ptr<pten::Allocation>> allocations;
allocations.reserve(batch_size_); allocations.reserve(batch_size_);
size_t batch_num = alloc_times_ / batch_size_; size_t batch_num = alloc_times_ / batch_size_;
for (size_t i = 0; i < batch_num; ++i) { for (size_t i = 0; i < batch_num; ++i) {
for (size_t j = 0; j < batch_size_; ++j) { for (size_t j = 0; j < batch_size_; ++j) {
size_t size = dis_(random_engine_); size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size); auto allocation = AllocShared(place_, size);
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr(); void* base_ptr = GetBasePtr(allocation);
void* system_ptr = void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr); EXPECT_EQ(base_ptr, system_ptr);
allocations.emplace_back(std::move(allocation)); allocations.emplace_back(allocation);
} }
allocations.clear(); allocations.clear();
} }
...@@ -70,19 +70,19 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { ...@@ -70,19 +70,19 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
} }
void ContinuousAllocTest() { void ContinuousAllocTest() {
std::vector<AllocationPtr> allocations; std::vector<std::shared_ptr<pten::Allocation>> allocations;
allocations.reserve(alloc_times_); allocations.reserve(alloc_times_);
for (size_t i = 0; i < alloc_times_; ++i) { for (size_t i = 0; i < alloc_times_; ++i) {
size_t size = dis_(random_engine_); size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size); auto allocation = AllocShared(place_, size);
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr(); void* base_ptr = GetBasePtr(allocation);
void* system_ptr = void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr); EXPECT_EQ(base_ptr, system_ptr);
allocations.emplace_back(std::move(allocation)); allocations.emplace_back(allocation);
} }
allocations.clear(); allocations.clear();
...@@ -90,8 +90,8 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test { ...@@ -90,8 +90,8 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
} }
void ZeroSizeAllocTest() { void ZeroSizeAllocTest() {
AllocationPtr allocation = Alloc(place_, 0); auto allocation = AllocShared(place_, 0);
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr(); void* base_ptr = GetBasePtr(allocation);
void* system_ptr = void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId()); platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr); EXPECT_EQ(base_ptr, system_ptr);
......
...@@ -47,6 +47,10 @@ bool InSameStream(const std::shared_ptr<Allocation>& allocation, ...@@ -47,6 +47,10 @@ bool InSameStream(const std::shared_ptr<Allocation>& allocation,
stream); stream);
} }
void* GetBasePtr(const std::shared_ptr<Allocation>& allocation) {
return allocation::AllocatorFacade::Instance().GetBasePtr(allocation);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size, AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size,
const gpuStream_t& stream) { const gpuStream_t& stream) {
......
...@@ -44,6 +44,8 @@ extern std::shared_ptr<Allocation> AllocShared(const platform::Place& place, ...@@ -44,6 +44,8 @@ extern std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
extern bool InSameStream(const std::shared_ptr<Allocation>& allocation, extern bool InSameStream(const std::shared_ptr<Allocation>& allocation,
const platform::Stream& stream); const platform::Stream& stream);
extern void* GetBasePtr(const std::shared_ptr<Allocation>& allocation);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
extern AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size, extern AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size,
const gpuStream_t& stream); const gpuStream_t& stream);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册