未验证 提交 b2a7261d 编写于 作者: F From00 提交者: GitHub

Add GetBasePtr interface in paddle::memory (#39145)

上级 529f1425
......@@ -34,6 +34,13 @@ if (WITH_ROCM)
DEPS device_context malloc)
endif()
if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
nv_test(get_base_ptr_test SRCS get_base_ptr_test.cu DEPS malloc gpu_info)
set_tests_properties(get_base_ptr_test PROPERTIES
ENVIRONMENT "FLAGS_allocator_strategy=auto_growth;
FLAGS_use_stream_safe_cuda_allocator=true;")
endif()
#if (WITH_GPU)
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
#endif()
......@@ -125,10 +125,3 @@ if(NOT WIN32)
cc_library(mmap_allocator SRCS mmap_allocator.cc DEPS allocator)
cc_test(mmap_allocator_test SRCS mmap_allocator_test.cc DEPS mmap_allocator allocator)
endif(NOT WIN32)
if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
nv_test(base_ptr_test SRCS base_ptr_test.cu DEPS malloc gpu_info)
set_tests_properties(base_ptr_test PROPERTIES
ENVIRONMENT "FLAGS_allocator_strategy=auto_growth;
FLAGS_use_stream_safe_cuda_allocator=true;")
endif()
......@@ -93,14 +93,7 @@ class Allocation : public pten::Allocation {
const platform::Place& place)
: pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {}
void* base_ptr() const {
PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth",
paddle::platform::errors::Unimplemented(
"base_ptr() is only implemented for auto_growth "
"strategy, not support %s strategy",
FLAGS_allocator_strategy));
return base_ptr_;
}
void* base_ptr() const { return base_ptr_; }
private:
inline void RegisterDecoratedAllocator(Allocator* allocator) {
......
......@@ -282,6 +282,10 @@ class AllocatorFacadePrivate {
return iter->second;
}
void* GetBasePtr(const std::shared_ptr<pten::Allocation>& allocation) {
return static_cast<Allocation*>(allocation.get())->base_ptr();
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
bool HasCUDAAllocator(const platform::CUDAPlace& place,
const gpuStream_t& stream) {
......@@ -821,6 +825,21 @@ const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator(
return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1);
}
void* AllocatorFacade::GetBasePtr(
const std::shared_ptr<pten::Allocation>& allocation) {
PADDLE_ENFORCE_EQ(GetAllocatorStrategy(), AllocatorStrategy::kAutoGrowth,
paddle::platform::errors::Unimplemented(
"GetBasePtr() is only implemented for auto_growth "
"strategy, not support allocator strategy: %d",
static_cast<int>(GetAllocatorStrategy())));
PADDLE_ENFORCE_EQ(platform::is_gpu_place(allocation->place()), true,
paddle::platform::errors::Unimplemented(
"GetBasePtr() is only implemented for CUDAPlace(), not "
"suppot place: %s",
allocation->place()));
return m_->GetBasePtr(allocation);
}
std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared(
const platform::Place& place, size_t size) {
return std::shared_ptr<pten::Allocation>(Alloc(place, size));
......
......@@ -51,6 +51,8 @@ class AllocatorFacade {
const std::shared_ptr<Allocator>& GetAllocator(const platform::Place& place);
void* GetBasePtr(const std::shared_ptr<Allocation>& allocation);
// Allocate a shared allocation.
std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
size_t size);
......
......@@ -35,9 +35,9 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
void OneByOneAllocTest() {
for (size_t i = 0; i < alloc_times_; ++i) {
size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size);
auto allocation = AllocShared(place_, size);
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* base_ptr = GetBasePtr(allocation);
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);
......@@ -47,21 +47,21 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
}
void BatchByBatchAllocTest() {
std::vector<AllocationPtr> allocations;
std::vector<std::shared_ptr<pten::Allocation>> allocations;
allocations.reserve(batch_size_);
size_t batch_num = alloc_times_ / batch_size_;
for (size_t i = 0; i < batch_num; ++i) {
for (size_t j = 0; j < batch_size_; ++j) {
size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size);
auto allocation = AllocShared(place_, size);
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* base_ptr = GetBasePtr(allocation);
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);
allocations.emplace_back(std::move(allocation));
allocations.emplace_back(allocation);
}
allocations.clear();
}
......@@ -70,19 +70,19 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
}
void ContinuousAllocTest() {
std::vector<AllocationPtr> allocations;
std::vector<std::shared_ptr<pten::Allocation>> allocations;
allocations.reserve(alloc_times_);
for (size_t i = 0; i < alloc_times_; ++i) {
size_t size = dis_(random_engine_);
AllocationPtr allocation = Alloc(place_, size);
auto allocation = AllocShared(place_, size);
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
void* base_ptr = GetBasePtr(allocation);
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);
allocations.emplace_back(std::move(allocation));
allocations.emplace_back(allocation);
}
allocations.clear();
......@@ -90,8 +90,8 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
}
void ZeroSizeAllocTest() {
AllocationPtr allocation = Alloc(place_, 0);
void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
auto allocation = AllocShared(place_, 0);
void* base_ptr = GetBasePtr(allocation);
void* system_ptr =
platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
EXPECT_EQ(base_ptr, system_ptr);
......
......@@ -47,6 +47,10 @@ bool InSameStream(const std::shared_ptr<Allocation>& allocation,
stream);
}
void* GetBasePtr(const std::shared_ptr<Allocation>& allocation) {
return allocation::AllocatorFacade::Instance().GetBasePtr(allocation);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size,
const gpuStream_t& stream) {
......
......@@ -44,6 +44,8 @@ extern std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
extern bool InSameStream(const std::shared_ptr<Allocation>& allocation,
const platform::Stream& stream);
extern void* GetBasePtr(const std::shared_ptr<Allocation>& allocation);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
extern AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size,
const gpuStream_t& stream);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册