Add GetBasePtr interface in paddle::memory (#39145)

b2a7261d · From00 · GitHub · 529f1425 · b2a7261d · b2a7261d
8 changed file
--- a/paddle/fluid/memory/CMakeLists.txt
+++ b/paddle/fluid/memory/CMakeLists.txt
@@ -34,6 +34,13 @@ if (WITH_ROCM)
            DEPS device_context malloc)
 endif()

+if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")       
+  nv_test(get_base_ptr_test SRCS get_base_ptr_test.cu DEPS malloc gpu_info)
+  set_tests_properties(get_base_ptr_test PROPERTIES 
+                       ENVIRONMENT "FLAGS_allocator_strategy=auto_growth;
+                                    FLAGS_use_stream_safe_cuda_allocator=true;")
+endif()
+
 #if (WITH_GPU)
 #   nv_test(pinned_memory_test SRCS pinned_memory_test.cu  DEPS place memory)
 #endif()
--- a/paddle/fluid/memory/allocation/CMakeLists.txt
+++ b/paddle/fluid/memory/allocation/CMakeLists.txt
@@ -125,10 +125,3 @@ if(NOT WIN32)
  cc_library(mmap_allocator SRCS mmap_allocator.cc DEPS allocator)
  cc_test(mmap_allocator_test SRCS mmap_allocator_test.cc DEPS mmap_allocator allocator)
 endif(NOT WIN32)
-
-if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")       
-  nv_test(base_ptr_test SRCS base_ptr_test.cu DEPS malloc gpu_info)
-  set_tests_properties(base_ptr_test PROPERTIES 
-                       ENVIRONMENT "FLAGS_allocator_strategy=auto_growth;
-                                    FLAGS_use_stream_safe_cuda_allocator=true;")
-endif()
--- a/paddle/fluid/memory/allocation/allocator.h
+++ b/paddle/fluid/memory/allocation/allocator.h
@@ -93,14 +93,7 @@ class Allocation : public pten::Allocation {
             const platform::Place& place)
      : pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {}

-  void* base_ptr() const {
-    PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth",
-                      paddle::platform::errors::Unimplemented(
-                          "base_ptr() is only implemented for auto_growth "
-                          "strategy, not support %s strategy",
-                          FLAGS_allocator_strategy));
-    return base_ptr_;
-  }
+  void* base_ptr() const { return base_ptr_; }

 private:
  inline void RegisterDecoratedAllocator(Allocator* allocator) {

--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -282,6 +282,10 @@ class AllocatorFacadePrivate {
    return iter->second;
  }

+  void* GetBasePtr(const std::shared_ptr<pten::Allocation>& allocation) {
+    return static_cast<Allocation*>(allocation.get())->base_ptr();
+  }
+
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
  bool HasCUDAAllocator(const platform::CUDAPlace& place,
                        const gpuStream_t& stream) {
@@ -821,6 +825,21 @@ const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator(
  return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1);
 }

+void* AllocatorFacade::GetBasePtr(
+    const std::shared_ptr<pten::Allocation>& allocation) {
+  PADDLE_ENFORCE_EQ(GetAllocatorStrategy(), AllocatorStrategy::kAutoGrowth,
+                    paddle::platform::errors::Unimplemented(
+                        "GetBasePtr() is only implemented for auto_growth "
+                        "strategy, not support allocator strategy: %d",
+                        static_cast<int>(GetAllocatorStrategy())));
+  PADDLE_ENFORCE_EQ(platform::is_gpu_place(allocation->place()), true,
+                    paddle::platform::errors::Unimplemented(
+                        "GetBasePtr() is only implemented for CUDAPlace(), not "
+                        "suppot place: %s",
+                        allocation->place()));
+  return m_->GetBasePtr(allocation);
+}
+
 std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared(
    const platform::Place& place, size_t size) {
  return std::shared_ptr<pten::Allocation>(Alloc(place, size));

--- a/paddle/fluid/memory/allocation/allocator_facade.h
+++ b/paddle/fluid/memory/allocation/allocator_facade.h
@@ -51,6 +51,8 @@ class AllocatorFacade {

  const std::shared_ptr<Allocator>& GetAllocator(const platform::Place& place);

+  void* GetBasePtr(const std::shared_ptr<Allocation>& allocation);
+
  // Allocate a shared allocation.
  std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
                                          size_t size);

--- a/paddle/fluid/memory/allocation/base_ptr_test.cu
+++ b/paddle/fluid/memory/allocation/base_ptr_test.cu
@@ -35,9 +35,9 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
  void OneByOneAllocTest() {
    for (size_t i = 0; i < alloc_times_; ++i) {
      size_t size = dis_(random_engine_);
-      AllocationPtr allocation = Alloc(place_, size);
+      auto allocation = AllocShared(place_, size);

-      void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
+      void* base_ptr = GetBasePtr(allocation);
      void* system_ptr =
          platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
      EXPECT_EQ(base_ptr, system_ptr);
@@ -47,21 +47,21 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
  }

  void BatchByBatchAllocTest() {
-    std::vector<AllocationPtr> allocations;
+    std::vector<std::shared_ptr<pten::Allocation>> allocations;
    allocations.reserve(batch_size_);
    size_t batch_num = alloc_times_ / batch_size_;

    for (size_t i = 0; i < batch_num; ++i) {
      for (size_t j = 0; j < batch_size_; ++j) {
        size_t size = dis_(random_engine_);
-        AllocationPtr allocation = Alloc(place_, size);
+        auto allocation = AllocShared(place_, size);

-        void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
+        void* base_ptr = GetBasePtr(allocation);
        void* system_ptr =
            platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
        EXPECT_EQ(base_ptr, system_ptr);

-        allocations.emplace_back(std::move(allocation));
+        allocations.emplace_back(allocation);
      }
      allocations.clear();
    }
@@ -70,19 +70,19 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
  }

  void ContinuousAllocTest() {
-    std::vector<AllocationPtr> allocations;
+    std::vector<std::shared_ptr<pten::Allocation>> allocations;
    allocations.reserve(alloc_times_);

    for (size_t i = 0; i < alloc_times_; ++i) {
      size_t size = dis_(random_engine_);
-      AllocationPtr allocation = Alloc(place_, size);
+      auto allocation = AllocShared(place_, size);

-      void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
+      void* base_ptr = GetBasePtr(allocation);
      void* system_ptr =
          platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
      EXPECT_EQ(base_ptr, system_ptr);

-      allocations.emplace_back(std::move(allocation));
+      allocations.emplace_back(allocation);
    }

    allocations.clear();
@@ -90,8 +90,8 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
  }

  void ZeroSizeAllocTest() {
-    AllocationPtr allocation = Alloc(place_, 0);
-    void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
+    auto allocation = AllocShared(place_, 0);
+    void* base_ptr = GetBasePtr(allocation);
    void* system_ptr =
        platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
    EXPECT_EQ(base_ptr, system_ptr);

--- a/paddle/fluid/memory/malloc.cc
+++ b/paddle/fluid/memory/malloc.cc
@@ -47,6 +47,10 @@ bool InSameStream(const std::shared_ptr<Allocation>& allocation,
                                                              stream);
 }

+void* GetBasePtr(const std::shared_ptr<Allocation>& allocation) {
+  return allocation::AllocatorFacade::Instance().GetBasePtr(allocation);
+}
+
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size,
                    const gpuStream_t& stream) {

--- a/paddle/fluid/memory/malloc.h
+++ b/paddle/fluid/memory/malloc.h
@@ -44,6 +44,8 @@ extern std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
 extern bool InSameStream(const std::shared_ptr<Allocation>& allocation,
                         const platform::Stream& stream);

+extern void* GetBasePtr(const std::shared_ptr<Allocation>& allocation);
+
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 extern AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size,
                           const gpuStream_t& stream);