From b2a7261d3cecde4153fbfcbc9aaef769fe936c4e Mon Sep 17 00:00:00 2001
From: From00 <chenruibiao@baidu.com>
Date: Tue, 25 Jan 2022 10:53:33 +0800
Subject: [PATCH] Add GetBasePtr interface in paddle::memory (#39145)

---
 paddle/fluid/memory/CMakeLists.txt            |  7 ++++++
 paddle/fluid/memory/allocation/CMakeLists.txt |  7 ------
 paddle/fluid/memory/allocation/allocator.h    |  9 +------
 .../memory/allocation/allocator_facade.cc     | 19 +++++++++++++++
 .../memory/allocation/allocator_facade.h      |  2 ++
 .../base_ptr_test.cu => get_base_ptr_test.cu} | 24 +++++++++----------
 paddle/fluid/memory/malloc.cc                 |  4 ++++
 paddle/fluid/memory/malloc.h                  |  2 ++
 8 files changed, 47 insertions(+), 27 deletions(-)
 rename paddle/fluid/memory/{allocation/base_ptr_test.cu => get_base_ptr_test.cu} (80%)

diff --git a/paddle/fluid/memory/CMakeLists.txt b/paddle/fluid/memory/CMakeLists.txt
index 97952e4b716..023b40518ed 100644
--- a/paddle/fluid/memory/CMakeLists.txt
+++ b/paddle/fluid/memory/CMakeLists.txt
@@ -34,6 +34,13 @@ if (WITH_ROCM)
             DEPS device_context malloc)
 endif()
 
+if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")       
+  nv_test(get_base_ptr_test SRCS get_base_ptr_test.cu DEPS malloc gpu_info)
+  set_tests_properties(get_base_ptr_test PROPERTIES 
+                       ENVIRONMENT "FLAGS_allocator_strategy=auto_growth;
+                                    FLAGS_use_stream_safe_cuda_allocator=true;")
+endif()
+
 #if (WITH_GPU)
 #   nv_test(pinned_memory_test SRCS pinned_memory_test.cu  DEPS place memory)
 #endif()
diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt
index 939ad140415..c0d1934a703 100644
--- a/paddle/fluid/memory/allocation/CMakeLists.txt
+++ b/paddle/fluid/memory/allocation/CMakeLists.txt
@@ -125,10 +125,3 @@ if(NOT WIN32)
   cc_library(mmap_allocator SRCS mmap_allocator.cc DEPS allocator)
   cc_test(mmap_allocator_test SRCS mmap_allocator_test.cc DEPS mmap_allocator allocator)
 endif(NOT WIN32)
-
-if(WITH_GPU AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")       
-  nv_test(base_ptr_test SRCS base_ptr_test.cu DEPS malloc gpu_info)
-  set_tests_properties(base_ptr_test PROPERTIES 
-                       ENVIRONMENT "FLAGS_allocator_strategy=auto_growth;
-                                    FLAGS_use_stream_safe_cuda_allocator=true;")
-endif()
diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h
index 3f04d475163..878633d1a62 100644
--- a/paddle/fluid/memory/allocation/allocator.h
+++ b/paddle/fluid/memory/allocation/allocator.h
@@ -93,14 +93,7 @@ class Allocation : public pten::Allocation {
              const platform::Place& place)
       : pten::Allocation(ptr, size, place), base_ptr_(base_ptr) {}
 
-  void* base_ptr() const {
-    PADDLE_ENFORCE_EQ(FLAGS_allocator_strategy, "auto_growth",
-                      paddle::platform::errors::Unimplemented(
-                          "base_ptr() is only implemented for auto_growth "
-                          "strategy, not support %s strategy",
-                          FLAGS_allocator_strategy));
-    return base_ptr_;
-  }
+  void* base_ptr() const { return base_ptr_; }
 
  private:
   inline void RegisterDecoratedAllocator(Allocator* allocator) {
diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc
index 6615bdf4b13..7cdac0de613 100644
--- a/paddle/fluid/memory/allocation/allocator_facade.cc
+++ b/paddle/fluid/memory/allocation/allocator_facade.cc
@@ -282,6 +282,10 @@ class AllocatorFacadePrivate {
     return iter->second;
   }
 
+  void* GetBasePtr(const std::shared_ptr<pten::Allocation>& allocation) {
+    return static_cast<Allocation*>(allocation.get())->base_ptr();
+  }
+
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   bool HasCUDAAllocator(const platform::CUDAPlace& place,
                         const gpuStream_t& stream) {
@@ -821,6 +825,21 @@ const std::shared_ptr<Allocator>& AllocatorFacade::GetAllocator(
   return m_->GetAllocator(place, /* A non-zero num to choose allocator_ */ 1);
 }
 
+void* AllocatorFacade::GetBasePtr(
+    const std::shared_ptr<pten::Allocation>& allocation) {
+  PADDLE_ENFORCE_EQ(GetAllocatorStrategy(), AllocatorStrategy::kAutoGrowth,
+                    paddle::platform::errors::Unimplemented(
+                        "GetBasePtr() is only implemented for auto_growth "
+                        "strategy, not support allocator strategy: %d",
+                        static_cast<int>(GetAllocatorStrategy())));
+  PADDLE_ENFORCE_EQ(platform::is_gpu_place(allocation->place()), true,
+                    paddle::platform::errors::Unimplemented(
+                        "GetBasePtr() is only implemented for CUDAPlace(), not "
+                        "suppot place: %s",
+                        allocation->place()));
+  return m_->GetBasePtr(allocation);
+}
+
 std::shared_ptr<pten::Allocation> AllocatorFacade::AllocShared(
     const platform::Place& place, size_t size) {
   return std::shared_ptr<pten::Allocation>(Alloc(place, size));
diff --git a/paddle/fluid/memory/allocation/allocator_facade.h b/paddle/fluid/memory/allocation/allocator_facade.h
index 76e2f0b5a94..a9b92e1801e 100644
--- a/paddle/fluid/memory/allocation/allocator_facade.h
+++ b/paddle/fluid/memory/allocation/allocator_facade.h
@@ -51,6 +51,8 @@ class AllocatorFacade {
 
   const std::shared_ptr<Allocator>& GetAllocator(const platform::Place& place);
 
+  void* GetBasePtr(const std::shared_ptr<Allocation>& allocation);
+
   // Allocate a shared allocation.
   std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
                                           size_t size);
diff --git a/paddle/fluid/memory/allocation/base_ptr_test.cu b/paddle/fluid/memory/get_base_ptr_test.cu
similarity index 80%
rename from paddle/fluid/memory/allocation/base_ptr_test.cu
rename to paddle/fluid/memory/get_base_ptr_test.cu
index 5edabfcb9f5..fe1d73b6028 100644
--- a/paddle/fluid/memory/allocation/base_ptr_test.cu
+++ b/paddle/fluid/memory/get_base_ptr_test.cu
@@ -35,9 +35,9 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
   void OneByOneAllocTest() {
     for (size_t i = 0; i < alloc_times_; ++i) {
       size_t size = dis_(random_engine_);
-      AllocationPtr allocation = Alloc(place_, size);
+      auto allocation = AllocShared(place_, size);
 
-      void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
+      void* base_ptr = GetBasePtr(allocation);
       void* system_ptr =
           platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
       EXPECT_EQ(base_ptr, system_ptr);
@@ -47,21 +47,21 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
   }
 
   void BatchByBatchAllocTest() {
-    std::vector<AllocationPtr> allocations;
+    std::vector<std::shared_ptr<pten::Allocation>> allocations;
     allocations.reserve(batch_size_);
     size_t batch_num = alloc_times_ / batch_size_;
 
     for (size_t i = 0; i < batch_num; ++i) {
       for (size_t j = 0; j < batch_size_; ++j) {
         size_t size = dis_(random_engine_);
-        AllocationPtr allocation = Alloc(place_, size);
+        auto allocation = AllocShared(place_, size);
 
-        void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
+        void* base_ptr = GetBasePtr(allocation);
         void* system_ptr =
             platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
         EXPECT_EQ(base_ptr, system_ptr);
 
-        allocations.emplace_back(std::move(allocation));
+        allocations.emplace_back(allocation);
       }
       allocations.clear();
     }
@@ -70,19 +70,19 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
   }
 
   void ContinuousAllocTest() {
-    std::vector<AllocationPtr> allocations;
+    std::vector<std::shared_ptr<pten::Allocation>> allocations;
     allocations.reserve(alloc_times_);
 
     for (size_t i = 0; i < alloc_times_; ++i) {
       size_t size = dis_(random_engine_);
-      AllocationPtr allocation = Alloc(place_, size);
+      auto allocation = AllocShared(place_, size);
 
-      void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
+      void* base_ptr = GetBasePtr(allocation);
       void* system_ptr =
           platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
       EXPECT_EQ(base_ptr, system_ptr);
 
-      allocations.emplace_back(std::move(allocation));
+      allocations.emplace_back(allocation);
     }
 
     allocations.clear();
@@ -90,8 +90,8 @@ class CUDAAllocatoionBasePtrTest : public ::testing::Test {
   }
 
   void ZeroSizeAllocTest() {
-    AllocationPtr allocation = Alloc(place_, 0);
-    void* base_ptr = static_cast<Allocation*>(allocation.get())->base_ptr();
+    auto allocation = AllocShared(place_, 0);
+    void* base_ptr = GetBasePtr(allocation);
     void* system_ptr =
         platform::GetGpuBasePtr(allocation->ptr(), place_.GetDeviceId());
     EXPECT_EQ(base_ptr, system_ptr);
diff --git a/paddle/fluid/memory/malloc.cc b/paddle/fluid/memory/malloc.cc
index 3e859377e98..63c562be97f 100644
--- a/paddle/fluid/memory/malloc.cc
+++ b/paddle/fluid/memory/malloc.cc
@@ -47,6 +47,10 @@ bool InSameStream(const std::shared_ptr<Allocation>& allocation,
                                                               stream);
 }
 
+void* GetBasePtr(const std::shared_ptr<Allocation>& allocation) {
+  return allocation::AllocatorFacade::Instance().GetBasePtr(allocation);
+}
+
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size,
                     const gpuStream_t& stream) {
diff --git a/paddle/fluid/memory/malloc.h b/paddle/fluid/memory/malloc.h
index 6443e91f08c..855cbb775a1 100644
--- a/paddle/fluid/memory/malloc.h
+++ b/paddle/fluid/memory/malloc.h
@@ -44,6 +44,8 @@ extern std::shared_ptr<Allocation> AllocShared(const platform::Place& place,
 extern bool InSameStream(const std::shared_ptr<Allocation>& allocation,
                          const platform::Stream& stream);
 
+extern void* GetBasePtr(const std::shared_ptr<Allocation>& allocation);
+
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 extern AllocationPtr Alloc(const platform::CUDAPlace& place, size_t size,
                            const gpuStream_t& stream);
-- 
GitLab