Replace {cpu,gpu}_allocator.h and {cpu,gpu}_allocator_test.cc by system_allocator{.h,_test.cc}

e02859c0 · Yi Wang · ce70df86 · e02859c0 · e02859c0 · ce70df86
8 changed file
--- a/paddle/memory/CMakeLists.txt
+++ b/paddle/memory/CMakeLists.txt
 add_subdirectory(detail)
+if(${WITH_GPU})
+  nv_library(memory SRCS memory.cc)
+else(${WITH_GPU})
+  cc_library(memory SRCS memroy.cc)
+endif(${WITH_GPU})
--- a/paddle/memory/detail/CMakeLists.txt
+++ b/paddle/memory/detail/CMakeLists.txt
-cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc)
+cc_test(system_allocator_test SRCS system_allocator_test.cc)
-nv_test(gpu_allocator_test SRCS gpu_allocator_test.cc)
--- a/paddle/memory/detail/cpu_allocator.h
+++ b/paddle/memory/detail/cpu_allocator.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-#include <stddef.h>  // for size_t
-#include <cstdlib>   // for malloc and free
-#ifndef _WIN32
-#include <sys/mman.h>  // for mlock and munlock
-#endif
-namespace paddle {
-namespace memory {
-namespace detail {
-// CPUAllocator<staging=true> calls mlock, which returns
-// pinned and locked memory as staging areas for data exchange
-// between host and device.  Allocates too much would reduce the
-// amount of memory available to the system for paging.  So, by
-// default, we should use CPUAllocator<staging=false>.
-template <bool staging>
-class CPUAllocator {
- public:
-  void* Alloc(size_t size);
-  void Free(void* p, size_t size);
-};
-template <>
-class CPUAllocator<false> {
- public:
-  void* Alloc(size_t size) { return std::malloc(size); }
-  void Free(void* p, size_t size) { std::free(p); }
-};
-template <>
-class CPUAllocator<true> {
- public:
-  void* Alloc(size_t size) {
-    void* p = std::malloc(size);
-    if (p == nullptr) {
-      return p;
-    }
-#ifndef _WIN32
-    mlock(p, size);
-#endif
-    return p;
-  }
-  void Free(void* p, size_t size) {
-#ifndef _WIN32
-    munlock(p, size);
-#endif
-    std::free(p);
-  }
-};
-}  // namespace detail
-}  // namespace memory
-}  // namespace paddle
--- a/paddle/memory/detail/cpu_allocator_test.cc
+++ b/paddle/memory/detail/cpu_allocator_test.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/memory/detail/cpu_allocator.h"
-#include "gtest/gtest.h"
-TEST(CPUAllocator, NonStaging) {
-  paddle::memory::detail::CPUAllocator<false> a;
-  void* p = a.Alloc(4096);
-  EXPECT_NE(p, nullptr);
-  a.Free(p, 4096);
-}
-TEST(CPUAllocator, Staging) {
-  paddle::memory::detail::CPUAllocator<true> a;
-  void* p = a.Alloc(4096);
-  EXPECT_NE(p, nullptr);
-  a.Free(p, 4096);
-}
--- a/paddle/memory/detail/gpu_allocator.h
+++ b/paddle/memory/detail/gpu_allocator.h
@@ -14,20 +14,58 @@ limitations under the License. */
 #pragma once
-#include <stddef.h>  // for size_t
+#include <stddef.h>    // for size_t
+#include <sys/mman.h>  // for mlock and munlock
+#include <cstdlib>     // for malloc and free
-#include <thrust/system_error.h>
+#ifndef PADDLE_ONLY_CPU
 #include <thrust/system/cuda/error.h>
+#include <thrust/system_error.h>
+#endif  // PADDLE_ONLY_CPU
 namespace paddle {
 namespace memory {
 namespace detail {
+class SystemAllocator {
+ public:
+  virtual void* Alloc(size_t size) = 0;
+  virtual void* Free(void* p) = 0;
+};
+// CPUAllocator<lock_memory=true> calls mlock, which returns pinned
+// and locked memory as staging areas for data exchange between host
+// and device.  Allocates too much would reduce the amount of memory
+// available to the system for paging.  So, by default, we should use
+// CPUAllocator<staging=false>.
+template <bool lock_memory>
+class CPUAllocator : public SystemAllocator {
+ public:
+  virtual void* Alloc(size_t size) {
+    void* p = std::malloc(size);
+    if (p != nullptr && lock_memory) {
+      mlock(p, size);
+    }
+    return p;
+  }
+  virtual void Free(void* p, size_t size) {
+    if (p != nullptr && lock_memory) {
+      munlock(p, size);
+    }
+    std::free(p);
+  }
+};
+#ifndef PADDLE_ONLY_CPU  // The following code are for CUDA.
+namespace {
 inline void throw_on_error(cudaError_t e, const char* message) {
  if (e) {
    throw thrust::system_error(e, thrust::cuda_category(), message);
  }
 }
+}  // namespace
 // GPUAllocator<staging=true> calls cudaHostMalloc, which returns
 // pinned and locked memory as staging areas for data exchange
@@ -36,17 +74,11 @@ inline void throw_on_error(cudaError_t e, const char* message) {
 // default, we should use GPUAllocator<staging=false>.
 template <bool staging>
 class GPUAllocator {
-public:
+ public:
-  void* Alloc(size_t size);
-  void Free(void* p, size_t size);
-};
-template <>
-class GPUAllocator<false> {
-public:
  void* Alloc(size_t size) {
    void* p = 0;
-    cudaError_t result = cudaMalloc(&p, size);
+    cudaError_t result =
+        staging ? cudaMallocHost(&p, size) : cudaMalloc(&p, size);
    if (result == cudaSuccess) {
      return p;
    }
@@ -60,32 +92,15 @@ public:
    // that is returned if you ever call cudaFree after the
    // driver has already shutdown. This happens only if the
    // process is terminating, in which case we don't care if
-    // cudaFree succeeds. 
+    // cudaFree succeeds.
-    auto err = cudaFree(p);
+    auto err = staging ? cudaFreeHost(p) : cudaFree(p);
    if (err != cudaErrorCudartUnloading) {
-        throw_on_error(err, "cudaFree failed");
+      throw_on_error(err, "cudaFree failed");
    }
  }
 };
-template <>
+#endif  // PADDLE_ONLY_CPU
-class GPUAllocator<true> {
-public:
-  void* Alloc(size_t size) {
-    void* p = 0;
-    cudaError_t result = cudaMallocHost(&p, size);
-    if (result == cudaSuccess) {
-        return p;
-    }
-    // clear last error
-    cudaGetLastError();
-    return nullptr;
-  }
-  void Free(void* p, size_t size) {
-    throw_on_error(cudaFreeHost(p), "cudaFreeHost failed");
-  }
-};
 }  // namespace detail
 }  // namespace memory

--- a/paddle/memory/detail/gpu_allocator_test.cc
+++ b/paddle/memory/detail/gpu_allocator_test.cc
@@ -12,9 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-#include "paddle/memory/detail/gpu_allocator.h"
+#include "paddle/memory/detail/system_allocator.h"
 #include "gtest/gtest.h"
+TEST(CPUAllocator, NoLockMem) {
+  paddle::memory::detail::CPUAllocator<false> a;
+  void* p = a.Alloc(4096);
+  EXPECT_NE(p, nullptr);
+  a.Free(p, 4096);
+}
+TEST(CPUAllocator, LockMem) {
+  paddle::memory::detail::CPUAllocator<true> a;
+  void* p = a.Alloc(4096);
+  EXPECT_NE(p, nullptr);
+  a.Free(p, 4096);
+}
+#ifndef PADDLE_ONLY_CPU
 TEST(GPUAllocator, NonStaging) {
  paddle::memory::detail::GPUAllocator<false> a;
  void* p = a.Alloc(4096);
@@ -28,3 +44,5 @@ TEST(GPUAllocator, Staging) {
  EXPECT_NE(p, nullptr);
  a.Free(p, 4096);
 }
+#endif  // PADDLE_ONLY_CPU
--- a/paddle/memory/memory.cc
+++ b/paddle/memory/memory.cc
@@ -14,48 +14,41 @@ limitations under the License. */
 #include "paddle/memory/memory.h"
+#include "paddle/memory/detail/cpu_allocator.h"
+#include "paddle/memory/detail/gpu_allocator.h"
 namespace paddle {
 namespace memory {
-template <>
+void Alloc(paddle::platform::Place pl, size_t size) {
-void* Alloc<CPUPlace>(CPUPlace, size_t size) {
+#ifndef PADDLE_ONLY_CPU
-  return GetCPUBuddyAllocator(false /*non-staging*/)->Alloc(size);
+  if (paddle::platform::is_gpu_place(pl)) {
-}
+    return GetGPUBuddyAllocator(pl.device)->Alloc(size);
+  }
-void* AllocStaging(CPUPlace, size_t size) {
+#endif  // PADDLE_ONLY_CPU
-  return GetCPUBuddyAllocator(true /*staging*/)->Alloc(size);
+  PADDLE_ASSERT(paddle::platform::is_cpu_place(pl));
-}
+  return GetCPUBuddyAllocator()->Alloc(size);
+}
-template <>
-void* Alloc<GPUPlace>(GPUPlace pl, size_t size) {
+void Free(paddle::platform::Place pl, void* p) {
-  return GetGPUBuddyAllocator(pl.device)->Alloc(size);
+#ifndef PADDLE_ONLY_CPU
-}
+  if (paddle::platform::is_gpu_place(pl)) {
+    GetGPUBuddyAllocator(pl.device)->Free(p);
-template <>
+  }
-void Free<CPUPlace>(CPUPlace, void* p) {
+#endif  // PADDLE_ONLY_CPU
-  return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p);
+  PADDLE_ASSERT(paddle::platform::is_cpu_place(pl));
-}
+  GetCPUBuddyAllocator()->Free(p);
+}
-void FreeStaging(CPUPlace, void* p) {
-  return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p);
+size_t Used(paddle::platform::Place pl) {
-}
+#ifndef PADDLE_ONLY_CPU
+  if (paddle::platform::is_gpu_place(pl)) {
-#ifdef PADDLE_WITH_GPU
+    return GetGPUBuddyAllocator(pl.device)->Used();
-template <>
+  }
-void* Alloc<GPUPlace>(GPUPlace pl, void* p) {
+#endif  // PADDLE_ONLY_CPU
-  return GetGPUBuddyAllocator(pl.device)->Free(p);
+  PADDLE_ASSERT(paddle::platform::is_cpu_place(pl));
-}
-template <>
-size_t Used<CPUPlace>(CPUPlace) {
  return GetCPUBuddyAllocator()->Used();
 }
-template <>
-size_t Alloc<GPUPlace>(GPUPlace pl) {
-  return GetGPUBuddyAllocator(pl.device)->Used();
-}
-#endif  // PADDLE_WITH_GPU
 }  // namespace memory
 }  // namespace paddle
--- a/paddle/memory/memory.h
+++ b/paddle/memory/memory.h
@@ -19,19 +19,9 @@ limitations under the License. */
 namespace paddle {
 namespace memory {
-template <typename paddle::framework::Place>
+void* Alloc(paddle::framework::Place, size_t);
-void* Alloc(Place, size_t);
+void Free(paddle::framework::Place, void*);
-template <typename paddle::framework::Place>
+size_t Used(paddle::framework::Place);
-void Free(Place, void*);
-template <typename paddle::framework::Place>
-size_t Used(Place);
-// Staging memory means "pinned" host memory that can be mapped into
-// the CUDA memory space and accessed by the device rapidly.  Don't
-// allocate too much staging memory; otherwise system performance will
-// degrade because the OS cannot find enough swap memory space.
-void* AllocStaging(CPUPlace, size_t);
-void* FreeStaging(CPUPlace, size_t);
 }  // namespace memory
 }  // namespace paddle