From ce70df86b1e8c892cdde5312caa0c2699f368f7d Mon Sep 17 00:00:00 2001
From: liaogang <liaogang@baidu.com>
Date: Tue, 27 Jun 2017 00:15:36 +0800
Subject: [PATCH] Add gpu_allocator

---
 paddle/memory/.clang-format                |  5 ++
 paddle/memory/detail/CMakeLists.txt        |  1 +
 paddle/memory/detail/cpu_allocator.h       |  6 +-
 paddle/memory/detail/gpu_allocator.h       | 92 ++++++++++++++++++++++
 paddle/memory/detail/gpu_allocator_test.cc | 30 +++++++
 5 files changed, 131 insertions(+), 3 deletions(-)
 create mode 100644 paddle/memory/.clang-format
 create mode 100644 paddle/memory/detail/gpu_allocator.h
 create mode 100644 paddle/memory/detail/gpu_allocator_test.cc
diff --git a/paddle/memory/.clang-format b/paddle/memory/.clang-format
new file mode 100644
index 0000000000..29282dc87e
--- /dev/null
+++ b/paddle/memory/.clang-format
@@ -0,0 +1,5 @@
+---
+Language:        Cpp
+BasedOnStyle:  Google
+Standard:  Cpp11 
+...
diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt
index fb8a11062d..81ca8a0bbf 100644
--- a/paddle/memory/detail/CMakeLists.txt
+++ b/paddle/memory/detail/CMakeLists.txt
@@ -1 +1,2 @@
 cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc)
+nv_test(gpu_allocator_test SRCS gpu_allocator_test.cc)
diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h
index a487fecef4..17753ccef7 100644
--- a/paddle/memory/detail/cpu_allocator.h
+++ b/paddle/memory/detail/cpu_allocator.h
@@ -32,21 +32,21 @@ namespace detail {
 // default, we should use CPUAllocator<staging=false>.
 template <bool staging>
 class CPUAllocator {
-public:
+ public:
   void* Alloc(size_t size);
   void Free(void* p, size_t size);
 };
 
 template <>
 class CPUAllocator<false> {
-public:
+ public:
   void* Alloc(size_t size) { return std::malloc(size); }
   void Free(void* p, size_t size) { std::free(p); }
 };
 
 template <>
 class CPUAllocator<true> {
-public:
+ public:
   void* Alloc(size_t size) {
     void* p = std::malloc(size);
     if (p == nullptr) {
diff --git a/paddle/memory/detail/gpu_allocator.h b/paddle/memory/detail/gpu_allocator.h
new file mode 100644
index 0000000000..9452c41fb8
--- /dev/null
+++ b/paddle/memory/detail/gpu_allocator.h
@@ -0,0 +1,92 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <stddef.h>  // for size_t
+
+#include <thrust/system_error.h>
+#include <thrust/system/cuda/error.h>
+
+namespace paddle {
+namespace memory {
+namespace detail {
+
+inline void throw_on_error(cudaError_t e, const char* message) {
+  if (e) {
+    throw thrust::system_error(e, thrust::cuda_category(), message);
+  }
+}
+
+// GPUAllocator<staging=true> calls cudaHostMalloc, which returns
+// pinned and locked memory as staging areas for data exchange
+// between host and device.  Allocates too much would reduce the
+// amount of memory available to the system for paging.  So, by
+// default, we should use GPUAllocator<staging=false>.
+template <bool staging>
+class GPUAllocator {
+public:
+  void* Alloc(size_t size);
+  void Free(void* p, size_t size);
+};
+
+template <>
+class GPUAllocator<false> {
+public:
+  void* Alloc(size_t size) {
+    void* p = 0;
+    cudaError_t result = cudaMalloc(&p, size);
+    if (result == cudaSuccess) {
+      return p;
+    }
+    // clear last error
+    cudaGetLastError();
+    return nullptr;
+  }
+
+  void Free(void* p, size_t size) {
+    // Purposefully allow cudaErrorCudartUnloading, because
+    // that is returned if you ever call cudaFree after the
+    // driver has already shutdown. This happens only if the
+    // process is terminating, in which case we don't care if
+    // cudaFree succeeds. 
+    auto err = cudaFree(p);
+    if (err != cudaErrorCudartUnloading) {
+        throw_on_error(err, "cudaFree failed");
+    }
+  }
+};
+
+template <>
+class GPUAllocator<true> {
+public:
+  void* Alloc(size_t size) {
+    void* p = 0;
+    cudaError_t result = cudaMallocHost(&p, size);
+    if (result == cudaSuccess) {
+        return p;
+    }
+    // clear last error
+    cudaGetLastError();
+    return nullptr;
+  }
+
+  void Free(void* p, size_t size) {
+    throw_on_error(cudaFreeHost(p), "cudaFreeHost failed");
+  }
+};
+
+}  // namespace detail
+}  // namespace memory
+}  // namespace paddle
diff --git a/paddle/memory/detail/gpu_allocator_test.cc b/paddle/memory/detail/gpu_allocator_test.cc
new file mode 100644
index 0000000000..18c1c9ab43
--- /dev/null
+++ b/paddle/memory/detail/gpu_allocator_test.cc
@@ -0,0 +1,30 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/memory/detail/gpu_allocator.h"
+#include "gtest/gtest.h"
+
+TEST(GPUAllocator, NonStaging) {
+  paddle::memory::detail::GPUAllocator<false> a;
+  void* p = a.Alloc(4096);
+  EXPECT_NE(p, nullptr);
+  a.Free(p, 4096);
+}
+
+TEST(GPUAllocator, Staging) {
+  paddle::memory::detail::GPUAllocator<true> a;
+  void* p = a.Alloc(4096);
+  EXPECT_NE(p, nullptr);
+  a.Free(p, 4096);
+}
-- 
GitLab