From ce70df86b1e8c892cdde5312caa0c2699f368f7d Mon Sep 17 00:00:00 2001 From: liaogang Date: Tue, 27 Jun 2017 00:15:36 +0800 Subject: [PATCH] Add gpu_allocator --- paddle/memory/.clang-format | 5 ++ paddle/memory/detail/CMakeLists.txt | 1 + paddle/memory/detail/cpu_allocator.h | 6 +- paddle/memory/detail/gpu_allocator.h | 92 ++++++++++++++++++++++ paddle/memory/detail/gpu_allocator_test.cc | 30 +++++++ 5 files changed, 131 insertions(+), 3 deletions(-) create mode 100644 paddle/memory/.clang-format create mode 100644 paddle/memory/detail/gpu_allocator.h create mode 100644 paddle/memory/detail/gpu_allocator_test.cc diff --git a/paddle/memory/.clang-format b/paddle/memory/.clang-format new file mode 100644 index 0000000000..29282dc87e --- /dev/null +++ b/paddle/memory/.clang-format @@ -0,0 +1,5 @@ +--- +Language: Cpp +BasedOnStyle: Google +Standard: Cpp11 +... diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index fb8a11062d..81ca8a0bbf 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1 +1,2 @@ cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) +nv_test(gpu_allocator_test SRCS gpu_allocator_test.cc) diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h index a487fecef4..17753ccef7 100644 --- a/paddle/memory/detail/cpu_allocator.h +++ b/paddle/memory/detail/cpu_allocator.h @@ -32,21 +32,21 @@ namespace detail { // default, we should use CPUAllocator. template class CPUAllocator { -public: + public: void* Alloc(size_t size); void Free(void* p, size_t size); }; template <> class CPUAllocator { -public: + public: void* Alloc(size_t size) { return std::malloc(size); } void Free(void* p, size_t size) { std::free(p); } }; template <> class CPUAllocator { -public: + public: void* Alloc(size_t size) { void* p = std::malloc(size); if (p == nullptr) { diff --git a/paddle/memory/detail/gpu_allocator.h b/paddle/memory/detail/gpu_allocator.h new file mode 100644 index 0000000000..9452c41fb8 --- /dev/null +++ b/paddle/memory/detail/gpu_allocator.h @@ -0,0 +1,92 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include // for size_t + +#include +#include + +namespace paddle { +namespace memory { +namespace detail { + +inline void throw_on_error(cudaError_t e, const char* message) { + if (e) { + throw thrust::system_error(e, thrust::cuda_category(), message); + } +} + +// GPUAllocator calls cudaHostMalloc, which returns +// pinned and locked memory as staging areas for data exchange +// between host and device. Allocates too much would reduce the +// amount of memory available to the system for paging. So, by +// default, we should use GPUAllocator. +template +class GPUAllocator { +public: + void* Alloc(size_t size); + void Free(void* p, size_t size); +}; + +template <> +class GPUAllocator { +public: + void* Alloc(size_t size) { + void* p = 0; + cudaError_t result = cudaMalloc(&p, size); + if (result == cudaSuccess) { + return p; + } + // clear last error + cudaGetLastError(); + return nullptr; + } + + void Free(void* p, size_t size) { + // Purposefully allow cudaErrorCudartUnloading, because + // that is returned if you ever call cudaFree after the + // driver has already shutdown. This happens only if the + // process is terminating, in which case we don't care if + // cudaFree succeeds. + auto err = cudaFree(p); + if (err != cudaErrorCudartUnloading) { + throw_on_error(err, "cudaFree failed"); + } + } +}; + +template <> +class GPUAllocator { +public: + void* Alloc(size_t size) { + void* p = 0; + cudaError_t result = cudaMallocHost(&p, size); + if (result == cudaSuccess) { + return p; + } + // clear last error + cudaGetLastError(); + return nullptr; + } + + void Free(void* p, size_t size) { + throw_on_error(cudaFreeHost(p), "cudaFreeHost failed"); + } +}; + +} // namespace detail +} // namespace memory +} // namespace paddle diff --git a/paddle/memory/detail/gpu_allocator_test.cc b/paddle/memory/detail/gpu_allocator_test.cc new file mode 100644 index 0000000000..18c1c9ab43 --- /dev/null +++ b/paddle/memory/detail/gpu_allocator_test.cc @@ -0,0 +1,30 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/memory/detail/gpu_allocator.h" +#include "gtest/gtest.h" + +TEST(GPUAllocator, NonStaging) { + paddle::memory::detail::GPUAllocator a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); +} + +TEST(GPUAllocator, Staging) { + paddle::memory::detail::GPUAllocator a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); +} -- GitLab