From e02859c0f53dfe4616976b015d4fefd8aaa6eb39 Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 26 Jun 2017 15:27:01 -0700 Subject: [PATCH] Replace {cpu,gpu}_allocator.h and {cpu,gpu}_allocator_test.cc by system_allocator{.h,_test.cc} --- paddle/memory/CMakeLists.txt | 6 ++ paddle/memory/detail/CMakeLists.txt | 3 +- paddle/memory/detail/cpu_allocator.h | 71 ----------------- paddle/memory/detail/cpu_allocator_test.cc | 30 ------- .../{gpu_allocator.h => system_allocator.h} | 79 +++++++++++-------- ...cator_test.cc => system_allocator_test.cc} | 20 ++++- paddle/memory/memory.cc | 67 +++++++--------- paddle/memory/memory.h | 16 +--- 8 files changed, 106 insertions(+), 186 deletions(-) delete mode 100644 paddle/memory/detail/cpu_allocator.h delete mode 100644 paddle/memory/detail/cpu_allocator_test.cc rename paddle/memory/detail/{gpu_allocator.h => system_allocator.h} (58%) rename paddle/memory/detail/{gpu_allocator_test.cc => system_allocator_test.cc} (69%) diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt index 3943c3cfad3..86625124967 100644 --- a/paddle/memory/CMakeLists.txt +++ b/paddle/memory/CMakeLists.txt @@ -1 +1,7 @@ add_subdirectory(detail) + +if(${WITH_GPU}) + nv_library(memory SRCS memory.cc) +else(${WITH_GPU}) + cc_library(memory SRCS memroy.cc) +endif(${WITH_GPU}) diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index 81ca8a0bbf0..3b5bbd7a12f 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1,2 +1 @@ -cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) -nv_test(gpu_allocator_test SRCS gpu_allocator_test.cc) +cc_test(system_allocator_test SRCS system_allocator_test.cc) diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h deleted file mode 100644 index 17753ccef71..00000000000 --- a/paddle/memory/detail/cpu_allocator.h +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include // for size_t -#include // for malloc and free - -#ifndef _WIN32 -#include // for mlock and munlock -#endif - -namespace paddle { -namespace memory { -namespace detail { - -// CPUAllocator calls mlock, which returns -// pinned and locked memory as staging areas for data exchange -// between host and device. Allocates too much would reduce the -// amount of memory available to the system for paging. So, by -// default, we should use CPUAllocator. -template -class CPUAllocator { - public: - void* Alloc(size_t size); - void Free(void* p, size_t size); -}; - -template <> -class CPUAllocator { - public: - void* Alloc(size_t size) { return std::malloc(size); } - void Free(void* p, size_t size) { std::free(p); } -}; - -template <> -class CPUAllocator { - public: - void* Alloc(size_t size) { - void* p = std::malloc(size); - if (p == nullptr) { - return p; - } -#ifndef _WIN32 - mlock(p, size); -#endif - return p; - } - - void Free(void* p, size_t size) { -#ifndef _WIN32 - munlock(p, size); -#endif - std::free(p); - } -}; - -} // namespace detail -} // namespace memory -} // namespace paddle diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc deleted file mode 100644 index 4e45266cd8a..00000000000 --- a/paddle/memory/detail/cpu_allocator_test.cc +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/memory/detail/cpu_allocator.h" -#include "gtest/gtest.h" - -TEST(CPUAllocator, NonStaging) { - paddle::memory::detail::CPUAllocator a; - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p, 4096); -} - -TEST(CPUAllocator, Staging) { - paddle::memory::detail::CPUAllocator a; - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p, 4096); -} diff --git a/paddle/memory/detail/gpu_allocator.h b/paddle/memory/detail/system_allocator.h similarity index 58% rename from paddle/memory/detail/gpu_allocator.h rename to paddle/memory/detail/system_allocator.h index 9452c41fb89..0a645531889 100644 --- a/paddle/memory/detail/gpu_allocator.h +++ b/paddle/memory/detail/system_allocator.h @@ -14,20 +14,58 @@ limitations under the License. */ #pragma once -#include // for size_t +#include // for size_t +#include // for mlock and munlock +#include // for malloc and free -#include +#ifndef PADDLE_ONLY_CPU #include +#include +#endif // PADDLE_ONLY_CPU namespace paddle { namespace memory { namespace detail { +class SystemAllocator { + public: + virtual void* Alloc(size_t size) = 0; + virtual void* Free(void* p) = 0; +}; + +// CPUAllocator calls mlock, which returns pinned +// and locked memory as staging areas for data exchange between host +// and device. Allocates too much would reduce the amount of memory +// available to the system for paging. So, by default, we should use +// CPUAllocator. +template +class CPUAllocator : public SystemAllocator { + public: + virtual void* Alloc(size_t size) { + void* p = std::malloc(size); + if (p != nullptr && lock_memory) { + mlock(p, size); + } + return p; + } + + virtual void Free(void* p, size_t size) { + if (p != nullptr && lock_memory) { + munlock(p, size); + } + std::free(p); + } +}; + +#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. + +namespace { inline void throw_on_error(cudaError_t e, const char* message) { if (e) { throw thrust::system_error(e, thrust::cuda_category(), message); } } +} // namespace // GPUAllocator calls cudaHostMalloc, which returns // pinned and locked memory as staging areas for data exchange @@ -36,17 +74,11 @@ inline void throw_on_error(cudaError_t e, const char* message) { // default, we should use GPUAllocator. template class GPUAllocator { -public: - void* Alloc(size_t size); - void Free(void* p, size_t size); -}; - -template <> -class GPUAllocator { -public: + public: void* Alloc(size_t size) { void* p = 0; - cudaError_t result = cudaMalloc(&p, size); + cudaError_t result = + staging ? cudaMallocHost(&p, size) : cudaMalloc(&p, size); if (result == cudaSuccess) { return p; } @@ -60,32 +92,15 @@ public: // that is returned if you ever call cudaFree after the // driver has already shutdown. This happens only if the // process is terminating, in which case we don't care if - // cudaFree succeeds. - auto err = cudaFree(p); + // cudaFree succeeds. + auto err = staging ? cudaFreeHost(p) : cudaFree(p); if (err != cudaErrorCudartUnloading) { - throw_on_error(err, "cudaFree failed"); + throw_on_error(err, "cudaFree failed"); } } }; -template <> -class GPUAllocator { -public: - void* Alloc(size_t size) { - void* p = 0; - cudaError_t result = cudaMallocHost(&p, size); - if (result == cudaSuccess) { - return p; - } - // clear last error - cudaGetLastError(); - return nullptr; - } - - void Free(void* p, size_t size) { - throw_on_error(cudaFreeHost(p), "cudaFreeHost failed"); - } -}; +#endif // PADDLE_ONLY_CPU } // namespace detail } // namespace memory diff --git a/paddle/memory/detail/gpu_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc similarity index 69% rename from paddle/memory/detail/gpu_allocator_test.cc rename to paddle/memory/detail/system_allocator_test.cc index 18c1c9ab430..4e7b8018b6a 100644 --- a/paddle/memory/detail/gpu_allocator_test.cc +++ b/paddle/memory/detail/system_allocator_test.cc @@ -12,9 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/memory/detail/gpu_allocator.h" +#include "paddle/memory/detail/system_allocator.h" #include "gtest/gtest.h" +TEST(CPUAllocator, NoLockMem) { + paddle::memory::detail::CPUAllocator a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); +} + +TEST(CPUAllocator, LockMem) { + paddle::memory::detail::CPUAllocator a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); +} + +#ifndef PADDLE_ONLY_CPU + TEST(GPUAllocator, NonStaging) { paddle::memory::detail::GPUAllocator a; void* p = a.Alloc(4096); @@ -28,3 +44,5 @@ TEST(GPUAllocator, Staging) { EXPECT_NE(p, nullptr); a.Free(p, 4096); } + +#endif // PADDLE_ONLY_CPU diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index b617923731a..ca3c01ebdb0 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -14,48 +14,41 @@ limitations under the License. */ #include "paddle/memory/memory.h" +#include "paddle/memory/detail/cpu_allocator.h" +#include "paddle/memory/detail/gpu_allocator.h" + namespace paddle { namespace memory { -template <> -void* Alloc(CPUPlace, size_t size) { - return GetCPUBuddyAllocator(false /*non-staging*/)->Alloc(size); -} - -void* AllocStaging(CPUPlace, size_t size) { - return GetCPUBuddyAllocator(true /*staging*/)->Alloc(size); -} - -template <> -void* Alloc(GPUPlace pl, size_t size) { - return GetGPUBuddyAllocator(pl.device)->Alloc(size); -} - -template <> -void Free(CPUPlace, void* p) { - return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p); -} - -void FreeStaging(CPUPlace, void* p) { - return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p); -} - -#ifdef PADDLE_WITH_GPU -template <> -void* Alloc(GPUPlace pl, void* p) { - return GetGPUBuddyAllocator(pl.device)->Free(p); -} - -template <> -size_t Used(CPUPlace) { +void Alloc(paddle::platform::Place pl, size_t size) { +#ifndef PADDLE_ONLY_CPU + if (paddle::platform::is_gpu_place(pl)) { + return GetGPUBuddyAllocator(pl.device)->Alloc(size); + } +#endif // PADDLE_ONLY_CPU + PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); + return GetCPUBuddyAllocator()->Alloc(size); +} + +void Free(paddle::platform::Place pl, void* p) { +#ifndef PADDLE_ONLY_CPU + if (paddle::platform::is_gpu_place(pl)) { + GetGPUBuddyAllocator(pl.device)->Free(p); + } +#endif // PADDLE_ONLY_CPU + PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); + GetCPUBuddyAllocator()->Free(p); +} + +size_t Used(paddle::platform::Place pl) { +#ifndef PADDLE_ONLY_CPU + if (paddle::platform::is_gpu_place(pl)) { + return GetGPUBuddyAllocator(pl.device)->Used(); + } +#endif // PADDLE_ONLY_CPU + PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); return GetCPUBuddyAllocator()->Used(); } -template <> -size_t Alloc(GPUPlace pl) { - return GetGPUBuddyAllocator(pl.device)->Used(); -} -#endif // PADDLE_WITH_GPU - } // namespace memory } // namespace paddle diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h index 8c15a133bb4..0bc609205ec 100644 --- a/paddle/memory/memory.h +++ b/paddle/memory/memory.h @@ -19,19 +19,9 @@ limitations under the License. */ namespace paddle { namespace memory { -template -void* Alloc(Place, size_t); -template -void Free(Place, void*); -template -size_t Used(Place); - -// Staging memory means "pinned" host memory that can be mapped into -// the CUDA memory space and accessed by the device rapidly. Don't -// allocate too much staging memory; otherwise system performance will -// degrade because the OS cannot find enough swap memory space. -void* AllocStaging(CPUPlace, size_t); -void* FreeStaging(CPUPlace, size_t); +void* Alloc(paddle::framework::Place, size_t); +void Free(paddle::framework::Place, void*); +size_t Used(paddle::framework::Place); } // namespace memory } // namespace paddle -- GitLab