diff --git a/paddle/memory/CMakeLists.txt b/paddle/memory/CMakeLists.txt index 3943c3cfad31d13a00645aba6fc153d3d13da987..86625124967d7dfb392f5a8e74e591cb2955385f 100644 --- a/paddle/memory/CMakeLists.txt +++ b/paddle/memory/CMakeLists.txt @@ -1 +1,7 @@ add_subdirectory(detail) + +if(${WITH_GPU}) + nv_library(memory SRCS memory.cc) +else(${WITH_GPU}) + cc_library(memory SRCS memroy.cc) +endif(${WITH_GPU}) diff --git a/paddle/memory/detail/CMakeLists.txt b/paddle/memory/detail/CMakeLists.txt index 81ca8a0bbf0ef2162a6efc63037a18b10bbcd563..3b5bbd7a12fab043c7e6cc66943715a40371c8a8 100644 --- a/paddle/memory/detail/CMakeLists.txt +++ b/paddle/memory/detail/CMakeLists.txt @@ -1,2 +1 @@ -cc_test(cpu_allocator_test SRCS cpu_allocator_test.cc) -nv_test(gpu_allocator_test SRCS gpu_allocator_test.cc) +cc_test(system_allocator_test SRCS system_allocator_test.cc) diff --git a/paddle/memory/detail/cpu_allocator.h b/paddle/memory/detail/cpu_allocator.h deleted file mode 100644 index 17753ccef718f8b258e962384c3273317fc72bec..0000000000000000000000000000000000000000 --- a/paddle/memory/detail/cpu_allocator.h +++ /dev/null @@ -1,71 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include // for size_t -#include // for malloc and free - -#ifndef _WIN32 -#include // for mlock and munlock -#endif - -namespace paddle { -namespace memory { -namespace detail { - -// CPUAllocator calls mlock, which returns -// pinned and locked memory as staging areas for data exchange -// between host and device. Allocates too much would reduce the -// amount of memory available to the system for paging. So, by -// default, we should use CPUAllocator. -template -class CPUAllocator { - public: - void* Alloc(size_t size); - void Free(void* p, size_t size); -}; - -template <> -class CPUAllocator { - public: - void* Alloc(size_t size) { return std::malloc(size); } - void Free(void* p, size_t size) { std::free(p); } -}; - -template <> -class CPUAllocator { - public: - void* Alloc(size_t size) { - void* p = std::malloc(size); - if (p == nullptr) { - return p; - } -#ifndef _WIN32 - mlock(p, size); -#endif - return p; - } - - void Free(void* p, size_t size) { -#ifndef _WIN32 - munlock(p, size); -#endif - std::free(p); - } -}; - -} // namespace detail -} // namespace memory -} // namespace paddle diff --git a/paddle/memory/detail/cpu_allocator_test.cc b/paddle/memory/detail/cpu_allocator_test.cc deleted file mode 100644 index 4e45266cd8ad8a2bfd8f2135d259691559bbcbf4..0000000000000000000000000000000000000000 --- a/paddle/memory/detail/cpu_allocator_test.cc +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/memory/detail/cpu_allocator.h" -#include "gtest/gtest.h" - -TEST(CPUAllocator, NonStaging) { - paddle::memory::detail::CPUAllocator a; - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p, 4096); -} - -TEST(CPUAllocator, Staging) { - paddle::memory::detail::CPUAllocator a; - void* p = a.Alloc(4096); - EXPECT_NE(p, nullptr); - a.Free(p, 4096); -} diff --git a/paddle/memory/detail/gpu_allocator.h b/paddle/memory/detail/system_allocator.h similarity index 58% rename from paddle/memory/detail/gpu_allocator.h rename to paddle/memory/detail/system_allocator.h index 9452c41fb897554c731ffd00a4fb70b7d4abf0b6..0a6455318899564dbd19b6b8dc8e7b61d59e7832 100644 --- a/paddle/memory/detail/gpu_allocator.h +++ b/paddle/memory/detail/system_allocator.h @@ -14,20 +14,58 @@ limitations under the License. */ #pragma once -#include // for size_t +#include // for size_t +#include // for mlock and munlock +#include // for malloc and free -#include +#ifndef PADDLE_ONLY_CPU #include +#include +#endif // PADDLE_ONLY_CPU namespace paddle { namespace memory { namespace detail { +class SystemAllocator { + public: + virtual void* Alloc(size_t size) = 0; + virtual void* Free(void* p) = 0; +}; + +// CPUAllocator calls mlock, which returns pinned +// and locked memory as staging areas for data exchange between host +// and device. Allocates too much would reduce the amount of memory +// available to the system for paging. So, by default, we should use +// CPUAllocator. +template +class CPUAllocator : public SystemAllocator { + public: + virtual void* Alloc(size_t size) { + void* p = std::malloc(size); + if (p != nullptr && lock_memory) { + mlock(p, size); + } + return p; + } + + virtual void Free(void* p, size_t size) { + if (p != nullptr && lock_memory) { + munlock(p, size); + } + std::free(p); + } +}; + +#ifndef PADDLE_ONLY_CPU // The following code are for CUDA. + +namespace { inline void throw_on_error(cudaError_t e, const char* message) { if (e) { throw thrust::system_error(e, thrust::cuda_category(), message); } } +} // namespace // GPUAllocator calls cudaHostMalloc, which returns // pinned and locked memory as staging areas for data exchange @@ -36,17 +74,11 @@ inline void throw_on_error(cudaError_t e, const char* message) { // default, we should use GPUAllocator. template class GPUAllocator { -public: - void* Alloc(size_t size); - void Free(void* p, size_t size); -}; - -template <> -class GPUAllocator { -public: + public: void* Alloc(size_t size) { void* p = 0; - cudaError_t result = cudaMalloc(&p, size); + cudaError_t result = + staging ? cudaMallocHost(&p, size) : cudaMalloc(&p, size); if (result == cudaSuccess) { return p; } @@ -60,32 +92,15 @@ public: // that is returned if you ever call cudaFree after the // driver has already shutdown. This happens only if the // process is terminating, in which case we don't care if - // cudaFree succeeds. - auto err = cudaFree(p); + // cudaFree succeeds. + auto err = staging ? cudaFreeHost(p) : cudaFree(p); if (err != cudaErrorCudartUnloading) { - throw_on_error(err, "cudaFree failed"); + throw_on_error(err, "cudaFree failed"); } } }; -template <> -class GPUAllocator { -public: - void* Alloc(size_t size) { - void* p = 0; - cudaError_t result = cudaMallocHost(&p, size); - if (result == cudaSuccess) { - return p; - } - // clear last error - cudaGetLastError(); - return nullptr; - } - - void Free(void* p, size_t size) { - throw_on_error(cudaFreeHost(p), "cudaFreeHost failed"); - } -}; +#endif // PADDLE_ONLY_CPU } // namespace detail } // namespace memory diff --git a/paddle/memory/detail/gpu_allocator_test.cc b/paddle/memory/detail/system_allocator_test.cc similarity index 69% rename from paddle/memory/detail/gpu_allocator_test.cc rename to paddle/memory/detail/system_allocator_test.cc index 18c1c9ab43084ae72185565cb7c04cf6d5c9937c..4e7b8018b6a072bcac0f4dc636df440cbc350303 100644 --- a/paddle/memory/detail/gpu_allocator_test.cc +++ b/paddle/memory/detail/system_allocator_test.cc @@ -12,9 +12,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/memory/detail/gpu_allocator.h" +#include "paddle/memory/detail/system_allocator.h" #include "gtest/gtest.h" +TEST(CPUAllocator, NoLockMem) { + paddle::memory::detail::CPUAllocator a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); +} + +TEST(CPUAllocator, LockMem) { + paddle::memory::detail::CPUAllocator a; + void* p = a.Alloc(4096); + EXPECT_NE(p, nullptr); + a.Free(p, 4096); +} + +#ifndef PADDLE_ONLY_CPU + TEST(GPUAllocator, NonStaging) { paddle::memory::detail::GPUAllocator a; void* p = a.Alloc(4096); @@ -28,3 +44,5 @@ TEST(GPUAllocator, Staging) { EXPECT_NE(p, nullptr); a.Free(p, 4096); } + +#endif // PADDLE_ONLY_CPU diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index b617923731a4d92e9765e2b73c55984a70a59264..ca3c01ebdb03598f760d19475b42930a137b3bba 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -14,48 +14,41 @@ limitations under the License. */ #include "paddle/memory/memory.h" +#include "paddle/memory/detail/cpu_allocator.h" +#include "paddle/memory/detail/gpu_allocator.h" + namespace paddle { namespace memory { -template <> -void* Alloc(CPUPlace, size_t size) { - return GetCPUBuddyAllocator(false /*non-staging*/)->Alloc(size); -} - -void* AllocStaging(CPUPlace, size_t size) { - return GetCPUBuddyAllocator(true /*staging*/)->Alloc(size); -} - -template <> -void* Alloc(GPUPlace pl, size_t size) { - return GetGPUBuddyAllocator(pl.device)->Alloc(size); -} - -template <> -void Free(CPUPlace, void* p) { - return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p); -} - -void FreeStaging(CPUPlace, void* p) { - return GetCPUBuddyAllocator(false /*non-staging*/)->Free(p); -} - -#ifdef PADDLE_WITH_GPU -template <> -void* Alloc(GPUPlace pl, void* p) { - return GetGPUBuddyAllocator(pl.device)->Free(p); -} - -template <> -size_t Used(CPUPlace) { +void Alloc(paddle::platform::Place pl, size_t size) { +#ifndef PADDLE_ONLY_CPU + if (paddle::platform::is_gpu_place(pl)) { + return GetGPUBuddyAllocator(pl.device)->Alloc(size); + } +#endif // PADDLE_ONLY_CPU + PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); + return GetCPUBuddyAllocator()->Alloc(size); +} + +void Free(paddle::platform::Place pl, void* p) { +#ifndef PADDLE_ONLY_CPU + if (paddle::platform::is_gpu_place(pl)) { + GetGPUBuddyAllocator(pl.device)->Free(p); + } +#endif // PADDLE_ONLY_CPU + PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); + GetCPUBuddyAllocator()->Free(p); +} + +size_t Used(paddle::platform::Place pl) { +#ifndef PADDLE_ONLY_CPU + if (paddle::platform::is_gpu_place(pl)) { + return GetGPUBuddyAllocator(pl.device)->Used(); + } +#endif // PADDLE_ONLY_CPU + PADDLE_ASSERT(paddle::platform::is_cpu_place(pl)); return GetCPUBuddyAllocator()->Used(); } -template <> -size_t Alloc(GPUPlace pl) { - return GetGPUBuddyAllocator(pl.device)->Used(); -} -#endif // PADDLE_WITH_GPU - } // namespace memory } // namespace paddle diff --git a/paddle/memory/memory.h b/paddle/memory/memory.h index 8c15a133bb4e9762d4264ee0d02ad96a3ed33e30..0bc609205eca2c53d85b1a2533f8f36d5b19595e 100644 --- a/paddle/memory/memory.h +++ b/paddle/memory/memory.h @@ -19,19 +19,9 @@ limitations under the License. */ namespace paddle { namespace memory { -template -void* Alloc(Place, size_t); -template -void Free(Place, void*); -template -size_t Used(Place); - -// Staging memory means "pinned" host memory that can be mapped into -// the CUDA memory space and accessed by the device rapidly. Don't -// allocate too much staging memory; otherwise system performance will -// degrade because the OS cannot find enough swap memory space. -void* AllocStaging(CPUPlace, size_t); -void* FreeStaging(CPUPlace, size_t); +void* Alloc(paddle::framework::Place, size_t); +void Free(paddle::framework::Place, void*); +size_t Used(paddle::framework::Place); } // namespace memory } // namespace paddle