diff --git a/paddle/fluid/memory/CMakeLists.txt b/paddle/fluid/memory/CMakeLists.txt index 827b039a109cf753170bb738cf08016abd9840f8..e7268077643c3988c59a52bf54873f1e8db4619b 100644 --- a/paddle/fluid/memory/CMakeLists.txt +++ b/paddle/fluid/memory/CMakeLists.txt @@ -1,6 +1,6 @@ add_subdirectory(detail) add_subdirectory(allocation) -cc_library(malloc SRCS malloc.cc DEPS buddy_allocator place enforce allocator_facade) +cc_library(malloc SRCS malloc.cc DEPS place enforce allocator_facade) cc_library(memcpy SRCS memcpy.cc DEPS place) cc_library(memory diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index f3666438b60cd71b2b13dd54075803ef4f61b555..4b7b9064dcde9b5209264257d51bbd976ba8eb85 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -3,6 +3,7 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator) cc_library(best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator) cc_library(locked_allocator SRCS locked_allocator.cc DEPS allocator) cc_library(buffered_allocator SRCS buffered_allocator.cc DEPS allocator) +cc_library(legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator) cc_test(buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator) if (WITH_GPU) @@ -53,6 +54,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS retry_allocator buffered_allocator allocator_strategy + legacy_allocator ) nv_test(allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade) diff --git a/paddle/fluid/memory/allocation/allocator.cc b/paddle/fluid/memory/allocation/allocator.cc index 51982ad97da5c7a592835c8567869143325a2cd1..8fb8a5fb897a736d7515951ba08c633da9a7706c 100644 --- a/paddle/fluid/memory/allocation/allocator.cc +++ b/paddle/fluid/memory/allocation/allocator.cc @@ -37,11 +37,7 @@ const char* BadAlloc::what() const noexcept { return msg_.c_str(); } void AllocationDeleter::operator()(Allocation* allocation) const { auto* allocator = allocation->allocator(); - if (allocator) { - allocator->Free(allocation); - } else { - delete allocation; // Compatible for legacy allocation. - } + allocator->Free(allocation); } } // namespace allocation diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index ec8a64a1d1fee9714da52e07cc6634377435226e..b06ff1b485108fdc22ea8dc763fde18f7f955c3f 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -19,10 +19,12 @@ #include #include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" +#include "paddle/fluid/memory/allocation/allocator_strategy.h" #include "paddle/fluid/memory/allocation/auto_increment_allocator.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/conditional_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" +#include "paddle/fluid/memory/allocation/legacy_allocator.h" #include "paddle/fluid/memory/allocation/locked_allocator.h" #include "paddle/fluid/memory/allocation/retry_allocator.h" #include "paddle/fluid/memory/allocation/zero_size_allocator.h" @@ -190,13 +192,29 @@ class AllocatorFacadePrivate { ~AllocatorFacadePrivate() = default; AllocatorFacadePrivate() { - InitCPUAllocator(); - InitCUDAAllocator(); - InitCUDAPinnedAllocator(); - WrapZeroSizeAllocator(); + if (GetAllocatorStrategy() == AllocatorStrategy::kLegacy) { + InitLegacyAllocator(); + } else { + InitCPUAllocator(); + InitCUDAAllocator(); + InitCUDAPinnedAllocator(); + WrapZeroSizeAllocator(); + } } private: + void InitLegacyAllocator() { + std::vector places{platform::CPUPlace()}; +#ifdef PADDLE_WITH_CUDA + for (int dev_id = 0; dev_id < platform::GetCUDADeviceCount(); ++dev_id) { + places.emplace_back(platform::CUDAPlace(dev_id)); + } +#endif + for (auto& p : places) { + allocators_[p] = std::make_shared(p); + } + } + void InitCPUAllocator() { allocators_[platform::CPUPlace()] = std::make_shared(); } diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h index 54b0dd244a660a507c8b35714973e2cdc8523d7a..d44a3f85beba712b1e735ba14008689bce7d0d64 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.h +++ b/paddle/fluid/memory/allocation/buffered_allocator.h @@ -35,12 +35,6 @@ class BufferedAllocator : public Allocator { ~BufferedAllocator(); - // std::unique_ptr Allocate( - // size_t size, Allocator::Attr attr = Allocator::Attr::kDefault) - // override; - // - // void FreeUniquePtr(std::unique_ptr allocation) override; - bool IsAllocThreadSafe() const override; // only used in unittest diff --git a/paddle/fluid/memory/allocation/legacy_allocator.cc b/paddle/fluid/memory/allocation/legacy_allocator.cc new file mode 100644 index 0000000000000000000000000000000000000000..e66537272340e89fe1075325323909213bbe97b8 --- /dev/null +++ b/paddle/fluid/memory/allocation/legacy_allocator.cc @@ -0,0 +1,307 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/memory/allocation/legacy_allocator.h" +#include +#include "glog/logging.h" +#include "paddle/fluid/memory/detail/buddy_allocator.h" +#include "paddle/fluid/memory/detail/system_allocator.h" +#include "paddle/fluid/platform/gpu_info.h" +#include "paddle/fluid/string/printf.h" + +DEFINE_bool(init_allocated_mem, false, + "It is a mistake that the values of the memory allocated by " + "BuddyAllocator are always zeroed in some op's implementation. " + "To find this error in time, we use init_allocated_mem to indicate " + "that initializing the allocated memory with a small value " + "during unit testing."); +DECLARE_double(fraction_of_gpu_memory_to_use); + +namespace paddle { +namespace memory { +namespace legacy { +template +void *Alloc(const Place &place, size_t size); + +template +void Free(const Place &place, void *p); + +template +size_t Used(const Place &place); + +struct Usage : public boost::static_visitor { + size_t operator()(const platform::CPUPlace &cpu) const; + size_t operator()(const platform::CUDAPlace &gpu) const; + size_t operator()(const platform::CUDAPinnedPlace &cuda_pinned) const; +}; + +size_t memory_usage(const platform::Place &p); + +using BuddyAllocator = detail::BuddyAllocator; + +BuddyAllocator *GetCPUBuddyAllocator() { + // We tried thread_local for inference::RNN1 model, but that not works much + // for multi-thread test. + static std::once_flag init_flag; + static detail::BuddyAllocator *a = nullptr; + + std::call_once(init_flag, []() { + a = new detail::BuddyAllocator( + std::unique_ptr(new detail::CPUAllocator), + platform::CpuMinChunkSize(), platform::CpuMaxChunkSize()); + }); + + return a; +} + +// We compared the NaiveAllocator with BuddyAllocator in CPU memory allocation, +// seems they are almost the same overhead. +struct NaiveAllocator { + void *Alloc(size_t size) { return malloc(size); } + + void Free(void *p) { + PADDLE_ENFORCE(p); + free(p); + } + + static NaiveAllocator *Instance() { + static NaiveAllocator x; + return &x; + } + + private: + std::mutex lock_; +}; + +template <> +void *Alloc(const platform::CPUPlace &place, size_t size) { + VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); + void *p = GetCPUBuddyAllocator()->Alloc(size); + if (FLAGS_init_allocated_mem) { + memset(p, 0xEF, size); + } + VLOG(100) << " pointer=" << p; + return p; +} + +template <> +void Free(const platform::CPUPlace &place, void *p) { + VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place); + GetCPUBuddyAllocator()->Free(p); +} + +template <> +size_t Used(const platform::CPUPlace &place) { + return GetCPUBuddyAllocator()->Used(); +} + +#ifdef PADDLE_WITH_CUDA +BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { + static std::once_flag init_flag; + static detail::BuddyAllocator **a_arr = nullptr; + + std::call_once(init_flag, [gpu_id]() { + int gpu_num = platform::GetCUDADeviceCount(); + PADDLE_ENFORCE(gpu_id < gpu_num, "gpu_id:%d should < gpu_num:%d", gpu_id, + gpu_num); + + a_arr = new BuddyAllocator *[gpu_num]; + for (int i = 0; i < gpu_num; i++) { + a_arr[i] = nullptr; + platform::SetDeviceId(i); + a_arr[i] = new BuddyAllocator( + std::unique_ptr(new detail::GPUAllocator(i)), + platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); + + VLOG(100) << "\n\nNOTE: each GPU device use " + << FLAGS_fraction_of_gpu_memory_to_use * 100 + << "% of GPU memory.\n" + << "You can set GFlags environment variable '" + << "FLAGS_fraction_of_gpu_memory_to_use" + << "' to change the fraction of GPU usage.\n\n"; + } + }); + + platform::SetDeviceId(gpu_id); + return a_arr[gpu_id]; +} +#endif + +template <> +size_t Used(const platform::CUDAPlace &place) { +#ifdef PADDLE_WITH_CUDA + return GetGPUBuddyAllocator(place.device)->Used(); +#else + PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); +#endif +} + +template <> +void *Alloc(const platform::CUDAPlace &place, + size_t size) { +#ifdef PADDLE_WITH_CUDA + auto *buddy_allocator = GetGPUBuddyAllocator(place.device); + auto *ptr = buddy_allocator->Alloc(size); + if (ptr == nullptr) { + int cur_dev = platform::GetCurrentDeviceId(); + platform::SetDeviceId(place.device); + size_t avail, total; + platform::GpuMemoryUsage(&avail, &total); + LOG(WARNING) << "Cannot allocate " << string::HumanReadableSize(size) + << " in GPU " << place.device << ", available " + << string::HumanReadableSize(avail); + LOG(WARNING) << "total " << total; + LOG(WARNING) << "GpuMinChunkSize " + << string::HumanReadableSize( + buddy_allocator->GetMinChunkSize()); + LOG(WARNING) << "GpuMaxChunkSize " + << string::HumanReadableSize( + buddy_allocator->GetMaxChunkSize()); + LOG(WARNING) << "GPU memory used: " + << string::HumanReadableSize(Used(place)); + platform::SetDeviceId(cur_dev); + } + if (FLAGS_init_allocated_mem) { + cudaMemset(ptr, 0xEF, size); + } + return ptr; +#else + PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); +#endif +} + +template <> +void Free(const platform::CUDAPlace &place, void *p) { +#ifdef PADDLE_WITH_CUDA + GetGPUBuddyAllocator(place.device)->Free(p); +#else + PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); +#endif +} + +#ifdef PADDLE_WITH_CUDA +BuddyAllocator *GetCUDAPinnedBuddyAllocator() { + static std::once_flag init_flag; + static BuddyAllocator *ba = nullptr; + + std::call_once(init_flag, []() { + ba = new BuddyAllocator(std::unique_ptr( + new detail::CUDAPinnedAllocator), + platform::CUDAPinnedMinChunkSize(), + platform::CUDAPinnedMaxChunkSize()); + }); + + return ba; +} +#endif + +template <> +size_t Used(const platform::CUDAPinnedPlace &place) { +#ifdef PADDLE_WITH_CUDA + return GetCUDAPinnedBuddyAllocator()->Used(); +#else + PADDLE_THROW("'CUDAPinnedPlace' is not supported in CPU only device."); +#endif +} + +template <> +void *Alloc(const platform::CUDAPinnedPlace &place, + size_t size) { +#ifdef PADDLE_WITH_CUDA + auto *buddy_allocator = GetCUDAPinnedBuddyAllocator(); + void *ptr = buddy_allocator->Alloc(size); + + if (ptr == nullptr) { + LOG(WARNING) << "cudaMallocHost Cannot allocate " << size + << " bytes in CUDAPinnedPlace"; + } + if (FLAGS_init_allocated_mem) { + memset(ptr, 0xEF, size); + } + return ptr; +#else + PADDLE_THROW("'CUDAPinnedPlace' is not supported in CPU only device."); +#endif +} + +template <> +void Free(const platform::CUDAPinnedPlace &place, + void *p) { +#ifdef PADDLE_WITH_CUDA + GetCUDAPinnedBuddyAllocator()->Free(p); +#else + PADDLE_THROW("'CUDAPinnedPlace' is not supported in CPU only device."); +#endif +} + +struct AllocVisitor : public boost::static_visitor { + inline explicit AllocVisitor(size_t size) : size_(size) {} + + template + inline void *operator()(const Place &place) const { + return Alloc(place, size_); + } + + private: + size_t size_; +}; + +struct FreeVisitor : public boost::static_visitor { + inline explicit FreeVisitor(void *ptr) : ptr_(ptr) {} + + template + inline void operator()(const Place &place) const { + Free(place, ptr_); + } + + private: + void *ptr_; +}; + +size_t Usage::operator()(const platform::CPUPlace &cpu) const { + return Used(cpu); +} + +size_t Usage::operator()(const platform::CUDAPlace &gpu) const { +#ifdef PADDLE_WITH_CUDA + return Used(gpu); +#else + PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); +#endif +} + +size_t Usage::operator()(const platform::CUDAPinnedPlace &cuda_pinned) const { +#ifdef PADDLE_WITH_CUDA + return Used(cuda_pinned); +#else + PADDLE_THROW("'CUDAPinnedPlace' is not supported in CPU only device."); +#endif +} +} // namespace legacy + +namespace allocation { + +Allocation *LegacyAllocator::AllocateImpl(size_t size, Allocator::Attr attr) { + void *ptr = boost::apply_visitor(legacy::AllocVisitor(size), place_); + return new Allocation(ptr, size, place_); +} + +void LegacyAllocator::Free(Allocation *allocation) { + boost::apply_visitor(legacy::FreeVisitor(allocation->ptr()), + allocation->place()); + delete allocation; +} +} // namespace allocation +} // namespace memory +} // namespace paddle diff --git a/paddle/fluid/memory/allocation/legacy_allocator.h b/paddle/fluid/memory/allocation/legacy_allocator.h new file mode 100644 index 0000000000000000000000000000000000000000..503a7a685cb9d8dbbbbd6c23b5b82c383893e3d8 --- /dev/null +++ b/paddle/fluid/memory/allocation/legacy_allocator.h @@ -0,0 +1,37 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/fluid/memory/allocation/allocator.h" +#include "paddle/fluid/platform/place.h" +namespace paddle { +namespace memory { +namespace allocation { + +class LegacyAllocatorPrivate; +class LegacyAllocator : public Allocator { + public: + explicit LegacyAllocator(const platform::Place &p) : place_(p) {} + + protected: + Allocation *AllocateImpl(size_t size, Allocator::Attr attr) override; + void Free(Allocation *allocation) override; + + private: + platform::Place place_; +}; + +} // namespace allocation +} // namespace memory +} // namespace paddle diff --git a/paddle/fluid/memory/malloc.cc b/paddle/fluid/memory/malloc.cc index 5c06cad64eca296b0a9b57bd22d2a94cd7d00838..e414ad657a9447142d6e3a42fc7efc86f01e9c9f 100644 --- a/paddle/fluid/memory/malloc.cc +++ b/paddle/fluid/memory/malloc.cc @@ -12,305 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/memory/malloc.h" #include #include - -#include "glog/logging.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h" -#include "paddle/fluid/memory/detail/buddy_allocator.h" -#include "paddle/fluid/memory/detail/system_allocator.h" -#include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/platform/gpu_info.h" -#include "paddle/fluid/string/printf.h" - -DEFINE_bool(init_allocated_mem, false, - "It is a mistake that the values of the memory allocated by " - "BuddyAllocator are always zeroed in some op's implementation. " - "To find this error in time, we use init_allocated_mem to indicate " - "that initializing the allocated memory with a small value " - "during unit testing."); -DECLARE_double(fraction_of_gpu_memory_to_use); - +#include "paddle/fluid/platform/place.h" namespace paddle { namespace memory { - -namespace legacy { - -using BuddyAllocator = detail::BuddyAllocator; - -BuddyAllocator* GetCPUBuddyAllocator() { - // We tried thread_local for inference::RNN1 model, but that not works much - // for multi-thread test. - static std::once_flag init_flag; - static detail::BuddyAllocator* a = nullptr; - - std::call_once(init_flag, []() { - a = new detail::BuddyAllocator( - std::unique_ptr(new detail::CPUAllocator), - platform::CpuMinChunkSize(), platform::CpuMaxChunkSize()); - }); - - return a; -} - -// We compared the NaiveAllocator with BuddyAllocator in CPU memory allocation, -// seems they are almost the same overhead. -struct NaiveAllocator { - void* Alloc(size_t size) { return malloc(size); } - - void Free(void* p) { - PADDLE_ENFORCE(p); - free(p); - } - - static NaiveAllocator* Instance() { - static NaiveAllocator x; - return &x; - } - - private: - std::mutex lock_; -}; - -template <> -void* Alloc(const platform::CPUPlace& place, size_t size) { - VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); - void* p = GetCPUBuddyAllocator()->Alloc(size); - if (FLAGS_init_allocated_mem) { - memset(p, 0xEF, size); - } - VLOG(100) << " pointer=" << p; - return p; -} - -template <> -void Free(const platform::CPUPlace& place, void* p) { - VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place); - GetCPUBuddyAllocator()->Free(p); -} - -template <> -size_t Used(const platform::CPUPlace& place) { - return GetCPUBuddyAllocator()->Used(); -} - -#ifdef PADDLE_WITH_CUDA -BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { - static std::once_flag init_flag; - static detail::BuddyAllocator** a_arr = nullptr; - - std::call_once(init_flag, [gpu_id]() { - int gpu_num = platform::GetCUDADeviceCount(); - PADDLE_ENFORCE(gpu_id < gpu_num, "gpu_id:%d should < gpu_num:%d", gpu_id, - gpu_num); - - a_arr = new BuddyAllocator*[gpu_num]; - for (int i = 0; i < gpu_num; i++) { - a_arr[i] = nullptr; - platform::SetDeviceId(i); - a_arr[i] = new BuddyAllocator( - std::unique_ptr(new detail::GPUAllocator(i)), - platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); - - VLOG(100) << "\n\nNOTE: each GPU device use " - << FLAGS_fraction_of_gpu_memory_to_use * 100 - << "% of GPU memory.\n" - << "You can set GFlags environment variable '" - << "FLAGS_fraction_of_gpu_memory_to_use" - << "' to change the fraction of GPU usage.\n\n"; - } - }); - - platform::SetDeviceId(gpu_id); - return a_arr[gpu_id]; -} -#endif - -template <> -size_t Used(const platform::CUDAPlace& place) { -#ifdef PADDLE_WITH_CUDA - return GetGPUBuddyAllocator(place.device)->Used(); -#else - PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); -#endif -} - -template <> -void* Alloc(const platform::CUDAPlace& place, - size_t size) { -#ifdef PADDLE_WITH_CUDA - auto* buddy_allocator = GetGPUBuddyAllocator(place.device); - auto* ptr = buddy_allocator->Alloc(size); - if (ptr == nullptr) { - int cur_dev = platform::GetCurrentDeviceId(); - platform::SetDeviceId(place.device); - size_t avail, total; - platform::GpuMemoryUsage(&avail, &total); - LOG(WARNING) << "Cannot allocate " << string::HumanReadableSize(size) - << " in GPU " << place.device << ", available " - << string::HumanReadableSize(avail); - LOG(WARNING) << "total " << total; - LOG(WARNING) << "GpuMinChunkSize " - << string::HumanReadableSize( - buddy_allocator->GetMinChunkSize()); - LOG(WARNING) << "GpuMaxChunkSize " - << string::HumanReadableSize( - buddy_allocator->GetMaxChunkSize()); - LOG(WARNING) << "GPU memory used: " - << string::HumanReadableSize(Used(place)); - platform::SetDeviceId(cur_dev); - } - if (FLAGS_init_allocated_mem) { - cudaMemset(ptr, 0xEF, size); - } - return ptr; -#else - PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); -#endif -} - -template <> -void Free(const platform::CUDAPlace& place, void* p) { -#ifdef PADDLE_WITH_CUDA - GetGPUBuddyAllocator(place.device)->Free(p); -#else - PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); -#endif -} - -#ifdef PADDLE_WITH_CUDA -BuddyAllocator* GetCUDAPinnedBuddyAllocator() { - static std::once_flag init_flag; - static BuddyAllocator* ba = nullptr; - - std::call_once(init_flag, []() { - ba = new BuddyAllocator(std::unique_ptr( - new detail::CUDAPinnedAllocator), - platform::CUDAPinnedMinChunkSize(), - platform::CUDAPinnedMaxChunkSize()); - }); - - return ba; -} -#endif - -template <> -size_t Used(const platform::CUDAPinnedPlace& place) { -#ifdef PADDLE_WITH_CUDA - return GetCUDAPinnedBuddyAllocator()->Used(); -#else - PADDLE_THROW("'CUDAPinnedPlace' is not supported in CPU only device."); -#endif -} - -template <> -void* Alloc(const platform::CUDAPinnedPlace& place, - size_t size) { -#ifdef PADDLE_WITH_CUDA - auto* buddy_allocator = GetCUDAPinnedBuddyAllocator(); - void* ptr = buddy_allocator->Alloc(size); - - if (ptr == nullptr) { - LOG(WARNING) << "cudaMallocHost Cannot allocate " << size - << " bytes in CUDAPinnedPlace"; - } - if (FLAGS_init_allocated_mem) { - memset(ptr, 0xEF, size); - } - return ptr; -#else - PADDLE_THROW("'CUDAPinnedPlace' is not supported in CPU only device."); -#endif -} - -template <> -void Free(const platform::CUDAPinnedPlace& place, - void* p) { -#ifdef PADDLE_WITH_CUDA - GetCUDAPinnedBuddyAllocator()->Free(p); -#else - PADDLE_THROW("'CUDAPinnedPlace' is not supported in CPU only device."); -#endif -} - -struct AllocVisitor : public boost::static_visitor { - inline explicit AllocVisitor(size_t size) : size_(size) {} - - template - inline void* operator()(const Place& place) const { - return Alloc(place, size_); - } - - private: - size_t size_; -}; - -struct FreeVisitor : public boost::static_visitor { - inline explicit FreeVisitor(void* ptr) : ptr_(ptr) {} - - template - inline void operator()(const Place& place) const { - Free(place, ptr_); - } - - private: - void* ptr_; -}; - -size_t Usage::operator()(const platform::CPUPlace& cpu) const { - return Used(cpu); -} - -size_t Usage::operator()(const platform::CUDAPlace& gpu) const { -#ifdef PADDLE_WITH_CUDA - return Used(gpu); -#else - PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); -#endif -} - -size_t Usage::operator()(const platform::CUDAPinnedPlace& cuda_pinned) const { -#ifdef PADDLE_WITH_CUDA - return Used(cuda_pinned); -#else - PADDLE_THROW("'CUDAPinnedPlace' is not supported in CPU only device."); -#endif -} - -class LegacyAllocation : public Allocation { - public: - using Allocation::Allocation; - - ~LegacyAllocation() final { - boost::apply_visitor(FreeVisitor(this->ptr()), this->place()); - } -}; - -} // namespace legacy - std::shared_ptr AllocShared(const platform::Place& place, size_t size, Allocator::Attr attr) { - if (allocation::GetAllocatorStrategy() == - allocation::AllocatorStrategy::kLegacy) { - void* p = boost::apply_visitor(legacy::AllocVisitor(size), place); - return std::shared_ptr( - new legacy::LegacyAllocation(p, size, place)); - } else { - return allocation::AllocatorFacade::Instance().AllocShared(place, size, - attr); - } + return allocation::AllocatorFacade::Instance().AllocShared(place, size, attr); } AllocationPtr Alloc(const platform::Place& place, size_t size, Allocator::Attr attr) { - if (allocation::GetAllocatorStrategy() == - allocation::AllocatorStrategy::kLegacy) { - void* p = boost::apply_visitor(legacy::AllocVisitor(size), place); - return AllocationPtr(new legacy::LegacyAllocation(p, size, place)); - } else { - return allocation::AllocatorFacade::Instance().Alloc(place, size, attr); - } + return allocation::AllocatorFacade::Instance().Alloc(place, size, attr); } } // namespace memory diff --git a/paddle/fluid/memory/malloc.h b/paddle/fluid/memory/malloc.h index 253a0bc5ccac3a029e5b335f8b044dca6cce68fe..916538b2a659d7d9503fdc337a4ba84fa21f77f9 100644 --- a/paddle/fluid/memory/malloc.h +++ b/paddle/fluid/memory/malloc.h @@ -30,26 +30,5 @@ extern std::shared_ptr AllocShared( extern AllocationPtr Alloc(const platform::Place& place, size_t size, Allocator::Attr attr = Allocator::kDefault); -namespace legacy { - -template -void* Alloc(const Place& place, size_t size); - -template -void Free(const Place& place, void* p); - -template -size_t Used(const Place& place); - -struct Usage : public boost::static_visitor { - size_t operator()(const platform::CPUPlace& cpu) const; - size_t operator()(const platform::CUDAPlace& gpu) const; - size_t operator()(const platform::CUDAPinnedPlace& cuda_pinned) const; -}; - -size_t memory_usage(const platform::Place& p); - -} // namespace legacy - } // namespace memory } // namespace paddle