diff --git a/paddle/memory/detail/system_allocator.cc b/paddle/memory/detail/system_allocator.cc index f61e67a32906083881dd7f47433521876be9b355..a270bd59581520859d43cddd2fc0cfa72080f46d 100644 --- a/paddle/memory/detail/system_allocator.cc +++ b/paddle/memory/detail/system_allocator.cc @@ -27,7 +27,7 @@ limitations under the License. */ // between host and device. Allocates too much would reduce the amount // of memory available to the system for paging. So, by default, we // should set false to use_pinned_memory. -DEFINE_bool(use_pinned_memory, false, "If set, allocate cpu pinned memory."); +DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory."); namespace paddle { namespace memory { diff --git a/paddle/memory/memory.cc b/paddle/memory/memory.cc index 207025f9b1c64f0f8943f9fae5edefc9328a1d26..c99cc541566d233579982dba6a8a985ff46860d9 100644 --- a/paddle/memory/memory.cc +++ b/paddle/memory/memory.cc @@ -13,22 +13,32 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/memory/memory.h" + +#include // for transform +#include // for memcpy +#include // for call_once + #include "paddle/memory/detail/buddy_allocator.h" #include "paddle/memory/detail/system_allocator.h" -#include // for memcpy - namespace paddle { namespace memory { -detail::BuddyAllocator* GetCPUBuddyAllocator() { - static detail::BuddyAllocator* a = nullptr; - if (a == nullptr) { - a = new detail::BuddyAllocator(new detail::CPUAllocator, - platform::CpuMinChunkSize(), - platform::CpuMaxChunkSize()); - } - return a; +using BuddyAllocator = detail::BuddyAllocator; + +std::once_flag cpu_allocator_flag; +std::once_flag gpu_allocator_flag; + +BuddyAllocator* GetCPUBuddyAllocator() { + static std::unique_ptr a{nullptr}; + + std::call_once(cpu_allocator_flag, [&]() { + a.reset(new BuddyAllocator(new detail::CPUAllocator, + platform::CpuMinChunkSize(), + platform::CpuMaxChunkSize())); + }); + + return a.get(); } template <> @@ -48,20 +58,31 @@ size_t Used(platform::CPUPlace place) { #ifndef PADDLE_ONLY_CPU -detail::BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { - static detail::BuddyAllocator** as = NULL; - if (as == NULL) { +BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { + using BuddyAllocVec = std::vector; + static std::unique_ptr as{ + new BuddyAllocVec, [](BuddyAllocVec* p) { + std::for_each(p->begin(), p->end(), + [](BuddyAllocator* p) { delete p; }); + }}; + + // GPU buddy allocators + auto& allocators = *as.get(); + + // GPU buddy allocator initialization + std::call_once(gpu_allocator_flag, [&]() { int gpu_num = platform::GetDeviceCount(); - as = new detail::BuddyAllocator*[gpu_num]; + allocators.reserve(gpu_num); for (int gpu = 0; gpu < gpu_num; gpu++) { platform::SetDeviceId(gpu); - as[gpu] = new detail::BuddyAllocator(new detail::GPUAllocator, - platform::GpuMinChunkSize(), - platform::GpuMaxChunkSize()); + allocators.emplace_back(new BuddyAllocator(new detail::GPUAllocator, + platform::GpuMinChunkSize(), + platform::GpuMaxChunkSize())); } - } + }); + platform::SetDeviceId(gpu_id); - return as[gpu_id]; + return allocators[gpu_id]; } template <> diff --git a/paddle/operators/gather_test.cc b/paddle/operators/gather_test.cc index d24d83f299fdb071e60fa3cc7b223c0228cb29af..0ae1e99452973feb6d085dd6ef51e2afca988f59 100644 --- a/paddle/operators/gather_test.cc +++ b/paddle/operators/gather_test.cc @@ -45,4 +45,8 @@ TEST(Gather, GatherData) { for (int i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4); for (int i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4); + + delete src; + delete index; + delete output; } diff --git a/paddle/operators/scatter_test.cc b/paddle/operators/scatter_test.cc index 4449ce6564396f1971506efb7458c00c834db19f..26fdaff1460a297fa638181641991f732533fe52 100644 --- a/paddle/operators/scatter_test.cc +++ b/paddle/operators/scatter_test.cc @@ -49,4 +49,8 @@ TEST(scatter, ScatterUpdate) { EXPECT_EQ(output->data()[i], float(i - 4)); for (size_t i = 8; i < 16; ++i) EXPECT_EQ(p_output[i], float(0)); for (size_t i = 8; i < 16; ++i) EXPECT_EQ(output->data()[i], float(0)); + + delete src; + delete index; + delete output; }