提交 812a64c0 编写于 作者: G gangliao 提交者: GitHub

Merge pull request #3534 from gangliao/mem_release

FIX: Release CPU/GPU memory at the end of the Program 
......@@ -27,7 +27,7 @@ limitations under the License. */
// between host and device. Allocates too much would reduce the amount
// of memory available to the system for paging. So, by default, we
// should set false to use_pinned_memory.
DEFINE_bool(use_pinned_memory, false, "If set, allocate cpu pinned memory.");
DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory.");
namespace paddle {
namespace memory {
......
......@@ -13,22 +13,32 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/memory/memory.h"
#include <algorithm> // for transform
#include <cstring> // for memcpy
#include <mutex> // for call_once
#include "paddle/memory/detail/buddy_allocator.h"
#include "paddle/memory/detail/system_allocator.h"
#include <cstring> // for memcpy
namespace paddle {
namespace memory {
detail::BuddyAllocator* GetCPUBuddyAllocator() {
static detail::BuddyAllocator* a = nullptr;
if (a == nullptr) {
a = new detail::BuddyAllocator(new detail::CPUAllocator,
platform::CpuMinChunkSize(),
platform::CpuMaxChunkSize());
}
return a;
using BuddyAllocator = detail::BuddyAllocator;
std::once_flag cpu_allocator_flag;
std::once_flag gpu_allocator_flag;
BuddyAllocator* GetCPUBuddyAllocator() {
static std::unique_ptr<BuddyAllocator> a{nullptr};
std::call_once(cpu_allocator_flag, [&]() {
a.reset(new BuddyAllocator(new detail::CPUAllocator,
platform::CpuMinChunkSize(),
platform::CpuMaxChunkSize()));
});
return a.get();
}
template <>
......@@ -48,20 +58,31 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
#ifndef PADDLE_ONLY_CPU
detail::BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
static detail::BuddyAllocator** as = NULL;
if (as == NULL) {
BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
using BuddyAllocVec = std::vector<BuddyAllocator*>;
static std::unique_ptr<BuddyAllocVec, void (*)(BuddyAllocVec * p)> as{
new BuddyAllocVec, [](BuddyAllocVec* p) {
std::for_each(p->begin(), p->end(),
[](BuddyAllocator* p) { delete p; });
}};
// GPU buddy allocators
auto& allocators = *as.get();
// GPU buddy allocator initialization
std::call_once(gpu_allocator_flag, [&]() {
int gpu_num = platform::GetDeviceCount();
as = new detail::BuddyAllocator*[gpu_num];
allocators.reserve(gpu_num);
for (int gpu = 0; gpu < gpu_num; gpu++) {
platform::SetDeviceId(gpu);
as[gpu] = new detail::BuddyAllocator(new detail::GPUAllocator,
platform::GpuMinChunkSize(),
platform::GpuMaxChunkSize());
allocators.emplace_back(new BuddyAllocator(new detail::GPUAllocator,
platform::GpuMinChunkSize(),
platform::GpuMaxChunkSize()));
}
}
});
platform::SetDeviceId(gpu_id);
return as[gpu_id];
return allocators[gpu_id];
}
template <>
......
......@@ -45,4 +45,8 @@ TEST(Gather, GatherData) {
for (int i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4);
for (int i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4);
delete src;
delete index;
delete output;
}
......@@ -49,4 +49,8 @@ TEST(scatter, ScatterUpdate) {
EXPECT_EQ(output->data<float>()[i], float(i - 4));
for (size_t i = 8; i < 16; ++i) EXPECT_EQ(p_output[i], float(0));
for (size_t i = 8; i < 16; ++i) EXPECT_EQ(output->data<float>()[i], float(0));
delete src;
delete index;
delete output;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册