提交 812a64c0 编写于 作者: G gangliao 提交者: GitHub

Merge pull request #3534 from gangliao/mem_release

FIX: Release CPU/GPU memory at the end of the Program 
...@@ -27,7 +27,7 @@ limitations under the License. */ ...@@ -27,7 +27,7 @@ limitations under the License. */
// between host and device. Allocates too much would reduce the amount // between host and device. Allocates too much would reduce the amount
// of memory available to the system for paging. So, by default, we // of memory available to the system for paging. So, by default, we
// should set false to use_pinned_memory. // should set false to use_pinned_memory.
DEFINE_bool(use_pinned_memory, false, "If set, allocate cpu pinned memory."); DEFINE_bool(use_pinned_memory, true, "If set, allocate cpu pinned memory.");
namespace paddle { namespace paddle {
namespace memory { namespace memory {
......
...@@ -13,22 +13,32 @@ See the License for the specific language governing permissions and ...@@ -13,22 +13,32 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/memory/memory.h" #include "paddle/memory/memory.h"
#include "paddle/memory/detail/buddy_allocator.h"
#include "paddle/memory/detail/system_allocator.h"
#include <algorithm> // for transform
#include <cstring> // for memcpy #include <cstring> // for memcpy
#include <mutex> // for call_once
#include "paddle/memory/detail/buddy_allocator.h"
#include "paddle/memory/detail/system_allocator.h"
namespace paddle { namespace paddle {
namespace memory { namespace memory {
detail::BuddyAllocator* GetCPUBuddyAllocator() { using BuddyAllocator = detail::BuddyAllocator;
static detail::BuddyAllocator* a = nullptr;
if (a == nullptr) { std::once_flag cpu_allocator_flag;
a = new detail::BuddyAllocator(new detail::CPUAllocator, std::once_flag gpu_allocator_flag;
BuddyAllocator* GetCPUBuddyAllocator() {
static std::unique_ptr<BuddyAllocator> a{nullptr};
std::call_once(cpu_allocator_flag, [&]() {
a.reset(new BuddyAllocator(new detail::CPUAllocator,
platform::CpuMinChunkSize(), platform::CpuMinChunkSize(),
platform::CpuMaxChunkSize()); platform::CpuMaxChunkSize()));
} });
return a;
return a.get();
} }
template <> template <>
...@@ -48,20 +58,31 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) { ...@@ -48,20 +58,31 @@ size_t Used<platform::CPUPlace>(platform::CPUPlace place) {
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
detail::BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
static detail::BuddyAllocator** as = NULL; using BuddyAllocVec = std::vector<BuddyAllocator*>;
if (as == NULL) { static std::unique_ptr<BuddyAllocVec, void (*)(BuddyAllocVec * p)> as{
new BuddyAllocVec, [](BuddyAllocVec* p) {
std::for_each(p->begin(), p->end(),
[](BuddyAllocator* p) { delete p; });
}};
// GPU buddy allocators
auto& allocators = *as.get();
// GPU buddy allocator initialization
std::call_once(gpu_allocator_flag, [&]() {
int gpu_num = platform::GetDeviceCount(); int gpu_num = platform::GetDeviceCount();
as = new detail::BuddyAllocator*[gpu_num]; allocators.reserve(gpu_num);
for (int gpu = 0; gpu < gpu_num; gpu++) { for (int gpu = 0; gpu < gpu_num; gpu++) {
platform::SetDeviceId(gpu); platform::SetDeviceId(gpu);
as[gpu] = new detail::BuddyAllocator(new detail::GPUAllocator, allocators.emplace_back(new BuddyAllocator(new detail::GPUAllocator,
platform::GpuMinChunkSize(), platform::GpuMinChunkSize(),
platform::GpuMaxChunkSize()); platform::GpuMaxChunkSize()));
}
} }
});
platform::SetDeviceId(gpu_id); platform::SetDeviceId(gpu_id);
return as[gpu_id]; return allocators[gpu_id];
} }
template <> template <>
......
...@@ -45,4 +45,8 @@ TEST(Gather, GatherData) { ...@@ -45,4 +45,8 @@ TEST(Gather, GatherData) {
for (int i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4); for (int i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4);
for (int i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4); for (int i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4);
delete src;
delete index;
delete output;
} }
...@@ -49,4 +49,8 @@ TEST(scatter, ScatterUpdate) { ...@@ -49,4 +49,8 @@ TEST(scatter, ScatterUpdate) {
EXPECT_EQ(output->data<float>()[i], float(i - 4)); EXPECT_EQ(output->data<float>()[i], float(i - 4));
for (size_t i = 8; i < 16; ++i) EXPECT_EQ(p_output[i], float(0)); for (size_t i = 8; i < 16; ++i) EXPECT_EQ(p_output[i], float(0));
for (size_t i = 8; i < 16; ++i) EXPECT_EQ(output->data<float>()[i], float(0)); for (size_t i = 8; i < 16; ++i) EXPECT_EQ(output->data<float>()[i], float(0));
delete src;
delete index;
delete output;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册