Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
19e669a9
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
19e669a9
编写于
11月 16, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add legacy_allocator
test=develop
上级
1cb7e7dd
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
374 addition
and
324 deletion
+374
-324
paddle/fluid/memory/CMakeLists.txt
paddle/fluid/memory/CMakeLists.txt
+1
-1
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+2
-0
paddle/fluid/memory/allocation/allocator.cc
paddle/fluid/memory/allocation/allocator.cc
+1
-5
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+22
-4
paddle/fluid/memory/allocation/buffered_allocator.h
paddle/fluid/memory/allocation/buffered_allocator.h
+0
-6
paddle/fluid/memory/allocation/legacy_allocator.cc
paddle/fluid/memory/allocation/legacy_allocator.cc
+307
-0
paddle/fluid/memory/allocation/legacy_allocator.h
paddle/fluid/memory/allocation/legacy_allocator.h
+37
-0
paddle/fluid/memory/malloc.cc
paddle/fluid/memory/malloc.cc
+4
-287
paddle/fluid/memory/malloc.h
paddle/fluid/memory/malloc.h
+0
-21
未找到文件。
paddle/fluid/memory/CMakeLists.txt
浏览文件 @
19e669a9
add_subdirectory
(
detail
)
add_subdirectory
(
allocation
)
cc_library
(
malloc SRCS malloc.cc DEPS
buddy_allocator
place enforce allocator_facade
)
cc_library
(
malloc SRCS malloc.cc DEPS place enforce allocator_facade
)
cc_library
(
memcpy SRCS memcpy.cc DEPS place
)
cc_library
(
memory
...
...
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
19e669a9
...
...
@@ -3,6 +3,7 @@ cc_library(cpu_allocator SRCS cpu_allocator.cc DEPS allocator)
cc_library
(
best_fit_allocator SRCS best_fit_allocator.cc DEPS allocator
)
cc_library
(
locked_allocator SRCS locked_allocator.cc DEPS allocator
)
cc_library
(
buffered_allocator SRCS buffered_allocator.cc DEPS allocator
)
cc_library
(
legacy_allocator SRCS legacy_allocator.cc DEPS allocator buddy_allocator
)
cc_test
(
buffered_allocator_test SRCS buffered_allocator_test.cc DEPS best_fit_allocator locked_allocator buffered_allocator cpu_allocator
)
if
(
WITH_GPU
)
...
...
@@ -53,6 +54,7 @@ cc_library(allocator_facade SRCS allocator_facade.cc DEPS
retry_allocator
buffered_allocator
allocator_strategy
legacy_allocator
)
nv_test
(
allocation_and_eigen_test SRCS allocation_and_eigen_test.cu DEPS allocator_facade
)
...
...
paddle/fluid/memory/allocation/allocator.cc
浏览文件 @
19e669a9
...
...
@@ -37,11 +37,7 @@ const char* BadAlloc::what() const noexcept { return msg_.c_str(); }
void
AllocationDeleter
::
operator
()(
Allocation
*
allocation
)
const
{
auto
*
allocator
=
allocation
->
allocator
();
if
(
allocator
)
{
allocator
->
Free
(
allocation
);
}
else
{
delete
allocation
;
// Compatible for legacy allocation.
}
allocator
->
Free
(
allocation
);
}
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
19e669a9
...
...
@@ -19,10 +19,12 @@
#include <vector>
#include "paddle/fluid/memory/allocation/aligned_allocator.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/auto_increment_allocator.h"
#include "paddle/fluid/memory/allocation/best_fit_allocator.h"
#include "paddle/fluid/memory/allocation/conditional_allocator.h"
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/allocation/retry_allocator.h"
#include "paddle/fluid/memory/allocation/zero_size_allocator.h"
...
...
@@ -190,13 +192,29 @@ class AllocatorFacadePrivate {
~
AllocatorFacadePrivate
()
=
default
;
AllocatorFacadePrivate
()
{
InitCPUAllocator
();
InitCUDAAllocator
();
InitCUDAPinnedAllocator
();
WrapZeroSizeAllocator
();
if
(
GetAllocatorStrategy
()
==
AllocatorStrategy
::
kLegacy
)
{
InitLegacyAllocator
();
}
else
{
InitCPUAllocator
();
InitCUDAAllocator
();
InitCUDAPinnedAllocator
();
WrapZeroSizeAllocator
();
}
}
private:
void
InitLegacyAllocator
()
{
std
::
vector
<
platform
::
Place
>
places
{
platform
::
CPUPlace
()};
#ifdef PADDLE_WITH_CUDA
for
(
int
dev_id
=
0
;
dev_id
<
platform
::
GetCUDADeviceCount
();
++
dev_id
)
{
places
.
emplace_back
(
platform
::
CUDAPlace
(
dev_id
));
}
#endif
for
(
auto
&
p
:
places
)
{
allocators_
[
p
]
=
std
::
make_shared
<
LegacyAllocator
>
(
p
);
}
}
void
InitCPUAllocator
()
{
allocators_
[
platform
::
CPUPlace
()]
=
std
::
make_shared
<
CPUManagedAllocator
>
();
}
...
...
paddle/fluid/memory/allocation/buffered_allocator.h
浏览文件 @
19e669a9
...
...
@@ -35,12 +35,6 @@ class BufferedAllocator : public Allocator {
~
BufferedAllocator
();
// std::unique_ptr<Allocation> Allocate(
// size_t size, Allocator::Attr attr = Allocator::Attr::kDefault)
// override;
//
// void FreeUniquePtr(std::unique_ptr<Allocation> allocation) override;
bool
IsAllocThreadSafe
()
const
override
;
// only used in unittest
...
...
paddle/fluid/memory/allocation/legacy_allocator.cc
0 → 100644
浏览文件 @
19e669a9
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include <string>
#include "glog/logging.h"
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/string/printf.h"
DEFINE_bool
(
init_allocated_mem
,
false
,
"It is a mistake that the values of the memory allocated by "
"BuddyAllocator are always zeroed in some op's implementation. "
"To find this error in time, we use init_allocated_mem to indicate "
"that initializing the allocated memory with a small value "
"during unit testing."
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
namespace
paddle
{
namespace
memory
{
namespace
legacy
{
template
<
typename
Place
>
void
*
Alloc
(
const
Place
&
place
,
size_t
size
);
template
<
typename
Place
>
void
Free
(
const
Place
&
place
,
void
*
p
);
template
<
typename
Place
>
size_t
Used
(
const
Place
&
place
);
struct
Usage
:
public
boost
::
static_visitor
<
size_t
>
{
size_t
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
;
};
size_t
memory_usage
(
const
platform
::
Place
&
p
);
using
BuddyAllocator
=
detail
::
BuddyAllocator
;
BuddyAllocator
*
GetCPUBuddyAllocator
()
{
// We tried thread_local for inference::RNN1 model, but that not works much
// for multi-thread test.
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
*
a
=
nullptr
;
std
::
call_once
(
init_flag
,
[]()
{
a
=
new
detail
::
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CPUAllocator
),
platform
::
CpuMinChunkSize
(),
platform
::
CpuMaxChunkSize
());
});
return
a
;
}
// We compared the NaiveAllocator with BuddyAllocator in CPU memory allocation,
// seems they are almost the same overhead.
struct
NaiveAllocator
{
void
*
Alloc
(
size_t
size
)
{
return
malloc
(
size
);
}
void
Free
(
void
*
p
)
{
PADDLE_ENFORCE
(
p
);
free
(
p
);
}
static
NaiveAllocator
*
Instance
()
{
static
NaiveAllocator
x
;
return
&
x
;
}
private:
std
::
mutex
lock_
;
};
template
<
>
void
*
Alloc
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
size_t
size
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
void
*
p
=
GetCPUBuddyAllocator
()
->
Alloc
(
size
);
if
(
FLAGS_init_allocated_mem
)
{
memset
(
p
,
0xEF
,
size
);
}
VLOG
(
100
)
<<
" pointer="
<<
p
;
return
p
;
}
template
<
>
void
Free
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
void
*
p
)
{
VLOG
(
10
)
<<
"Free pointer="
<<
p
<<
" on "
<<
platform
::
Place
(
place
);
GetCPUBuddyAllocator
()
->
Free
(
p
);
}
template
<
>
size_t
Used
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
)
{
return
GetCPUBuddyAllocator
()
->
Used
();
}
#ifdef PADDLE_WITH_CUDA
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
**
a_arr
=
nullptr
;
std
::
call_once
(
init_flag
,
[
gpu_id
]()
{
int
gpu_num
=
platform
::
GetCUDADeviceCount
();
PADDLE_ENFORCE
(
gpu_id
<
gpu_num
,
"gpu_id:%d should < gpu_num:%d"
,
gpu_id
,
gpu_num
);
a_arr
=
new
BuddyAllocator
*
[
gpu_num
];
for
(
int
i
=
0
;
i
<
gpu_num
;
i
++
)
{
a_arr
[
i
]
=
nullptr
;
platform
::
SetDeviceId
(
i
);
a_arr
[
i
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
i
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
100
)
<<
"
\n\n
NOTE: each GPU device use "
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
"% of GPU memory.
\n
"
<<
"You can set GFlags environment variable '"
<<
"FLAGS_fraction_of_gpu_memory_to_use"
<<
"' to change the fraction of GPU usage.
\n\n
"
;
}
});
platform
::
SetDeviceId
(
gpu_id
);
return
a_arr
[
gpu_id
];
}
#endif
template
<
>
size_t
Used
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
)
{
#ifdef PADDLE_WITH_CUDA
return
GetGPUBuddyAllocator
(
place
.
device
)
->
Used
();
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
,
size_t
size
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
buddy_allocator
=
GetGPUBuddyAllocator
(
place
.
device
);
auto
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
size_t
avail
,
total
;
platform
::
GpuMemoryUsage
(
&
avail
,
&
total
);
LOG
(
WARNING
)
<<
"Cannot allocate "
<<
string
::
HumanReadableSize
(
size
)
<<
" in GPU "
<<
place
.
device
<<
", available "
<<
string
::
HumanReadableSize
(
avail
);
LOG
(
WARNING
)
<<
"total "
<<
total
;
LOG
(
WARNING
)
<<
"GpuMinChunkSize "
<<
string
::
HumanReadableSize
(
buddy_allocator
->
GetMinChunkSize
());
LOG
(
WARNING
)
<<
"GpuMaxChunkSize "
<<
string
::
HumanReadableSize
(
buddy_allocator
->
GetMaxChunkSize
());
LOG
(
WARNING
)
<<
"GPU memory used: "
<<
string
::
HumanReadableSize
(
Used
<
platform
::
CUDAPlace
>
(
place
));
platform
::
SetDeviceId
(
cur_dev
);
}
if
(
FLAGS_init_allocated_mem
)
{
cudaMemset
(
ptr
,
0xEF
,
size
);
}
return
ptr
;
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
Free
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
,
void
*
p
)
{
#ifdef PADDLE_WITH_CUDA
GetGPUBuddyAllocator
(
place
.
device
)
->
Free
(
p
);
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
#ifdef PADDLE_WITH_CUDA
BuddyAllocator
*
GetCUDAPinnedBuddyAllocator
()
{
static
std
::
once_flag
init_flag
;
static
BuddyAllocator
*
ba
=
nullptr
;
std
::
call_once
(
init_flag
,
[]()
{
ba
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CUDAPinnedAllocator
),
platform
::
CUDAPinnedMinChunkSize
(),
platform
::
CUDAPinnedMaxChunkSize
());
});
return
ba
;
}
#endif
template
<
>
size_t
Used
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
)
{
#ifdef PADDLE_WITH_CUDA
return
GetCUDAPinnedBuddyAllocator
()
->
Used
();
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
,
size_t
size
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
buddy_allocator
=
GetCUDAPinnedBuddyAllocator
();
void
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
LOG
(
WARNING
)
<<
"cudaMallocHost Cannot allocate "
<<
size
<<
" bytes in CUDAPinnedPlace"
;
}
if
(
FLAGS_init_allocated_mem
)
{
memset
(
ptr
,
0xEF
,
size
);
}
return
ptr
;
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
Free
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
,
void
*
p
)
{
#ifdef PADDLE_WITH_CUDA
GetCUDAPinnedBuddyAllocator
()
->
Free
(
p
);
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
struct
AllocVisitor
:
public
boost
::
static_visitor
<
void
*>
{
inline
explicit
AllocVisitor
(
size_t
size
)
:
size_
(
size
)
{}
template
<
typename
Place
>
inline
void
*
operator
()(
const
Place
&
place
)
const
{
return
Alloc
<
Place
>
(
place
,
size_
);
}
private:
size_t
size_
;
};
struct
FreeVisitor
:
public
boost
::
static_visitor
<
void
>
{
inline
explicit
FreeVisitor
(
void
*
ptr
)
:
ptr_
(
ptr
)
{}
template
<
typename
Place
>
inline
void
operator
()(
const
Place
&
place
)
const
{
Free
<
Place
>
(
place
,
ptr_
);
}
private:
void
*
ptr_
;
};
size_t
Usage
::
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
{
return
Used
(
cpu
);
}
size_t
Usage
::
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
{
#ifdef PADDLE_WITH_CUDA
return
Used
(
gpu
);
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
size_t
Usage
::
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
{
#ifdef PADDLE_WITH_CUDA
return
Used
(
cuda_pinned
);
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
}
// namespace legacy
namespace
allocation
{
Allocation
*
LegacyAllocator
::
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
{
void
*
ptr
=
boost
::
apply_visitor
(
legacy
::
AllocVisitor
(
size
),
place_
);
return
new
Allocation
(
ptr
,
size
,
place_
);
}
void
LegacyAllocator
::
Free
(
Allocation
*
allocation
)
{
boost
::
apply_visitor
(
legacy
::
FreeVisitor
(
allocation
->
ptr
()),
allocation
->
place
());
delete
allocation
;
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/legacy_allocator.h
0 → 100644
浏览文件 @
19e669a9
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
class
LegacyAllocatorPrivate
;
class
LegacyAllocator
:
public
Allocator
{
public:
explicit
LegacyAllocator
(
const
platform
::
Place
&
p
)
:
place_
(
p
)
{}
protected:
Allocation
*
AllocateImpl
(
size_t
size
,
Allocator
::
Attr
attr
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
private:
platform
::
Place
place_
;
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/malloc.cc
浏览文件 @
19e669a9
...
...
@@ -12,305 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/malloc.h"
#include <string>
#include <vector>
#include "glog/logging.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/detail/buddy_allocator.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/string/printf.h"
DEFINE_bool
(
init_allocated_mem
,
false
,
"It is a mistake that the values of the memory allocated by "
"BuddyAllocator are always zeroed in some op's implementation. "
"To find this error in time, we use init_allocated_mem to indicate "
"that initializing the allocated memory with a small value "
"during unit testing."
);
DECLARE_double
(
fraction_of_gpu_memory_to_use
);
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
memory
{
namespace
legacy
{
using
BuddyAllocator
=
detail
::
BuddyAllocator
;
BuddyAllocator
*
GetCPUBuddyAllocator
()
{
// We tried thread_local for inference::RNN1 model, but that not works much
// for multi-thread test.
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
*
a
=
nullptr
;
std
::
call_once
(
init_flag
,
[]()
{
a
=
new
detail
::
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CPUAllocator
),
platform
::
CpuMinChunkSize
(),
platform
::
CpuMaxChunkSize
());
});
return
a
;
}
// We compared the NaiveAllocator with BuddyAllocator in CPU memory allocation,
// seems they are almost the same overhead.
struct
NaiveAllocator
{
void
*
Alloc
(
size_t
size
)
{
return
malloc
(
size
);
}
void
Free
(
void
*
p
)
{
PADDLE_ENFORCE
(
p
);
free
(
p
);
}
static
NaiveAllocator
*
Instance
()
{
static
NaiveAllocator
x
;
return
&
x
;
}
private:
std
::
mutex
lock_
;
};
template
<
>
void
*
Alloc
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
size_t
size
)
{
VLOG
(
10
)
<<
"Allocate "
<<
size
<<
" bytes on "
<<
platform
::
Place
(
place
);
void
*
p
=
GetCPUBuddyAllocator
()
->
Alloc
(
size
);
if
(
FLAGS_init_allocated_mem
)
{
memset
(
p
,
0xEF
,
size
);
}
VLOG
(
100
)
<<
" pointer="
<<
p
;
return
p
;
}
template
<
>
void
Free
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
,
void
*
p
)
{
VLOG
(
10
)
<<
"Free pointer="
<<
p
<<
" on "
<<
platform
::
Place
(
place
);
GetCPUBuddyAllocator
()
->
Free
(
p
);
}
template
<
>
size_t
Used
<
platform
::
CPUPlace
>
(
const
platform
::
CPUPlace
&
place
)
{
return
GetCPUBuddyAllocator
()
->
Used
();
}
#ifdef PADDLE_WITH_CUDA
BuddyAllocator
*
GetGPUBuddyAllocator
(
int
gpu_id
)
{
static
std
::
once_flag
init_flag
;
static
detail
::
BuddyAllocator
**
a_arr
=
nullptr
;
std
::
call_once
(
init_flag
,
[
gpu_id
]()
{
int
gpu_num
=
platform
::
GetCUDADeviceCount
();
PADDLE_ENFORCE
(
gpu_id
<
gpu_num
,
"gpu_id:%d should < gpu_num:%d"
,
gpu_id
,
gpu_num
);
a_arr
=
new
BuddyAllocator
*
[
gpu_num
];
for
(
int
i
=
0
;
i
<
gpu_num
;
i
++
)
{
a_arr
[
i
]
=
nullptr
;
platform
::
SetDeviceId
(
i
);
a_arr
[
i
]
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
GPUAllocator
(
i
)),
platform
::
GpuMinChunkSize
(),
platform
::
GpuMaxChunkSize
());
VLOG
(
100
)
<<
"
\n\n
NOTE: each GPU device use "
<<
FLAGS_fraction_of_gpu_memory_to_use
*
100
<<
"% of GPU memory.
\n
"
<<
"You can set GFlags environment variable '"
<<
"FLAGS_fraction_of_gpu_memory_to_use"
<<
"' to change the fraction of GPU usage.
\n\n
"
;
}
});
platform
::
SetDeviceId
(
gpu_id
);
return
a_arr
[
gpu_id
];
}
#endif
template
<
>
size_t
Used
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
)
{
#ifdef PADDLE_WITH_CUDA
return
GetGPUBuddyAllocator
(
place
.
device
)
->
Used
();
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
,
size_t
size
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
buddy_allocator
=
GetGPUBuddyAllocator
(
place
.
device
);
auto
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
int
cur_dev
=
platform
::
GetCurrentDeviceId
();
platform
::
SetDeviceId
(
place
.
device
);
size_t
avail
,
total
;
platform
::
GpuMemoryUsage
(
&
avail
,
&
total
);
LOG
(
WARNING
)
<<
"Cannot allocate "
<<
string
::
HumanReadableSize
(
size
)
<<
" in GPU "
<<
place
.
device
<<
", available "
<<
string
::
HumanReadableSize
(
avail
);
LOG
(
WARNING
)
<<
"total "
<<
total
;
LOG
(
WARNING
)
<<
"GpuMinChunkSize "
<<
string
::
HumanReadableSize
(
buddy_allocator
->
GetMinChunkSize
());
LOG
(
WARNING
)
<<
"GpuMaxChunkSize "
<<
string
::
HumanReadableSize
(
buddy_allocator
->
GetMaxChunkSize
());
LOG
(
WARNING
)
<<
"GPU memory used: "
<<
string
::
HumanReadableSize
(
Used
<
platform
::
CUDAPlace
>
(
place
));
platform
::
SetDeviceId
(
cur_dev
);
}
if
(
FLAGS_init_allocated_mem
)
{
cudaMemset
(
ptr
,
0xEF
,
size
);
}
return
ptr
;
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
Free
<
platform
::
CUDAPlace
>
(
const
platform
::
CUDAPlace
&
place
,
void
*
p
)
{
#ifdef PADDLE_WITH_CUDA
GetGPUBuddyAllocator
(
place
.
device
)
->
Free
(
p
);
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
#ifdef PADDLE_WITH_CUDA
BuddyAllocator
*
GetCUDAPinnedBuddyAllocator
()
{
static
std
::
once_flag
init_flag
;
static
BuddyAllocator
*
ba
=
nullptr
;
std
::
call_once
(
init_flag
,
[]()
{
ba
=
new
BuddyAllocator
(
std
::
unique_ptr
<
detail
::
SystemAllocator
>
(
new
detail
::
CUDAPinnedAllocator
),
platform
::
CUDAPinnedMinChunkSize
(),
platform
::
CUDAPinnedMaxChunkSize
());
});
return
ba
;
}
#endif
template
<
>
size_t
Used
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
)
{
#ifdef PADDLE_WITH_CUDA
return
GetCUDAPinnedBuddyAllocator
()
->
Used
();
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
*
Alloc
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
,
size_t
size
)
{
#ifdef PADDLE_WITH_CUDA
auto
*
buddy_allocator
=
GetCUDAPinnedBuddyAllocator
();
void
*
ptr
=
buddy_allocator
->
Alloc
(
size
);
if
(
ptr
==
nullptr
)
{
LOG
(
WARNING
)
<<
"cudaMallocHost Cannot allocate "
<<
size
<<
" bytes in CUDAPinnedPlace"
;
}
if
(
FLAGS_init_allocated_mem
)
{
memset
(
ptr
,
0xEF
,
size
);
}
return
ptr
;
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
template
<
>
void
Free
<
platform
::
CUDAPinnedPlace
>
(
const
platform
::
CUDAPinnedPlace
&
place
,
void
*
p
)
{
#ifdef PADDLE_WITH_CUDA
GetCUDAPinnedBuddyAllocator
()
->
Free
(
p
);
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
struct
AllocVisitor
:
public
boost
::
static_visitor
<
void
*>
{
inline
explicit
AllocVisitor
(
size_t
size
)
:
size_
(
size
)
{}
template
<
typename
Place
>
inline
void
*
operator
()(
const
Place
&
place
)
const
{
return
Alloc
<
Place
>
(
place
,
size_
);
}
private:
size_t
size_
;
};
struct
FreeVisitor
:
public
boost
::
static_visitor
<
void
>
{
inline
explicit
FreeVisitor
(
void
*
ptr
)
:
ptr_
(
ptr
)
{}
template
<
typename
Place
>
inline
void
operator
()(
const
Place
&
place
)
const
{
Free
<
Place
>
(
place
,
ptr_
);
}
private:
void
*
ptr_
;
};
size_t
Usage
::
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
{
return
Used
(
cpu
);
}
size_t
Usage
::
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
{
#ifdef PADDLE_WITH_CUDA
return
Used
(
gpu
);
#else
PADDLE_THROW
(
"'CUDAPlace' is not supported in CPU only device."
);
#endif
}
size_t
Usage
::
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
{
#ifdef PADDLE_WITH_CUDA
return
Used
(
cuda_pinned
);
#else
PADDLE_THROW
(
"'CUDAPinnedPlace' is not supported in CPU only device."
);
#endif
}
class
LegacyAllocation
:
public
Allocation
{
public:
using
Allocation
::
Allocation
;
~
LegacyAllocation
()
final
{
boost
::
apply_visitor
(
FreeVisitor
(
this
->
ptr
()),
this
->
place
());
}
};
}
// namespace legacy
std
::
shared_ptr
<
Allocation
>
AllocShared
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
)
{
if
(
allocation
::
GetAllocatorStrategy
()
==
allocation
::
AllocatorStrategy
::
kLegacy
)
{
void
*
p
=
boost
::
apply_visitor
(
legacy
::
AllocVisitor
(
size
),
place
);
return
std
::
shared_ptr
<
Allocation
>
(
new
legacy
::
LegacyAllocation
(
p
,
size
,
place
));
}
else
{
return
allocation
::
AllocatorFacade
::
Instance
().
AllocShared
(
place
,
size
,
attr
);
}
return
allocation
::
AllocatorFacade
::
Instance
().
AllocShared
(
place
,
size
,
attr
);
}
AllocationPtr
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
)
{
if
(
allocation
::
GetAllocatorStrategy
()
==
allocation
::
AllocatorStrategy
::
kLegacy
)
{
void
*
p
=
boost
::
apply_visitor
(
legacy
::
AllocVisitor
(
size
),
place
);
return
AllocationPtr
(
new
legacy
::
LegacyAllocation
(
p
,
size
,
place
));
}
else
{
return
allocation
::
AllocatorFacade
::
Instance
().
Alloc
(
place
,
size
,
attr
);
}
return
allocation
::
AllocatorFacade
::
Instance
().
Alloc
(
place
,
size
,
attr
);
}
}
// namespace memory
...
...
paddle/fluid/memory/malloc.h
浏览文件 @
19e669a9
...
...
@@ -30,26 +30,5 @@ extern std::shared_ptr<Allocation> AllocShared(
extern
AllocationPtr
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
namespace
legacy
{
template
<
typename
Place
>
void
*
Alloc
(
const
Place
&
place
,
size_t
size
);
template
<
typename
Place
>
void
Free
(
const
Place
&
place
,
void
*
p
);
template
<
typename
Place
>
size_t
Used
(
const
Place
&
place
);
struct
Usage
:
public
boost
::
static_visitor
<
size_t
>
{
size_t
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
;
};
size_t
memory_usage
(
const
platform
::
Place
&
p
);
}
// namespace legacy
}
// namespace memory
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录