Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
2bc5b08d
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2bc5b08d
编写于
4月 08, 2018
作者:
D
Darcy
提交者:
GitHub
4月 08, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #9719 from wangkuiyi/cpplint-memory-detail
Rewrite memroy/detail
上级
c1b6692f
f31a0da3
变更
25
显示空白变更内容
内联
并排
Showing
25 changed file
with
359 addition
and
429 deletion
+359
-429
cmake/generic.cmake
cmake/generic.cmake
+6
-6
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+4
-4
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+1
-1
paddle/fluid/memory/CMakeLists.txt
paddle/fluid/memory/CMakeLists.txt
+6
-11
paddle/fluid/memory/detail/CMakeLists.txt
paddle/fluid/memory/detail/CMakeLists.txt
+3
-7
paddle/fluid/memory/detail/buddy_allocator.cc
paddle/fluid/memory/detail/buddy_allocator.cc
+11
-10
paddle/fluid/memory/detail/buddy_allocator.h
paddle/fluid/memory/detail/buddy_allocator.h
+7
-7
paddle/fluid/memory/detail/memory_block.cc
paddle/fluid/memory/detail/memory_block.cc
+57
-58
paddle/fluid/memory/detail/memory_block.h
paddle/fluid/memory/detail/memory_block.h
+85
-46
paddle/fluid/memory/detail/memory_block_desc.cc
paddle/fluid/memory/detail/memory_block_desc.cc
+23
-19
paddle/fluid/memory/detail/meta_cache.cc
paddle/fluid/memory/detail/meta_cache.cc
+14
-16
paddle/fluid/memory/detail/meta_cache.h
paddle/fluid/memory/detail/meta_cache.h
+0
-64
paddle/fluid/memory/detail/meta_data.h
paddle/fluid/memory/detail/meta_data.h
+0
-54
paddle/fluid/memory/detail/system_allocator.cc
paddle/fluid/memory/detail/system_allocator.cc
+11
-11
paddle/fluid/memory/detail/system_allocator.h
paddle/fluid/memory/detail/system_allocator.h
+4
-4
paddle/fluid/memory/detail/system_allocator_test.cc
paddle/fluid/memory/detail/system_allocator_test.cc
+9
-9
paddle/fluid/memory/malloc.cc
paddle/fluid/memory/malloc.cc
+1
-1
paddle/fluid/memory/malloc.h
paddle/fluid/memory/malloc.h
+104
-0
paddle/fluid/memory/malloc_test.cc
paddle/fluid/memory/malloc_test.cc
+4
-5
paddle/fluid/memory/memory.h
paddle/fluid/memory/memory.h
+2
-88
paddle/fluid/memory/pinned_memory_test.cu
paddle/fluid/memory/pinned_memory_test.cu
+0
-1
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+1
-1
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+3
-3
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+2
-2
paddle/testing/CMakeLists.txt
paddle/testing/CMakeLists.txt
+1
-1
未找到文件。
cmake/generic.cmake
浏览文件 @
2bc5b08d
...
...
@@ -244,11 +244,11 @@ function(cc_test TARGET_NAME)
cmake_parse_arguments
(
cc_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
add_executable
(
${
TARGET_NAME
}
${
cc_test_SRCS
}
)
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS)
target_circle_link_libraries
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main
paddle_
memory gtest gflags glog
)
target_circle_link_libraries
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main memory gtest gflags glog
)
if
(
"
${
cc_test_DEPS
}
"
MATCHES
"ARCHIVE_START"
)
list
(
REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END
)
endif
()
add_dependencies
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main
paddle_
memory gtest gflags glog
)
add_dependencies
(
${
TARGET_NAME
}
${
cc_test_DEPS
}
paddle_gtest_main memory gtest gflags glog
)
add_test
(
NAME
${
TARGET_NAME
}
COMMAND
${
TARGET_NAME
}
${
cc_test_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
...
...
@@ -311,8 +311,8 @@ function(nv_test TARGET_NAME)
set
(
multiValueArgs SRCS DEPS
)
cmake_parse_arguments
(
nv_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cuda_add_executable
(
${
TARGET_NAME
}
${
nv_test_SRCS
}
)
target_link_libraries
(
${
TARGET_NAME
}
${
nv_test_DEPS
}
paddle_gtest_main
paddle_
memory gtest gflags glog
)
add_dependencies
(
${
TARGET_NAME
}
${
nv_test_DEPS
}
paddle_gtest_main
paddle_
memory gtest gflags glog
)
target_link_libraries
(
${
TARGET_NAME
}
${
nv_test_DEPS
}
paddle_gtest_main memory gtest gflags glog
)
add_dependencies
(
${
TARGET_NAME
}
${
nv_test_DEPS
}
paddle_gtest_main memory gtest gflags glog
)
add_test
(
${
TARGET_NAME
}
${
TARGET_NAME
}
)
endif
()
endfunction
(
nv_test
)
...
...
@@ -387,8 +387,8 @@ function(hip_test TARGET_NAME)
endif
()
add_executable
(
${
TARGET_NAME
}
${
_cmake_options
}
${
_generated_files
}
${
_sources
}
)
set_target_properties
(
${
TARGET_NAME
}
PROPERTIES LINKER_LANGUAGE HIP
)
target_link_libraries
(
${
TARGET_NAME
}
${
hip_test_DEPS
}
paddle_gtest_main
paddle_
memory gtest gflags
)
add_dependencies
(
${
TARGET_NAME
}
${
hip_test_DEPS
}
paddle_gtest_main
paddle_
memory gtest gflags
)
target_link_libraries
(
${
TARGET_NAME
}
${
hip_test_DEPS
}
paddle_gtest_main memory gtest gflags
)
add_dependencies
(
${
TARGET_NAME
}
${
hip_test_DEPS
}
paddle_gtest_main memory gtest gflags
)
add_test
(
${
TARGET_NAME
}
${
TARGET_NAME
}
)
endif
()
endfunction
(
hip_test
)
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
2bc5b08d
...
...
@@ -7,9 +7,9 @@ cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
nv_test
(
dim_test SRCS dim_test.cu DEPS ddim
)
if
(
WITH_GPU
)
nv_library
(
tensor SRCS tensor.cc tensor_util.cu DEPS ddim place
paddle_
memory device_context framework_proto
)
nv_library
(
tensor SRCS tensor.cc tensor_util.cu DEPS ddim place memory device_context framework_proto
)
else
()
cc_library
(
tensor SRCS tensor.cc tensor_util.cc DEPS ddim place
paddle_
memory device_context framework_proto
)
cc_library
(
tensor SRCS tensor.cc tensor_util.cc DEPS ddim place memory device_context framework_proto
)
endif
()
cc_test
(
tensor_test SRCS tensor_test.cc DEPS tensor
)
...
...
@@ -21,9 +21,9 @@ endif()
cc_test
(
eigen_test SRCS eigen_test.cc DEPS tensor
)
nv_test
(
mixed_vector_test SRCS mixed_vector_test.cu DEPS place
paddle_
memory device_context init
)
nv_test
(
mixed_vector_test SRCS mixed_vector_test.cu DEPS place memory device_context init
)
cc_library
(
lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio
)
cc_test
(
lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor
paddle_
memory
)
cc_test
(
lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory
)
nv_test
(
lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor init
)
cc_library
(
reader SRCS reader.cc DEPS lod_tensor ddim
)
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
2bc5b08d
set
(
FLUID_CORE_MODULES proto_desc
paddle_
memory lod_tensor executor prune init
)
set
(
FLUID_CORE_MODULES proto_desc memory lod_tensor executor prune init
)
cc_library
(
paddle_fluid_api
SRCS io.cc
...
...
paddle/fluid/memory/CMakeLists.txt
浏览文件 @
2bc5b08d
add_subdirectory
(
detail
)
cc_library
(
m
emory SRCS memory.cc DEPS
place enforce
)
cc_library
(
m
alloc SRCS malloc.cc DEPS buddy_allocator
place enforce
)
cc_library
(
memcpy SRCS memcpy.cc DEPS place
)
cc_library
(
paddle_
memory
cc_library
(
memory
DEPS
memory
memcpy
meta_data
meta_cache
memory_block
buddy_allocator
system_allocator
)
malloc
memcpy
)
cc_test
(
m
emory_test SRCS memory_test.cc DEPS place paddle_memory
)
cc_test
(
m
alloc_test SRCS malloc_test.cc DEPS malloc
)
#if (WITH_GPU)
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place
paddle_
memory)
# nv_test(pinned_memory_test SRCS pinned_memory_test.cu DEPS place memory)
#endif()
paddle/fluid/memory/detail/CMakeLists.txt
浏览文件 @
2bc5b08d
cc_library
(
memory_block SRCS memory_block.cc memory_block_desc.cc meta_cache.cc
)
if
(
${
WITH_GPU
}
)
nv_library
(
system_allocator SRCS system_allocator.cc DEPS gflags cpu_info gpu_info
)
else
(
${
WITH_GPU
}
)
...
...
@@ -6,10 +8,4 @@ endif(${WITH_GPU})
cc_test
(
system_allocator_test SRCS system_allocator_test.cc DEPS system_allocator
)
cc_library
(
meta_data SRCS meta_data.cc
)
cc_library
(
meta_cache SRCS meta_cache.cc
)
cc_library
(
memory_block SRCS memory_block.cc
)
cc_library
(
buddy_allocator SRCS buddy_allocator.cc DEPS glog
)
cc_library
(
buddy_allocator SRCS buddy_allocator.cc DEPS memory_block system_allocator glog
)
paddle/fluid/memory/detail/buddy_allocator.cc
浏览文件 @
2bc5b08d
...
...
@@ -46,7 +46,8 @@ inline size_t align(size_t size, size_t alignment) {
void
*
BuddyAllocator
::
Alloc
(
size_t
unaligned_size
)
{
// adjust allocation alignment
size_t
size
=
align
(
unaligned_size
+
sizeof
(
Metadata
),
min_chunk_size_
);
size_t
size
=
align
(
unaligned_size
+
sizeof
(
MemoryBlock
::
Desc
),
min_chunk_size_
);
// acquire the allocator lock
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
...
...
@@ -103,7 +104,7 @@ void BuddyAllocator::Free(void* p) {
return
;
}
block
->
mark_as_free
(
cache_
);
block
->
mark_as_free
(
&
cache_
);
total_used_
-=
block
->
total_size
(
cache_
);
total_free_
+=
block
->
total_size
(
cache_
);
...
...
@@ -122,7 +123,7 @@ void BuddyAllocator::Free(void* p) {
right_buddy
));
// merge its right buddy to the block
block
->
merge
(
cache_
,
right_buddy
);
block
->
merge
(
&
cache_
,
right_buddy
);
}
}
...
...
@@ -139,7 +140,7 @@ void BuddyAllocator::Free(void* p) {
left_buddy
->
total_size
(
cache_
),
left_buddy
));
// merge the block to its left buddy
left_buddy
->
merge
(
cache_
,
block
);
left_buddy
->
merge
(
&
cache_
,
block
);
block
=
left_buddy
;
}
}
...
...
@@ -163,13 +164,13 @@ size_t BuddyAllocator::Used() { return total_used_; }
void
*
BuddyAllocator
::
SystemAlloc
(
size_t
size
)
{
size_t
index
=
0
;
void
*
p
=
system_allocator_
->
Alloc
(
index
,
size
);
void
*
p
=
system_allocator_
->
Alloc
(
&
index
,
size
);
VLOG
(
10
)
<<
"Allocated "
<<
p
<<
" from system allocator."
;
if
(
p
==
nullptr
)
return
nullptr
;
static_cast
<
MemoryBlock
*>
(
p
)
->
init
(
cache_
,
MemoryBlock
::
HUGE_CHUNK
,
index
,
static_cast
<
MemoryBlock
*>
(
p
)
->
init
(
&
cache_
,
MemoryBlock
::
HUGE_CHUNK
,
index
,
size
,
nullptr
,
nullptr
);
return
static_cast
<
MemoryBlock
*>
(
p
)
->
data
();
...
...
@@ -187,14 +188,14 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
// Allocate a new maximum sized block
size_t
index
=
0
;
void
*
p
=
system_allocator_
->
Alloc
(
index
,
max_chunk_size_
);
void
*
p
=
system_allocator_
->
Alloc
(
&
index
,
max_chunk_size_
);
if
(
p
==
nullptr
)
return
pool_
.
end
();
VLOG
(
10
)
<<
"Creating and inserting new block "
<<
p
<<
" from system allocator"
;
static_cast
<
MemoryBlock
*>
(
p
)
->
init
(
cache_
,
MemoryBlock
::
FREE_CHUNK
,
index
,
static_cast
<
MemoryBlock
*>
(
p
)
->
init
(
&
cache_
,
MemoryBlock
::
FREE_CHUNK
,
index
,
max_chunk_size_
,
nullptr
,
nullptr
);
// gpu fallback allocation
...
...
@@ -238,11 +239,11 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
VLOG
(
10
)
<<
"Split block ("
<<
block
<<
", "
<<
block
->
total_size
(
cache_
)
<<
") into"
;
block
->
split
(
cache_
,
size
);
block
->
split
(
&
cache_
,
size
);
VLOG
(
10
)
<<
"Left block ("
<<
block
<<
", "
<<
block
->
total_size
(
cache_
)
<<
")"
;
block
->
set_type
(
cache_
,
MemoryBlock
::
ARENA_CHUNK
);
block
->
set_type
(
&
cache_
,
MemoryBlock
::
ARENA_CHUNK
);
// the rest of memory if exist
if
(
block
->
has_right_buddy
(
cache_
))
{
...
...
paddle/fluid/memory/detail/buddy_allocator.h
浏览文件 @
2bc5b08d
...
...
@@ -14,18 +14,18 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/memory/detail/meta_cache.h"
#include "paddle/fluid/memory/detail/meta_data.h"
#include <mutex> // NOLINT
#include <set>
#include <tuple>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
#include <mutex>
#include <set>
#include <unordered_map>
#include <vector>
namespace
paddle
{
namespace
memory
{
namespace
detail
{
...
...
paddle/fluid/memory/detail/memory_block.cc
浏览文件 @
2bc5b08d
...
...
@@ -13,143 +13,142 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/detail/meta_cache.h"
#include "paddle/fluid/memory/detail/meta_data.h"
#include "paddle/fluid/platform/assert.h"
namespace
paddle
{
namespace
memory
{
namespace
detail
{
void
MemoryBlock
::
init
(
MetadataCache
&
cache
,
Type
t
,
size_t
index
,
size_t
size
,
void
MemoryBlock
::
init
(
MetadataCache
*
cache
,
Type
t
,
size_t
index
,
size_t
size
,
void
*
left_buddy
,
void
*
right_buddy
)
{
cache
.
store
(
this
,
Metadata
(
t
,
index
,
size
-
sizeof
(
Metadata
),
size
,
cache
->
save
(
this
,
MemoryBlock
::
Desc
(
t
,
index
,
size
-
sizeof
(
MemoryBlock
::
Desc
),
size
,
static_cast
<
MemoryBlock
*>
(
left_buddy
),
static_cast
<
MemoryBlock
*>
(
right_buddy
)));
}
MemoryBlock
::
Type
MemoryBlock
::
type
(
MetadataCache
&
cache
)
const
{
MemoryBlock
::
Type
MemoryBlock
::
type
(
const
MetadataCache
&
cache
)
const
{
return
cache
.
load
(
this
).
type
;
}
size_t
MemoryBlock
::
size
(
MetadataCache
&
cache
)
const
{
size_t
MemoryBlock
::
size
(
const
MetadataCache
&
cache
)
const
{
return
cache
.
load
(
this
).
size
;
}
size_t
MemoryBlock
::
total_size
(
MetadataCache
&
cache
)
const
{
size_t
MemoryBlock
::
index
(
const
MetadataCache
&
cache
)
const
{
return
cache
.
load
(
this
).
index
;
}
size_t
MemoryBlock
::
total_size
(
const
MetadataCache
&
cache
)
const
{
return
cache
.
load
(
this
).
total_size
;
}
MemoryBlock
*
MemoryBlock
::
left_buddy
(
MetadataCache
&
cache
)
const
{
bool
MemoryBlock
::
has_left_buddy
(
const
MetadataCache
&
cache
)
const
{
return
left_buddy
(
cache
)
!=
nullptr
;
}
bool
MemoryBlock
::
has_right_buddy
(
const
MetadataCache
&
cache
)
const
{
return
right_buddy
(
cache
)
!=
nullptr
;
}
MemoryBlock
*
MemoryBlock
::
left_buddy
(
const
MetadataCache
&
cache
)
const
{
return
cache
.
load
(
this
).
left_buddy
;
}
MemoryBlock
*
MemoryBlock
::
right_buddy
(
MetadataCache
&
cache
)
const
{
MemoryBlock
*
MemoryBlock
::
right_buddy
(
const
MetadataCache
&
cache
)
const
{
return
cache
.
load
(
this
).
right_buddy
;
}
void
MemoryBlock
::
split
(
MetadataCache
&
cache
,
size_t
size
)
{
void
MemoryBlock
::
split
(
MetadataCache
*
cache
,
size_t
size
)
{
// make sure the split fits
PADDLE_ASSERT
(
total_size
(
cache
)
>=
size
);
PADDLE_ASSERT
(
total_size
(
*
cache
)
>=
size
);
// bail out if there is no room for another partition
if
(
total_size
(
cache
)
-
size
<=
sizeof
(
Metadata
))
{
if
(
total_size
(
*
cache
)
-
size
<=
sizeof
(
MemoryBlock
::
Desc
))
{
return
;
}
// find the position of the split
void
*
right_partition
=
reinterpret_cast
<
uint8_t
*>
(
this
)
+
size
;
size_t
remaining_size
=
total_size
(
cache
)
-
size
;
size_t
remaining_size
=
total_size
(
*
cache
)
-
size
;
// Add the new block as a buddy
auto
metadata
=
cache
.
load
(
this
);
auto
metadata
=
cache
->
load
(
this
);
// Write the metadata for the new block
auto
new_block_right_buddy
=
metadata
.
right_buddy
;
cache
.
store
(
static_cast
<
MemoryBlock
*>
(
right_partition
),
Metadata
(
FREE_CHUNK
,
index
(
cache
),
remaining_size
-
sizeof
(
Metadata
),
cache
->
save
(
static_cast
<
MemoryBlock
*>
(
right_partition
),
MemoryBlock
::
Desc
(
FREE_CHUNK
,
index
(
*
cache
),
remaining_size
-
sizeof
(
MemoryBlock
::
Desc
),
remaining_size
,
this
,
new_block_right_buddy
));
metadata
.
right_buddy
=
static_cast
<
MemoryBlock
*>
(
right_partition
);
metadata
.
size
=
size
-
sizeof
(
Me
tadata
);
metadata
.
size
=
size
-
sizeof
(
Me
moryBlock
::
Desc
);
metadata
.
total_size
=
size
;
cache
.
stor
e
(
this
,
metadata
);
cache
->
sav
e
(
this
,
metadata
);
// Write metadata for the new block's right buddy
if
(
new_block_right_buddy
!=
nullptr
)
{
auto
buddy_metadata
=
cache
.
load
(
new_block_right_buddy
);
auto
buddy_metadata
=
cache
->
load
(
new_block_right_buddy
);
buddy_metadata
.
left_buddy
=
static_cast
<
MemoryBlock
*>
(
right_partition
);
cache
.
stor
e
(
new_block_right_buddy
,
buddy_metadata
);
cache
->
sav
e
(
new_block_right_buddy
,
buddy_metadata
);
}
}
void
MemoryBlock
::
merge
(
MetadataCache
&
cache
,
MemoryBlock
*
right_buddy
)
{
void
MemoryBlock
::
merge
(
MetadataCache
*
cache
,
MemoryBlock
*
right_buddy
)
{
// only free blocks can be merged
PADDLE_ASSERT
(
type
(
cache
)
==
FREE_CHUNK
);
PADDLE_ASSERT
(
right_buddy
->
type
(
cache
)
==
FREE_CHUNK
);
PADDLE_ASSERT
(
type
(
*
cache
)
==
FREE_CHUNK
);
PADDLE_ASSERT
(
right_buddy
->
type
(
*
cache
)
==
FREE_CHUNK
);
auto
metadata
=
cache
.
load
(
this
);
auto
metadata
=
cache
->
load
(
this
);
// link this->buddy's buddy
metadata
.
right_buddy
=
right_buddy
->
right_buddy
(
cache
);
metadata
.
right_buddy
=
right_buddy
->
right_buddy
(
*
cache
);
// link buddy's buddy -> this
if
(
metadata
.
right_buddy
!=
nullptr
)
{
auto
buddy_metadata
=
cache
.
load
(
metadata
.
right_buddy
);
auto
buddy_metadata
=
cache
->
load
(
metadata
.
right_buddy
);
buddy_metadata
.
left_buddy
=
this
;
cache
.
stor
e
(
metadata
.
right_buddy
,
buddy_metadata
);
cache
->
sav
e
(
metadata
.
right_buddy
,
buddy_metadata
);
}
metadata
.
size
+=
right_buddy
->
total_size
(
cache
);
metadata
.
total_size
+=
right_buddy
->
total_size
(
cache
);
metadata
.
size
+=
right_buddy
->
total_size
(
*
cache
);
metadata
.
total_size
+=
right_buddy
->
total_size
(
*
cache
);
cache
.
store
(
this
,
metadata
);
cache
.
store
(
right_buddy
,
Metadata
(
INVALID_CHUNK
,
0
,
0
,
0
,
nullptr
,
nullptr
));
cache
->
save
(
this
,
metadata
);
cache
->
save
(
right_buddy
,
MemoryBlock
::
Desc
(
INVALID_CHUNK
,
0
,
0
,
0
,
nullptr
,
nullptr
));
}
void
MemoryBlock
::
mark_as_free
(
MetadataCache
&
cache
)
{
void
MemoryBlock
::
mark_as_free
(
MetadataCache
*
cache
)
{
// check for double free or corruption
PADDLE_ASSERT
(
type
(
cache
)
!=
FREE_CHUNK
);
PADDLE_ASSERT
(
type
(
cache
)
!=
INVALID_CHUNK
);
PADDLE_ASSERT
(
type
(
*
cache
)
!=
FREE_CHUNK
);
PADDLE_ASSERT
(
type
(
*
cache
)
!=
INVALID_CHUNK
);
set_type
(
cache
,
FREE_CHUNK
);
}
void
MemoryBlock
::
set_type
(
MetadataCache
&
cache
,
Type
t
)
{
auto
metadata
=
cache
.
load
(
this
);
void
MemoryBlock
::
set_type
(
MetadataCache
*
cache
,
Type
t
)
{
auto
metadata
=
cache
->
load
(
this
);
metadata
.
type
=
t
;
cache
.
store
(
this
,
metadata
);
}
bool
MemoryBlock
::
has_left_buddy
(
MetadataCache
&
cache
)
const
{
return
left_buddy
(
cache
)
!=
nullptr
;
}
bool
MemoryBlock
::
has_right_buddy
(
MetadataCache
&
cache
)
const
{
return
right_buddy
(
cache
)
!=
nullptr
;
}
size_t
MemoryBlock
::
index
(
MetadataCache
&
cache
)
const
{
return
cache
.
load
(
this
).
index
;
cache
->
save
(
this
,
metadata
);
}
void
*
MemoryBlock
::
data
()
const
{
return
const_cast
<
Metadata
*>
(
reinterpret_cast
<
const
Metadata
*>
(
this
))
+
1
;
return
const_cast
<
MemoryBlock
::
Desc
*>
(
reinterpret_cast
<
const
MemoryBlock
::
Desc
*>
(
this
))
+
1
;
}
MemoryBlock
*
MemoryBlock
::
metadata
()
const
{
return
const_cast
<
MemoryBlock
*>
(
reinterpret_cast
<
const
MemoryBlock
*>
(
reinterpret_cast
<
const
Me
tadata
*>
(
this
)
-
1
));
reinterpret_cast
<
const
Me
moryBlock
::
Desc
*>
(
this
)
-
1
));
}
}
// namespace detail
...
...
paddle/fluid/memory/detail/memory_block.h
浏览文件 @
2bc5b08d
...
...
@@ -11,21 +11,21 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstddef>
#include <cstdint>
#include <unordered_map>
namespace
paddle
{
namespace
memory
{
namespace
detail
{
// Forward
Declarations
// Forward
declaration.
class
MetadataCache
;
/
*! \brief A class used to interpret the contents of a memory block */
class
MemoryBlock
{
public:
/
/ MemoryBlock represents Each allocated memory block, which contains
// MemoryBlock::Desc and the payload.
struct
MemoryBlock
{
enum
Type
{
FREE_CHUNK
,
// memory is free and idle
ARENA_CHUNK
,
// memory is being occupied
...
...
@@ -33,57 +33,96 @@ class MemoryBlock {
INVALID_CHUNK
// memory is invalid
};
public:
void
init
(
MetadataCache
&
cache
,
Type
t
,
size_t
index
,
size_t
size
,
// init saves the MemoryBlock::Desc of the memory block in a MetadataCache.
// If it is a CPU memory block, the MetadataCache writes the
// MemoryBlock::Desc to the beginning of the block; or, if it is a GPU memory
// block, the MetadataCache writes the Meatadata to a std::map in
// the CPU.
void
init
(
MetadataCache
*
cache
,
Type
t
,
size_t
index
,
size_t
size
,
void
*
left_buddy
,
void
*
right_buddy
);
public:
/*! \brief The type of the allocation */
Type
type
(
MetadataCache
&
cache
)
const
;
/*! \brief The size of the data region */
size_t
size
(
MetadataCache
&
cache
)
const
;
// All these accessors returns fields in the MemoryBlock::Desc of the memory
// block. They all need a MetadataCache instance as their first
// parameter because they read the MemoryBlock::Desc from the cache.
Type
type
(
const
MetadataCache
&
cache
)
const
;
size_t
size
(
const
MetadataCache
&
cache
)
const
;
size_t
index
(
const
MetadataCache
&
cache
)
const
;
size_t
total_size
(
const
MetadataCache
&
cache
)
const
;
bool
has_left_buddy
(
const
MetadataCache
&
cache
)
const
;
bool
has_right_buddy
(
const
MetadataCache
&
cache
)
const
;
MemoryBlock
*
left_buddy
(
const
MetadataCache
&
cache
)
const
;
MemoryBlock
*
right_buddy
(
const
MetadataCache
&
cache
)
const
;
/
*! \brief An index to track the allocator */
size_t
index
(
MetadataCache
&
cache
)
const
;
/
/ Split the allocation into left/right blocks.
void
split
(
MetadataCache
*
cache
,
size_t
size
)
;
/
*! \brief The total size of the block */
size_t
total_size
(
MetadataCache
&
cache
)
const
;
/
/ Merge left and right blocks together.
void
merge
(
MetadataCache
*
cache
,
MemoryBlock
*
right_buddy
)
;
/
*! \brief Check the left buddy of the block */
bool
has_left_buddy
(
MetadataCache
&
cache
)
const
;
/
/ Mark the allocation as free.
void
mark_as_free
(
MetadataCache
*
cache
)
;
/*! \brief Check the right buddy of the block */
bool
has_right_buddy
(
MetadataCache
&
cache
)
const
;
/*! \brief Get the left buddy */
MemoryBlock
*
left_buddy
(
MetadataCache
&
cache
)
const
;
/*! \brief Get the right buddy */
MemoryBlock
*
right_buddy
(
MetadataCache
&
cache
)
const
;
public:
/*! \brief Split the allocation into left/right blocks */
void
split
(
MetadataCache
&
cache
,
size_t
size
);
// Change the type of the allocation.
void
set_type
(
MetadataCache
*
cache
,
Type
t
);
/*! \brief Merge left and right blocks together */
void
merge
(
MetadataCache
&
cache
,
MemoryBlock
*
right_buddy
);
/*! \brief Mark the allocation as free */
void
mark_as_free
(
MetadataCache
&
cache
);
/*! \brief Change the type of the allocation */
void
set_type
(
MetadataCache
&
cache
,
Type
t
);
public:
/*! \brief Get a pointer to the memory block's data */
void
*
data
()
const
;
/*! \brief Get a pointer to the memory block's metadata */
MemoryBlock
*
metadata
()
const
;
// MemoryBlock::Desc describes a MemoryBlock.
struct
Desc
{
Desc
(
MemoryBlock
::
Type
t
,
size_t
i
,
size_t
s
,
size_t
ts
,
MemoryBlock
*
l
,
MemoryBlock
*
r
);
Desc
();
// Updates guard_begin and guard_end by hashes of the Metadata object.
void
update_guards
();
// Checks that guard_begin and guard_end are hashes of the Metadata object.
bool
check_guards
()
const
;
// TODO(gangliao): compress this
size_t
guard_begin
=
0
;
MemoryBlock
::
Type
type
=
MemoryBlock
::
INVALID_CHUNK
;
size_t
index
=
0
;
size_t
size
=
0
;
size_t
total_size
=
0
;
MemoryBlock
*
left_buddy
=
nullptr
;
MemoryBlock
*
right_buddy
=
nullptr
;
size_t
guard_end
=
0
;
};
};
// A cache for accessing memory block meta-data that may be expensive
// to access directly. This class exists to unify the
// MemoryBlock::Desc format between GPU and CPU allocations. It should
// be removed when the CPU can access all GPU allocations directly via
// UVM.
class
MetadataCache
{
public:
static
size_t
overhead
();
explicit
MetadataCache
(
bool
uses_gpu
);
// Disable copying and assignment.
MetadataCache
(
const
MetadataCache
&
)
=
delete
;
MetadataCache
&
operator
=
(
const
MetadataCache
&
)
=
delete
;
// Returns the MemoryBlock::Desc for a memory block. When MetadataCache is
// used to manage CPU memory, the MemoryBlock::Desc resides at the beginning
// of the memory block; when used to manage GPU memory, the
// Meatadata resides in CPU memory indexed by cache_.
MemoryBlock
::
Desc
load
(
const
MemoryBlock
*
memory_block
)
const
;
// Saves the MemoryBlock::Desc of a memory block into the cache. For CPU
// memory block, writes the MemoryBlock::Desc to the beginning of the memory
// block; whereas for GPU memory, writes it to cache_.
void
save
(
MemoryBlock
*
memory_block
,
const
MemoryBlock
::
Desc
&
meta_data
);
// For GPU memory block, erases its MemoryBlock::Desc from cache_.
void
invalidate
(
MemoryBlock
*
memory_block
);
private:
typedef
std
::
unordered_map
<
const
MemoryBlock
*
,
MemoryBlock
::
Desc
>
MetadataMap
;
MetadataMap
cache_
;
bool
uses_gpu_
;
};
}
// namespace detail
...
...
paddle/fluid/memory/detail/me
ta_data
.cc
→
paddle/fluid/memory/detail/me
mory_block_desc
.cc
浏览文件 @
2bc5b08d
...
...
@@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/detail/meta_data.h"
#include <functional>
#include "paddle/fluid/memory/detail/memory_block.h"
namespace
paddle
{
namespace
memory
{
namespace
detail
{
Me
tadata
::
Metadata
(
MemoryBlock
::
Type
t
,
size_t
i
,
size_t
s
,
size_t
ts
,
Me
moryBlock
::
Desc
::
Desc
(
MemoryBlock
::
Type
t
,
size_t
i
,
size_t
s
,
size_t
ts
,
MemoryBlock
*
l
,
MemoryBlock
*
r
)
:
type
(
t
),
index
(
i
),
...
...
@@ -29,7 +29,7 @@ Metadata::Metadata(MemoryBlock::Type t, size_t i, size_t s, size_t ts,
left_buddy
(
l
),
right_buddy
(
r
)
{}
Me
tadata
::
Metadata
()
Me
moryBlock
::
Desc
::
Desc
()
:
type
(
MemoryBlock
::
INVALID_CHUNK
),
index
(
0
),
size
(
0
),
...
...
@@ -37,32 +37,36 @@ Metadata::Metadata()
left_buddy
(
nullptr
),
right_buddy
(
nullptr
)
{}
namespace
{
template
<
class
T
>
inline
void
hash_combine
(
std
::
size_t
&
seed
,
const
T
&
v
)
{
inline
void
hash_combine
(
std
::
size_t
*
seed
,
const
T
&
v
)
{
std
::
hash
<
T
>
hasher
;
seed
^=
hasher
(
v
)
+
0x9e3779b9
+
(
seed
<<
6
)
+
(
seed
>>
2
);
(
*
seed
)
^=
hasher
(
v
)
+
0x9e3779b9
+
((
*
seed
)
<<
6
)
+
((
*
seed
)
>>
2
);
}
inline
size_t
hash
(
const
Me
tadata
*
metadata
,
size_t
initial_seed
)
{
inline
size_t
hash
(
const
Me
moryBlock
::
Desc
&
metadata
,
size_t
initial_seed
)
{
size_t
seed
=
initial_seed
;
hash_combine
(
seed
,
(
size_t
)
metadata
->
type
);
hash_combine
(
seed
,
metadata
->
index
);
hash_combine
(
seed
,
metadata
->
size
);
hash_combine
(
seed
,
metadata
->
total_size
);
hash_combine
(
seed
,
metadata
->
left_buddy
);
hash_combine
(
seed
,
metadata
->
right_buddy
);
hash_combine
(
&
seed
,
static_cast
<
size_t
>
(
metadata
.
type
)
);
hash_combine
(
&
seed
,
metadata
.
index
);
hash_combine
(
&
seed
,
metadata
.
size
);
hash_combine
(
&
seed
,
metadata
.
total_size
);
hash_combine
(
&
seed
,
metadata
.
left_buddy
);
hash_combine
(
&
seed
,
metadata
.
right_buddy
);
return
seed
;
}
void
Metadata
::
update_guards
()
{
guard_begin
=
hash
(
this
,
1
);
guard_end
=
hash
(
this
,
2
);
}
// namespace
void
MemoryBlock
::
Desc
::
update_guards
()
{
guard_begin
=
hash
(
*
this
,
1
);
guard_end
=
hash
(
*
this
,
2
);
}
bool
Me
tadata
::
check_guards
()
const
{
return
guard_begin
==
hash
(
this
,
1
)
&&
guard_end
==
hash
(
this
,
2
);
bool
Me
moryBlock
::
Desc
::
check_guards
()
const
{
return
guard_begin
==
hash
(
*
this
,
1
)
&&
guard_end
==
hash
(
*
this
,
2
);
}
}
// namespace detail
...
...
paddle/fluid/memory/detail/meta_cache.cc
浏览文件 @
2bc5b08d
...
...
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/detail/meta_cache.h"
#include "glog/logging.h"
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/platform/assert.h"
...
...
@@ -23,29 +22,28 @@ namespace detail {
MetadataCache
::
MetadataCache
(
bool
uses_gpu
)
:
uses_gpu_
(
uses_gpu
)
{}
Me
tadata
MetadataCache
::
load
(
const
MemoryBlock
*
block
)
{
Me
moryBlock
::
Desc
MetadataCache
::
load
(
const
MemoryBlock
*
block
)
const
{
if
(
uses_gpu_
)
{
auto
existing_
metadata
=
cache_
.
find
(
block
);
PADDLE_ASSERT
(
existing_
metadata
->
second
.
check_guards
());
return
existing_
metadata
->
second
;
auto
existing_
desc
=
cache_
.
find
(
block
);
PADDLE_ASSERT
(
existing_
desc
->
second
.
check_guards
());
return
existing_
desc
->
second
;
}
else
{
auto
*
meta
=
reinterpret_cast
<
const
Metadata
*>
(
block
);
VLOG
(
10
)
<<
"Load Me
taData type="
<<
meta
->
type
;
PADDLE_ASSERT
(
meta
->
check_guards
());
return
*
reinterpret_cast
<
const
Me
tadata
*>
(
block
);
auto
*
desc
=
reinterpret_cast
<
const
MemoryBlock
::
Desc
*>
(
block
);
VLOG
(
10
)
<<
"Load Me
moryBlock::Desc type="
<<
desc
->
type
;
PADDLE_ASSERT
(
desc
->
check_guards
());
return
*
reinterpret_cast
<
const
Me
moryBlock
::
Desc
*>
(
block
);
}
}
void
MetadataCache
::
store
(
MemoryBlock
*
block
,
const
Metadata
&
original_metadata
)
{
auto
metadata
=
original_metadata
;
metadata
.
update_guards
();
void
MetadataCache
::
save
(
MemoryBlock
*
block
,
const
MemoryBlock
::
Desc
&
original_desc
)
{
auto
desc
=
original_desc
;
desc
.
update_guards
();
if
(
uses_gpu_
)
{
cache_
[
block
]
=
metadata
;
cache_
[
block
]
=
desc
;
}
else
{
*
reinterpret_cast
<
Me
tadata
*>
(
block
)
=
metadata
;
*
reinterpret_cast
<
Me
moryBlock
::
Desc
*>
(
block
)
=
desc
;
}
}
...
...
paddle/fluid/memory/detail/meta_cache.h
已删除
100644 → 0
浏览文件 @
c1b6692f
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/detail/meta_data.h"
#include <unordered_map>
namespace
paddle
{
namespace
memory
{
namespace
detail
{
/**
* \brief A cache for accessing memory block meta-data that may be expensive
* to access directly.
*
* \note This class exists to unify the metadata format between GPU and CPU
* allocations. It should be removed when the CPU can access all GPU
* allocations directly via UVM.
*/
class
MetadataCache
{
public:
explicit
MetadataCache
(
bool
uses_gpu
);
public:
/*! \brief Load the associated metadata for the specified memory block. */
Metadata
load
(
const
MemoryBlock
*
memory_block
);
/*! \brief Store the associated metadata for the specified memory block. */
void
store
(
MemoryBlock
*
memory_block
,
const
Metadata
&
meta_data
);
/*! \brief Indicate that the specified metadata will no longer be used. */
void
invalidate
(
MemoryBlock
*
memory_block
);
public:
MetadataCache
(
const
MetadataCache
&
)
=
delete
;
MetadataCache
&
operator
=
(
const
MetadataCache
&
)
=
delete
;
private:
bool
uses_gpu_
;
private:
typedef
std
::
unordered_map
<
const
MemoryBlock
*
,
Metadata
>
MetadataMap
;
private:
MetadataMap
cache_
;
};
}
// namespace detail
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/detail/meta_data.h
已删除
100644 → 0
浏览文件 @
c1b6692f
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/memory/detail/memory_block.h"
#include <stddef.h>
namespace
paddle
{
namespace
memory
{
namespace
detail
{
class
Metadata
{
public:
Metadata
(
MemoryBlock
::
Type
t
,
size_t
i
,
size_t
s
,
size_t
ts
,
MemoryBlock
*
l
,
MemoryBlock
*
r
);
Metadata
();
public:
/*! \brief Update the guards when metadata is changed */
void
update_guards
();
/*! \brief Check consistency to previous modification */
bool
check_guards
()
const
;
public:
// TODO(gangliao): compress this
// clang-format off
size_t
guard_begin
=
0
;
MemoryBlock
::
Type
type
=
MemoryBlock
::
INVALID_CHUNK
;
size_t
index
=
0
;
size_t
size
=
0
;
size_t
total_size
=
0
;
MemoryBlock
*
left_buddy
=
nullptr
;
MemoryBlock
*
right_buddy
=
nullptr
;
size_t
guard_end
=
0
;
// clang-format on
};
}
// namespace detail
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/detail/system_allocator.cc
浏览文件 @
2bc5b08d
...
...
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/detail/system_allocator.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
#include <stdlib.h> // for malloc and free
#include <sys/mman.h> // for mlock and munlock
#include <algorithm> // for std::max
#include "gflags/gflags.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
// If use_pinned_memory is true, CPUAllocator calls mlock, which
// returns pinned and locked memory as staging areas for data exchange
...
...
@@ -35,13 +35,13 @@ namespace paddle {
namespace
memory
{
namespace
detail
{
void
*
CPUAllocator
::
Alloc
(
size_t
&
index
,
size_t
size
)
{
void
*
CPUAllocator
::
Alloc
(
size_t
*
index
,
size_t
size
)
{
// According to http://www.cplusplus.com/reference/cstdlib/malloc/,
// malloc might not return nullptr if size is zero, but the returned
// pointer shall not be dereferenced -- so we make it nullptr.
if
(
size
<=
0
)
return
nullptr
;
index
=
0
;
// unlock memory
*
index
=
0
;
// unlock memory
void
*
p
;
...
...
@@ -56,7 +56,7 @@ void* CPUAllocator::Alloc(size_t& index, size_t size) {
if
(
p
!=
nullptr
)
{
if
(
FLAGS_use_pinned_memory
)
{
index
=
1
;
*
index
=
1
;
mlock
(
p
,
size
);
// lock memory
}
}
...
...
@@ -75,7 +75,7 @@ bool CPUAllocator::UseGpu() const { return false; }
#ifdef PADDLE_WITH_CUDA
void
*
GPUAllocator
::
Alloc
(
size_t
&
index
,
size_t
size
)
{
void
*
GPUAllocator
::
Alloc
(
size_t
*
index
,
size_t
size
)
{
// CUDA documentation doesn't explain if cudaMalloc returns nullptr
// if size is 0. We just make sure it does.
if
(
size
<=
0
)
return
nullptr
;
...
...
@@ -93,7 +93,7 @@ void* GPUAllocator::Alloc(size_t& index, size_t size) {
}
if
(
result
==
cudaSuccess
)
{
index
=
0
;
*
index
=
0
;
gpu_alloc_size_
+=
size
;
return
p
;
}
else
{
...
...
@@ -133,7 +133,7 @@ bool GPUAllocator::UseGpu() const { return true; }
// PINNED memory allows direct DMA transfers by the GPU to and from system
// memory. It’s locked to a physical address.
void
*
CUDAPinnedAllocator
::
Alloc
(
size_t
&
index
,
size_t
size
)
{
void
*
CUDAPinnedAllocator
::
Alloc
(
size_t
*
index
,
size_t
size
)
{
if
(
size
<=
0
)
return
nullptr
;
// NOTE: here, we use CUDAPinnedMaxAllocSize as the maximum memory size
...
...
@@ -154,7 +154,7 @@ void* CUDAPinnedAllocator::Alloc(size_t& index, size_t size) {
cudaError_t
result
=
cudaMallocHost
(
&
p
,
size
);
if
(
result
==
cudaSuccess
)
{
index
=
1
;
// PINNED memory
*
index
=
1
;
// PINNED memory
cuda_pinnd_alloc_size_
+=
size
;
return
p
;
}
else
{
...
...
paddle/fluid/memory/detail/system_allocator.h
浏览文件 @
2bc5b08d
...
...
@@ -29,14 +29,14 @@ namespace detail {
class
SystemAllocator
{
public:
virtual
~
SystemAllocator
()
{}
virtual
void
*
Alloc
(
size_t
&
index
,
size_t
size
)
=
0
;
virtual
void
*
Alloc
(
size_t
*
index
,
size_t
size
)
=
0
;
virtual
void
Free
(
void
*
p
,
size_t
size
,
size_t
index
)
=
0
;
virtual
bool
UseGpu
()
const
=
0
;
};
class
CPUAllocator
:
public
SystemAllocator
{
public:
virtual
void
*
Alloc
(
size_t
&
index
,
size_t
size
);
virtual
void
*
Alloc
(
size_t
*
index
,
size_t
size
);
virtual
void
Free
(
void
*
p
,
size_t
size
,
size_t
index
);
virtual
bool
UseGpu
()
const
;
};
...
...
@@ -46,7 +46,7 @@ class GPUAllocator : public SystemAllocator {
public:
explicit
GPUAllocator
(
int
gpu_id
)
:
gpu_id_
(
gpu_id
)
{}
virtual
void
*
Alloc
(
size_t
&
index
,
size_t
size
);
virtual
void
*
Alloc
(
size_t
*
index
,
size_t
size
);
virtual
void
Free
(
void
*
p
,
size_t
size
,
size_t
index
);
virtual
bool
UseGpu
()
const
;
...
...
@@ -58,7 +58,7 @@ class GPUAllocator : public SystemAllocator {
class
CUDAPinnedAllocator
:
public
SystemAllocator
{
public:
virtual
void
*
Alloc
(
size_t
&
index
,
size_t
size
);
virtual
void
*
Alloc
(
size_t
*
index
,
size_t
size
);
virtual
void
Free
(
void
*
p
,
size_t
size
,
size_t
index
);
virtual
bool
UseGpu
()
const
;
...
...
paddle/fluid/memory/detail/system_allocator_test.cc
浏览文件 @
2bc5b08d
...
...
@@ -22,11 +22,11 @@ limitations under the License. */
DECLARE_bool
(
use_pinned_memory
);
void
TestAllocator
(
paddle
::
memory
::
detail
::
SystemAllocator
&
a
,
size_t
size
)
{
void
TestAllocator
(
paddle
::
memory
::
detail
::
SystemAllocator
*
a
,
size_t
size
)
{
bool
freed
=
false
;
{
size_t
index
;
void
*
p
=
a
.
Alloc
(
index
,
size
);
void
*
p
=
a
->
Alloc
(
&
index
,
size
);
if
(
size
>
0
)
{
EXPECT_NE
(
p
,
nullptr
);
}
else
{
...
...
@@ -36,7 +36,7 @@ void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) {
int
*
i
=
static_cast
<
int
*>
(
p
);
std
::
shared_ptr
<
int
>
ptr
(
i
,
[
&
](
void
*
p
)
{
freed
=
true
;
a
.
Free
(
p
,
size
,
index
);
a
->
Free
(
p
,
size
,
index
);
});
}
EXPECT_TRUE
(
freed
);
...
...
@@ -45,21 +45,21 @@ void TestAllocator(paddle::memory::detail::SystemAllocator& a, size_t size) {
TEST
(
CPUAllocator
,
NoLockMem
)
{
FLAGS_use_pinned_memory
=
false
;
paddle
::
memory
::
detail
::
CPUAllocator
a
;
TestAllocator
(
a
,
2048
);
TestAllocator
(
a
,
0
);
TestAllocator
(
&
a
,
2048
);
TestAllocator
(
&
a
,
0
);
}
TEST
(
CPUAllocator
,
LockMem
)
{
FLAGS_use_pinned_memory
=
true
;
paddle
::
memory
::
detail
::
CPUAllocator
a
;
TestAllocator
(
a
,
2048
);
TestAllocator
(
a
,
0
);
TestAllocator
(
&
a
,
2048
);
TestAllocator
(
&
a
,
0
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
GPUAllocator
,
Alloc
)
{
paddle
::
memory
::
detail
::
GPUAllocator
a
(
0
);
TestAllocator
(
a
,
2048
);
TestAllocator
(
a
,
0
);
TestAllocator
(
&
a
,
2048
);
TestAllocator
(
&
a
,
0
);
}
#endif
paddle/fluid/memory/m
emory
.cc
→
paddle/fluid/memory/m
alloc
.cc
浏览文件 @
2bc5b08d
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/m
emory
.h"
#include "paddle/fluid/memory/m
alloc
.h"
#include "glog/logging.h"
...
...
paddle/fluid/memory/malloc.h
0 → 100644
浏览文件 @
2bc5b08d
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
memory
{
/**
* \brief Allocate memory block in one place.
*
* \param[in] place Allocation place (CPU or GPU).
* \param[in] size Allocation size.
*
* \return Allocated memory block address.
*
* \note If return nullptr, it indicates memory allocation failed
* because insufficient memory in current system. When Alloc
* function is invoked, you must check the returned memory
* address is valid or not.
*/
template
<
typename
Place
>
void
*
Alloc
(
Place
place
,
size_t
size
);
/**
* \brief Free memory block in one place.
*
* \param[in] place Allocation place (CPU or GPU).
* \param[in] ptr Memory block address to free.
*
*/
template
<
typename
Place
>
void
Free
(
Place
place
,
void
*
ptr
);
/**
* \brief Total size of used memory in one place.
*
* \param[in] place Allocation place (CPU or GPU).
*
*/
template
<
typename
Place
>
size_t
Used
(
Place
place
);
struct
Usage
:
public
boost
::
static_visitor
<
size_t
>
{
size_t
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
;
};
size_t
memory_usage
(
const
platform
::
Place
&
p
);
/**
* \brief Free memory block in one place.
*
* \note In some cases, custom deleter is used to
* deallocate the memory automatically for
* std::unique_ptr<T> in tensor.h.
*
*/
template
<
typename
T
,
typename
Place
>
class
PODDeleter
{
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
public:
explicit
PODDeleter
(
Place
place
)
:
place_
(
place
)
{}
void
operator
()(
T
*
ptr
)
{
Free
(
place_
,
static_cast
<
void
*>
(
ptr
));
}
private:
Place
place_
;
};
/**
* \brief Free memory block in one place does not meet POD
*
* \note In some cases, custom deleter is used to
* deallocate the memory automatically for
* std::unique_ptr<T> in tensor.h.
*
*/
template
<
typename
T
,
typename
Place
>
class
PlainDeleter
{
public:
explicit
PlainDeleter
(
Place
place
)
:
place_
(
place
)
{}
void
operator
()(
T
*
ptr
)
{
Free
(
place_
,
reinterpret_cast
<
void
*>
(
ptr
));
}
private:
Place
place_
;
};
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/m
emory
_test.cc
→
paddle/fluid/memory/m
alloc
_test.cc
浏览文件 @
2bc5b08d
...
...
@@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/m
emory
.h"
#include "paddle/fluid/memory/m
alloc
.h"
#include <unordered_map>
#include "gtest/gtest.h"
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/detail/meta_data.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/place.h"
...
...
@@ -28,7 +27,7 @@ inline bool is_aligned(void const *p) {
}
size_t
align
(
size_t
size
,
paddle
::
platform
::
CPUPlace
place
)
{
size
+=
sizeof
(
paddle
::
memory
::
detail
::
Me
tadata
);
size
+=
sizeof
(
paddle
::
memory
::
detail
::
Me
moryBlock
::
Desc
);
size_t
alignment
=
paddle
::
platform
::
CpuMinChunkSize
();
size_t
remaining
=
size
%
alignment
;
return
remaining
==
0
?
size
:
size
+
(
alignment
-
remaining
);
...
...
@@ -86,7 +85,7 @@ TEST(BuddyAllocator, CPUMultAlloc) {
#ifdef PADDLE_WITH_CUDA
size_t
align
(
size_t
size
,
paddle
::
platform
::
CUDAPlace
place
)
{
size
+=
sizeof
(
paddle
::
memory
::
detail
::
Me
tadata
);
size
+=
sizeof
(
paddle
::
memory
::
detail
::
Me
moryBlock
::
Desc
);
size_t
alignment
=
paddle
::
platform
::
GpuMinChunkSize
();
size_t
remaining
=
size
%
alignment
;
return
remaining
==
0
?
size
:
size
+
(
alignment
-
remaining
);
...
...
@@ -142,7 +141,7 @@ TEST(BuddyAllocator, GPUMultAlloc) {
}
size_t
align
(
size_t
size
,
paddle
::
platform
::
CUDAPinnedPlace
place
)
{
size
+=
sizeof
(
paddle
::
memory
::
detail
::
Me
tadata
);
size
+=
sizeof
(
paddle
::
memory
::
detail
::
Me
moryBlock
::
Desc
);
size_t
alignment
=
paddle
::
platform
::
CUDAPinnedMinChunkSize
();
size_t
remaining
=
size
%
alignment
;
return
remaining
==
0
?
size
:
size
+
(
alignment
-
remaining
);
...
...
paddle/fluid/memory/memory.h
浏览文件 @
2bc5b08d
...
...
@@ -14,91 +14,5 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
memory
{
/**
* \brief Allocate memory block in one place.
*
* \param[in] place Allocation place (CPU or GPU).
* \param[in] size Allocation size.
*
* \return Allocated memory block address.
*
* \note If return nullptr, it indicates memory allocation failed
* because insufficient memory in current system. When Alloc
* function is invoked, you must check the returned memory
* address is valid or not.
*/
template
<
typename
Place
>
void
*
Alloc
(
Place
place
,
size_t
size
);
/**
* \brief Free memory block in one place.
*
* \param[in] place Allocation place (CPU or GPU).
* \param[in] ptr Memory block address to free.
*
*/
template
<
typename
Place
>
void
Free
(
Place
place
,
void
*
ptr
);
/**
* \brief Total size of used memory in one place.
*
* \param[in] place Allocation place (CPU or GPU).
*
*/
template
<
typename
Place
>
size_t
Used
(
Place
place
);
struct
Usage
:
public
boost
::
static_visitor
<
size_t
>
{
size_t
operator
()(
const
platform
::
CPUPlace
&
cpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPlace
&
gpu
)
const
;
size_t
operator
()(
const
platform
::
CUDAPinnedPlace
&
cuda_pinned
)
const
;
};
size_t
memory_usage
(
const
platform
::
Place
&
p
);
/**
* \brief Free memory block in one place.
*
* \note In some cases, custom deleter is used to
* deallocate the memory automatically for
* std::unique_ptr<T> in tensor.h.
*
*/
template
<
typename
T
,
typename
Place
>
class
PODDeleter
{
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
public:
explicit
PODDeleter
(
Place
place
)
:
place_
(
place
)
{}
void
operator
()(
T
*
ptr
)
{
Free
(
place_
,
static_cast
<
void
*>
(
ptr
));
}
private:
Place
place_
;
};
/**
* \brief Free memory block in one place does not meet POD
*
* \note In some cases, custom deleter is used to
* deallocate the memory automatically for
* std::unique_ptr<T> in tensor.h.
*
*/
template
<
typename
T
,
typename
Place
>
class
PlainDeleter
{
public:
explicit
PlainDeleter
(
Place
place
)
:
place_
(
place
)
{}
void
operator
()(
T
*
ptr
)
{
Free
(
place_
,
reinterpret_cast
<
void
*>
(
ptr
));
}
private:
Place
place_
;
};
}
// namespace memory
}
// namespace paddle
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/memcpy.h"
paddle/fluid/memory/pinned_memory_test.cu
浏览文件 @
2bc5b08d
...
...
@@ -15,7 +15,6 @@ limitations under the License. */
#include <unordered_map>
#include "paddle/fluid/memory/detail/memory_block.h"
#include "paddle/fluid/memory/detail/meta_data.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memory.h"
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
2bc5b08d
...
...
@@ -263,7 +263,7 @@ cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test
(
scatter_test SRCS scatter_test.cc DEPS tensor
)
cc_test
(
beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor
)
cc_test
(
beam_search_op_test SRCS beam_search_op_test.cc DEPS lod_tensor beam_search_op
)
cc_test
(
strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor
paddle_
memory
)
cc_test
(
strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor memory
)
cc_test
(
save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op
)
cc_test
(
save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op
)
nv_test
(
nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context
)
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
2bc5b08d
...
...
@@ -42,12 +42,12 @@ ENDIF()
# memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies
cc_library
(
device_context SRCS device_context.cc DEPS m
emory buddy_allocator
system_allocator memory_block meta_data meta_cache
place eigen3
${
GPU_CTX_DEPS
}
${
MKLDNN_CTX_DEPS
}
)
cc_library
(
device_context SRCS device_context.cc DEPS m
alloc
place eigen3
${
GPU_CTX_DEPS
}
${
MKLDNN_CTX_DEPS
}
)
nv_test
(
device_context_test SRCS device_context_test.cu DEPS device_context gpu_info
)
nv_test
(
cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda
)
nv_test
(
transform_test SRCS transform_test.cu DEPS
paddle_
memory place device_context
)
nv_test
(
transform_test SRCS transform_test.cu DEPS memory place device_context
)
cc_library
(
device_tracer SRCS device_tracer.cc DEPS boost profiler_proto
${
GPU_CTX_DEPS
}
)
cc_library
(
profiler SRCS profiler.cc DEPS device_context device_tracer
)
...
...
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
2bc5b08d
...
...
@@ -2,13 +2,13 @@ if(WITH_PYTHON)
if
(
WITH_AMD_GPU
)
hip_library
(
paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
DEPS pybind python backward proto_desc
paddle_
memory executor prune init profiler feed_fetch_method
DEPS pybind python backward proto_desc memory executor prune init profiler feed_fetch_method
parallel_executor
${
GLOB_OP_LIB
}
)
else
()
cc_library
(
paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
DEPS pybind python backward proto_desc
paddle_
memory executor prune init profiler feed_fetch_method
DEPS pybind python backward proto_desc memory executor prune init profiler feed_fetch_method
parallel_executor
${
GLOB_OP_LIB
}
)
if
(
NOT APPLE AND NOT ANDROID
)
...
...
paddle/testing/CMakeLists.txt
浏览文件 @
2bc5b08d
...
...
@@ -6,6 +6,6 @@ if(WITH_TESTING)
add_library
(
paddle_test_util STATIC TestUtil.cpp
)
add_dependencies
(
paddle_test_util paddle_proto
${
external_project_dependencies
}
)
if
(
NOT MOBILE_INFERENCE
)
cc_library
(
paddle_gtest_main SRCS paddle_gtest_main.cc DEPS init
paddle_
memory gtest gflags
)
cc_library
(
paddle_gtest_main SRCS paddle_gtest_main.cc DEPS init memory gtest gflags
)
endif
()
endif
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录