Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
15076c32
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
15076c32
编写于
10月 02, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add comments and polish code style
上级
b4f54d33
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
194 addition
and
28 deletion
+194
-28
paddle/fluid/framework/tensor_util.cc
paddle/fluid/framework/tensor_util.cc
+3
-2
paddle/fluid/memory/allocation/aligned_allocator.cc
paddle/fluid/memory/allocation/aligned_allocator.cc
+5
-0
paddle/fluid/memory/allocation/aligned_allocator.h
paddle/fluid/memory/allocation/aligned_allocator.h
+36
-7
paddle/fluid/memory/allocation/allocation_and_eigen_test.cu
paddle/fluid/memory/allocation/allocation_and_eigen_test.cu
+3
-0
paddle/fluid/memory/allocation/allocator.h
paddle/fluid/memory/allocation/allocator.h
+76
-9
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+2
-2
paddle/fluid/memory/allocation/allocator_facade.h
paddle/fluid/memory/allocation/allocator_facade.h
+7
-0
paddle/fluid/memory/allocation/auto_increment_allocator.h
paddle/fluid/memory/allocation/auto_increment_allocator.h
+22
-2
paddle/fluid/memory/allocation/conditional_allocator.h
paddle/fluid/memory/allocation/conditional_allocator.h
+16
-0
paddle/fluid/memory/allocation/cpu_allocator.h
paddle/fluid/memory/allocation/cpu_allocator.h
+7
-1
paddle/fluid/memory/allocation/cuda_allocator.h
paddle/fluid/memory/allocation/cuda_allocator.h
+1
-0
paddle/fluid/memory/allocation/locked_allocator.h
paddle/fluid/memory/allocation/locked_allocator.h
+1
-0
paddle/fluid/memory/allocation/naive_managed_allocator.h
paddle/fluid/memory/allocation/naive_managed_allocator.h
+5
-0
paddle/fluid/memory/allocation/pinned_allocator.cc
paddle/fluid/memory/allocation/pinned_allocator.cc
+1
-1
paddle/fluid/memory/allocation/pinned_allocator.h
paddle/fluid/memory/allocation/pinned_allocator.h
+1
-0
paddle/fluid/memory/allocation/zero_size_allocator.h
paddle/fluid/memory/allocation/zero_size_allocator.h
+3
-0
paddle/fluid/operators/detection/generate_proposals_op.cu
paddle/fluid/operators/detection/generate_proposals_op.cu
+2
-1
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+2
-2
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+1
-1
未找到文件。
paddle/fluid/framework/tensor_util.cc
浏览文件 @
15076c32
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#include <algorithm>
#include <algorithm>
#include <limits>
#include <limits>
#include <vector>
#include <vector>
#include "../memory/allocation/allocator.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -111,8 +112,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
...
@@ -111,8 +112,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
dst
->
set_layout
(
src
.
layout
());
dst
->
set_layout
(
src
.
layout
());
auto
src_place
=
src
.
place
();
auto
src_place
=
src
.
place
();
auto
src_ptr
=
src
.
data
<
void
>
();
auto
src_ptr
=
src
.
data
<
void
>
();
auto
dst_ptr
=
dst
->
mutable_data
(
dst_place
,
src
.
type
(),
auto
dst_ptr
=
memory
::
Allocator
::
kCommunication
);
dst
->
mutable_data
(
dst_place
,
src
.
type
(),
memory
::
Allocator
::
kCrossDevice
);
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
...
...
paddle/fluid/memory/allocation/aligned_allocator.cc
浏览文件 @
15076c32
...
@@ -21,6 +21,11 @@ namespace allocation {
...
@@ -21,6 +21,11 @@ namespace allocation {
ThinAlignedAllocator
::
ThinAlignedAllocator
(
ThinAlignedAllocator
::
ThinAlignedAllocator
(
std
::
shared_ptr
<
ManagedAllocator
>
underlyning_allocator
)
std
::
shared_ptr
<
ManagedAllocator
>
underlyning_allocator
)
:
underlying_allocator_
(
std
::
move
(
underlyning_allocator
))
{}
:
underlying_allocator_
(
std
::
move
(
underlyning_allocator
))
{}
std
::
shared_ptr
<
Allocation
>
ThinAlignedAllocator
::
AllocateShared
(
size_t
size
,
Allocator
::
Attr
attr
)
{
return
std
::
shared_ptr
<
Allocation
>
(
Allocate
(
size
,
attr
).
release
());
}
}
// namespace allocation
}
// namespace allocation
}
// namespace memory
}
// namespace memory
}
// namespace paddle
}
// namespace paddle
paddle/fluid/memory/allocation/aligned_allocator.h
浏览文件 @
15076c32
...
@@ -20,34 +20,66 @@ namespace paddle {
...
@@ -20,34 +20,66 @@ namespace paddle {
namespace
memory
{
namespace
memory
{
namespace
allocation
{
namespace
allocation
{
// The aligned allocation and allocator will wrap a managed allocator,
// and returns the aligned pointer.
//
// NOTE(yy): For speed reason, I just use a template parameter to get
// alignment, however, it can be an private member if necessary.
//
// NOTE(yy): kAlignment must be 2^N. a `static_assert` should be added.
template
<
size_t
kAlignment
>
template
<
size_t
kAlignment
>
class
AlignedAllocation
:
public
Allocation
{
class
AlignedAllocation
:
public
Allocation
{
public:
public:
AlignedAllocation
(
std
::
unique_ptr
<
Allocation
>&&
underlying_allocation
,
AlignedAllocation
(
std
::
unique_ptr
<
Allocation
>&&
underlying_allocation
,
size_t
size
)
size_t
size
)
:
Allocation
(
AlignedPtr
(
underlying_allocation
->
ptr
()),
size
,
:
Allocation
(
AlignedPtr
(
underlying_allocation
->
ptr
()),
size
+
kAlignment
-
Offset
(
underlying_allocation
->
ptr
()),
underlying_allocation
->
place
()),
underlying_allocation
->
place
()),
underlying_allocation_
(
std
::
move
(
underlying_allocation
))
{}
underlying_allocation_
(
std
::
move
(
underlying_allocation
))
{}
private:
private:
static
void
*
AlignedPtr
(
void
*
ptr
)
{
static
void
*
AlignedPtr
(
void
*
ptr
)
{
auto
ptr_addr
=
reinterpret_cast
<
uintptr_t
>
(
ptr
);
return
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
uintptr_t
>
(
ptr
)
+
ptr_addr
=
(
ptr_addr
&
~
(
kAlignment
-
1
))
+
kAlignment
;
Offset
(
ptr
));
return
reinterpret_cast
<
void
*>
(
ptr_addr
);
}
// Offset to aligned pointer.
// if ptr is already aligned, returns 0.
static
size_t
Offset
(
void
*
ptr
)
{
auto
ptr_addr
=
reinterpret_cast
<
intptr_t
>
(
ptr
);
intptr_t
aligned_addr
=
(
ptr_addr
&
~
(
kAlignment
-
1
));
intptr_t
diff
=
aligned_addr
-
ptr_addr
;
if
(
diff
==
0
)
{
return
0
;
}
else
{
return
kAlignment
+
diff
;
}
}
}
std
::
unique_ptr
<
Allocation
>
underlying_allocation_
;
std
::
unique_ptr
<
Allocation
>
underlying_allocation_
;
};
};
// Thin aligned allocator is trivial and used to generate a small size binary.
//
// NOTE(yy): This is a trick to make a template class. This class extract the
// common code into a `thin` class. So if there are multiple specification of
// the template class, the binary size will not extended too much.
//
// NOTE(yy): This could be an over design. If it harms readability of code, it
// could be removed later.
class
ThinAlignedAllocator
:
public
ManagedAllocator
{
class
ThinAlignedAllocator
:
public
ManagedAllocator
{
public:
public:
explicit
ThinAlignedAllocator
(
explicit
ThinAlignedAllocator
(
std
::
shared_ptr
<
ManagedAllocator
>
underlyning_allocator
);
std
::
shared_ptr
<
ManagedAllocator
>
underlyning_allocator
);
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
;
protected:
protected:
std
::
shared_ptr
<
ManagedAllocator
>
underlying_allocator_
;
std
::
shared_ptr
<
ManagedAllocator
>
underlying_allocator_
;
};
};
// An aligned allocator will allocate `size+kAlignment` allocation and adjust
// the pointer offset.
template
<
size_t
kAlignment
>
template
<
size_t
kAlignment
>
class
AlignedAllocator
:
public
ThinAlignedAllocator
{
class
AlignedAllocator
:
public
ThinAlignedAllocator
{
public:
public:
...
@@ -58,9 +90,6 @@ class AlignedAllocator : public ThinAlignedAllocator {
...
@@ -58,9 +90,6 @@ class AlignedAllocator : public ThinAlignedAllocator {
return
std
::
unique_ptr
<
Allocation
>
(
return
std
::
unique_ptr
<
Allocation
>
(
new
AlignedAllocation
<
kAlignment
>
(
std
::
move
(
raw_allocation
),
size
));
new
AlignedAllocation
<
kAlignment
>
(
std
::
move
(
raw_allocation
),
size
));
}
}
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
{
return
std
::
shared_ptr
<
Allocation
>
(
Allocate
(
size
,
attr
).
release
());
}
};
};
}
// namespace allocation
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/allocation_and_eigen_test.cu
浏览文件 @
15076c32
...
@@ -18,6 +18,9 @@
...
@@ -18,6 +18,9 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/fluid/platform/for_range.h"
#include "unsupported/Eigen/CXX11/Tensor"
#include "unsupported/Eigen/CXX11/Tensor"
// NOTE(yy): this unittest is not important. It just used for debugging.
// It can be removed later.
struct
FillZero
{
struct
FillZero
{
public:
public:
float
*
ptr_
;
float
*
ptr_
;
...
...
paddle/fluid/memory/allocation/allocator.h
浏览文件 @
15076c32
...
@@ -12,6 +12,22 @@
...
@@ -12,6 +12,22 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include <utility>
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#pragma once
#include <memory>
#include <memory>
#include <string>
#include <string>
...
@@ -21,15 +37,22 @@ namespace paddle {
...
@@ -21,15 +37,22 @@ namespace paddle {
namespace
memory
{
namespace
memory
{
namespace
allocation
{
namespace
allocation
{
// Exception when `Alloc`/`AllocShared` failed
class
BadAlloc
:
public
std
::
exception
{
class
BadAlloc
:
public
std
::
exception
{
public:
public:
explicit
BadAlloc
(
const
std
::
string
&
msg
)
:
msg_
(
msg
)
{}
explicit
BadAlloc
(
std
::
string
msg
)
:
msg_
(
std
::
move
(
msg
)
)
{}
const
char
*
what
()
const
noexcept
override
;
const
char
*
what
()
const
noexcept
override
;
private:
private:
std
::
string
msg_
;
std
::
string
msg_
;
};
};
// Allocation is the object holding the actually pointer. Use
// `Allocation::ptr()` will returns the pointer that allocated.
//
// NOTE: this is the base class of Allocation. Each allocator can use its own
// allocation object.
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
class
Allocation
{
class
Allocation
{
public:
public:
Allocation
(
void
*
ptr
,
size_t
size
,
platform
::
Place
place
)
Allocation
(
void
*
ptr
,
size_t
size
,
platform
::
Place
place
)
...
@@ -38,8 +61,22 @@ class Allocation {
...
@@ -38,8 +61,22 @@ class Allocation {
Allocation
(
const
Allocation
&
o
)
=
delete
;
Allocation
(
const
Allocation
&
o
)
=
delete
;
Allocation
&
operator
=
(
const
Allocation
&
o
)
=
delete
;
Allocation
&
operator
=
(
const
Allocation
&
o
)
=
delete
;
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`.
void
*
ptr
()
const
{
return
ptr_
;
}
void
*
ptr
()
const
{
return
ptr_
;
}
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element.
//
// NOTE: Some allocator might alloc more memory than request. The size
// could larger than its request. For example,
// the AlignedAllocator will always allocate memory as size + kAlignment.
// The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`.
size_t
size
()
const
{
return
size_
;
}
size_t
size
()
const
{
return
size_
;
}
const
platform
::
Place
&
place
()
const
{
return
place_
;
}
const
platform
::
Place
&
place
()
const
{
return
place_
;
}
...
@@ -52,22 +89,51 @@ class Allocation {
...
@@ -52,22 +89,51 @@ class Allocation {
platform
::
Place
place_
;
platform
::
Place
place_
;
};
};
// Base interface class of memory Allocator.
// To allocate a memory, allocator needs two parameters:
// 1. size of bytes.
// 2. Attribute of memory.
// NOTE: the attribute of memory might be ignored if the allocator does not
// care it.
class
Allocator
{
class
Allocator
{
public:
public:
enum
Attr
{
enum
Attr
{
kDefault
=
0
,
kDefault
=
0
,
// Default attribute. Uses the fast or stablest allocation
kTiny
=
1
,
// algorithm.
kFixedHuge
=
2
,
kFluxHuge
=
3
,
kFixedHuge
=
1
,
// The allocation may not be freed until the program
kTmp
=
4
,
// ends. e.g., `Parameters` and `Momentum`.
kCommunication
=
5
,
NumOfAttrs
=
6
kFluxHuge
=
2
,
// The allocation may create and freed frequently and the
// allocation is considerable huge. Like `activations`
// and gradients.
kScratchpad
=
3
,
// The `Scratchpad` memory is allocated and freed very soon,
// usually within an operator or aux memory.
// Like CUDNN workspace, AUX memory in batch norm, etc.
//
// https://en.wikipedia.org/wiki/Scratchpad_memory
kCrossDevice
=
4
,
// The memory used cross-device memory copy/communication.
// For example:
// 1. it can use an `pinned` memory for CPU-GPU
// communication.
// 2. it can use an `registered` memory for RDMA
// communication.
NumOfAttrs
=
5
// The number of all attributes. It is used internally.
};
};
virtual
~
Allocator
();
virtual
~
Allocator
();
// Allocate an allocation. Note the return allocation might need to be freed
// manually if the Allocator is an `UnmanagedAllocator`.
virtual
std
::
unique_ptr
<
Allocation
>
Allocate
(
virtual
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Allocator
::
Attr
attr
=
kDefault
)
=
0
;
size_t
size
,
Allocator
::
Attr
attr
=
kDefault
)
=
0
;
// True if the `Allocate` is thread safe.
virtual
bool
IsAllocThreadSafe
()
const
;
virtual
bool
IsAllocThreadSafe
()
const
;
};
};
...
@@ -82,7 +148,8 @@ class UnmanagedAllocator : public Allocator {
...
@@ -82,7 +148,8 @@ class UnmanagedAllocator : public Allocator {
}
}
};
};
// The allocation will be managed by smart pointers
// The allocation will be managed by smart pointers. i.e., users do not need
// to free allocation manually.
class
ManagedAllocator
:
public
Allocator
{
class
ManagedAllocator
:
public
Allocator
{
public:
public:
virtual
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
virtual
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
15076c32
...
@@ -46,7 +46,7 @@ class CPUManagedAllocator : public ManagedAllocator {
...
@@ -46,7 +46,7 @@ class CPUManagedAllocator : public ManagedAllocator {
std
::
unique_ptr
<
Allocator
>
(
new
CPUPinnedAllocator
())))
{}
std
::
unique_ptr
<
Allocator
>
(
new
CPUPinnedAllocator
())))
{}
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Attr
attr
)
override
{
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Attr
attr
)
override
{
if
(
attr
==
kC
ommunication
)
{
if
(
attr
==
kC
rossDevice
)
{
return
communication_allocator_
->
Allocate
(
size
,
attr
);
return
communication_allocator_
->
Allocate
(
size
,
attr
);
}
else
{
}
else
{
return
normal_allocator_
->
Allocate
(
size
,
attr
);
return
normal_allocator_
->
Allocate
(
size
,
attr
);
...
@@ -54,7 +54,7 @@ class CPUManagedAllocator : public ManagedAllocator {
...
@@ -54,7 +54,7 @@ class CPUManagedAllocator : public ManagedAllocator {
}
}
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
{
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
{
if
(
attr
==
kC
ommunication
)
{
if
(
attr
==
kC
rossDevice
)
{
return
communication_allocator_
->
AllocateShared
(
size
,
attr
);
return
communication_allocator_
->
AllocateShared
(
size
,
attr
);
}
else
{
}
else
{
return
normal_allocator_
->
AllocateShared
(
size
,
attr
);
return
normal_allocator_
->
AllocateShared
(
size
,
attr
);
...
...
paddle/fluid/memory/allocation/allocator_facade.h
浏览文件 @
15076c32
...
@@ -24,6 +24,10 @@ namespace allocation {
...
@@ -24,6 +24,10 @@ namespace allocation {
// Allocator Facade is the interface exposed to other modules.
// Allocator Facade is the interface exposed to other modules.
// All the configuration or dirty code under development should
// All the configuration or dirty code under development should
// be hidden behind this facade.
// be hidden behind this facade.
//
// NOTE(yy): This class is a singleton class.
// NOTE(yy): To create a stable ABI and make compilation faster. Here we use
// a Pimpl trick;
class
AllocatorFacadePrivate
;
class
AllocatorFacadePrivate
;
class
AllocatorFacade
{
class
AllocatorFacade
{
public:
public:
...
@@ -33,13 +37,16 @@ class AllocatorFacade {
...
@@ -33,13 +37,16 @@ class AllocatorFacade {
static
AllocatorFacade
&
Instance
();
static
AllocatorFacade
&
Instance
();
// Allocate a shared allocation.
std
::
shared_ptr
<
Allocation
>
AllocShared
(
std
::
shared_ptr
<
Allocation
>
AllocShared
(
const
platform
::
Place
&
place
,
size_t
size
,
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
// Allocate a unique allocation.
std
::
unique_ptr
<
Allocation
>
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
std
::
unique_ptr
<
Allocation
>
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
// TODO(yy): Allocate a Copy-On-Write allocation?
private:
private:
AllocatorFacade
();
AllocatorFacade
();
AllocatorFacadePrivate
*
m_
;
AllocatorFacadePrivate
*
m_
;
...
...
paddle/fluid/memory/allocation/auto_increment_allocator.h
浏览文件 @
15076c32
...
@@ -24,12 +24,27 @@ namespace paddle {
...
@@ -24,12 +24,27 @@ namespace paddle {
namespace
memory
{
namespace
memory
{
namespace
allocation
{
namespace
allocation
{
// The AutoIncrementAllocator manages many underlying allocators. If none of
// them can allocate the request memory, a new allocator will be created and
// invoke its `allocate` method.
//
// NOTE(yy): The AutoIncrementAllocator will prefer to allocate memory from
// the latest sucessful allocator.
//
// NOTE(yy): We may need to release an underlying allocator if it allocate
// nothing. However, it is generally not useful, since it will make performance
// undetermined.
//
// NOTE(yy): This allocator is only locked when creating new underlying
// allocator. The allocation requests from many threads may be dispatched
// to the same underlying allocator. So the underlying allocator must be
// thread safe.
class
AutoIncrementAllocator
:
public
ManagedAllocator
{
class
AutoIncrementAllocator
:
public
ManagedAllocator
{
public:
public:
// Creator is the method to create ManagedAllocator
using
AllocatorCreator
=
std
::
function
<
std
::
shared_ptr
<
ManagedAllocator
>
()
>
;
using
AllocatorCreator
=
std
::
function
<
std
::
shared_ptr
<
ManagedAllocator
>
()
>
;
template
<
typename
Creator
>
explicit
AutoIncrementAllocator
(
AllocatorCreator
&&
creator
)
explicit
AutoIncrementAllocator
(
Creator
&&
creator
)
:
creator_
(
std
::
move
(
creator
)),
prev_success_allocator_
{
0
}
{}
:
creator_
(
std
::
move
(
creator
)),
prev_success_allocator_
{
0
}
{}
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Attr
attr
)
override
;
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Attr
attr
)
override
;
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
;
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
;
...
@@ -65,6 +80,11 @@ class AutoIncrementAllocator : public ManagedAllocator {
...
@@ -65,6 +80,11 @@ class AutoIncrementAllocator : public ManagedAllocator {
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
underlying_allocators_
.
emplace_back
(
creator_
());
underlying_allocators_
.
emplace_back
(
creator_
());
prev_success_allocator_
=
underlying_allocators_
.
size
()
-
1
;
prev_success_allocator_
=
underlying_allocators_
.
size
()
-
1
;
PADDLE_ENFORCE
(
underlying_allocators_
[
prev_success_allocator_
]
->
IsAllocThreadSafe
(),
"the underlying allocator must be thread safe. This is a program "
"bug."
);
return
callback
(
*
underlying_allocators_
[
prev_success_allocator_
]);
return
callback
(
*
underlying_allocators_
[
prev_success_allocator_
]);
}
}
}
}
...
...
paddle/fluid/memory/allocation/conditional_allocator.h
浏览文件 @
15076c32
...
@@ -22,6 +22,22 @@ namespace paddle {
...
@@ -22,6 +22,22 @@ namespace paddle {
namespace
memory
{
namespace
memory
{
namespace
allocation
{
namespace
allocation
{
// A composite allocator who will dispatch the allocation request by registered
// condition.
//
// For example:
//
// auto* cond_allocator = new ConditionalAllocator();
// cond_allocator->AddAllocator([](size_t size, Attr attr){
// // if size > 10
// return size > 10;
// }, allocator_a).AddAllocator([](size_t size, Attr attr){
// // elif attr is kDefault
// return attr == kDefault;
// }, allocator_b).AddAllocator([](size_t size, Attr attr){
// // else
// return true;
// }, allocator_c);
class
ConditionalAllocator
:
public
ManagedAllocator
{
class
ConditionalAllocator
:
public
ManagedAllocator
{
public:
public:
ConditionalAllocator
()
=
default
;
ConditionalAllocator
()
=
default
;
...
...
paddle/fluid/memory/allocation/cpu_allocator.h
浏览文件 @
15076c32
...
@@ -18,7 +18,13 @@
...
@@ -18,7 +18,13 @@
namespace
paddle
{
namespace
paddle
{
namespace
memory
{
namespace
memory
{
namespace
allocation
{
namespace
allocation
{
// CPU system allocator and allocation.
//
// NOTE(yy): Should we just use `malloc` here since there is an
// aligned_allocator.
//
// NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import
// an open-sourced allocator into Paddle.
class
CPUAllocation
:
public
Allocation
{
class
CPUAllocation
:
public
Allocation
{
public:
public:
CPUAllocation
(
void
*
ptr
,
size_t
size
)
CPUAllocation
(
void
*
ptr
,
size_t
size
)
...
...
paddle/fluid/memory/allocation/cuda_allocator.h
浏览文件 @
15076c32
...
@@ -20,6 +20,7 @@ namespace paddle {
...
@@ -20,6 +20,7 @@ namespace paddle {
namespace
memory
{
namespace
memory
{
namespace
allocation
{
namespace
allocation
{
// CUDA System allocator and allocation.
// Just a flag type.
// Just a flag type.
class
CUDAAllocation
:
public
Allocation
{
class
CUDAAllocation
:
public
Allocation
{
public:
public:
...
...
paddle/fluid/memory/allocation/locked_allocator.h
浏览文件 @
15076c32
...
@@ -20,6 +20,7 @@ namespace paddle {
...
@@ -20,6 +20,7 @@ namespace paddle {
namespace
memory
{
namespace
memory
{
namespace
allocation
{
namespace
allocation
{
// A allocator to make underlying allocator thread safe.
class
LockedAllocator
:
public
UnmanagedAllocator
{
class
LockedAllocator
:
public
UnmanagedAllocator
{
public:
public:
explicit
LockedAllocator
(
std
::
unique_ptr
<
Allocator
>&&
underlying_allocator
);
explicit
LockedAllocator
(
std
::
unique_ptr
<
Allocator
>&&
underlying_allocator
);
...
...
paddle/fluid/memory/allocation/naive_managed_allocator.h
浏览文件 @
15076c32
...
@@ -20,6 +20,11 @@ namespace paddle {
...
@@ -20,6 +20,11 @@ namespace paddle {
namespace
memory
{
namespace
memory
{
namespace
allocation
{
namespace
allocation
{
// An allocator to wrap an UnmanagedAllocator and make the allocation managed
// by C++ smart ptr.
//
// NOTE: if the NaiveManagedAllocator is destroyed before
// NaiveManagedAllocations, the allocation will never be released.
class
NaiveManagedAllocator
;
class
NaiveManagedAllocator
;
class
NaiveManagedAllocation
:
public
Allocation
{
class
NaiveManagedAllocation
:
public
Allocation
{
public:
public:
...
...
paddle/fluid/memory/allocation/pinned_allocator.cc
浏览文件 @
15076c32
...
@@ -23,7 +23,7 @@ namespace allocation {
...
@@ -23,7 +23,7 @@ namespace allocation {
std
::
unique_ptr
<
Allocation
>
CPUPinnedAllocator
::
Allocate
(
size_t
size
,
std
::
unique_ptr
<
Allocation
>
CPUPinnedAllocator
::
Allocate
(
size_t
size
,
Allocator
::
Attr
attr
)
{
Allocator
::
Attr
attr
)
{
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
attr
,
kC
ommunication
,
attr
,
kC
rossDevice
,
"CPUPinnedAllocator should be used for Cross-Device Communication"
);
"CPUPinnedAllocator should be used for Cross-Device Communication"
);
void
*
ptr
;
void
*
ptr
;
...
...
paddle/fluid/memory/allocation/pinned_allocator.h
浏览文件 @
15076c32
...
@@ -19,6 +19,7 @@ namespace paddle {
...
@@ -19,6 +19,7 @@ namespace paddle {
namespace
memory
{
namespace
memory
{
namespace
allocation
{
namespace
allocation
{
// Allocator uses `cudaMallocHost`
class
CPUPinnedAllocation
:
public
Allocation
{
class
CPUPinnedAllocation
:
public
Allocation
{
public:
public:
CPUPinnedAllocation
(
void
*
ptr
,
size_t
size
)
CPUPinnedAllocation
(
void
*
ptr
,
size_t
size
)
...
...
paddle/fluid/memory/allocation/zero_size_allocator.h
浏览文件 @
15076c32
...
@@ -22,6 +22,9 @@ namespace paddle {
...
@@ -22,6 +22,9 @@ namespace paddle {
namespace
memory
{
namespace
memory
{
namespace
allocation
{
namespace
allocation
{
// The allocator handles the request's size is zero. Allocator will always
// return an allocation even the request size is zero. However, the
// allocation.ptr() is nullptr
class
ZeroSizeAllocation
:
public
Allocation
{
class
ZeroSizeAllocation
:
public
Allocation
{
public:
public:
explicit
ZeroSizeAllocation
(
const
platform
::
Place
&
p
)
explicit
ZeroSizeAllocation
(
const
platform
::
Place
&
p
)
...
...
paddle/fluid/operators/detection/generate_proposals_op.cu
浏览文件 @
15076c32
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <paddle/fluid/memory/allocation/allocator.h>
#include <stdio.h>
#include <stdio.h>
#include <string>
#include <string>
#include <vector>
#include <vector>
...
@@ -70,7 +71,7 @@ static void SortDescending(const platform::CUDADeviceContext &ctx,
...
@@ -70,7 +71,7 @@ static void SortDescending(const platform::CUDADeviceContext &ctx,
// Allocate temporary storage
// Allocate temporary storage
auto
place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
auto
place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
auto
d_temp_storage
=
auto
d_temp_storage
=
memory
::
Alloc
(
place
,
temp_storage_bytes
,
memory
::
Allocator
::
k
Tmp
);
memory
::
Alloc
(
place
,
temp_storage_bytes
,
memory
::
Allocator
::
k
Scratchpad
);
// Run sorting operation
// Run sorting operation
cub
::
DeviceRadixSort
::
SortPairsDescending
<
T
,
int
>
(
cub
::
DeviceRadixSort
::
SortPairsDescending
<
T
,
int
>
(
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
15076c32
...
@@ -112,8 +112,8 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface {
...
@@ -112,8 +112,8 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface {
}
}
void
*
allocate
(
size_t
num_bytes
)
const
override
{
void
*
allocate
(
size_t
num_bytes
)
const
override
{
auto
buf
=
auto
buf
=
paddle
::
memory
::
Alloc
(
place_
,
num_bytes
,
paddle
::
memory
::
Alloc
(
place_
,
num_bytes
,
memory
::
Allocator
::
kTiny
);
memory
::
Allocator
::
kScratchpad
);
void
*
retv
=
buf
->
ptr
();
void
*
retv
=
buf
->
ptr
();
allocations_
[
buf
->
ptr
()]
=
std
::
move
(
buf
);
allocations_
[
buf
->
ptr
()]
=
std
::
move
(
buf
);
return
retv
;
return
retv
;
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
15076c32
...
@@ -64,7 +64,7 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
...
@@ -64,7 +64,7 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
auto
*
src_ptr
=
static_cast
<
const
void
*>
(
tensor
.
data
<
CUR_TYPE
>
());
auto
*
src_ptr
=
static_cast
<
const
void
*>
(
tensor
.
data
<
CUR_TYPE
>
());
auto
*
dst_ptr
=
static_cast
<
void
*>
(
dst_tensor
.
mutable_data
<
CUR_TYPE
>
(
auto
*
dst_ptr
=
static_cast
<
void
*>
(
dst_tensor
.
mutable_data
<
CUR_TYPE
>
(
tensor
.
dims
(),
platform
::
CPUPlace
(),
tensor
.
dims
(),
platform
::
CPUPlace
(),
memory
::
Allocator
::
kC
ommunication
));
memory
::
Allocator
::
kC
rossDevice
));
paddle
::
platform
::
GpuMemcpySync
(
dst_ptr
,
src_ptr
,
paddle
::
platform
::
GpuMemcpySync
(
dst_ptr
,
src_ptr
,
sizeof
(
CUR_TYPE
)
*
tensor
.
numel
(),
sizeof
(
CUR_TYPE
)
*
tensor
.
numel
(),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录