Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
087af6a6
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
087af6a6
编写于
1月 02, 2019
作者:
X
Xin Pan
提交者:
GitHub
1月 02, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #15131 from panyx0718/clean
hide temp tensor allocation
上级
adc96e06
9186451f
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
91 addition
and
85 deletion
+91
-85
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+24
-0
paddle/fluid/framework/tensor_util.h
paddle/fluid/framework/tensor_util.h
+0
-22
paddle/fluid/operators/conv_op.h
paddle/fluid/operators/conv_op.h
+2
-9
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+2
-2
paddle/fluid/platform/temporary_allocator_test.cc
paddle/fluid/platform/temporary_allocator_test.cc
+63
-52
未找到文件。
paddle/fluid/framework/operator.h
浏览文件 @
087af6a6
...
@@ -377,6 +377,30 @@ class ExecutionContext {
...
@@ -377,6 +377,30 @@ class ExecutionContext {
return
op_
.
Outputs
(
name
);
return
op_
.
Outputs
(
name
);
}
}
template
<
typename
T
,
typename
DevContext
>
Tensor
AllocateTmpTensor
(
const
framework
::
DDim
&
dim
,
const
DevContext
&
dev_ctx
)
const
{
auto
tmp_allocation_ptr
=
platform
::
DeviceTemporaryAllocator
::
Instance
()
.
Get
<
DevContext
>
(
dev_ctx
)
.
Allocate
(
product
(
dim
)
*
sizeof
(
T
));
auto
&
deleter
=
tmp_allocation_ptr
.
get_deleter
();
auto
*
allocation_ptr
=
tmp_allocation_ptr
.
release
();
auto
shared_allocation
=
std
::
shared_ptr
<
memory
::
allocation
::
Allocation
>
(
allocation_ptr
,
deleter
);
PADDLE_ENFORCE
(
dynamic_cast
<
platform
::
TemporaryAllocation
*>
(
allocation_ptr
)
!=
nullptr
,
"The AllocationPtr must be TemporaryAllocation."
);
PADDLE_ENFORCE_EQ
(
allocation_ptr
->
size
(),
framework
::
product
(
dim
)
*
sizeof
(
T
));
paddle
::
framework
::
Tensor
temp_tensor
(
framework
::
ToDataType
(
std
::
type_index
(
typeid
(
T
))));
temp_tensor
.
Resize
(
dim
);
temp_tensor
.
ResetHolder
(
std
::
move
(
shared_allocation
));
return
temp_tensor
;
}
private:
private:
const
OperatorBase
&
op_
;
const
OperatorBase
&
op_
;
const
Scope
&
scope_
;
const
Scope
&
scope_
;
...
...
paddle/fluid/framework/tensor_util.h
浏览文件 @
087af6a6
...
@@ -151,27 +151,5 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) {
...
@@ -151,27 +151,5 @@ void TensorToVector(const Tensor& src, std::vector<T>* dst) {
memory
::
Copy
(
dst_place
,
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src
.
place
()),
memory
::
Copy
(
dst_place
,
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src
.
place
()),
src_ptr
,
size
);
src_ptr
,
size
);
}
}
template
<
typename
T
>
paddle
::
framework
::
Tensor
GetTensor
(
memory
::
allocation
::
AllocationPtr
temp_allocation_ptr
,
const
framework
::
DDim
&
dim
)
{
auto
&
deleter
=
temp_allocation_ptr
.
get_deleter
();
auto
*
allocation_ptr
=
temp_allocation_ptr
.
release
();
auto
shared_allocation
=
std
::
shared_ptr
<
memory
::
allocation
::
Allocation
>
(
allocation_ptr
,
deleter
);
PADDLE_ENFORCE
(
dynamic_cast
<
platform
::
TemporaryAllocation
*>
(
allocation_ptr
)
!=
nullptr
,
"The AllocationPtr must be TemporaryAllocation."
);
PADDLE_ENFORCE_EQ
(
allocation_ptr
->
size
(),
framework
::
product
(
dim
)
*
sizeof
(
T
));
paddle
::
framework
::
Tensor
temp_tensor
(
framework
::
ToDataType
(
std
::
type_index
(
typeid
(
T
))));
temp_tensor
.
Resize
(
dim
);
temp_tensor
.
ResetHolder
(
std
::
move
(
shared_allocation
));
return
temp_tensor
;
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/operators/conv_op.h
浏览文件 @
087af6a6
...
@@ -18,7 +18,6 @@ limitations under the License. */
...
@@ -18,7 +18,6 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/math/depthwise_conv.h"
#include "paddle/fluid/operators/math/depthwise_conv.h"
#include "paddle/fluid/operators/math/im2col.h"
#include "paddle/fluid/operators/math/im2col.h"
...
@@ -158,10 +157,7 @@ class GemmConvKernel : public framework::OpKernel<T> {
...
@@ -158,10 +157,7 @@ class GemmConvKernel : public framework::OpKernel<T> {
// to call the matrix multiplication interface.
// to call the matrix multiplication interface.
Tensor
col_matrix
;
Tensor
col_matrix
;
if
(
is_expand
)
{
if
(
is_expand
)
{
auto
tmp_allocation_ptr
=
col
=
context
.
AllocateTmpTensor
<
T
,
DeviceContext
>
(
col_shape
,
dev_ctx
);
platform
::
DeviceTemporaryAllocator
::
Instance
().
Get
(
dev_ctx
).
Allocate
(
framework
::
product
(
col_shape
)
*
sizeof
(
T
));
col
=
framework
::
GetTensor
<
T
>
(
std
::
move
(
tmp_allocation_ptr
),
col_shape
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
}
...
@@ -293,10 +289,7 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
...
@@ -293,10 +289,7 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
// to call the matrix multiplication interface.
// to call the matrix multiplication interface.
Tensor
col_matrix
;
Tensor
col_matrix
;
if
(
is_expand
)
{
if
(
is_expand
)
{
auto
tmp_allocation_ptr
=
col
=
context
.
AllocateTmpTensor
<
T
,
DeviceContext
>
(
col_shape
,
dev_ctx
);
platform
::
DeviceTemporaryAllocator
::
Instance
().
Get
(
dev_ctx
).
Allocate
(
framework
::
product
(
col_shape
)
*
sizeof
(
T
));
col
=
framework
::
GetTensor
<
T
>
(
std
::
move
(
tmp_allocation_ptr
),
col_shape
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
}
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
087af6a6
...
@@ -100,7 +100,7 @@ ENDIF()
...
@@ -100,7 +100,7 @@ ENDIF()
nv_library
(
cuda_device_guard SRCS cuda_device_guard.cc DEPS gpu_info
)
nv_library
(
cuda_device_guard SRCS cuda_device_guard.cc DEPS gpu_info
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
nv_test
(
temporal_allocator_test SRCS temporary_allocator_test.cc DEPS temp_allocator tensor
)
nv_test
(
temporal_allocator_test SRCS temporary_allocator_test.cc DEPS temp_allocator tensor
operator
)
else
()
else
()
cc_test
(
temporal_allocator_test SRCS temporary_allocator_test.cc DEPS temp_allocator tensor
)
cc_test
(
temporal_allocator_test SRCS temporary_allocator_test.cc DEPS temp_allocator tensor
operator
)
endif
()
endif
()
paddle/fluid/platform/temporary_allocator_test.cc
浏览文件 @
087af6a6
...
@@ -14,12 +14,27 @@
...
@@ -14,12 +14,27 @@
#include "paddle/fluid/platform/temporary_allocator.h"
#include "paddle/fluid/platform/temporary_allocator.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <string>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
DECLARE_double
(
limit_of_temporary_allocation
);
DECLARE_double
(
limit_of_temporary_allocation
);
namespace
paddle
{
namespace
paddle
{
namespace
platform
{
namespace
platform
{
class
DummyOp
:
public
framework
::
OperatorBase
{
public:
DummyOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
protected:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{}
};
TEST
(
temporary_allocator
,
temporary_allocator
)
{
TEST
(
temporary_allocator
,
temporary_allocator
)
{
platform
::
CPUPlace
cpu_place
;
platform
::
CPUPlace
cpu_place
;
TemporaryAllocator
alloc
(
cpu_place
);
TemporaryAllocator
alloc
(
cpu_place
);
...
@@ -68,96 +83,92 @@ TEST(temporary_allocator, add_callback) {
...
@@ -68,96 +83,92 @@ TEST(temporary_allocator, add_callback) {
}
}
TEST
(
temporary_allocator
,
create_tensor_with_allocationptr
)
{
TEST
(
temporary_allocator
,
create_tensor_with_allocationptr
)
{
platform
::
CPUPlace
cpu_place
;
framework
::
VariableNameMap
dummy_vars
;
TemporaryAllocator
cpu_alloc
(
cpu_place
);
framework
::
AttributeMap
dummy_attrs
;
DummyOp
op
(
"dummy"
,
dummy_vars
,
dummy_vars
,
dummy_attrs
);
framework
::
Scope
scope
;
framework
::
VariableValueMap
vars
;
framework
::
RuntimeContext
run_ctx
(
vars
,
vars
);
size_t
memory_size
=
300
;
{
{
size_t
memory_size
=
200
;
platform
::
CPUPlace
cpu_place
;
auto
allocation
=
cpu_alloc
.
Allocate
(
memory_size
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
void
*
address
=
allocation
->
ptr
();
auto
*
dev_ctx
=
static_cast
<
platform
::
CPUDeviceContext
*>
(
pool
.
Get
(
cpu_place
));
framework
::
ExecutionContext
ctx
(
op
,
scope
,
*
dev_ctx
,
run_ctx
);
int
numel
=
memory_size
/
sizeof
(
float
);
int
numel
=
memory_size
/
sizeof
(
float
);
framework
::
Tensor
tensor
=
framework
::
GetTensor
<
float
>
(
framework
::
Tensor
tensor
=
std
::
move
(
allocation
),
framework
::
make_ddim
({
numel
}));
ctx
.
AllocateTmpTensor
<
float
,
platform
::
CPUDeviceContext
>
(
PADDLE_ENFORCE_EQ
(
address
,
tensor
.
data
<
float
>
()
);
framework
::
make_ddim
({
numel
}),
*
dev_ctx
);
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
}
}
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
platform
::
CUDAPlace
gpu_place
(
0
);
TemporaryAllocator
gpu_alloc
(
gpu_place
);
{
{
size_t
memory_size
=
300
;
platform
::
CUDAPlace
gpu_place
(
0
);
auto
allocation
=
gpu_alloc
.
Allocate
(
memory_size
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
void
*
address
=
allocation
->
ptr
();
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
gpu_place
));
framework
::
ExecutionContext
ctx
(
op
,
scope
,
*
dev_ctx
,
run_ctx
);
int
numel
=
memory_size
/
sizeof
(
float
);
int
numel
=
memory_size
/
sizeof
(
float
);
framework
::
Tensor
tensor
=
framework
::
GetTensor
<
float
>
(
framework
::
Tensor
tensor
=
std
::
move
(
allocation
),
framework
::
make_ddim
({
numel
}));
ctx
.
AllocateTmpTensor
<
float
,
platform
::
CUDADeviceContext
>
(
PADDLE_ENFORCE_EQ
(
address
,
tensor
.
data
<
float
>
()
);
framework
::
make_ddim
({
numel
}),
*
dev_ctx
);
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
}
}
// The allocation is not holded now, it should be placed to
// TemporaryAllocationQueue.
PADDLE_ENFORCE_EQ
(
gpu_alloc
.
TemporaryAllocationQueueSize
(),
1
);
gpu_alloc
.
Release
([]()
{});
PADDLE_ENFORCE_EQ
(
gpu_alloc
.
TemporaryAllocationQueueSize
(),
0
);
#endif
#endif
}
}
TEST
(
temporary_allocator
,
create_tensor_with_allocationptr2
)
{
TEST
(
temporary_allocator
,
create_tensor_with_allocationptr2
)
{
platform
::
CPUPlace
cpu_place
;
framework
::
VariableNameMap
dummy_vars
;
TemporaryAllocator
cpu_alloc
(
cpu_place
);
framework
::
AttributeMap
dummy_attrs
;
DummyOp
op
(
"dummy"
,
dummy_vars
,
dummy_vars
,
dummy_attrs
);
framework
::
Scope
scope
;
framework
::
VariableValueMap
vars
;
framework
::
RuntimeContext
run_ctx
(
vars
,
vars
);
size_t
memory_size
=
400
;
{
{
size_t
memory_size
=
400
;
platform
::
CPUPlace
cpu_place
;
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
static_cast
<
platform
::
CPUDeviceContext
*>
(
pool
.
Get
(
cpu_place
));
framework
::
ExecutionContext
ctx
(
op
,
scope
,
*
dev_ctx
,
run_ctx
);
int
numel
=
memory_size
/
sizeof
(
float
);
int
numel
=
memory_size
/
sizeof
(
float
);
framework
::
Tensor
out_side_tensor
;
framework
::
Tensor
out_side_tensor
;
void
*
address
;
{
{
auto
allocation
=
cpu_alloc
.
Allocate
(
memory_size
);
framework
::
Tensor
tensor
=
address
=
allocation
->
ptr
();
ctx
.
AllocateTmpTensor
<
float
,
platform
::
CPUDeviceContext
>
(
framework
::
Tensor
tensor
=
framework
::
GetTensor
<
float
>
(
framework
::
make_ddim
({
numel
}),
*
dev_ctx
);
std
::
move
(
allocation
),
framework
::
make_ddim
({
numel
}));
PADDLE_ENFORCE_EQ
(
address
,
tensor
.
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
out_side_tensor
.
ShareDataWith
(
tensor
);
out_side_tensor
.
ShareDataWith
(
tensor
);
}
}
PADDLE_ENFORCE_EQ
(
address
,
out_side_tensor
.
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
out_side_tensor
.
numel
(),
numel
);
PADDLE_ENFORCE_EQ
(
out_side_tensor
.
numel
(),
numel
);
}
}
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
platform
::
CUDAPlace
gpu_place
(
0
);
TemporaryAllocator
gpu_alloc
(
gpu_place
);
{
{
void
*
address
;
platform
::
CUDAPlace
gpu_place
(
0
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
gpu_place
));
framework
::
ExecutionContext
ctx
(
op
,
scope
,
*
dev_ctx
,
run_ctx
);
size_t
memory_size
=
500
;
size_t
memory_size
=
500
;
int
numel
=
memory_size
/
sizeof
(
float
);
int
numel
=
memory_size
/
sizeof
(
float
);
framework
::
Tensor
out_side_tensor
;
framework
::
Tensor
out_side_tensor
;
{
{
auto
allocation
=
gpu_alloc
.
Allocate
(
memory_size
);
framework
::
Tensor
tensor
=
address
=
allocation
->
ptr
();
ctx
.
AllocateTmpTensor
<
float
,
platform
::
CUDADeviceContext
>
(
framework
::
Tensor
tensor
=
framework
::
GetTensor
<
float
>
(
framework
::
make_ddim
({
numel
}),
*
dev_ctx
);
std
::
move
(
allocation
),
framework
::
make_ddim
({
numel
}));
PADDLE_ENFORCE_EQ
(
address
,
tensor
.
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
PADDLE_ENFORCE_EQ
(
tensor
.
numel
(),
numel
);
out_side_tensor
.
ShareDataWith
(
tensor
);
out_side_tensor
.
ShareDataWith
(
tensor
);
}
}
PADDLE_ENFORCE_EQ
(
address
,
out_side_tensor
.
data
<
float
>
());
PADDLE_ENFORCE_EQ
(
out_side_tensor
.
numel
(),
numel
);
PADDLE_ENFORCE_EQ
(
out_side_tensor
.
numel
(),
numel
);
// The allocation is holded by out_side_tensor.
PADDLE_ENFORCE_EQ
(
gpu_alloc
.
TemporaryAllocationQueueSize
(),
0
);
gpu_alloc
.
Release
([]()
{});
PADDLE_ENFORCE_EQ
(
gpu_alloc
.
TemporaryAllocationQueueSize
(),
0
);
}
}
// The allocation is not holded now, it should be placed to
// TemporaryAllocationQueue.
PADDLE_ENFORCE_EQ
(
gpu_alloc
.
TemporaryAllocationQueueSize
(),
1
);
gpu_alloc
.
Release
([]()
{});
PADDLE_ENFORCE_EQ
(
gpu_alloc
.
TemporaryAllocationQueueSize
(),
0
);
#endif
#endif
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录