Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
31270e58
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
31270e58
编写于
9月 29, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add communication attr
上级
8e3fdc6e
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
152 addition
and
29 deletion
+152
-29
paddle/fluid/framework/tensor.cc
paddle/fluid/framework/tensor.cc
+5
-3
paddle/fluid/framework/tensor.h
paddle/fluid/framework/tensor.h
+10
-3
paddle/fluid/framework/tensor_impl.h
paddle/fluid/framework/tensor_impl.h
+7
-3
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+2
-2
paddle/fluid/memory/allocation/allocator.h
paddle/fluid/memory/allocation/allocator.h
+2
-1
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+31
-4
paddle/fluid/memory/allocation/pinned_allocator.cc
paddle/fluid/memory/allocation/pinned_allocator.cc
+43
-0
paddle/fluid/memory/allocation/pinned_allocator.h
paddle/fluid/memory/allocation/pinned_allocator.h
+37
-0
paddle/fluid/operators/conv_mkldnn_op.cc
paddle/fluid/operators/conv_mkldnn_op.cc
+8
-5
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+6
-7
python/paddle/fluid/tests/unittests/test_conv2d_op.py
python/paddle/fluid/tests/unittests/test_conv2d_op.py
+1
-1
未找到文件。
paddle/fluid/framework/tensor.cc
浏览文件 @
31270e58
...
...
@@ -32,6 +32,7 @@ size_t Tensor::memory_size() const {
}
void
*
Tensor
::
mutable_data
(
platform
::
Place
place
,
std
::
type_index
type
,
memory
::
Allocator
::
Attr
attr
,
size_t
requested_size
)
{
type_
=
type
;
PADDLE_ENFORCE_GE
(
numel
(),
0
,
...
...
@@ -46,17 +47,18 @@ void* Tensor::mutable_data(platform::Place place, std::type_index type,
/* some versions of boost::variant don't have operator!= */
if
(
holder_
==
nullptr
||
!
(
holder_
->
place
()
==
place
)
||
holder_
->
size
()
<
size
+
offset_
)
{
holder_
=
memory
::
AllocShared
(
place
,
size
);
holder_
=
memory
::
AllocShared
(
place
,
size
,
attr
);
offset_
=
0
;
}
return
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
uintptr_t
>
(
holder_
->
ptr
())
+
offset_
);
}
void
*
Tensor
::
mutable_data
(
platform
::
Place
place
,
size_t
requested_size
)
{
void
*
Tensor
::
mutable_data
(
platform
::
Place
place
,
memory
::
Allocator
::
Attr
attr
,
size_t
requested_size
)
{
PADDLE_ENFORCE
(
this
->
holder_
!=
nullptr
,
"Cannot invoke mutable data if current hold nothing."
);
return
mutable_data
(
place
,
type_
,
requested_size
);
return
mutable_data
(
place
,
type_
,
attr
,
requested_size
);
}
Tensor
&
Tensor
::
ShareDataWith
(
const
Tensor
&
src
)
{
...
...
paddle/fluid/framework/tensor.h
浏览文件 @
31270e58
...
...
@@ -84,12 +84,17 @@ class Tensor {
* @note If not exist, then allocation.
*/
template
<
typename
T
>
T
*
mutable_data
(
platform
::
Place
place
,
size_t
requested_size
=
0
);
T
*
mutable_data
(
platform
::
Place
place
,
memory
::
Allocator
::
Attr
attr
=
memory
::
Allocator
::
kDefault
,
size_t
requested_size
=
0
);
void
*
mutable_data
(
platform
::
Place
place
,
std
::
type_index
type
,
memory
::
Allocator
::
Attr
attr
=
memory
::
Allocator
::
kDefault
,
size_t
requested_size
=
0
);
void
*
mutable_data
(
platform
::
Place
place
,
size_t
requested_size
=
0
);
void
*
mutable_data
(
platform
::
Place
place
,
memory
::
Allocator
::
Attr
attr
=
memory
::
Allocator
::
kDefault
,
size_t
requested_size
=
0
);
/**
* @brief Return a pointer to mutable memory block.
...
...
@@ -101,7 +106,9 @@ class Tensor {
* @note If not exist, then allocation.
*/
template
<
typename
T
>
T
*
mutable_data
(
DDim
dims
,
platform
::
Place
place
,
size_t
requested_size
=
0
);
T
*
mutable_data
(
DDim
dims
,
platform
::
Place
place
,
memory
::
Allocator
::
Attr
attr
=
memory
::
Allocator
::
kDefault
,
size_t
requested_size
=
0
);
/*! Return the dimensions of the memory block. */
const
DDim
&
dims
()
const
;
...
...
paddle/fluid/framework/tensor_impl.h
浏览文件 @
31270e58
...
...
@@ -47,16 +47,20 @@ inline T* Tensor::data() {
template
<
typename
T
>
inline
T
*
Tensor
::
mutable_data
(
DDim
dims
,
platform
::
Place
place
,
memory
::
Allocator
::
Attr
attr
,
size_t
requested_size
)
{
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
Resize
(
dims
);
return
mutable_data
<
T
>
(
place
,
requested_size
);
return
mutable_data
<
T
>
(
place
,
attr
,
requested_size
);
}
template
<
typename
T
>
inline
T
*
Tensor
::
mutable_data
(
platform
::
Place
place
,
size_t
requested_size
)
{
inline
T
*
Tensor
::
mutable_data
(
platform
::
Place
place
,
memory
::
Allocator
::
Attr
attr
,
size_t
requested_size
)
{
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
return
reinterpret_cast
<
T
*>
(
mutable_data
(
place
,
typeid
(
T
),
requested_size
));
return
reinterpret_cast
<
T
*>
(
mutable_data
(
place
,
typeid
(
T
),
attr
,
requested_size
));
}
inline
Tensor
ReshapeToMatrix
(
const
Tensor
&
src
,
int
num_col_dims
)
{
...
...
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
31270e58
...
...
@@ -25,9 +25,9 @@ endif()
cc_library
(
naive_managed_allocator SRCS naive_managed_allocator.cc DEPS allocator
)
cc_test
(
naive_managed_allocator_test SRCS naive_managed_allocator_test.cc DEPS naive_managed_allocator
)
nv_library
(
pinned_allocator SRCS pinned_allocator.cc DEPS allocator
)
if
(
WITH_GPU
)
set
(
AllocatorFacadeDeps gpu_info cuda_allocator
)
set
(
AllocatorFacadeDeps gpu_info cuda_allocator
pinned_allocator
)
else
()
set
(
AllocatorFacadeDeps
)
endif
()
...
...
paddle/fluid/memory/allocation/allocator.h
浏览文件 @
31270e58
...
...
@@ -60,7 +60,8 @@ class Allocator {
kFixedHuge
=
2
,
kFluxHuge
=
3
,
kTmp
=
4
,
NumOfAttrs
=
5
kCommunication
=
5
,
NumOfAttrs
=
6
};
virtual
~
Allocator
();
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
31270e58
...
...
@@ -21,6 +21,7 @@
#include "paddle/fluid/memory/allocation/cpu_allocator.h"
#include "paddle/fluid/memory/allocation/locked_allocator.h"
#include "paddle/fluid/memory/allocation/naive_managed_allocator.h"
#include "paddle/fluid/memory/allocation/pinned_allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/gpu_info.h"
#include "paddle/fluid/platform/place.h"
...
...
@@ -32,6 +33,35 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
class
CPUManagedAllocator
:
public
ManagedAllocator
{
public:
CPUManagedAllocator
()
:
normal_allocator_
(
NaiveManagedAllocator
::
Create
(
std
::
unique_ptr
<
Allocator
>
(
new
CPUAllocator
()))),
communication_allocator_
(
NaiveManagedAllocator
::
Create
(
std
::
unique_ptr
<
Allocator
>
(
new
CPUPinnedAllocator
())))
{}
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Attr
attr
)
override
{
if
(
attr
==
kCommunication
)
{
return
communication_allocator_
->
Allocate
(
size
,
attr
);
}
else
{
return
normal_allocator_
->
Allocate
(
size
,
attr
);
}
}
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
{
if
(
attr
==
kCommunication
)
{
return
communication_allocator_
->
AllocateShared
(
size
,
attr
);
}
else
{
return
normal_allocator_
->
AllocateShared
(
size
,
attr
);
}
}
private:
std
::
shared_ptr
<
ManagedAllocator
>
normal_allocator_
;
std
::
shared_ptr
<
ManagedAllocator
>
communication_allocator_
;
};
class
AllocatorFacadePrivate
{
public:
std
::
map
<
platform
::
Place
,
std
::
shared_ptr
<
ManagedAllocator
>>
allocators_
;
...
...
@@ -52,10 +82,7 @@ class AllocatorFacadePrivate {
private:
void
InitCPUAllocator
()
{
auto
all
=
NaiveManagedAllocator
::
Create
(
std
::
unique_ptr
<
Allocator
>
(
new
CPUAllocator
()));
allocators_
[
platform
::
CPUPlace
()]
=
all
;
allocators_
[
platform
::
CPUPlace
()]
=
std
::
make_shared
<
CPUManagedAllocator
>
();
}
void
InitCUDAAllocator
()
{
...
...
paddle/fluid/memory/allocation/pinned_allocator.cc
0 → 100644
浏览文件 @
31270e58
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/pinned_allocator.h"
#include <cuda.h>
#include <cuda_runtime.h>
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
std
::
unique_ptr
<
Allocation
>
CPUPinnedAllocator
::
Allocate
(
size_t
size
,
Allocator
::
Attr
attr
)
{
PADDLE_ENFORCE_EQ
(
attr
,
kCommunication
,
"CPUPinnedAllocator should be used for Cross-Device Communication"
);
void
*
ptr
;
PADDLE_ENFORCE
(
cudaMallocHost
(
&
ptr
,
size
));
return
std
::
unique_ptr
<
CPUPinnedAllocation
>
(
new
CPUPinnedAllocation
(
ptr
,
size
));
}
void
CPUPinnedAllocator
::
Free
(
Allocation
*
allocation
)
{
PADDLE_ENFORCE_NOT_NULL
(
dynamic_cast
<
CPUPinnedAllocation
*>
(
allocation
));
PADDLE_ENFORCE
(
cudaFreeHost
(
allocation
->
ptr
()));
}
bool
CPUPinnedAllocator
::
IsAllocThreadSafe
()
const
{
return
true
;
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/pinned_allocator.h
0 → 100644
浏览文件 @
31270e58
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/memory/allocation/allocator.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
class
CPUPinnedAllocation
:
public
Allocation
{
public:
CPUPinnedAllocation
(
void
*
ptr
,
size_t
size
)
:
Allocation
(
ptr
,
size
,
platform
::
CPUPlace
())
{}
};
class
CPUPinnedAllocator
:
public
UnmanagedAllocator
{
public:
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Attr
attr
)
override
;
void
Free
(
Allocation
*
allocation
)
override
;
bool
IsAllocThreadSafe
()
const
override
;
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/operators/conv_mkldnn_op.cc
浏览文件 @
31270e58
...
...
@@ -303,7 +303,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
bool
fuse_eltwise
=
ctx
.
Attr
<
bool
>
(
"fuse_eltwise"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
// TODO: add support for dilation
// TODO: add support for dilation
// NOLINT
PADDLE_ENFORCE
(
dilations
.
size
()
==
2
&&
dilations
[
0
]
==
1
&&
dilations
[
1
]
==
1
,
"dilation in convolution is not implemented yet"
);
...
...
@@ -386,8 +386,9 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
user_weights_memory_p
=
handler
.
AcquireWeightsMemory
(
user_weights_md
,
to_void_cast
<
T
>
(
filter_data
));
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
(),
handler
.
GetDstMemorySize
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
(),
paddle
::
memory
::
Allocator
::
kDefault
,
handler
.
GetDstMemorySize
());
// create reorder primitive if the input format is not the preferred one
auto
src_memory_p
=
handler
.
AcquireSrcMemoryFromPrimitive
(
user_src_memory_p
,
pipeline
);
...
...
@@ -626,7 +627,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
user_diff_dst_memory_p
,
pipeline
);
const
size_t
size
=
handler
.
GetDiffWeightsMemorySize
();
filter_grad_data
=
filter_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
(),
size
);
filter_grad_data
=
filter_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
(),
paddle
::
memory
::
Allocator
::
kDefault
,
size
);
auto
diff_weights_memory_p
=
handler
.
AcquireDiffWeightsMemoryFromWeightsPrimitive
(
...
...
@@ -651,7 +653,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
pipeline
);
const
size_t
size
=
handler
.
GetDiffSourceMemorySize
();
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
(),
size
);
input_grad_data
=
input_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
(),
paddle
::
memory
::
Allocator
::
kDefault
,
size
);
auto
diff_src_memory_p
=
handler
.
AcquireDiffSrcMemoryFromDataPrimitive
(
reinterpret_cast
<
void
*>
(
input_grad_data
));
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
31270e58
...
...
@@ -112,17 +112,16 @@ T TensorGetElement(const framework::Tensor &self, size_t offset) {
}
}
// TODO(dzhwinter) : fix the redund
e
nt Tensor allocate and free
// TODO(dzhwinter) : fix the redund
a
nt Tensor allocate and free
template
<
typename
T
>
void
TensorSetElement
(
framework
::
Tensor
*
self
,
size_t
offset
,
T
elem
)
{
if
(
platform
::
is_gpu_place
(
self
->
place
()))
{
std
::
shared_ptr
<
framework
::
Tensor
>
dst
(
new
framework
::
Tensor
);
framework
::
TensorCopySync
(
*
self
,
platform
::
CPUPlace
(),
dst
.
get
());
dst
->
data
<
T
>
()[
offset
]
=
elem
;
framework
::
TensorCopySync
(
*
dst
.
get
(),
self
->
place
(),
self
);
framework
::
Tensor
dst
;
framework
::
TensorCopySync
(
*
self
,
platform
::
CPUPlace
(),
&
dst
);
dst
.
mutable_data
<
T
>
(
platform
::
CPUPlace
())[
offset
]
=
elem
;
framework
::
TensorCopySync
(
dst
,
self
->
place
(),
self
);
}
else
if
(
platform
::
is_cpu_place
(
self
->
place
()))
{
self
->
data
<
T
>
(
)[
offset
]
=
elem
;
self
->
mutable_data
<
T
>
(
self
->
place
()
)[
offset
]
=
elem
;
}
}
...
...
python/paddle/fluid/tests/unittests/test_conv2d_op.py
浏览文件 @
31270e58
...
...
@@ -113,7 +113,7 @@ class TestConv2dOp(OpTest):
return
place
=
core
.
CUDAPlace
(
0
)
if
self
.
testcudnn
()
else
core
.
CPUPlace
()
self
.
check_grad_with_place
(
place
,
set
([
'Input'
,
'Filter'
])
,
'Output'
,
max_relative_error
=
0.02
)
place
,
{
'Input'
,
'Filter'
}
,
'Output'
,
max_relative_error
=
0.02
)
def
test_check_grad_no_filter
(
self
):
if
self
.
dtype
==
np
.
float16
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录