Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
02cf54d3
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
02cf54d3
编写于
7月 23, 2018
作者:
Y
Yan Chunwei
提交者:
GitHub
7月 23, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
bugfix lod cpu performance (#12297)
上级
b41f8b9d
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
99 addition
and
35 deletion
+99
-35
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+83
-34
paddle/fluid/operators/adam_op.h
paddle/fluid/operators/adam_op.h
+7
-0
paddle/fluid/operators/detection/target_assign_op.h
paddle/fluid/operators/detection/target_assign_op.h
+8
-0
paddle/fluid/operators/math/sequence2batch.h
paddle/fluid/operators/math/sequence2batch.h
+1
-1
未找到文件。
paddle/fluid/framework/mixed_vector.h
浏览文件 @
02cf54d3
...
...
@@ -26,6 +26,7 @@
namespace
paddle
{
namespace
framework
{
#if defined(PADDLE_WITH_CUDA)
// Vector<T> implements the std::vector interface, and can get Data or
// MutableData from any place. The data will be synced implicitly inside.
template
<
typename
T
>
...
...
@@ -37,11 +38,11 @@ class Vector {
Vector
()
{
InitEmpty
();
}
// Fill vector with value. The vector size is `count`.
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
{
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
{
InitEmpty
();
if
(
count
!=
0
)
{
resize
(
count
);
T
*
ptr
=
begin
();
T
*
ptr
=
begin
();
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
ptr
[
i
]
=
value
;
}
...
...
@@ -59,7 +60,7 @@ class Vector {
// implicit cast from std::vector.
template
<
typename
U
>
Vector
(
const
std
::
vector
<
U
>
&
dat
)
{
// NOLINT
Vector
(
const
std
::
vector
<
U
>
&
dat
)
{
// NOLINT
if
(
dat
.
size
()
==
0
)
{
InitEmpty
();
}
else
{
...
...
@@ -68,10 +69,10 @@ class Vector {
}
// Copy ctor
Vector
(
const
Vector
<
T
>
&
other
)
{
this
->
operator
=
(
other
);
}
Vector
(
const
Vector
<
T
>
&
other
)
{
this
->
operator
=
(
other
);
}
// Copy operator
Vector
<
T
>
&
operator
=
(
const
Vector
<
T
>&
other
)
{
Vector
<
T
>
&
operator
=
(
const
Vector
<
T
>
&
other
)
{
if
(
other
.
size
()
!=
0
)
{
this
->
InitByIter
(
other
.
size
(),
other
.
begin
(),
other
.
end
());
}
else
{
...
...
@@ -81,7 +82,7 @@ class Vector {
}
// Move ctor
Vector
(
Vector
<
T
>
&&
other
)
{
Vector
(
Vector
<
T
>
&&
other
)
{
this
->
size_
=
other
.
size_
;
this
->
flag_
=
other
.
flag_
;
if
(
other
.
cuda_vec_
.
memory_size
())
{
...
...
@@ -93,13 +94,13 @@ class Vector {
}
// CPU data access method. Mutable.
T
&
operator
[](
size_t
i
)
{
T
&
operator
[](
size_t
i
)
{
MutableCPU
();
return
const_cast
<
T
*>
(
cpu_vec_
.
data
<
T
>
())[
i
];
return
const_cast
<
T
*>
(
cpu_vec_
.
data
<
T
>
())[
i
];
}
// CPU data access method. Immutable.
const
T
&
operator
[](
size_t
i
)
const
{
const
T
&
operator
[](
size_t
i
)
const
{
ImmutableCPU
();
return
cpu_vec_
.
data
<
T
>
()[
i
];
}
...
...
@@ -107,43 +108,43 @@ class Vector {
// std::vector iterator methods. Based on CPU data access method
size_t
size
()
const
{
return
size_
;
}
T
*
begin
()
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
0
);
}
T
*
begin
()
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
0
);
}
T
*
end
()
{
T
*
end
()
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
size
());
}
T
&
front
()
{
return
*
begin
();
}
T
&
front
()
{
return
*
begin
();
}
T
&
back
()
{
T
&
back
()
{
auto
it
=
end
();
--
it
;
return
*
it
;
}
const
T
*
begin
()
const
{
const
T
*
begin
()
const
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
0
);
}
const
T
*
end
()
const
{
const
T
*
end
()
const
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
size
());
}
const
T
*
cbegin
()
const
{
return
begin
();
}
const
T
*
cbegin
()
const
{
return
begin
();
}
const
T
*
cend
()
const
{
return
end
();
}
const
T
*
cend
()
const
{
return
end
();
}
const
T
&
back
()
const
{
const
T
&
back
()
const
{
auto
it
=
end
();
--
it
;
return
*
it
;
}
T
*
data
()
{
return
begin
();
}
T
*
data
()
{
return
begin
();
}
const
T
*
data
()
const
{
return
begin
();
}
const
T
*
data
()
const
{
return
begin
();
}
const
T
&
front
()
const
{
return
*
begin
();
}
const
T
&
front
()
const
{
return
*
begin
();
}
// end of std::vector iterator methods
// assign this from iterator.
...
...
@@ -169,7 +170,7 @@ class Vector {
void
Extend
(
It
begin
,
It
end
)
{
size_t
pre_size
=
size_
;
resize
(
pre_size
+
(
end
-
begin
));
T
*
ptr
=
this
->
begin
()
+
pre_size
;
T
*
ptr
=
this
->
begin
()
+
pre_size
;
for
(;
begin
<
end
;
++
begin
,
++
ptr
)
{
*
ptr
=
*
begin
;
}
...
...
@@ -183,9 +184,9 @@ class Vector {
MutableCPU
();
Tensor
cpu_tensor
;
platform
::
Place
cpu
=
platform
::
CPUPlace
();
T
*
ptr
=
cpu_tensor
.
mutable_data
<
T
>
(
T
*
ptr
=
cpu_tensor
.
mutable_data
<
T
>
(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
size
)}),
cpu
);
const
T
*
old_ptr
=
const
T
*
old_ptr
=
cpu_vec_
.
memory_size
()
==
0
?
nullptr
:
cpu_vec_
.
data
<
T
>
();
if
(
old_ptr
!=
nullptr
)
{
std
::
copy
(
old_ptr
,
old_ptr
+
size_
,
ptr
);
...
...
@@ -196,7 +197,7 @@ class Vector {
}
// get cuda ptr. immutable
const
T
*
CUDAData
(
platform
::
Place
place
)
const
{
const
T
*
CUDAData
(
platform
::
Place
place
)
const
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
place
),
"CUDA Data must on CUDA place"
);
ImmutableCUDA
(
place
);
...
...
@@ -204,10 +205,10 @@ class Vector {
}
// get cuda ptr. mutable
T
*
CUDAMutableData
(
platform
::
Place
place
)
{
const
T
*
ptr
=
CUDAData
(
place
);
T
*
CUDAMutableData
(
platform
::
Place
place
)
{
const
T
*
ptr
=
CUDAData
(
place
);
flag_
=
kDirty
|
kDataInCUDA
;
return
const_cast
<
T
*>
(
ptr
);
return
const_cast
<
T
*>
(
ptr
);
}
// clear
...
...
@@ -228,7 +229,7 @@ class Vector {
}
// the unify method to access CPU or CUDA data. immutable.
const
T
*
Data
(
platform
::
Place
place
)
const
{
const
T
*
Data
(
platform
::
Place
place
)
const
{
if
(
platform
::
is_gpu_place
(
place
))
{
return
CUDAData
(
place
);
}
else
{
...
...
@@ -237,7 +238,7 @@ class Vector {
}
// the unify method to access CPU or CUDA data. mutable.
T
*
MutableData
(
platform
::
Place
place
)
{
T
*
MutableData
(
platform
::
Place
place
)
{
if
(
platform
::
is_gpu_place
(
place
))
{
return
CUDAMutableData
(
place
);
}
else
{
...
...
@@ -253,7 +254,7 @@ class Vector {
return
result
;
}
bool
operator
==
(
const
Vector
<
T
>
&
other
)
const
{
bool
operator
==
(
const
Vector
<
T
>
&
other
)
const
{
if
(
size
()
!=
other
.
size
())
return
false
;
auto
it1
=
cbegin
();
auto
it2
=
other
.
cbegin
();
...
...
@@ -274,7 +275,7 @@ class Vector {
template
<
typename
Iter
>
void
InitByIter
(
size_t
size
,
Iter
begin
,
Iter
end
)
{
platform
::
Place
cpu
=
platform
::
CPUPlace
();
T
*
ptr
=
this
->
cpu_vec_
.
template
mutable_data
<
T
>(
T
*
ptr
=
this
->
cpu_vec_
.
template
mutable_data
<
T
>(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
size
)}),
cpu
);
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
*
ptr
++
=
*
begin
++
;
...
...
@@ -368,7 +369,7 @@ class Vector {
}
}
static
T
&
EmptyDummy
()
{
static
T
&
EmptyDummy
()
{
static
T
dummy
=
T
();
return
dummy
;
}
...
...
@@ -379,5 +380,53 @@ class Vector {
size_t
size_
;
};
}
// namespace framework
#else // PADDLE_WITH_CUDA
template
<
typename
T
>
class
CPUVector
:
public
std
::
vector
<
T
,
std
::
allocator
<
T
>>
{
public:
CPUVector
()
:
std
::
vector
<
T
>
()
{}
CPUVector
(
size_t
count
,
const
T
&
value
=
T
())
:
std
::
vector
<
T
>
(
count
,
value
)
{}
CPUVector
(
std
::
initializer_list
<
T
>
init
)
:
std
::
vector
<
T
>
(
init
)
{}
CPUVector
(
const
std
::
vector
<
T
>
&
other
)
:
std
::
vector
<
T
>
(
other
)
{}
explicit
CPUVector
(
const
CPUVector
<
T
>
&
other
)
:
std
::
vector
<
T
>
(
other
)
{}
CPUVector
(
CPUVector
<
T
>
&&
other
)
:
std
::
vector
<
T
>
(
std
::
move
(
other
))
{}
CPUVector
(
std
::
vector
<
T
>
&&
other
)
:
std
::
vector
<
T
>
(
std
::
move
(
other
))
{}
CPUVector
&
operator
=
(
const
CPUVector
&
other
)
{
this
->
assign
(
other
.
begin
(),
other
.
end
());
return
*
this
;
}
CPUVector
&
operator
=
(
const
std
::
vector
<
T
>
&
other
)
{
this
->
assign
(
other
.
begin
(),
other
.
end
());
return
*
this
;
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
CPUVector
<
T
>
&
other
)
{
std
::
stringstream
ss
;
for
(
auto
v
:
other
)
{
os
<<
v
<<
" "
;
}
return
os
;
}
void
resize
(
size_t
size
)
{
this
->
resize
(
size
);
}
T
&
operator
[](
size_t
id
)
{
return
this
->
at
(
id
);
}
const
T
&
operator
[](
size_t
id
)
const
{
return
this
->
at
(
id
);
}
template
<
typename
D
>
void
Extend
(
const
D
&
begin
,
const
D
&
end
)
{
this
->
reserve
(
this
->
size
()
+
size_t
(
end
-
begin
));
this
->
insert
(
this
->
end
(),
begin
,
end
);
}
};
template
<
typename
T
>
using
Vector
=
CPUVector
<
T
>
;
#endif // PADDLE_WITH_CUDA
};
// namespace framework
}
// namespace paddle
paddle/fluid/operators/adam_op.h
浏览文件 @
02cf54d3
...
...
@@ -293,11 +293,18 @@ class AdamOpKernel : public framework::OpKernel<T> {
auto
&
grad_tensor
=
grad_merge
.
value
();
const
T
*
grad_data
=
grad_tensor
.
template
data
<
T
>();
int64_t
*
rows
=
nullptr
;
// When compiled without CUDA, the CUDAMutableData() interface should not be
// provided.
#if defined(PADDLE_WITH_CUDA)
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
rows
=
grad_merge
.
mutable_rows
()
->
CUDAMutableData
(
ctx
.
GetPlace
());
}
else
{
#endif
rows
=
grad_merge
.
mutable_rows
()
->
data
();
#if defined(PADDLE_WITH_CUDA)
}
#endif
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
SparseAdamFunctor
<
T
>
functor
(
...
...
paddle/fluid/operators/detection/target_assign_op.h
浏览文件 @
02cf54d3
...
...
@@ -106,7 +106,11 @@ class TargetAssignKernel : public framework::OpKernel<T> {
int64_t
k
=
x
->
dims
()[
2
];
auto
x_lod
=
x
->
lod
().
back
();
#if defined(PADDLE_WITH_CUDA)
size_t
*
x_lod_data
=
x_lod
.
MutableData
(
ctx
.
GetPlace
());
#else
size_t
*
x_lod_data
=
x_lod
.
data
();
#endif
TargetAssignFunctor
<
T
,
WT
>
functor
(
x_data
,
match_idx_data
,
x_lod_data
,
mismatch_value
,
n
,
m
,
p
,
k
,
out_data
,
...
...
@@ -121,7 +125,11 @@ class TargetAssignKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
neg_indices
->
lod
().
size
(),
1UL
);
const
int
*
neg_idx_data
=
neg_indices
->
data
<
int
>
();
auto
neg_lod
=
neg_indices
->
lod
().
back
();
#if defined(PADDLE_WITH_CUDA)
size_t
*
neg_lod_data
=
neg_lod
.
MutableData
(
ctx
.
GetPlace
());
#else
size_t
*
neg_lod_data
=
neg_lod
.
data
();
#endif
NegTargetAssignFunctor
<
DeviceContext
,
T
,
WT
>
neg_trg_functor
;
neg_trg_functor
(
device_ctx
,
neg_idx_data
,
neg_lod_data
,
n
,
m
,
k
,
mismatch_value
,
out_data
,
out_wt_data
);
...
...
paddle/fluid/operators/math/sequence2batch.h
浏览文件 @
02cf54d3
...
...
@@ -78,7 +78,7 @@ class LoDTensor2BatchFunctor {
auto
lods
=
lod_tensor
.
lod
();
PADDLE_ENFORCE_EQ
(
lods
.
size
(),
1UL
,
"Only support one level sequence now."
);
auto
lod
=
lods
[
0
];
const
auto
&
lod
=
lods
[
0
];
std
::
vector
<
SeqInfo
>
seq_info
;
for
(
size_t
seq_id
=
0
;
seq_id
<
lod
.
size
()
-
1
;
++
seq_id
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录