Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
3043f51b
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3043f51b
编写于
9月 21, 2018
作者:
Y
Yu Yang
提交者:
GitHub
9月 21, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #13511 from reyoung/fix_ce
Revert "Merge pull request #13431 from chengduoZH/refine_lod"
上级
cffad81c
a6c8d6b9
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
326 addition
and
381 deletion
+326
-381
paddle/fluid/framework/details/cow_ptr.h
paddle/fluid/framework/details/cow_ptr.h
+61
-23
paddle/fluid/framework/details/cow_ptr_test.cc
paddle/fluid/framework/details/cow_ptr_test.cc
+0
-8
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+241
-326
paddle/fluid/operators/detection_map_op.h
paddle/fluid/operators/detection_map_op.h
+13
-15
paddle/fluid/operators/extract_rows_op.cc
paddle/fluid/operators/extract_rows_op.cc
+1
-1
paddle/fluid/operators/math/selected_rows_functor.cu
paddle/fluid/operators/math/selected_rows_functor.cu
+6
-4
paddle/fluid/operators/sum_op.h
paddle/fluid/operators/sum_op.h
+1
-0
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+1
-1
python/paddle/fluid/tests/unittests/test_detection_map_op.py
python/paddle/fluid/tests/unittests/test_detection_map_op.py
+2
-3
未找到文件。
paddle/fluid/framework/details/cow_ptr.h
浏览文件 @
3043f51b
...
...
@@ -20,41 +20,79 @@ namespace paddle {
namespace
framework
{
namespace
details
{
template
<
class
T
>
class
COWPtr
{
// Change it to thread safe flags if needed.
class
ThreadUnsafeOwnershipFlags
{
public:
typedef
std
::
shared_ptr
<
T
>
RefPtr
;
explicit
ThreadUnsafeOwnershipFlags
(
bool
flag
)
:
flag_
(
flag
)
{}
private:
RefPtr
m_sp
;
ThreadUnsafeOwnershipFlags
(
const
ThreadUnsafeOwnershipFlags
&
other
)
=
delete
;
ThreadUnsafeOwnershipFlags
&
operator
=
(
const
ThreadUnsafeOwnershipFlags
&
other
)
=
delete
;
ThreadUnsafeOwnershipFlags
(
ThreadUnsafeOwnershipFlags
&&
other
)
=
default
;
void
detach
()
{
T
*
tmp
=
m_sp
.
get
();
if
(
!
(
tmp
==
nullptr
||
m_sp
.
unique
()))
{
m_sp
=
RefPtr
(
new
T
(
*
tmp
));
void
SetOwnership
(
bool
flag
)
{
flag_
=
flag
;
}
// Invoke the callback if it is not owned.
template
<
typename
Callback
>
void
AcquireOwnershipOnce
(
Callback
acquire
)
{
if
(
!
flag_
)
{
acquire
();
flag_
=
true
;
}
}
public:
COWPtr
()
:
m_sp
(
nullptr
)
{}
explicit
COWPtr
(
T
*
t
)
:
m_sp
(
t
)
{}
explicit
COWPtr
(
const
RefPtr
&
refptr
)
:
m_sp
(
refptr
)
{}
private:
bool
flag_
;
};
const
T
&
Data
()
const
{
return
operator
*
();
}
// Copy-On-Write pointer.
// It will hold a T* pointer, and only copy once when `MutableData` is invoked.
//
// The template parameter OwnershipFlags should have:
// * a constructor takes a bool. True if own.
// * SetOwnership(bool flag).
// * AcquireOwnershipOnce(Callback). It will invoke the callback if it is not
// owned.
//
// https://en.wikipedia.org/wiki/Copy-on-write
template
<
typename
T
,
typename
OwnershipFlags
=
ThreadUnsafeOwnershipFlags
>
class
COWPtr
{
public:
// Ctor from raw pointer.
explicit
COWPtr
(
T
*
ptr
)
:
payload_
(
ptr
),
ownership_
{
true
}
{}
T
*
MutableData
()
{
return
operator
->
();
}
// Move methods. Steal ownership from origin
COWPtr
(
COWPtr
&&
other
)
:
payload_
(
other
.
payload_
),
ownership_
{
std
::
move
(
other
.
ownership_
)}
{}
COWPtr
&
operator
=
(
COWPtr
&&
origin
)
=
default
;
const
T
&
operator
*
()
const
{
return
*
m_sp
;
}
T
&
operator
*
()
{
detach
();
return
*
m_sp
;
// Copy methods. Not own payload
COWPtr
(
const
COWPtr
&
other
)
:
payload_
(
other
.
payload_
),
ownership_
{
false
}
{}
COWPtr
&
operator
=
(
const
COWPtr
&
other
)
{
payload_
=
other
.
payload_
;
ownership_
.
SetOwnership
(
false
);
return
*
this
;
}
const
T
*
operator
->
()
const
{
return
m_sp
.
operator
->
();
}
T
*
operator
->
()
{
detach
();
return
m_sp
.
operator
->
();
// Access read only data.
const
T
&
Data
()
const
{
return
*
payload_
;
}
// Access mutable data. If the data is not owned, the data will be copied
// before.
T
*
MutableData
()
{
ownership_
.
AcquireOwnershipOnce
(
[
this
]
{
payload_
.
reset
(
new
T
(
*
payload_
));
});
return
payload_
.
get
();
}
private:
// Actual data pointer.
std
::
shared_ptr
<
T
>
payload_
;
// Ownership flag.
OwnershipFlags
ownership_
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/cow_ptr_test.cc
浏览文件 @
3043f51b
...
...
@@ -30,14 +30,6 @@ TEST(COWPtr, all) {
ASSERT_EQ
(
ptr2
.
Data
(),
10
);
}
TEST
(
COWPtr
,
change_old
)
{
COWPtr
<
int
>
ptr
(
new
int
{
0
});
COWPtr
<
int
>
ptr2
=
ptr
;
*
ptr
.
MutableData
()
=
10
;
ASSERT_EQ
(
ptr2
.
Data
(),
0
);
ASSERT_EQ
(
ptr
.
Data
(),
10
);
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/mixed_vector.h
浏览文件 @
3043f51b
...
...
@@ -17,12 +17,10 @@
#include <algorithm>
#include <initializer_list>
#include <memory>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/memcpy.h"
#include "glog/logging.h"
...
...
@@ -30,165 +28,173 @@ namespace paddle {
namespace
framework
{
#if defined(PADDLE_WITH_CUDA)
namespace
details
{
struct
CUDABuffer
{
void
*
data_
{
nullptr
};
size_t
size_
{
0
};
platform
::
CUDAPlace
place_
;
CUDABuffer
()
{}
CUDABuffer
(
platform
::
Place
place
,
size_t
size
)
:
size_
(
size
),
place_
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
{
data_
=
memory
::
Alloc
(
place_
,
size
);
}
~
CUDABuffer
()
{
ClearMemory
();
}
// Vector<T> implements the std::vector interface, and can get Data or
// MutableData from any place. The data will be synced implicitly inside.
template
<
typename
T
>
class
Vector
{
public:
using
value_type
=
T
;
CUDABuffer
(
const
CUDABuffer
&
o
)
=
delete
;
CUDABuffer
&
operator
=
(
const
CUDABuffer
&
o
)
=
delete
;
// Default ctor. Create empty Vector
Vector
()
{
InitEmpty
();
}
void
Resize
(
platform
::
Place
place
,
size_t
size
)
{
ClearMemory
();
place_
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
data_
=
memory
::
Alloc
(
place_
,
size
);
size_
=
size
;
// Fill vector with value. The vector size is `count`.
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
{
InitEmpty
();
if
(
count
!=
0
)
{
resize
(
count
);
T
*
ptr
=
begin
();
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
ptr
[
i
]
=
value
;
}
}
void
Swap
(
CUDABuffer
&
o
)
{
std
::
swap
(
data_
,
o
.
data_
);
std
::
swap
(
place_
,
o
.
place_
);
std
::
swap
(
size_
,
o
.
size_
);
}
private:
void
ClearMemory
()
const
{
if
(
data_
)
{
memory
::
Free
(
place_
,
data_
);
// Ctor with init_list
Vector
(
std
::
initializer_list
<
T
>
init
)
{
if
(
init
.
size
()
==
0
)
{
InitEmpty
();
}
else
{
InitByIter
(
init
.
size
(),
init
.
begin
(),
init
.
end
());
}
}
};
}
// namespace details
// Vector<T> implements the std::vector interface, and can get Data or
// MutableData from any place. The data will be synced implicitly inside.
template
<
typename
T
>
class
Vector
{
public:
using
value_type
=
T
;
using
iterator
=
typename
std
::
vector
<
T
>::
iterator
;
using
const_iterator
=
typename
std
::
vector
<
T
>::
const_iterator
;
private:
// The actual class to implement vector logic
class
VectorData
{
public:
VectorData
()
:
flag_
(
kDataInCPU
)
{}
VectorData
(
size_t
count
,
const
T
&
value
)
:
cpu_
(
count
,
value
),
flag_
(
kDataInCPU
)
{}
VectorData
(
std
::
initializer_list
<
T
>
init
)
:
cpu_
(
init
),
flag_
(
kDataInCPU
)
{}
// implicit cast from std::vector.
template
<
typename
U
>
explicit
VectorData
(
const
std
::
vector
<
U
>
&
dat
)
:
cpu_
(
dat
),
flag_
(
kDataInCPU
)
{}
VectorData
(
const
VectorData
&
o
)
{
o
.
ImmutableCPU
();
cpu_
=
o
.
cpu_
;
flag_
=
kDataInCPU
;
Vector
(
const
std
::
vector
<
U
>
&
dat
)
{
// NOLINT
if
(
dat
.
size
()
==
0
)
{
InitEmpty
();
}
else
{
InitByIter
(
dat
.
size
(),
dat
.
begin
(),
dat
.
end
());
}
}
VectorData
&
operator
=
(
const
VectorData
&
o
)
{
o
.
ImmutableCPU
();
cpu_
=
o
.
cpu_
;
flag_
=
kDataInCPU
;
details
::
CUDABuffer
null
;
gpu_
.
Swap
(
null
);
// Copy ctor
Vector
(
const
Vector
<
T
>
&
other
)
{
this
->
operator
=
(
other
);
}
// Copy operator
Vector
<
T
>
&
operator
=
(
const
Vector
<
T
>
&
other
)
{
if
(
other
.
size
()
!=
0
)
{
this
->
InitByIter
(
other
.
size
(),
other
.
begin
(),
other
.
end
());
}
else
{
InitEmpty
();
}
return
*
this
;
}
// Move ctor
Vector
(
Vector
<
T
>
&&
other
)
{
this
->
size_
=
other
.
size_
;
this
->
flag_
=
other
.
flag_
;
if
(
other
.
cuda_vec_
.
memory_size
())
{
this
->
cuda_vec_
.
ShareDataWith
(
other
.
cuda_vec_
);
}
if
(
other
.
cpu_vec_
.
memory_size
())
{
this
->
cpu_vec_
.
ShareDataWith
(
other
.
cpu_vec_
);
}
}
// CPU data access method. Mutable.
T
&
operator
[](
size_t
i
)
{
MutableCPU
();
return
cpu_
[
i
];
return
const_cast
<
T
*>
(
cpu_vec_
.
data
<
T
>
())
[
i
];
}
// CPU data access method. Immutable.
const
T
&
operator
[](
size_t
i
)
const
{
ImmutableCPU
();
return
cpu_
[
i
];
return
cpu_vec_
.
data
<
T
>
()
[
i
];
}
size_t
size
()
const
{
return
cpu_
.
size
();
}
// std::vector iterator methods. Based on CPU data access method
size_t
size
()
const
{
return
size_
;
}
iterator
begin
()
{
MutableCPU
();
return
cpu_
.
begin
();
}
T
*
begin
()
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
0
);
}
iterator
end
()
{
MutableCPU
();
return
cpu_
.
end
();
T
*
end
()
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
size
());
}
T
&
front
()
{
MutableCPU
();
return
cpu_
.
front
();
}
T
&
front
()
{
return
*
begin
();
}
T
&
back
()
{
MutableCPU
();
return
cpu_
.
back
();
auto
it
=
end
();
--
it
;
return
*
it
;
}
const_iterator
begin
()
const
{
ImmutableCPU
();
return
cpu_
.
begin
();
const
T
*
begin
()
const
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
0
);
}
const_iterator
end
()
const
{
ImmutableCPU
();
return
cpu_
.
end
();
const
T
*
end
()
const
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
size
());
}
const
T
*
cbegin
()
const
{
return
begin
();
}
const
T
*
cend
()
const
{
return
end
();
}
const
T
&
back
()
const
{
ImmutableCPU
();
return
cpu_
.
back
();
auto
it
=
end
();
--
it
;
return
*
it
;
}
T
*
data
()
{
return
&
(
*
this
)[
0
]
;
}
T
*
data
()
{
return
begin
()
;
}
const
T
*
data
()
const
{
return
&
(
*
this
)[
0
]
;
}
const
T
*
data
()
const
{
return
begin
()
;
}
const
T
&
front
()
const
{
ImmutableCPU
();
return
cpu_
.
front
();
}
const
T
&
front
()
const
{
return
*
begin
();
}
// end of std::vector iterator methods
// assign this from iterator.
// NOTE: the iterator must support `end-begin`
template
<
typename
Iter
>
void
assign
(
Iter
begin
,
Iter
end
)
{
MutableCPU
();
cpu_
.
assign
(
begin
,
end
);
InitByIter
(
end
-
begin
,
begin
,
end
);
}
// push_back. If the previous capacity is not enough, the memory will
// double.
void
push_back
(
T
elem
)
{
MutableCPU
();
cpu_
.
push_back
(
elem
);
if
(
size_
+
1
>
capacity
())
{
reserve
((
size_
+
1
)
<<
1
);
}
*
end
()
=
elem
;
++
size_
;
}
// extend a vector by iterator.
// NOTE: the iterator must support end-begin
template
<
typename
It
>
void
Extend
(
It
begin
,
It
end
)
{
MutableCPU
();
auto
out_it
=
std
::
back_inserter
<
std
::
vector
<
T
>>
(
this
->
cpu_
);
std
::
copy
(
begin
,
end
,
out_it
);
size_t
pre_size
=
size_
;
resize
(
pre_size
+
(
end
-
begin
));
T
*
ptr
=
this
->
begin
()
+
pre_size
;
for
(;
begin
<
end
;
++
begin
,
++
ptr
)
{
*
ptr
=
*
begin
;
}
}
// resize the vector
void
resize
(
size_t
size
)
{
if
(
size
+
1
<=
capacity
())
{
size_
=
size
;
}
else
{
MutableCPU
();
cpu_
.
resize
(
size
);
Tensor
cpu_tensor
;
platform
::
Place
cpu
=
platform
::
CPUPlace
();
T
*
ptr
=
cpu_tensor
.
mutable_data
<
T
>
(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
size
)}),
cpu
);
const
T
*
old_ptr
=
cpu_vec_
.
memory_size
()
==
0
?
nullptr
:
cpu_vec_
.
data
<
T
>
();
if
(
old_ptr
!=
nullptr
)
{
std
::
copy
(
old_ptr
,
old_ptr
+
size_
,
ptr
);
}
size_
=
size
;
cpu_vec_
.
ShareDataWith
(
cpu_tensor
);
}
}
// get cuda ptr. immutable
...
...
@@ -196,7 +202,7 @@ class Vector {
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
place
),
"CUDA Data must on CUDA place"
);
ImmutableCUDA
(
place
);
return
reinterpret_cast
<
T
*>
(
gpu_
.
data_
);
return
cuda_vec_
.
data
<
T
>
(
);
}
// get cuda ptr. mutable
...
...
@@ -208,28 +214,77 @@ class Vector {
// clear
void
clear
()
{
cpu_
.
clear
()
;
size_
=
0
;
flag_
=
kDirty
|
kDataInCPU
;
}
size_t
capacity
()
const
{
return
cpu_
.
capacity
();
}
size_t
capacity
()
const
{
return
cpu_vec_
.
memory_size
()
/
SizeOfType
(
typeid
(
T
));
}
// reserve data
void
reserve
(
size_t
size
)
{
cpu_
.
reserve
(
size
);
}
void
reserve
(
size_t
size
)
{
size_t
pre_size
=
size_
;
resize
(
size
);
resize
(
pre_size
);
}
// the unify method to access CPU or CUDA data. immutable.
const
T
*
Data
(
platform
::
Place
place
)
const
{
if
(
platform
::
is_gpu_place
(
place
))
{
return
CUDAData
(
place
);
}
else
{
return
data
();
}
}
// the unify method to access CPU or CUDA data. mutable.
T
*
MutableData
(
platform
::
Place
place
)
{
if
(
platform
::
is_gpu_place
(
place
))
{
return
CUDAMutableData
(
place
);
}
else
{
return
data
();
}
}
// implicit cast operator. Vector can be cast to std::vector implicitly.
operator
std
::
vector
<
T
>
()
const
{
ImmutableCPU
();
return
cpu_
;
std
::
vector
<
T
>
result
;
result
.
resize
(
size
());
std
::
copy
(
begin
(),
end
(),
result
.
begin
());
return
result
;
}
bool
operator
==
(
const
VectorData
&
other
)
const
{
ImmutableCPU
();
other
.
ImmutableCPU
();
return
cpu_
==
other
.
cpu_
;
bool
operator
==
(
const
Vector
<
T
>
&
other
)
const
{
if
(
size
()
!=
other
.
size
())
return
false
;
auto
it1
=
cbegin
();
auto
it2
=
other
.
cbegin
();
for
(;
it1
<
cend
();
++
it1
,
++
it2
)
{
if
(
*
it1
!=
*
it2
)
{
return
false
;
}
}
return
true
;
}
private:
void
InitEmpty
()
{
size_
=
0
;
flag_
=
kDataInCPU
;
}
template
<
typename
Iter
>
void
InitByIter
(
size_t
size
,
Iter
begin
,
Iter
end
)
{
platform
::
Place
cpu
=
platform
::
CPUPlace
();
T
*
ptr
=
this
->
cpu_vec_
.
template
mutable_data
<
T
>(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
size
)}),
cpu
);
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
*
ptr
++
=
*
begin
++
;
}
flag_
=
kDataInCPU
|
kDirty
;
size_
=
size
;
}
enum
DataFlag
{
kDataInCPU
=
0x01
,
kDataInCUDA
=
0x02
,
...
...
@@ -239,10 +294,8 @@ class Vector {
void
CopyToCPU
()
const
{
// COPY GPU Data To CPU
void
*
src
=
gpu_
.
data_
;
void
*
dst
=
cpu_
.
data
();
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
gpu_
.
place_
,
src
,
gpu_
.
size_
,
nullptr
);
TensorCopy
(
cuda_vec_
,
platform
::
CPUPlace
(),
&
cpu_vec_
);
WaitPlace
(
cuda_vec_
.
place
());
}
void
MutableCPU
()
{
...
...
@@ -255,12 +308,16 @@ class Vector {
void
ImmutableCUDA
(
platform
::
Place
place
)
const
{
if
(
IsDirty
())
{
if
(
IsInCPU
())
{
CopyCPUDataToCUDA
(
place
);
TensorCopy
(
cpu_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
&
cuda_vec_
);
WaitPlace
(
place
);
UnsetFlag
(
kDirty
);
SetFlag
(
kDataInCUDA
);
}
else
if
(
IsInCUDA
()
&&
!
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
)
==
gpu_
.
place_
))
{
CopyCUDADataToAnotherPlace
(
place
);
}
else
if
(
IsInCUDA
()
&&
!
(
place
==
cuda_vec_
.
place
()))
{
framework
::
Tensor
tmp
;
TensorCopy
(
cuda_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
&
tmp
);
WaitPlace
(
cuda_vec_
.
place
());
cuda_vec_
.
ShareDataWith
(
tmp
);
// Still dirty
}
else
{
// Dirty && DataInCUDA && Device is same
...
...
@@ -269,38 +326,27 @@ class Vector {
}
else
{
if
(
!
IsInCUDA
())
{
// Even data is not dirty. However, data is not in CUDA. Copy data.
CopyCPUDataToCUDA
(
place
);
TensorCopy
(
cpu_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
&
cuda_vec_
);
WaitPlace
(
place
);
SetFlag
(
kDataInCUDA
);
}
else
if
(
!
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
)
==
gpu_
.
place_
))
{
CopyCUDADataToAnotherPlace
(
place
);
}
else
if
(
!
(
place
==
cuda_vec_
.
place
()))
{
framework
::
Tensor
tmp
;
WaitPlace
(
cuda_vec_
.
place
());
TensorCopy
(
cuda_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
&
tmp
);
WaitPlace
(
cuda_vec_
.
place
());
WaitPlace
(
place
);
cuda_vec_
.
ShareDataWith
(
tmp
);
}
else
{
// Not Dirty && DataInCUDA && Device is same
// Do nothing.
}
}
}
void
CopyCUDADataToAnotherPlace
(
const
platform
::
Place
&
place
)
const
{
details
::
CUDABuffer
tmp
(
place
,
gpu_
.
size_
);
const
void
*
src
=
gpu_
.
data_
;
void
*
dst
=
tmp
.
data_
;
memory
::
Copy
(
tmp
.
place_
,
dst
,
gpu_
.
place_
,
src
,
gpu_
.
size_
,
nullptr
);
gpu_
.
Swap
(
tmp
);
}
void
CopyCPUDataToCUDA
(
const
platform
::
Place
&
place
)
const
{
void
*
src
=
cpu_
.
data
();
gpu_
.
Resize
(
place
,
cpu_
.
size
()
*
sizeof
(
T
));
void
*
dst
=
gpu_
.
data_
;
auto
stream
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
))
->
stream
();
memory
::
Copy
(
gpu_
.
place_
,
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
.
size_
,
stream
);
}
void
ImmutableCPU
()
const
{
if
(
IsDirty
()
&&
!
IsInCPU
())
{
// If data has been changed in CUDA, or
//
CPU has no data.
if
(
IsDirty
()
&&
!
IsInCPU
())
{
// If data has been changed in CUDA, or
CPU has no data.
CopyToCPU
();
UnsetFlag
(
kDirty
);
}
...
...
@@ -316,154 +362,23 @@ class Vector {
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
mutable
std
::
vector
<
T
>
cpu_
;
mutable
details
::
CUDABuffer
gpu_
;
mutable
int
flag_
;
};
public:
// Default ctor. Create empty Vector
Vector
()
:
m_
(
new
VectorData
())
{}
// Fill vector with value. The vector size is `count`.
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
:
m_
(
new
VectorData
(
count
,
value
))
{}
// Ctor with init_list
Vector
(
std
::
initializer_list
<
T
>
init
)
:
m_
(
new
VectorData
(
init
))
{}
// implicit cast from std::vector.
template
<
typename
U
>
Vector
(
const
std
::
vector
<
U
>
&
dat
)
:
m_
(
new
VectorData
(
dat
))
{
// NOLINT
}
// Copy ctor
Vector
(
const
Vector
<
T
>
&
other
)
{
m_
=
other
.
m_
;
}
// Copy operator
Vector
<
T
>
&
operator
=
(
const
Vector
<
T
>
&
other
)
{
m_
=
other
.
m_
;
return
*
this
;
}
// Move ctor
Vector
(
Vector
<
T
>
&&
other
)
{
m_
=
std
::
move
(
other
.
m_
);
}
// CPU data access method. Mutable.
T
&
operator
[](
size_t
i
)
{
return
(
*
m_
)[
i
];
}
// CPU data access method. Immutable.
const
T
&
operator
[](
size_t
i
)
const
{
return
(
*
m_
)[
i
];
}
// std::vector iterator methods. Based on CPU data access method
size_t
size
()
const
{
return
m_
->
size
();
}
iterator
begin
()
{
return
m_
->
begin
();
}
iterator
end
()
{
return
m_
->
end
();
}
T
&
front
()
{
return
m_
->
front
();
}
T
&
back
()
{
return
m_
->
back
();
}
const_iterator
begin
()
const
{
return
m_
->
begin
();
}
const_iterator
end
()
const
{
return
m_
->
end
();
}
const_iterator
cbegin
()
const
{
return
begin
();
}
const_iterator
cend
()
const
{
return
end
();
}
const
T
&
back
()
const
{
return
m_
->
back
();
}
T
*
data
()
{
return
m_
->
data
();
}
const
T
*
data
()
const
{
return
m_
->
data
();
}
const
T
&
front
()
const
{
return
m_
->
front
();
}
// end of std::vector iterator methods
// assign this from iterator.
// NOTE: the iterator must support `end-begin`
template
<
typename
Iter
>
void
assign
(
Iter
begin
,
Iter
end
)
{
m_
->
assign
(
begin
,
end
);
}
// push_back. If the previous capacity is not enough, the memory will
// double.
void
push_back
(
T
elem
)
{
m_
->
push_back
(
elem
);
}
// extend a vector by iterator.
// NOTE: the iterator must support end-begin
template
<
typename
It
>
void
Extend
(
It
begin
,
It
end
)
{
m_
->
Extend
(
begin
,
end
);
}
// resize the vector
void
resize
(
size_t
size
)
{
if
(
m_
.
Data
().
size
()
!=
size
)
{
m_
->
resize
(
size
);
}
}
// get cuda ptr. immutable
const
T
*
CUDAData
(
platform
::
Place
place
)
const
{
return
m_
.
Data
().
CUDAData
(
place
);
}
// get cuda ptr. mutable
T
*
CUDAMutableData
(
platform
::
Place
place
)
{
return
m_
->
CUDAMutableData
(
place
);
}
// clear
void
clear
()
{
m_
->
clear
();
}
size_t
capacity
()
const
{
return
m_
->
capacity
();
}
// reserve data
void
reserve
(
size_t
size
)
{
m_
->
reserve
(
size
);
}
// the unify method to access CPU or CUDA data. immutable.
const
T
*
Data
(
platform
::
Place
place
)
const
{
static
void
WaitPlace
(
const
platform
::
Place
place
)
{
if
(
platform
::
is_gpu_place
(
place
))
{
return
CUDAData
(
place
);
}
else
{
return
data
();
platform
::
DeviceContextPool
::
Instance
()
.
Get
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
->
Wait
();
}
}
// the unify method to access CPU or CUDA data. mutable.
T
*
MutableData
(
platform
::
Place
place
)
{
if
(
platform
::
is_gpu_place
(
place
))
{
return
CUDAMutableData
(
place
);
}
else
{
return
data
();
}
static
T
&
EmptyDummy
()
{
static
T
dummy
=
T
();
return
dummy
;
}
// implicit cast operator. Vector can be cast to std::vector implicitly.
operator
std
::
vector
<
T
>
()
const
{
return
*
m_
;
}
bool
operator
==
(
const
Vector
<
T
>
&
other
)
const
{
if
(
size
()
!=
other
.
size
())
return
false
;
auto
it1
=
cbegin
();
auto
it2
=
other
.
cbegin
();
for
(;
it1
<
cend
();
++
it1
,
++
it2
)
{
if
(
*
it1
!=
*
it2
)
{
return
false
;
}
}
return
true
;
}
const
void
*
Handle
()
const
{
return
&
m_
.
Data
();
}
private:
// Vector is an COW object.
details
::
COWPtr
<
VectorData
>
m_
;
mutable
int
flag_
;
mutable
Tensor
cpu_vec_
;
mutable
Tensor
cuda_vec_
;
size_t
size_
;
};
#else // PADDLE_WITH_CUDA
...
...
paddle/fluid/operators/detection_map_op.h
浏览文件 @
3043f51b
...
...
@@ -76,8 +76,8 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto
ap_type
=
GetAPType
(
ctx
.
Attr
<
std
::
string
>
(
"ap_type"
));
int
class_num
=
ctx
.
Attr
<
int
>
(
"class_num"
);
auto
&
label_lod
=
in_label
->
lod
();
auto
&
detect_lod
=
in_detect
->
lod
();
auto
label_lod
=
in_label
->
lod
();
auto
detect_lod
=
in_detect
->
lod
();
PADDLE_ENFORCE_EQ
(
label_lod
.
size
(),
1UL
,
"Only support one level sequence now."
);
PADDLE_ENFORCE_EQ
(
label_lod
[
0
].
size
(),
detect_lod
[
0
].
size
(),
...
...
@@ -166,11 +166,11 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto
labels
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
input_label
);
auto
detect
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
input_detect
);
auto
&
label_lod
=
input_label
.
lod
();
auto
&
detect_lod
=
input_detect
.
lod
();
auto
label_lod
=
input_label
.
lod
();
auto
detect_lod
=
input_detect
.
lod
();
int
batch_size
=
label_lod
[
0
].
size
()
-
1
;
auto
&
label_index
=
label_lod
[
0
];
auto
label_index
=
label_lod
[
0
];
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
std
::
map
<
int
,
std
::
vector
<
Box
>>
boxes
;
...
...
@@ -274,6 +274,7 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
output_true_pos
->
set_lod
(
true_pos_lod
);
output_false_pos
->
set_lod
(
false_pos_lod
);
return
;
}
void
GetInputPos
(
const
framework
::
Tensor
&
input_pos_count
,
...
...
@@ -291,7 +292,7 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto
SetData
=
[](
const
framework
::
LoDTensor
&
pos_tensor
,
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
int
>>>&
pos
)
{
const
T
*
pos_data
=
pos_tensor
.
data
<
T
>
();
auto
&
pos_data_lod
=
pos_tensor
.
lod
()[
0
];
auto
pos_data_lod
=
pos_tensor
.
lod
()[
0
];
for
(
size_t
i
=
0
;
i
<
pos_data_lod
.
size
()
-
1
;
++
i
)
{
for
(
size_t
j
=
pos_data_lod
[
i
];
j
<
pos_data_lod
[
i
+
1
];
++
j
)
{
T
score
=
pos_data
[
j
*
2
];
...
...
@@ -316,23 +317,20 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
int
>>>*
false_pos
)
const
{
int
batch_size
=
gt_boxes
.
size
();
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
auto
&
image_gt_boxes
=
gt_boxes
[
n
];
for
(
auto
&
image_gt_box
:
image_gt_boxes
)
{
auto
image_gt_boxes
=
gt_boxes
[
n
];
for
(
auto
it
=
image_gt_boxes
.
begin
();
it
!=
image_gt_boxes
.
end
();
++
it
)
{
size_t
count
=
0
;
auto
&
labeled_bboxes
=
image_gt_box
.
second
;
auto
labeled_bboxes
=
it
->
second
;
if
(
evaluate_difficult
)
{
count
=
labeled_bboxes
.
size
();
}
else
{
for
(
auto
&
box
:
labeled_bboxes
)
{
if
(
!
box
.
is_difficult
)
{
++
count
;
}
}
for
(
size_t
i
=
0
;
i
<
labeled_bboxes
.
size
();
++
i
)
if
(
!
(
labeled_bboxes
[
i
].
is_difficult
))
++
count
;
}
if
(
count
==
0
)
{
continue
;
}
int
label
=
i
mage_gt_box
.
first
;
int
label
=
i
t
->
first
;
if
(
label_pos_count
->
find
(
label
)
==
label_pos_count
->
end
())
{
(
*
label_pos_count
)[
label
]
=
count
;
}
else
{
...
...
paddle/fluid/operators/extract_rows_op.cc
浏览文件 @
3043f51b
...
...
@@ -50,7 +50,7 @@ class ExtractRowsOp : public framework::OperatorBase {
auto
&
in
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
SelectedRows
>
();
auto
out
=
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
&
in_rows
=
in
.
rows
();
auto
in_rows
=
in
.
rows
();
auto
out_dim
=
framework
::
make_ddim
(
std
::
vector
<
int64_t
>
{
static_cast
<
int64_t
>
(
in_rows
.
size
()),
1
});
auto
dst_ptr
=
out
->
mutable_data
<
int64_t
>
(
out_dim
,
in
.
place
());
...
...
paddle/fluid/operators/math/selected_rows_functor.cu
浏览文件 @
3043f51b
...
...
@@ -60,9 +60,11 @@ struct SelectedRowsAdd<platform::CUDADeviceContext, T> {
auto
out_place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
out_place
));
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
out_data
,
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
out_data
,
boost
::
get
<
platform
::
CUDAPlace
>
(
in1_place
),
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
in1_value
.
numel
()
*
sizeof
(
T
),
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
).
stream
());
auto
*
in2_data
=
in2_value
.
data
<
T
>
();
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
...
...
@@ -146,7 +148,7 @@ struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
auto
in1_height
=
input1
.
height
();
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
->
height
());
auto
&
in1_rows
=
input1
.
rows
(
);
framework
::
Vector
<
int64_t
>
in1_rows
(
input1
.
rows
()
);
auto
&
in2_rows
=
*
(
input2
->
mutable_rows
());
auto
&
in1_value
=
input1
.
value
();
...
...
paddle/fluid/operators/sum_op.h
浏览文件 @
3043f51b
...
...
@@ -123,6 +123,7 @@ class SumKernel : public framework::OpKernel<T> {
out_value
->
Resize
(
framework
::
make_ddim
(
in_dim
));
out_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
// if all the input sparse vars are empty, no need to
// merge these vars.
if
(
first_dim
==
0UL
)
{
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
3043f51b
...
...
@@ -345,7 +345,7 @@ class OpTest(unittest.TestCase):
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
)
+
" in class "
+
self
.
__class__
.
__name__
)
str
(
actual_t
))
if
isinstance
(
expect
,
tuple
):
self
.
assertListEqual
(
actual
.
recursive_sequence_lengths
(),
expect
[
1
],
"Output ("
+
out_name
+
...
...
python/paddle/fluid/tests/unittests/test_detection_map_op.py
浏览文件 @
3043f51b
...
...
@@ -20,7 +20,6 @@ import six
import
sys
import
collections
import
math
import
paddle.fluid
as
fluid
from
op_test
import
OpTest
...
...
@@ -33,7 +32,7 @@ class TestDetectionMAPOp(OpTest):
self
.
detect
=
np
.
array
(
self
.
detect
).
astype
(
'float32'
)
self
.
mAP
=
np
.
array
(
self
.
mAP
).
astype
(
'float32'
)
if
len
(
self
.
class_pos_count
)
>
0
:
if
(
len
(
self
.
class_pos_count
)
>
0
)
:
self
.
class_pos_count
=
np
.
array
(
self
.
class_pos_count
).
astype
(
'int32'
)
self
.
true_pos
=
np
.
array
(
self
.
true_pos
).
astype
(
'float32'
)
...
...
@@ -274,7 +273,7 @@ class TestDetectionMAPOp11Point(TestDetectionMAPOp):
class
TestDetectionMAPOpMultiBatch
(
TestDetectionMAPOp
):
def
init_test_case
(
self
):
super
(
TestDetectionMAPOpMultiBatch
,
self
).
init_test_case
()
self
.
class_pos_count
=
[
0
,
2
,
1
,
0
]
self
.
class_pos_count
=
[
0
,
2
,
1
]
self
.
true_pos_lod
=
[[
0
,
3
,
2
]]
self
.
true_pos
=
[[
0.7
,
1.
],
[
0.3
,
0.
],
[
0.2
,
1.
],
[
0.8
,
0.
],
[
0.1
,
1.
]]
self
.
false_pos_lod
=
[[
0
,
3
,
2
]]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录