Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
0be1582d
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
694
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
0be1582d
编写于
9月 27, 2018
作者:
Y
Yu Yang
提交者:
GitHub
9月 27, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #13525 from reyoung/fix_mixed_vector
Fix mixed vector
上级
4e81e228
e1913bc5
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
416 addition
and
331 deletion
+416
-331
paddle/fluid/framework/details/cow_ptr.h
paddle/fluid/framework/details/cow_ptr.h
+19
-61
paddle/fluid/framework/details/cow_ptr_test.cc
paddle/fluid/framework/details/cow_ptr_test.cc
+8
-0
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+362
-241
paddle/fluid/operators/detection_map_op.h
paddle/fluid/operators/detection_map_op.h
+15
-13
paddle/fluid/operators/extract_rows_op.cc
paddle/fluid/operators/extract_rows_op.cc
+1
-1
paddle/fluid/operators/lookup_table_op.cu
paddle/fluid/operators/lookup_table_op.cu
+2
-4
paddle/fluid/operators/math/selected_rows_functor.cu
paddle/fluid/operators/math/selected_rows_functor.cu
+4
-6
paddle/fluid/operators/sgd_op.cu
paddle/fluid/operators/sgd_op.cu
+1
-1
paddle/fluid/operators/sum_op.h
paddle/fluid/operators/sum_op.h
+0
-1
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+1
-1
python/paddle/fluid/tests/unittests/test_detection_map_op.py
python/paddle/fluid/tests/unittests/test_detection_map_op.py
+3
-2
未找到文件。
paddle/fluid/framework/details/cow_ptr.h
浏览文件 @
0be1582d
...
@@ -20,79 +20,37 @@ namespace paddle {
...
@@ -20,79 +20,37 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
details
{
namespace
details
{
// Change it to thread safe flags if needed.
template
<
class
T
>
class
ThreadUnsafeOwnershipFlags
{
class
COWPtr
{
public:
public:
explicit
ThreadUnsafeOwnershipFlags
(
bool
flag
)
:
flag_
(
flag
)
{}
typedef
std
::
shared_ptr
<
T
>
RefPtr
;
ThreadUnsafeOwnershipFlags
(
const
ThreadUnsafeOwnershipFlags
&
other
)
=
delete
;
ThreadUnsafeOwnershipFlags
&
operator
=
(
const
ThreadUnsafeOwnershipFlags
&
other
)
=
delete
;
ThreadUnsafeOwnershipFlags
(
ThreadUnsafeOwnershipFlags
&&
other
)
=
default
;
void
SetOwnership
(
bool
flag
)
{
flag_
=
flag
;
}
// Invoke the callback if it is not owned.
template
<
typename
Callback
>
void
AcquireOwnershipOnce
(
Callback
acquire
)
{
if
(
!
flag_
)
{
acquire
();
flag_
=
true
;
}
}
private:
private:
bool
flag_
;
RefPtr
m_sp
;
};
// Copy-On-Write pointer.
// It will hold a T* pointer, and only copy once when `MutableData` is invoked.
//
// The template parameter OwnershipFlags should have:
// * a constructor takes a bool. True if own.
// * SetOwnership(bool flag).
// * AcquireOwnershipOnce(Callback). It will invoke the callback if it is not
// owned.
//
// https://en.wikipedia.org/wiki/Copy-on-write
template
<
typename
T
,
typename
OwnershipFlags
=
ThreadUnsafeOwnershipFlags
>
class
COWPtr
{
public:
public:
// Ctor from raw pointer.
COWPtr
()
:
m_sp
(
nullptr
)
{}
explicit
COWPtr
(
T
*
ptr
)
:
payload_
(
ptr
),
ownership_
{
true
}
{}
explicit
COWPtr
(
T
*
t
)
:
m_sp
(
t
)
{}
// Move methods. Steal ownership from origin
const
T
&
Data
()
const
{
return
*
m_sp
;
}
COWPtr
(
COWPtr
&&
other
)
:
payload_
(
other
.
payload_
),
ownership_
{
std
::
move
(
other
.
ownership_
)}
{}
COWPtr
&
operator
=
(
COWPtr
&&
origin
)
=
default
;
// Copy methods. Not own payload
COWPtr
(
const
COWPtr
&
other
)
:
payload_
(
other
.
payload_
),
ownership_
{
false
}
{}
COWPtr
&
operator
=
(
const
COWPtr
&
other
)
{
payload_
=
other
.
payload_
;
ownership_
.
SetOwnership
(
false
);
return
*
this
;
}
// Access read only data.
const
T
&
Data
()
const
{
return
*
payload_
;
}
// Access mutable data. If the data is not owned, the data will be copied
// before.
T
*
MutableData
()
{
T
*
MutableData
()
{
ownership_
.
AcquireOwnershipOnce
(
DetachIfNotUnique
();
[
this
]
{
payload_
.
reset
(
new
T
(
*
payload_
));
});
return
m_sp
.
get
();
return
payload_
.
get
();
}
}
private:
void
DetachIfNotUnique
()
{
// Actual data pointer.
T
*
tmp
=
m_sp
.
get
();
std
::
shared_ptr
<
T
>
payload_
;
if
(
!
(
tmp
==
nullptr
||
m_sp
.
unique
()))
{
Detach
();
}
}
// Ownership flag.
void
Detach
()
{
OwnershipFlags
ownership_
;
T
*
tmp
=
m_sp
.
get
();
m_sp
=
RefPtr
(
new
T
(
*
tmp
));
}
};
};
}
// namespace details
}
// namespace details
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/details/cow_ptr_test.cc
浏览文件 @
0be1582d
...
@@ -30,6 +30,14 @@ TEST(COWPtr, all) {
...
@@ -30,6 +30,14 @@ TEST(COWPtr, all) {
ASSERT_EQ
(
ptr2
.
Data
(),
10
);
ASSERT_EQ
(
ptr2
.
Data
(),
10
);
}
}
TEST
(
COWPtr
,
change_old
)
{
COWPtr
<
int
>
ptr
(
new
int
{
0
});
COWPtr
<
int
>
ptr2
=
ptr
;
*
ptr
.
MutableData
()
=
10
;
ASSERT_EQ
(
ptr2
.
Data
(),
0
);
ASSERT_EQ
(
ptr
.
Data
(),
10
);
}
}
// namespace details
}
// namespace details
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/mixed_vector.h
浏览文件 @
0be1582d
...
@@ -17,10 +17,13 @@
...
@@ -17,10 +17,13 @@
#include <algorithm>
#include <algorithm>
#include <initializer_list>
#include <initializer_list>
#include <memory>
#include <memory>
#include <mutex> // NOLINT
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/memory/memcpy.h"
#include "glog/logging.h"
#include "glog/logging.h"
...
@@ -28,173 +31,167 @@ namespace paddle {
...
@@ -28,173 +31,167 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
#if defined(PADDLE_WITH_CUDA)
#if defined(PADDLE_WITH_CUDA)
// Vector<T> implements the std::vector interface, and can get Data or
namespace
details
{
// MutableData from any place. The data will be synced implicitly inside.
struct
CUDABuffer
{
template
<
typename
T
>
void
*
data_
{
nullptr
};
class
Vector
{
size_t
size_
{
0
};
public:
platform
::
CUDAPlace
place_
;
using
value_type
=
T
;
// Default ctor. Create empty Vector
CUDABuffer
()
{}
Vector
()
{
InitEmpty
();
}
CUDABuffer
(
platform
::
Place
place
,
size_t
size
)
:
size_
(
size
),
place_
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
{
// Fill vector with value. The vector size is `count`.
data_
=
memory
::
Alloc
(
place_
,
size
);
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
{
InitEmpty
();
if
(
count
!=
0
)
{
resize
(
count
);
T
*
ptr
=
begin
();
for
(
size_t
i
=
0
;
i
<
count
;
++
i
)
{
ptr
[
i
]
=
value
;
}
}
}
}
// Ctor with init_list
~
CUDABuffer
()
{
ClearMemory
();
}
Vector
(
std
::
initializer_list
<
T
>
init
)
{
if
(
init
.
size
()
==
0
)
{
CUDABuffer
(
const
CUDABuffer
&
o
)
=
delete
;
InitEmpty
();
CUDABuffer
&
operator
=
(
const
CUDABuffer
&
o
)
=
delete
;
}
else
{
InitByIter
(
init
.
size
(),
init
.
begin
(),
init
.
end
());
void
Resize
(
platform
::
Place
place
,
size_t
size
)
{
ClearMemory
();
place_
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
data_
=
memory
::
Alloc
(
place_
,
size
);
PADDLE_ENFORCE_NOT_NULL
(
data_
);
size_
=
size
;
}
}
void
Swap
(
CUDABuffer
&
o
)
{
std
::
swap
(
data_
,
o
.
data_
);
std
::
swap
(
place_
,
o
.
place_
);
std
::
swap
(
size_
,
o
.
size_
);
}
}
// implicit cast from std::vector.
private:
template
<
typename
U
>
void
ClearMemory
()
const
{
Vector
(
const
std
::
vector
<
U
>
&
dat
)
{
// NOLINT
if
(
data_
!=
nullptr
)
{
if
(
dat
.
size
()
==
0
)
{
memory
::
Free
(
place_
,
data_
);
InitEmpty
();
}
else
{
InitByIter
(
dat
.
size
(),
dat
.
begin
(),
dat
.
end
());
}
}
}
}
};
}
// namespace details
// Copy ctor
// Vector<T> implements the std::vector interface, and can get Data or
Vector
(
const
Vector
<
T
>
&
other
)
{
this
->
operator
=
(
other
);
}
// MutableData from any place. The data will be synced implicitly inside.
template
<
typename
T
>
class
Vector
{
public:
using
value_type
=
T
;
using
iterator
=
typename
std
::
vector
<
T
>::
iterator
;
using
const_iterator
=
typename
std
::
vector
<
T
>::
const_iterator
;
// Copy operator
private:
Vector
<
T
>
&
operator
=
(
const
Vector
<
T
>
&
other
)
{
// The actual class to implement vector logic
if
(
other
.
size
()
!=
0
)
{
class
VectorData
{
this
->
InitByIter
(
other
.
size
(),
other
.
begin
(),
other
.
end
());
public:
}
else
{
VectorData
()
:
flag_
(
kDataInCPU
)
{}
InitEmpty
();
VectorData
(
size_t
count
,
const
T
&
value
)
}
:
cpu_
(
count
,
value
),
flag_
(
kDataInCPU
)
{}
return
*
this
;
VectorData
(
std
::
initializer_list
<
T
>
init
)
:
cpu_
(
init
),
flag_
(
kDataInCPU
)
{}
}
template
<
typename
U
>
explicit
VectorData
(
const
std
::
vector
<
U
>
&
dat
)
:
cpu_
(
dat
),
flag_
(
kDataInCPU
)
{}
~
VectorData
()
{}
// Move ctor
VectorData
(
const
VectorData
&
o
)
{
Vector
(
Vector
<
T
>
&&
other
)
{
o
.
ImmutableCPU
();
this
->
size_
=
other
.
size_
;
cpu_
=
o
.
cpu_
;
this
->
flag_
=
other
.
flag_
;
flag_
=
kDataInCPU
;
if
(
other
.
cuda_vec_
.
memory_size
())
{
this
->
cuda_vec_
.
ShareDataWith
(
other
.
cuda_vec_
);
}
if
(
other
.
cpu_vec_
.
memory_size
())
{
this
->
cpu_vec_
.
ShareDataWith
(
other
.
cpu_vec_
);
}
}
VectorData
&
operator
=
(
const
VectorData
&
o
)
{
o
.
ImmutableCPU
();
cpu_
=
o
.
cpu_
;
flag_
=
kDataInCPU
;
details
::
CUDABuffer
null
;
gpu_
.
Swap
(
null
);
return
*
this
;
}
}
// CPU data access method. Mutable.
T
&
operator
[](
size_t
i
)
{
T
&
operator
[](
size_t
i
)
{
MutableCPU
();
MutableCPU
();
return
const_cast
<
T
*>
(
cpu_vec_
.
data
<
T
>
())
[
i
];
return
cpu_
[
i
];
}
}
// CPU data access method. Immutable.
const
T
&
operator
[](
size_t
i
)
const
{
const
T
&
operator
[](
size_t
i
)
const
{
ImmutableCPU
();
ImmutableCPU
();
return
cpu_vec_
.
data
<
T
>
()
[
i
];
return
cpu_
[
i
];
}
}
// std::vector iterator methods. Based on CPU data access method
size_t
size
()
const
{
return
cpu_
.
size
();
}
size_t
size
()
const
{
return
size_
;
}
T
*
begin
()
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
0
);
}
iterator
begin
()
{
MutableCPU
();
return
cpu_
.
begin
();
}
T
*
end
()
{
iterator
end
()
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
size
());
MutableCPU
();
return
cpu_
.
end
();
}
}
T
&
front
()
{
return
*
begin
();
}
T
&
front
()
{
MutableCPU
();
return
cpu_
.
front
();
}
T
&
back
()
{
T
&
back
()
{
auto
it
=
end
();
MutableCPU
();
--
it
;
return
cpu_
.
back
();
return
*
it
;
}
}
const
T
*
begin
()
const
{
const_iterator
begin
()
const
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
0
);
ImmutableCPU
();
return
cpu_
.
begin
();
}
}
const
T
*
end
()
const
{
const_iterator
end
()
const
{
return
capacity
()
==
0
?
&
EmptyDummy
()
:
&
this
->
operator
[](
size
());
ImmutableCPU
();
return
cpu_
.
end
();
}
}
const
T
*
cbegin
()
const
{
return
begin
();
}
const
T
*
cend
()
const
{
return
end
();
}
const
T
&
back
()
const
{
const
T
&
back
()
const
{
auto
it
=
end
();
ImmutableCPU
();
--
it
;
return
cpu_
.
back
();
return
*
it
;
}
}
T
*
data
()
{
return
begin
()
;
}
T
*
data
()
{
return
&
(
*
this
)[
0
]
;
}
const
T
*
data
()
const
{
return
begin
()
;
}
const
T
*
data
()
const
{
return
&
(
*
this
)[
0
]
;
}
const
T
&
front
()
const
{
return
*
begin
();
}
const
T
&
front
()
const
{
// end of std::vector iterator methods
ImmutableCPU
();
return
cpu_
.
front
();
}
// assign this from iterator.
// assign this from iterator.
// NOTE: the iterator must support `end-begin`
// NOTE: the iterator must support `end-begin`
template
<
typename
Iter
>
template
<
typename
Iter
>
void
assign
(
Iter
begin
,
Iter
end
)
{
void
assign
(
Iter
begin
,
Iter
end
)
{
InitByIter
(
end
-
begin
,
begin
,
end
);
MutableCPU
();
cpu_
.
assign
(
begin
,
end
);
}
}
// push_back. If the previous capacity is not enough, the memory will
// push_back. If the previous capacity is not enough, the memory will
// double.
// double.
void
push_back
(
T
elem
)
{
void
push_back
(
T
elem
)
{
if
(
size_
+
1
>
capacity
())
{
MutableCPU
();
reserve
((
size_
+
1
)
<<
1
);
cpu_
.
push_back
(
elem
);
}
*
end
()
=
elem
;
++
size_
;
}
}
// extend a vector by iterator.
// extend a vector by iterator.
// NOTE: the iterator must support end-begin
// NOTE: the iterator must support end-begin
template
<
typename
It
>
template
<
typename
It
>
void
Extend
(
It
begin
,
It
end
)
{
void
Extend
(
It
begin
,
It
end
)
{
size_t
pre_size
=
size_
;
MutableCPU
();
resize
(
pre_size
+
(
end
-
begin
));
auto
out_it
=
std
::
back_inserter
<
std
::
vector
<
T
>>
(
this
->
cpu_
);
T
*
ptr
=
this
->
begin
()
+
pre_size
;
std
::
copy
(
begin
,
end
,
out_it
);
for
(;
begin
<
end
;
++
begin
,
++
ptr
)
{
*
ptr
=
*
begin
;
}
}
}
// resize the vector
// resize the vector
void
resize
(
size_t
size
)
{
void
resize
(
size_t
size
)
{
if
(
size
+
1
<=
capacity
())
{
size_
=
size
;
}
else
{
MutableCPU
();
MutableCPU
();
Tensor
cpu_tensor
;
cpu_
.
resize
(
size
);
platform
::
Place
cpu
=
platform
::
CPUPlace
();
T
*
ptr
=
cpu_tensor
.
mutable_data
<
T
>
(
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
size
)}),
cpu
);
const
T
*
old_ptr
=
cpu_vec_
.
memory_size
()
==
0
?
nullptr
:
cpu_vec_
.
data
<
T
>
();
if
(
old_ptr
!=
nullptr
)
{
std
::
copy
(
old_ptr
,
old_ptr
+
size_
,
ptr
);
}
size_
=
size
;
cpu_vec_
.
ShareDataWith
(
cpu_tensor
);
}
}
}
// get cuda ptr. immutable
// get cuda ptr. immutable
...
@@ -202,7 +199,7 @@ class Vector {
...
@@ -202,7 +199,7 @@ class Vector {
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
place
),
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
place
),
"CUDA Data must on CUDA place"
);
"CUDA Data must on CUDA place"
);
ImmutableCUDA
(
place
);
ImmutableCUDA
(
place
);
return
cuda_vec_
.
data
<
T
>
(
);
return
reinterpret_cast
<
T
*>
(
gpu_
.
data_
);
}
}
// get cuda ptr. mutable
// get cuda ptr. mutable
...
@@ -214,77 +211,39 @@ class Vector {
...
@@ -214,77 +211,39 @@ class Vector {
// clear
// clear
void
clear
()
{
void
clear
()
{
size_
=
0
;
cpu_
.
clear
()
;
flag_
=
kDirty
|
kDataInCPU
;
flag_
=
kDirty
|
kDataInCPU
;
}
}
size_t
capacity
()
const
{
size_t
capacity
()
const
{
return
cpu_
.
capacity
();
}
return
cpu_vec_
.
memory_size
()
/
SizeOfType
(
typeid
(
T
));
}
// reserve data
// reserve data
void
reserve
(
size_t
size
)
{
void
reserve
(
size_t
size
)
const
{
cpu_
.
reserve
(
size
);
}
size_t
pre_size
=
size_
;
resize
(
size
);
resize
(
pre_size
);
}
// the unify method to access CPU or CUDA data. immutable.
const
T
*
Data
(
platform
::
Place
place
)
const
{
if
(
platform
::
is_gpu_place
(
place
))
{
return
CUDAData
(
place
);
}
else
{
return
data
();
}
}
// the unify method to access CPU or CUDA data. mutable.
T
*
MutableData
(
platform
::
Place
place
)
{
if
(
platform
::
is_gpu_place
(
place
))
{
return
CUDAMutableData
(
place
);
}
else
{
return
data
();
}
}
// implicit cast operator. Vector can be cast to std::vector implicitly.
// implicit cast operator. Vector can be cast to std::vector implicitly.
operator
std
::
vector
<
T
>
()
const
{
operator
std
::
vector
<
T
>
()
const
{
std
::
vector
<
T
>
result
;
ImmutableCPU
();
result
.
resize
(
size
());
return
cpu_
;
std
::
copy
(
begin
(),
end
(),
result
.
begin
());
return
result
;
}
}
bool
operator
==
(
const
Vector
<
T
>
&
other
)
const
{
bool
operator
==
(
const
VectorData
&
other
)
const
{
if
(
size
()
!=
other
.
size
())
return
false
;
ImmutableCPU
();
auto
it1
=
cbegin
();
other
.
ImmutableCPU
();
auto
it2
=
other
.
cbegin
();
return
cpu_
==
other
.
cpu_
;
for
(;
it1
<
cend
();
++
it1
,
++
it2
)
{
if
(
*
it1
!=
*
it2
)
{
return
false
;
}
}
return
true
;
}
}
private:
std
::
mutex
&
Mutex
()
const
{
return
mtx_
;
}
void
InitEmpty
()
{
size_
=
0
;
flag_
=
kDataInCPU
;
}
template
<
typename
Iter
>
std
::
unique_ptr
<
platform
::
CUDAPlace
>
CUDAPlace
()
const
{
void
InitByIter
(
size_t
size
,
Iter
begin
,
Iter
end
)
{
if
(
gpu_
.
data_
==
nullptr
)
{
platform
::
Place
cpu
=
platform
::
CPUPlace
();
return
nullptr
;
T
*
ptr
=
this
->
cpu_vec_
.
template
mutable_data
<
T
>(
}
else
{
framework
::
make_ddim
({
static_cast
<
int64_t
>
(
size
)}),
cpu
);
return
std
::
unique_ptr
<
platform
::
CUDAPlace
>
(
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
new
platform
::
CUDAPlace
(
gpu_
.
place_
));
*
ptr
++
=
*
begin
++
;
}
}
flag_
=
kDataInCPU
|
kDirty
;
size_
=
size
;
}
}
private:
enum
DataFlag
{
enum
DataFlag
{
kDataInCPU
=
0x01
,
kDataInCPU
=
0x01
,
kDataInCUDA
=
0x02
,
kDataInCUDA
=
0x02
,
...
@@ -294,8 +253,15 @@ class Vector {
...
@@ -294,8 +253,15 @@ class Vector {
void
CopyToCPU
()
const
{
void
CopyToCPU
()
const
{
// COPY GPU Data To CPU
// COPY GPU Data To CPU
TensorCopy
(
cuda_vec_
,
platform
::
CPUPlace
(),
&
cpu_vec_
);
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
WaitPlace
(
cuda_vec_
.
place
());
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
Place
(
gpu_
.
place_
)));
auto
stream
=
dev_ctx
->
stream
();
void
*
src
=
gpu_
.
data_
;
void
*
dst
=
cpu_
.
data
();
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
gpu_
.
place_
,
src
,
gpu_
.
size_
,
stream
);
dev_ctx
->
Wait
();
}
}
void
MutableCPU
()
{
void
MutableCPU
()
{
...
@@ -308,16 +274,12 @@ class Vector {
...
@@ -308,16 +274,12 @@ class Vector {
void
ImmutableCUDA
(
platform
::
Place
place
)
const
{
void
ImmutableCUDA
(
platform
::
Place
place
)
const
{
if
(
IsDirty
())
{
if
(
IsDirty
())
{
if
(
IsInCPU
())
{
if
(
IsInCPU
())
{
TensorCopy
(
cpu_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
CopyCPUDataToCUDA
(
place
);
&
cuda_vec_
);
WaitPlace
(
place
);
UnsetFlag
(
kDirty
);
UnsetFlag
(
kDirty
);
SetFlag
(
kDataInCUDA
);
SetFlag
(
kDataInCUDA
);
}
else
if
(
IsInCUDA
()
&&
!
(
place
==
cuda_vec_
.
place
()))
{
}
else
if
(
IsInCUDA
()
&&
framework
::
Tensor
tmp
;
!
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
)
==
gpu_
.
place_
))
{
TensorCopy
(
cuda_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
&
tmp
);
PADDLE_THROW
(
"This situation should not happen"
);
WaitPlace
(
cuda_vec_
.
place
());
cuda_vec_
.
ShareDataWith
(
tmp
);
// Still dirty
// Still dirty
}
else
{
}
else
{
// Dirty && DataInCUDA && Device is same
// Dirty && DataInCUDA && Device is same
...
@@ -326,17 +288,10 @@ class Vector {
...
@@ -326,17 +288,10 @@ class Vector {
}
else
{
}
else
{
if
(
!
IsInCUDA
())
{
if
(
!
IsInCUDA
())
{
// Even data is not dirty. However, data is not in CUDA. Copy data.
// Even data is not dirty. However, data is not in CUDA. Copy data.
TensorCopy
(
cpu_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
CopyCPUDataToCUDA
(
place
);
&
cuda_vec_
);
WaitPlace
(
place
);
SetFlag
(
kDataInCUDA
);
SetFlag
(
kDataInCUDA
);
}
else
if
(
!
(
place
==
cuda_vec_
.
place
()))
{
}
else
if
(
!
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
)
==
gpu_
.
place_
))
{
framework
::
Tensor
tmp
;
PADDLE_THROW
(
"This situation should not happen."
);
WaitPlace
(
cuda_vec_
.
place
());
TensorCopy
(
cuda_vec_
,
boost
::
get
<
platform
::
CUDAPlace
>
(
place
),
&
tmp
);
WaitPlace
(
cuda_vec_
.
place
());
WaitPlace
(
place
);
cuda_vec_
.
ShareDataWith
(
tmp
);
}
else
{
}
else
{
// Not Dirty && DataInCUDA && Device is same
// Not Dirty && DataInCUDA && Device is same
// Do nothing.
// Do nothing.
...
@@ -344,9 +299,20 @@ class Vector {
...
@@ -344,9 +299,20 @@ class Vector {
}
}
}
}
void
CopyCPUDataToCUDA
(
const
platform
::
Place
&
place
)
const
{
void
*
src
=
cpu_
.
data
();
gpu_
.
Resize
(
place
,
cpu_
.
size
()
*
sizeof
(
T
));
void
*
dst
=
gpu_
.
data_
;
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
memory
::
Copy
(
gpu_
.
place_
,
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
.
size_
,
stream
);
}
void
ImmutableCPU
()
const
{
void
ImmutableCPU
()
const
{
if
(
IsDirty
()
&&
if
(
IsDirty
()
&&
!
IsInCPU
())
{
// If data has been changed in CUDA, or
!
IsInCPU
())
{
// If data has been changed in CUDA, or
CPU has no data.
//
CPU has no data.
CopyToCPU
();
CopyToCPU
();
UnsetFlag
(
kDirty
);
UnsetFlag
(
kDirty
);
}
}
...
@@ -362,23 +328,178 @@ class Vector {
...
@@ -362,23 +328,178 @@ class Vector {
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
static
void
WaitPlace
(
const
platform
::
Place
place
)
{
mutable
std
::
vector
<
T
>
cpu_
;
mutable
details
::
CUDABuffer
gpu_
;
mutable
int
flag_
;
mutable
std
::
mutex
mtx_
;
};
public:
// Default ctor. Create empty Vector
Vector
()
:
m_
(
new
VectorData
())
{}
// Fill vector with value. The vector size is `count`.
explicit
Vector
(
size_t
count
,
const
T
&
value
=
T
())
:
m_
(
new
VectorData
(
count
,
value
))
{}
// Ctor with init_list
Vector
(
std
::
initializer_list
<
T
>
init
)
:
m_
(
new
VectorData
(
init
))
{}
// implicit cast from std::vector.
template
<
typename
U
>
Vector
(
const
std
::
vector
<
U
>
&
dat
)
:
m_
(
new
VectorData
(
dat
))
{
// NOLINT
}
// Copy ctor
Vector
(
const
Vector
<
T
>
&
other
)
{
m_
=
other
.
m_
;
}
// Copy operator
Vector
<
T
>
&
operator
=
(
const
Vector
<
T
>
&
other
)
{
m_
=
other
.
m_
;
return
*
this
;
}
// Move ctor
Vector
(
Vector
<
T
>
&&
other
)
{
m_
=
std
::
move
(
other
.
m_
);
}
// CPU data access method. Mutable.
T
&
operator
[](
size_t
i
)
{
return
(
*
m_
.
MutableData
())[
i
];
}
// CPU data access method. Immutable.
const
T
&
operator
[](
size_t
i
)
const
{
return
m_
.
Data
()[
i
];
}
// std::vector iterator methods. Based on CPU data access method
size_t
size
()
const
{
return
m_
.
Data
().
size
();
}
iterator
begin
()
{
return
m_
.
MutableData
()
->
begin
();
}
iterator
end
()
{
return
m_
.
MutableData
()
->
end
();
}
T
&
front
()
{
return
m_
.
MutableData
()
->
front
();
}
T
&
back
()
{
return
m_
.
MutableData
()
->
back
();
}
const_iterator
begin
()
const
{
return
m_
.
Data
().
begin
();
}
const_iterator
end
()
const
{
return
m_
.
Data
().
end
();
}
const_iterator
cbegin
()
const
{
return
begin
();
}
const_iterator
cend
()
const
{
return
end
();
}
const
T
&
back
()
const
{
return
m_
.
Data
().
back
();
}
T
*
data
()
{
return
m_
.
MutableData
()
->
data
();
}
const
T
*
data
()
const
{
return
m_
.
Data
().
data
();
}
const
T
&
front
()
const
{
return
m_
.
Data
().
front
();
}
// end of std::vector iterator methods
// assign this from iterator.
// NOTE: the iterator must support `end-begin`
template
<
typename
Iter
>
void
assign
(
Iter
begin
,
Iter
end
)
{
m_
.
MutableData
()
->
assign
(
begin
,
end
);
}
// push_back. If the previous capacity is not enough, the memory will
// double.
void
push_back
(
T
elem
)
{
m_
.
MutableData
()
->
push_back
(
elem
);
}
// extend a vector by iterator.
// NOTE: the iterator must support end-begin
template
<
typename
It
>
void
Extend
(
It
begin
,
It
end
)
{
m_
.
MutableData
()
->
Extend
(
begin
,
end
);
}
// resize the vector
void
resize
(
size_t
size
)
{
if
(
m_
.
Data
().
size
()
!=
size
)
{
m_
.
MutableData
()
->
resize
(
size
);
}
}
// get cuda ptr. immutable
const
T
*
CUDAData
(
platform
::
Place
place
)
const
{
{
auto
&
mtx
=
m_
.
Data
().
Mutex
();
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx
);
auto
cuda_place
=
m_
.
Data
().
CUDAPlace
();
if
(
cuda_place
==
nullptr
||
*
cuda_place
==
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
{
return
m_
.
Data
().
CUDAData
(
place
);
}
}
// If m_ contains CUDAData in a different place. Detach manually.
m_
.
Detach
();
return
CUDAData
(
place
);
}
// get cuda ptr. mutable
T
*
CUDAMutableData
(
platform
::
Place
place
)
{
{
auto
&
mtx
=
m_
.
Data
().
Mutex
();
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx
);
auto
cuda_place
=
m_
.
Data
().
CUDAPlace
();
if
(
cuda_place
==
nullptr
||
*
cuda_place
==
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
{
return
m_
.
MutableData
()
->
CUDAMutableData
(
place
);
}
}
// If m_ contains CUDAData in a different place. Detach manually.
m_
.
Detach
();
return
CUDAMutableData
(
place
);
}
// clear
void
clear
()
{
m_
.
MutableData
()
->
clear
();
}
size_t
capacity
()
const
{
return
m_
.
Data
().
capacity
();
}
// reserve data
void
reserve
(
size_t
size
)
{
m_
.
Data
().
reserve
(
size
);
}
// the unify method to access CPU or CUDA data. immutable.
const
T
*
Data
(
platform
::
Place
place
)
const
{
if
(
platform
::
is_gpu_place
(
place
))
{
return
CUDAData
(
place
);
}
else
{
return
data
();
}
}
// the unify method to access CPU or CUDA data. mutable.
T
*
MutableData
(
platform
::
Place
place
)
{
if
(
platform
::
is_gpu_place
(
place
))
{
if
(
platform
::
is_gpu_place
(
place
))
{
platform
::
DeviceContextPool
::
Instance
()
return
CUDAMutableData
(
place
);
.
Get
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place
))
}
else
{
->
Wait
();
return
data
();
}
}
}
}
static
T
&
EmptyDummy
()
{
// implicit cast operator. Vector can be cast to std::vector implicitly.
static
T
dummy
=
T
();
operator
std
::
vector
<
T
>
()
const
{
return
m_
.
Data
();
}
return
dummy
;
bool
operator
==
(
const
Vector
<
T
>
&
other
)
const
{
if
(
size
()
!=
other
.
size
())
return
false
;
auto
it1
=
cbegin
();
auto
it2
=
other
.
cbegin
();
for
(;
it1
<
cend
();
++
it1
,
++
it2
)
{
if
(
*
it1
!=
*
it2
)
{
return
false
;
}
}
return
true
;
}
}
mutable
int
flag_
;
const
void
*
Handle
()
const
{
return
&
m_
.
Data
();
}
mutable
Tensor
cpu_vec_
;
mutable
Tensor
cuda_vec_
;
private:
size_t
size_
;
// Vector is an COW object.
mutable
details
::
COWPtr
<
VectorData
>
m_
;
};
};
#else // PADDLE_WITH_CUDA
#else // PADDLE_WITH_CUDA
...
...
paddle/fluid/operators/detection_map_op.h
浏览文件 @
0be1582d
...
@@ -76,8 +76,8 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
...
@@ -76,8 +76,8 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto
ap_type
=
GetAPType
(
ctx
.
Attr
<
std
::
string
>
(
"ap_type"
));
auto
ap_type
=
GetAPType
(
ctx
.
Attr
<
std
::
string
>
(
"ap_type"
));
int
class_num
=
ctx
.
Attr
<
int
>
(
"class_num"
);
int
class_num
=
ctx
.
Attr
<
int
>
(
"class_num"
);
auto
label_lod
=
in_label
->
lod
();
auto
&
label_lod
=
in_label
->
lod
();
auto
detect_lod
=
in_detect
->
lod
();
auto
&
detect_lod
=
in_detect
->
lod
();
PADDLE_ENFORCE_EQ
(
label_lod
.
size
(),
1UL
,
PADDLE_ENFORCE_EQ
(
label_lod
.
size
(),
1UL
,
"Only support one level sequence now."
);
"Only support one level sequence now."
);
PADDLE_ENFORCE_EQ
(
label_lod
[
0
].
size
(),
detect_lod
[
0
].
size
(),
PADDLE_ENFORCE_EQ
(
label_lod
[
0
].
size
(),
detect_lod
[
0
].
size
(),
...
@@ -166,11 +166,11 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
...
@@ -166,11 +166,11 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto
labels
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
input_label
);
auto
labels
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
input_label
);
auto
detect
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
input_detect
);
auto
detect
=
framework
::
EigenTensor
<
T
,
2
>::
From
(
input_detect
);
auto
label_lod
=
input_label
.
lod
();
auto
&
label_lod
=
input_label
.
lod
();
auto
detect_lod
=
input_detect
.
lod
();
auto
&
detect_lod
=
input_detect
.
lod
();
int
batch_size
=
label_lod
[
0
].
size
()
-
1
;
int
batch_size
=
label_lod
[
0
].
size
()
-
1
;
auto
label_index
=
label_lod
[
0
];
auto
&
label_index
=
label_lod
[
0
];
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
std
::
map
<
int
,
std
::
vector
<
Box
>>
boxes
;
std
::
map
<
int
,
std
::
vector
<
Box
>>
boxes
;
...
@@ -274,7 +274,6 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
...
@@ -274,7 +274,6 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
output_true_pos
->
set_lod
(
true_pos_lod
);
output_true_pos
->
set_lod
(
true_pos_lod
);
output_false_pos
->
set_lod
(
false_pos_lod
);
output_false_pos
->
set_lod
(
false_pos_lod
);
return
;
}
}
void
GetInputPos
(
const
framework
::
Tensor
&
input_pos_count
,
void
GetInputPos
(
const
framework
::
Tensor
&
input_pos_count
,
...
@@ -292,7 +291,7 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
...
@@ -292,7 +291,7 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
auto
SetData
=
[](
const
framework
::
LoDTensor
&
pos_tensor
,
auto
SetData
=
[](
const
framework
::
LoDTensor
&
pos_tensor
,
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
int
>>>&
pos
)
{
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
int
>>>&
pos
)
{
const
T
*
pos_data
=
pos_tensor
.
data
<
T
>
();
const
T
*
pos_data
=
pos_tensor
.
data
<
T
>
();
auto
pos_data_lod
=
pos_tensor
.
lod
()[
0
];
auto
&
pos_data_lod
=
pos_tensor
.
lod
()[
0
];
for
(
size_t
i
=
0
;
i
<
pos_data_lod
.
size
()
-
1
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
pos_data_lod
.
size
()
-
1
;
++
i
)
{
for
(
size_t
j
=
pos_data_lod
[
i
];
j
<
pos_data_lod
[
i
+
1
];
++
j
)
{
for
(
size_t
j
=
pos_data_lod
[
i
];
j
<
pos_data_lod
[
i
+
1
];
++
j
)
{
T
score
=
pos_data
[
j
*
2
];
T
score
=
pos_data
[
j
*
2
];
...
@@ -317,20 +316,23 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
...
@@ -317,20 +316,23 @@ class DetectionMAPOpKernel : public framework::OpKernel<T> {
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
int
>>>*
false_pos
)
const
{
std
::
map
<
int
,
std
::
vector
<
std
::
pair
<
T
,
int
>>>*
false_pos
)
const
{
int
batch_size
=
gt_boxes
.
size
();
int
batch_size
=
gt_boxes
.
size
();
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
auto
image_gt_boxes
=
gt_boxes
[
n
];
auto
&
image_gt_boxes
=
gt_boxes
[
n
];
for
(
auto
it
=
image_gt_boxes
.
begin
();
it
!=
image_gt_boxes
.
end
();
++
it
)
{
for
(
auto
&
image_gt_box
:
image_gt_boxes
)
{
size_t
count
=
0
;
size_t
count
=
0
;
auto
labeled_bboxes
=
it
->
second
;
auto
&
labeled_bboxes
=
image_gt_box
.
second
;
if
(
evaluate_difficult
)
{
if
(
evaluate_difficult
)
{
count
=
labeled_bboxes
.
size
();
count
=
labeled_bboxes
.
size
();
}
else
{
}
else
{
for
(
size_t
i
=
0
;
i
<
labeled_bboxes
.
size
();
++
i
)
for
(
auto
&
box
:
labeled_bboxes
)
{
if
(
!
(
labeled_bboxes
[
i
].
is_difficult
))
++
count
;
if
(
!
box
.
is_difficult
)
{
++
count
;
}
}
}
}
if
(
count
==
0
)
{
if
(
count
==
0
)
{
continue
;
continue
;
}
}
int
label
=
i
t
->
first
;
int
label
=
i
mage_gt_box
.
first
;
if
(
label_pos_count
->
find
(
label
)
==
label_pos_count
->
end
())
{
if
(
label_pos_count
->
find
(
label
)
==
label_pos_count
->
end
())
{
(
*
label_pos_count
)[
label
]
=
count
;
(
*
label_pos_count
)[
label
]
=
count
;
}
else
{
}
else
{
...
...
paddle/fluid/operators/extract_rows_op.cc
浏览文件 @
0be1582d
...
@@ -50,7 +50,7 @@ class ExtractRowsOp : public framework::OperatorBase {
...
@@ -50,7 +50,7 @@ class ExtractRowsOp : public framework::OperatorBase {
auto
&
in
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
SelectedRows
>
();
auto
&
in
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
SelectedRows
>
();
auto
out
=
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
out
=
scope
.
FindVar
(
Output
(
"Out"
))
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
in_rows
=
in
.
rows
();
auto
&
in_rows
=
in
.
rows
();
auto
out_dim
=
framework
::
make_ddim
(
auto
out_dim
=
framework
::
make_ddim
(
std
::
vector
<
int64_t
>
{
static_cast
<
int64_t
>
(
in_rows
.
size
()),
1
});
std
::
vector
<
int64_t
>
{
static_cast
<
int64_t
>
(
in_rows
.
size
()),
1
});
auto
dst_ptr
=
out
->
mutable_data
<
int64_t
>
(
out_dim
,
in
.
place
());
auto
dst_ptr
=
out
->
mutable_data
<
int64_t
>
(
out_dim
,
in
.
place
());
...
...
paddle/fluid/operators/lookup_table_op.cu
浏览文件 @
0be1582d
...
@@ -127,10 +127,8 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
...
@@ -127,10 +127,8 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
context
.
GetPlace
());
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
context
.
GetPlace
());
// TODO(yuyang18): Strange code here.
// TODO(yuyang18): Strange code here.
memory
::
Copy
(
platform
::
CPUPlace
(),
memory
::
Copy
(
gpu_place
,
new_rows
.
CUDAMutableData
(
context
.
GetPlace
()),
new_rows
.
CUDAMutableData
(
context
.
GetPlace
()),
gpu_place
,
gpu_place
,
ids_data
,
ids_num
*
sizeof
(
int64_t
),
stream
);
ids_data
,
ids_num
*
sizeof
(
int64_t
),
stream
);
d_table
->
set_rows
(
new_rows
);
d_table
->
set_rows
(
new_rows
);
auto
*
d_table_value
=
d_table
->
mutable_value
();
auto
*
d_table_value
=
d_table
->
mutable_value
();
...
...
paddle/fluid/operators/math/selected_rows_functor.cu
浏览文件 @
0be1582d
...
@@ -60,11 +60,9 @@ struct SelectedRowsAdd<platform::CUDADeviceContext, T> {
...
@@ -60,11 +60,9 @@ struct SelectedRowsAdd<platform::CUDADeviceContext, T> {
auto
out_place
=
context
.
GetPlace
();
auto
out_place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
out_place
));
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
out_place
));
memory
::
Copy
(
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
out_data
,
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
out_data
,
boost
::
get
<
platform
::
CUDAPlace
>
(
in1_place
),
in1_data
,
boost
::
get
<
platform
::
CUDAPlace
>
(
in1_place
),
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
).
stream
());
auto
*
in2_data
=
in2_value
.
data
<
T
>
();
auto
*
in2_data
=
in2_value
.
data
<
T
>
();
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
out_place
),
...
@@ -148,7 +146,7 @@ struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
...
@@ -148,7 +146,7 @@ struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
auto
in1_height
=
input1
.
height
();
auto
in1_height
=
input1
.
height
();
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
->
height
());
PADDLE_ENFORCE_EQ
(
in1_height
,
input2
->
height
());
framework
::
Vector
<
int64_t
>
in1_rows
(
input1
.
rows
()
);
auto
&
in1_rows
=
input1
.
rows
(
);
auto
&
in2_rows
=
*
(
input2
->
mutable_rows
());
auto
&
in2_rows
=
*
(
input2
->
mutable_rows
());
auto
&
in1_value
=
input1
.
value
();
auto
&
in1_value
=
input1
.
value
();
...
...
paddle/fluid/operators/sgd_op.cu
浏览文件 @
0be1582d
...
@@ -89,7 +89,7 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -89,7 +89,7 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
in_height
,
out_dims
[
0
]);
PADDLE_ENFORCE_EQ
(
in_height
,
out_dims
[
0
]);
auto
&
in_value
=
grad
->
value
();
auto
&
in_value
=
grad
->
value
();
framework
::
Vector
<
int64_t
>
in_rows
(
grad
->
rows
()
);
auto
&
in_rows
=
grad
->
rows
(
);
int64_t
in_row_numel
=
in_value
.
numel
()
/
in_rows
.
size
();
int64_t
in_row_numel
=
in_value
.
numel
()
/
in_rows
.
size
();
PADDLE_ENFORCE_EQ
(
in_row_numel
,
param_out
->
numel
()
/
in_height
);
PADDLE_ENFORCE_EQ
(
in_row_numel
,
param_out
->
numel
()
/
in_height
);
...
...
paddle/fluid/operators/sum_op.h
浏览文件 @
0be1582d
...
@@ -124,7 +124,6 @@ class SumKernel : public framework::OpKernel<T> {
...
@@ -124,7 +124,6 @@ class SumKernel : public framework::OpKernel<T> {
out_value
->
Resize
(
framework
::
make_ddim
(
in_dim
));
out_value
->
Resize
(
framework
::
make_ddim
(
in_dim
));
out_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
out_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
// if all the input sparse vars are empty, no need to
// if all the input sparse vars are empty, no need to
// merge these vars.
// merge these vars.
if
(
first_dim
==
0UL
)
{
if
(
first_dim
==
0UL
)
{
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
0be1582d
...
@@ -345,7 +345,7 @@ class OpTest(unittest.TestCase):
...
@@ -345,7 +345,7 @@ class OpTest(unittest.TestCase):
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
actual_t
,
expect_t
,
atol
=
atol
,
equal_nan
=
equal_nan
),
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"Output ("
+
out_name
+
") has diff at "
+
str
(
place
)
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
"
\n
Expect "
+
str
(
expect_t
)
+
"
\n
"
+
"But Got"
+
str
(
actual_t
))
str
(
actual_t
)
+
" in class "
+
self
.
__class__
.
__name__
)
if
isinstance
(
expect
,
tuple
):
if
isinstance
(
expect
,
tuple
):
self
.
assertListEqual
(
actual
.
recursive_sequence_lengths
(),
self
.
assertListEqual
(
actual
.
recursive_sequence_lengths
(),
expect
[
1
],
"Output ("
+
out_name
+
expect
[
1
],
"Output ("
+
out_name
+
...
...
python/paddle/fluid/tests/unittests/test_detection_map_op.py
浏览文件 @
0be1582d
...
@@ -20,6 +20,7 @@ import six
...
@@ -20,6 +20,7 @@ import six
import
sys
import
sys
import
collections
import
collections
import
math
import
math
import
paddle.fluid
as
fluid
from
op_test
import
OpTest
from
op_test
import
OpTest
...
@@ -32,7 +33,7 @@ class TestDetectionMAPOp(OpTest):
...
@@ -32,7 +33,7 @@ class TestDetectionMAPOp(OpTest):
self
.
detect
=
np
.
array
(
self
.
detect
).
astype
(
'float32'
)
self
.
detect
=
np
.
array
(
self
.
detect
).
astype
(
'float32'
)
self
.
mAP
=
np
.
array
(
self
.
mAP
).
astype
(
'float32'
)
self
.
mAP
=
np
.
array
(
self
.
mAP
).
astype
(
'float32'
)
if
(
len
(
self
.
class_pos_count
)
>
0
)
:
if
len
(
self
.
class_pos_count
)
>
0
:
self
.
class_pos_count
=
np
.
array
(
self
.
class_pos_count
).
astype
(
self
.
class_pos_count
=
np
.
array
(
self
.
class_pos_count
).
astype
(
'int32'
)
'int32'
)
self
.
true_pos
=
np
.
array
(
self
.
true_pos
).
astype
(
'float32'
)
self
.
true_pos
=
np
.
array
(
self
.
true_pos
).
astype
(
'float32'
)
...
@@ -273,7 +274,7 @@ class TestDetectionMAPOp11Point(TestDetectionMAPOp):
...
@@ -273,7 +274,7 @@ class TestDetectionMAPOp11Point(TestDetectionMAPOp):
class
TestDetectionMAPOpMultiBatch
(
TestDetectionMAPOp
):
class
TestDetectionMAPOpMultiBatch
(
TestDetectionMAPOp
):
def
init_test_case
(
self
):
def
init_test_case
(
self
):
super
(
TestDetectionMAPOpMultiBatch
,
self
).
init_test_case
()
super
(
TestDetectionMAPOpMultiBatch
,
self
).
init_test_case
()
self
.
class_pos_count
=
[
0
,
2
,
1
]
self
.
class_pos_count
=
[
0
,
2
,
1
,
0
]
self
.
true_pos_lod
=
[[
0
,
3
,
2
]]
self
.
true_pos_lod
=
[[
0
,
3
,
2
]]
self
.
true_pos
=
[[
0.7
,
1.
],
[
0.3
,
0.
],
[
0.2
,
1.
],
[
0.8
,
0.
],
[
0.1
,
1.
]]
self
.
true_pos
=
[[
0.7
,
1.
],
[
0.3
,
0.
],
[
0.2
,
1.
],
[
0.8
,
0.
],
[
0.1
,
1.
]]
self
.
false_pos_lod
=
[[
0
,
3
,
2
]]
self
.
false_pos_lod
=
[[
0
,
3
,
2
]]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录