Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5b267474
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5b267474
编写于
2月 19, 2021
作者:
G
Guanghua Yu
提交者:
GitHub
2月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add offset parameter in roi_align,generate_proposals.etc ops (#30864)
* add parameter in roi_align op
上级
75f81233
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
354 addition
and
187 deletion
+354
-187
paddle/fluid/operators/detection/bbox_util.cu.h
paddle/fluid/operators/detection/bbox_util.cu.h
+48
-31
paddle/fluid/operators/detection/bbox_util.h
paddle/fluid/operators/detection/bbox_util.h
+43
-29
paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
.../fluid/operators/detection/distribute_fpn_proposals_op.cc
+8
-1
paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu
.../fluid/operators/detection/distribute_fpn_proposals_op.cu
+5
-4
paddle/fluid/operators/detection/distribute_fpn_proposals_op.h
...e/fluid/operators/detection/distribute_fpn_proposals_op.h
+6
-5
paddle/fluid/operators/detection/generate_proposals_v2_op.cc
paddle/fluid/operators/detection/generate_proposals_v2_op.cc
+22
-9
paddle/fluid/operators/detection/generate_proposals_v2_op.cu
paddle/fluid/operators/detection/generate_proposals_v2_op.cu
+11
-8
paddle/fluid/operators/detection/nms_util.h
paddle/fluid/operators/detection/nms_util.h
+5
-3
paddle/fluid/operators/roi_align_op.cc
paddle/fluid/operators/roi_align_op.cc
+13
-2
paddle/fluid/operators/roi_align_op.cu
paddle/fluid/operators/roi_align_op.cu
+37
-25
paddle/fluid/operators/roi_align_op.h
paddle/fluid/operators/roi_align_op.h
+27
-12
python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py
...fluid/tests/unittests/test_distribute_fpn_proposals_op.py
+20
-5
python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
...addle/fluid/tests/unittests/test_generate_proposals_op.py
+39
-28
python/paddle/fluid/tests/unittests/test_generate_proposals_v2_op.py
...le/fluid/tests/unittests/test_generate_proposals_v2_op.py
+35
-17
python/paddle/fluid/tests/unittests/test_roi_align_op.py
python/paddle/fluid/tests/unittests/test_roi_align_op.py
+35
-8
未找到文件。
paddle/fluid/operators/detection/bbox_util.cu.h
浏览文件 @
5b267474
...
...
@@ -77,17 +77,20 @@ struct BoxDecodeAndClipFunctor {
const
T
*
var
;
const
int
*
index
;
const
T
*
im_info
;
const
bool
pixel_offset
;
T
*
proposals
;
BoxDecodeAndClipFunctor
(
const
T
*
anchor
,
const
T
*
deltas
,
const
T
*
var
,
const
int
*
index
,
const
T
*
im_info
,
T
*
proposals
)
const
int
*
index
,
const
T
*
im_info
,
T
*
proposals
,
bool
pixel_offset
=
true
)
:
anchor
(
anchor
),
deltas
(
deltas
),
var
(
var
),
index
(
index
),
im_info
(
im_info
),
proposals
(
proposals
)
{}
proposals
(
proposals
),
pixel_offset
(
pixel_offset
)
{}
T
bbox_clip_default
{
static_cast
<
T
>
(
kBBoxClipDefault
)};
...
...
@@ -98,8 +101,9 @@ struct BoxDecodeAndClipFunctor {
T
axmax
=
anchor
[
k
+
2
];
T
aymax
=
anchor
[
k
+
3
];
T
w
=
axmax
-
axmin
+
1.0
;
T
h
=
aymax
-
aymin
+
1.0
;
T
offset
=
pixel_offset
?
static_cast
<
T
>
(
1.0
)
:
0
;
T
w
=
axmax
-
axmin
+
offset
;
T
h
=
aymax
-
aymin
+
offset
;
T
cx
=
axmin
+
0.5
*
w
;
T
cy
=
aymin
+
0.5
*
h
;
...
...
@@ -123,13 +127,13 @@ struct BoxDecodeAndClipFunctor {
T
oxmin
=
d_cx
-
d_w
*
0.5
;
T
oymin
=
d_cy
-
d_h
*
0.5
;
T
oxmax
=
d_cx
+
d_w
*
0.5
-
1.
;
T
oymax
=
d_cy
+
d_h
*
0.5
-
1.
;
T
oxmax
=
d_cx
+
d_w
*
0.5
-
offset
;
T
oymax
=
d_cy
+
d_h
*
0.5
-
offset
;
proposals
[
i
*
4
]
=
Max
(
Min
(
oxmin
,
im_info
[
1
]
-
1.
),
0.
);
proposals
[
i
*
4
+
1
]
=
Max
(
Min
(
oymin
,
im_info
[
0
]
-
1.
),
0.
);
proposals
[
i
*
4
+
2
]
=
Max
(
Min
(
oxmax
,
im_info
[
1
]
-
1.
),
0.
);
proposals
[
i
*
4
+
3
]
=
Max
(
Min
(
oymax
,
im_info
[
0
]
-
1.
),
0.
);
proposals
[
i
*
4
]
=
Max
(
Min
(
oxmin
,
im_info
[
1
]
-
offset
),
0.
);
proposals
[
i
*
4
+
1
]
=
Max
(
Min
(
oymin
,
im_info
[
0
]
-
offset
),
0.
);
proposals
[
i
*
4
+
2
]
=
Max
(
Min
(
oxmax
,
im_info
[
1
]
-
offset
),
0.
);
proposals
[
i
*
4
+
3
]
=
Max
(
Min
(
oymax
,
im_info
[
0
]
-
offset
),
0.
);
}
__device__
__forceinline__
T
Min
(
T
a
,
T
b
)
const
{
return
a
>
b
?
b
:
a
;
}
...
...
@@ -141,7 +145,8 @@ template <typename T, int BlockSize>
static
__global__
void
FilterBBoxes
(
const
T
*
bboxes
,
const
T
*
im_info
,
const
T
min_size
,
const
int
num
,
int
*
keep_num
,
int
*
keep
,
bool
is_scale
=
true
)
{
bool
is_scale
=
true
,
bool
pixel_offset
=
true
)
{
T
im_h
=
im_info
[
0
];
T
im_w
=
im_info
[
1
];
...
...
@@ -157,19 +162,25 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
T
ymin
=
bboxes
[
k
+
1
];
T
xmax
=
bboxes
[
k
+
2
];
T
ymax
=
bboxes
[
k
+
3
];
T
offset
=
pixel_offset
?
static_cast
<
T
>
(
1.0
)
:
0
;
T
w
=
xmax
-
xmin
+
offset
;
T
h
=
ymax
-
ymin
+
offset
;
if
(
pixel_offset
)
{
T
cx
=
xmin
+
w
/
2.
;
T
cy
=
ymin
+
h
/
2.
;
if
(
is_scale
)
{
w
=
(
xmax
-
xmin
)
/
im_info
[
2
]
+
1.
;
h
=
(
ymax
-
ymin
)
/
im_info
[
2
]
+
1.
;
}
T
w
=
xmax
-
xmin
+
1.0
;
T
h
=
ymax
-
ymin
+
1.0
;
T
cx
=
xmin
+
w
/
2.
;
T
cy
=
ymin
+
h
/
2.
;
if
(
is_scale
)
{
w
=
(
xmax
-
xmin
)
/
im_info
[
2
]
+
1.
;
h
=
(
ymax
-
ymin
)
/
im_info
[
2
]
+
1.
;
}
if
(
w
>=
min_size
&&
h
>=
min_size
&&
cx
<=
im_w
&&
cy
<=
im_h
)
{
keep_index
[
threadIdx
.
x
]
=
i
;
if
(
w
>=
min_size
&&
h
>=
min_size
&&
cx
<=
im_w
&&
cy
<=
im_h
)
{
keep_index
[
threadIdx
.
x
]
=
i
;
}
}
else
{
if
(
w
>=
min_size
&&
h
>=
min_size
)
{
keep_index
[
threadIdx
.
x
]
=
i
;
}
}
__syncthreads
();
if
(
threadIdx
.
x
==
0
)
{
...
...
@@ -187,19 +198,23 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
}
}
static
__device__
float
IoU
(
const
float
*
a
,
const
float
*
b
)
{
static
__device__
float
IoU
(
const
float
*
a
,
const
float
*
b
,
const
bool
pixel_offset
=
true
)
{
float
offset
=
pixel_offset
?
static_cast
<
float
>
(
1.0
)
:
0
;
float
left
=
max
(
a
[
0
],
b
[
0
]),
right
=
min
(
a
[
2
],
b
[
2
]);
float
top
=
max
(
a
[
1
],
b
[
1
]),
bottom
=
min
(
a
[
3
],
b
[
3
]);
float
width
=
max
(
right
-
left
+
1
,
0.
f
),
height
=
max
(
bottom
-
top
+
1
,
0.
f
);
float
width
=
max
(
right
-
left
+
offset
,
0.
f
),
height
=
max
(
bottom
-
top
+
offset
,
0.
f
);
float
inter_s
=
width
*
height
;
float
s_a
=
(
a
[
2
]
-
a
[
0
]
+
1
)
*
(
a
[
3
]
-
a
[
1
]
+
1
);
float
s_b
=
(
b
[
2
]
-
b
[
0
]
+
1
)
*
(
b
[
3
]
-
b
[
1
]
+
1
);
float
s_a
=
(
a
[
2
]
-
a
[
0
]
+
offset
)
*
(
a
[
3
]
-
a
[
1
]
+
offset
);
float
s_b
=
(
b
[
2
]
-
b
[
0
]
+
offset
)
*
(
b
[
3
]
-
b
[
1
]
+
offset
);
return
inter_s
/
(
s_a
+
s_b
-
inter_s
);
}
static
__global__
void
NMSKernel
(
const
int
n_boxes
,
const
float
nms_overlap_thresh
,
const
float
*
dev_boxes
,
uint64_t
*
dev_mask
)
{
const
float
*
dev_boxes
,
uint64_t
*
dev_mask
,
bool
pixel_offset
=
true
)
{
const
int
row_start
=
blockIdx
.
y
;
const
int
col_start
=
blockIdx
.
x
;
...
...
@@ -231,7 +246,8 @@ static __global__ void NMSKernel(const int n_boxes,
start
=
threadIdx
.
x
+
1
;
}
for
(
i
=
start
;
i
<
col_size
;
i
++
)
{
if
(
IoU
(
cur_box
,
block_boxes
+
i
*
4
)
>
nms_overlap_thresh
)
{
if
(
IoU
(
cur_box
,
block_boxes
+
i
*
4
,
pixel_offset
)
>
nms_overlap_thresh
)
{
t
|=
1ULL
<<
i
;
}
}
...
...
@@ -243,7 +259,7 @@ static __global__ void NMSKernel(const int n_boxes,
template
<
typename
T
>
static
void
NMS
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
Tensor
&
proposals
,
const
Tensor
&
sorted_indices
,
const
T
nms_threshold
,
Tensor
*
keep_out
)
{
Tensor
*
keep_out
,
bool
pixel_offset
=
true
)
{
int
boxes_num
=
proposals
.
dims
()[
0
];
const
int
col_blocks
=
DIVUP
(
boxes_num
,
kThreadsPerBlock
);
dim3
blocks
(
DIVUP
(
boxes_num
,
kThreadsPerBlock
),
...
...
@@ -255,7 +271,8 @@ static void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
framework
::
Vector
<
uint64_t
>
mask
(
boxes_num
*
col_blocks
);
NMSKernel
<<<
blocks
,
threads
>>>
(
boxes_num
,
nms_threshold
,
boxes
,
mask
.
CUDAMutableData
(
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
ctx
.
GetPlace
())));
platform
::
CUDAPlace
,
ctx
.
GetPlace
())),
pixel_offset
);
std
::
vector
<
uint64_t
>
remv
(
col_blocks
);
memset
(
&
remv
[
0
],
0
,
sizeof
(
uint64_t
)
*
col_blocks
);
...
...
paddle/fluid/operators/detection/bbox_util.h
浏览文件 @
5b267474
...
...
@@ -31,7 +31,7 @@ struct RangeInitFunctor {
};
template
<
typename
T
>
inline
HOSTDEVICE
T
RoIArea
(
const
T
*
box
,
bool
normalized
)
{
inline
HOSTDEVICE
T
RoIArea
(
const
T
*
box
,
bool
pixel_offset
=
true
)
{
if
(
box
[
2
]
<
box
[
0
]
||
box
[
3
]
<
box
[
1
])
{
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
...
...
@@ -39,11 +39,11 @@ inline HOSTDEVICE T RoIArea(const T* box, bool normalized) {
}
else
{
const
T
w
=
box
[
2
]
-
box
[
0
];
const
T
h
=
box
[
3
]
-
box
[
1
];
if
(
normalized
)
{
return
w
*
h
;
}
else
{
if
(
pixel_offset
)
{
// If coordinate values are not within range [0, 1].
return
(
w
+
1
)
*
(
h
+
1
);
}
else
{
return
w
*
h
;
}
}
}
...
...
@@ -157,10 +157,12 @@ template <class T>
void
ClipTiledBoxes
(
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Tensor
&
im_info
,
const
framework
::
Tensor
&
input_boxes
,
framework
::
Tensor
*
out
,
bool
is_scale
=
true
)
{
framework
::
Tensor
*
out
,
bool
is_scale
=
true
,
bool
pixel_offset
=
true
)
{
T
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
im_info_data
=
im_info
.
data
<
T
>
();
const
T
*
input_boxes_data
=
input_boxes
.
data
<
T
>
();
T
offset
=
pixel_offset
?
static_cast
<
T
>
(
1.0
)
:
0
;
T
zero
(
0
);
T
im_w
=
is_scale
?
round
(
im_info_data
[
1
]
/
im_info_data
[
2
])
:
im_info_data
[
1
];
...
...
@@ -168,13 +170,17 @@ void ClipTiledBoxes(const platform::DeviceContext& ctx,
is_scale
?
round
(
im_info_data
[
0
]
/
im_info_data
[
2
])
:
im_info_data
[
0
];
for
(
int64_t
i
=
0
;
i
<
input_boxes
.
numel
();
++
i
)
{
if
(
i
%
4
==
0
)
{
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_w
-
1
),
zero
);
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_w
-
offset
),
zero
);
}
else
if
(
i
%
4
==
1
)
{
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_h
-
1
),
zero
);
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_h
-
offset
),
zero
);
}
else
if
(
i
%
4
==
2
)
{
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_w
-
1
),
zero
);
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_w
-
offset
),
zero
);
}
else
{
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_h
-
1
),
zero
);
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_h
-
offset
),
zero
);
}
}
}
...
...
@@ -184,29 +190,35 @@ template <class T>
void
FilterBoxes
(
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Tensor
*
boxes
,
float
min_size
,
const
framework
::
Tensor
&
im_info
,
bool
is_scale
,
framework
::
Tensor
*
keep
)
{
framework
::
Tensor
*
keep
,
bool
pixel_offset
=
true
)
{
const
T
*
im_info_data
=
im_info
.
data
<
T
>
();
const
T
*
boxes_data
=
boxes
->
data
<
T
>
();
keep
->
Resize
({
boxes
->
dims
()[
0
]});
min_size
=
std
::
max
(
min_size
,
1.0
f
);
int
*
keep_data
=
keep
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
T
offset
=
pixel_offset
?
static_cast
<
T
>
(
1.0
)
:
0
;
int
keep_len
=
0
;
for
(
int
i
=
0
;
i
<
boxes
->
dims
()[
0
];
++
i
)
{
T
ws
=
boxes_data
[
4
*
i
+
2
]
-
boxes_data
[
4
*
i
]
+
1
;
T
hs
=
boxes_data
[
4
*
i
+
3
]
-
boxes_data
[
4
*
i
+
1
]
+
1
;
T
x_ctr
=
boxes_data
[
4
*
i
]
+
ws
/
2
;
T
y_ctr
=
boxes_data
[
4
*
i
+
1
]
+
hs
/
2
;
T
ws
=
boxes_data
[
4
*
i
+
2
]
-
boxes_data
[
4
*
i
]
+
offset
;
T
hs
=
boxes_data
[
4
*
i
+
3
]
-
boxes_data
[
4
*
i
+
1
]
+
offset
;
if
(
pixel_offset
)
{
T
x_ctr
=
boxes_data
[
4
*
i
]
+
ws
/
2
;
T
y_ctr
=
boxes_data
[
4
*
i
+
1
]
+
hs
/
2
;
if
(
is_scale
)
{
ws
=
(
boxes_data
[
4
*
i
+
2
]
-
boxes_data
[
4
*
i
])
/
im_info_data
[
2
]
+
1
;
hs
=
(
boxes_data
[
4
*
i
+
3
]
-
boxes_data
[
4
*
i
+
1
])
/
im_info_data
[
2
]
+
1
;
}
if
(
ws
>=
min_size
&&
hs
>=
min_size
&&
x_ctr
<=
im_info_data
[
1
]
&&
y_ctr
<=
im_info_data
[
0
])
{
keep_data
[
keep_len
++
]
=
i
;
if
(
is_scale
)
{
ws
=
(
boxes_data
[
4
*
i
+
2
]
-
boxes_data
[
4
*
i
])
/
im_info_data
[
2
]
+
1
;
hs
=
(
boxes_data
[
4
*
i
+
3
]
-
boxes_data
[
4
*
i
+
1
])
/
im_info_data
[
2
]
+
1
;
}
if
(
ws
>=
min_size
&&
hs
>=
min_size
&&
x_ctr
<=
im_info_data
[
1
]
&&
y_ctr
<=
im_info_data
[
0
])
{
keep_data
[
keep_len
++
]
=
i
;
}
}
else
{
if
(
ws
>=
min_size
&&
hs
>=
min_size
)
{
keep_data
[
keep_len
++
]
=
i
;
}
}
}
keep
->
Resize
({
keep_len
});
...
...
@@ -216,8 +228,8 @@ template <class T>
static
void
BoxCoder
(
const
platform
::
DeviceContext
&
ctx
,
framework
::
Tensor
*
all_anchors
,
framework
::
Tensor
*
bbox_deltas
,
framework
::
Tensor
*
variances
,
framework
::
Tensor
*
proposals
)
{
framework
::
Tensor
*
variances
,
framework
::
Tensor
*
proposals
,
const
bool
pixel_offset
=
true
)
{
T
*
proposals_data
=
proposals
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int64_t
row
=
all_anchors
->
dims
()[
0
];
...
...
@@ -230,9 +242,11 @@ static void BoxCoder(const platform::DeviceContext& ctx,
variances_data
=
variances
->
data
<
T
>
();
}
T
offset
=
pixel_offset
?
static_cast
<
T
>
(
1.0
)
:
0
;
for
(
int64_t
i
=
0
;
i
<
row
;
++
i
)
{
T
anchor_width
=
anchor_data
[
i
*
len
+
2
]
-
anchor_data
[
i
*
len
]
+
1.0
;
T
anchor_height
=
anchor_data
[
i
*
len
+
3
]
-
anchor_data
[
i
*
len
+
1
]
+
1.0
;
T
anchor_width
=
anchor_data
[
i
*
len
+
2
]
-
anchor_data
[
i
*
len
]
+
offset
;
T
anchor_height
=
anchor_data
[
i
*
len
+
3
]
-
anchor_data
[
i
*
len
+
1
]
+
offset
;
T
anchor_center_x
=
anchor_data
[
i
*
len
]
+
0.5
*
anchor_width
;
T
anchor_center_y
=
anchor_data
[
i
*
len
+
1
]
+
0.5
*
anchor_height
;
...
...
@@ -270,8 +284,8 @@ static void BoxCoder(const platform::DeviceContext& ctx,
proposals_data
[
i
*
len
]
=
bbox_center_x
-
bbox_width
/
2
;
proposals_data
[
i
*
len
+
1
]
=
bbox_center_y
-
bbox_height
/
2
;
proposals_data
[
i
*
len
+
2
]
=
bbox_center_x
+
bbox_width
/
2
-
1
;
proposals_data
[
i
*
len
+
3
]
=
bbox_center_y
+
bbox_height
/
2
-
1
;
proposals_data
[
i
*
len
+
2
]
=
bbox_center_x
+
bbox_width
/
2
-
offset
;
proposals_data
[
i
*
len
+
3
]
=
bbox_center_y
+
bbox_height
/
2
-
offset
;
}
// return proposals;
}
...
...
paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
浏览文件 @
5b267474
...
...
@@ -103,6 +103,9 @@ class DistributeFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr
<
int
>
(
"refer_scale"
,
"The referring scale of FPN layer with"
" specified level"
);
AddAttr
<
bool
>
(
"pixel_offset"
,
"(bool, default True),"
,
"If true, im_shape pixel offset is 1."
)
.
SetDefault
(
true
);
AddComment
(
R"DOC(
This operator distribute all proposals into different fpn level,
with respect to scale of the proposals, the referring scale and
...
...
@@ -134,4 +137,8 @@ REGISTER_OP_VERSION(distribute_fpn_proposals)
.
NewOutput
(
"MultiLevelRoisNum"
,
"The RoIs' number of each image on multiple "
"levels. The number on each level has the shape of (B),"
"B is the number of images."
));
"B is the number of images."
))
.
AddCheckpoint
(
R"ROC(Register distribute_fpn_proposals for adding the attribute of pixel_offset)ROC"
,
paddle
::
framework
::
compatible
::
OpVersionDesc
().
NewAttr
(
"pixel_offset"
,
"If true, im_shape pixel offset is 1."
,
true
));
paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu
浏览文件 @
5b267474
...
...
@@ -43,15 +43,15 @@ __global__ void GPUDistFpnProposalsHelper(
const
int
nthreads
,
const
T
*
rois
,
const
int
lod_size
,
const
int
refer_level
,
const
int
refer_scale
,
const
int
max_level
,
const
int
min_level
,
int
*
roi_batch_id_data
,
int
*
sub_lod_list
,
int
*
target_lvls
)
{
int
*
target_lvls
,
bool
pixel_offset
=
true
)
{
CUDA_KERNEL_LOOP
(
i
,
nthreads
)
{
const
T
*
offset_roi
=
rois
+
i
*
BBoxSize
;
int
roi_batch_ind
=
roi_batch_id_data
[
i
];
// get the target level of current rois
T
roi_area
=
RoIArea
(
offset_roi
,
false
);
T
roi_area
=
RoIArea
(
offset_roi
,
pixel_offset
);
T
roi_scale
=
sqrt
(
roi_area
);
int
tgt_lvl
=
floor
(
log2
(
roi_scale
/
static_cast
<
T
>
(
refer_scale
)
+
(
T
)
1e-
6
)
+
refer_level
);
log2
(
roi_scale
/
static_cast
<
T
>
(
refer_scale
)
+
(
T
)
1e-
8
)
+
refer_level
);
tgt_lvl
=
min
(
max_level
,
max
(
tgt_lvl
,
min_level
));
target_lvls
[
i
]
=
tgt_lvl
;
// compute number of rois in the same batch and same target level
...
...
@@ -73,6 +73,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
const
int
max_level
=
ctx
.
Attr
<
int
>
(
"max_level"
);
const
int
refer_level
=
ctx
.
Attr
<
int
>
(
"refer_level"
);
const
int
refer_scale
=
ctx
.
Attr
<
int
>
(
"refer_scale"
);
const
bool
pixel_offset
=
ctx
.
Attr
<
bool
>
(
"pixel_offset"
);
int
num_level
=
max_level
-
min_level
+
1
;
// check that the fpn_rois is not empty
...
...
@@ -126,7 +127,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
GPUDistFpnProposalsHelper
<
T
><<<
dist_blocks
,
threads
>>>
(
roi_num
,
fpn_rois
->
data
<
T
>
(),
lod_size
,
refer_level
,
refer_scale
,
max_level
,
min_level
,
roi_batch_id_list_gpu
.
data
<
int
>
(),
sub_lod_list_data
,
target_lvls_data
);
sub_lod_list_data
,
target_lvls_data
,
pixel_offset
);
dev_ctx
.
Wait
();
auto
place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
dev_ctx
.
GetPlace
());
...
...
paddle/fluid/operators/detection/distribute_fpn_proposals_op.h
浏览文件 @
5b267474
...
...
@@ -44,7 +44,7 @@ inline std::vector<size_t> GetLodFromRoisNum(const Tensor* rois_num) {
}
template
<
typename
T
>
static
inline
T
BBoxArea
(
const
T
*
box
,
bool
normalized
)
{
static
inline
T
BBoxArea
(
const
T
*
box
,
bool
pixel_offset
)
{
if
(
box
[
2
]
<
box
[
0
]
||
box
[
3
]
<
box
[
1
])
{
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
...
...
@@ -52,11 +52,11 @@ static inline T BBoxArea(const T* box, bool normalized) {
}
else
{
const
T
w
=
box
[
2
]
-
box
[
0
];
const
T
h
=
box
[
3
]
-
box
[
1
];
if
(
normalized
)
{
return
w
*
h
;
}
else
{
if
(
pixel_offset
)
{
// If coordinate values are not within range [0, 1].
return
(
w
+
1
)
*
(
h
+
1
);
}
else
{
return
w
*
h
;
}
}
}
...
...
@@ -77,6 +77,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
const
int
max_level
=
context
.
Attr
<
int
>
(
"max_level"
);
const
int
refer_level
=
context
.
Attr
<
int
>
(
"refer_level"
);
const
int
refer_scale
=
context
.
Attr
<
int
>
(
"refer_scale"
);
const
bool
pixel_offset
=
context
.
Attr
<
bool
>
(
"pixel_offset"
);
const
int
num_level
=
max_level
-
min_level
+
1
;
// check that the fpn_rois is not empty
...
...
@@ -108,7 +109,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
const
T
*
rois_data
=
fpn_rois_slice
.
data
<
T
>
();
for
(
int
j
=
0
;
j
<
fpn_rois_slice
.
dims
()[
0
];
++
j
)
{
// get the target level of current rois
T
roi_scale
=
std
::
sqrt
(
BBoxArea
(
rois_data
,
false
));
T
roi_scale
=
std
::
sqrt
(
BBoxArea
(
rois_data
,
pixel_offset
));
int
tgt_lvl
=
std
::
floor
(
std
::
log2
(
roi_scale
/
refer_scale
+
(
T
)
1e-6
)
+
refer_level
);
tgt_lvl
=
std
::
min
(
max_level
,
std
::
max
(
tgt_lvl
,
min_level
));
...
...
paddle/fluid/operators/detection/generate_proposals_v2_op.cc
浏览文件 @
5b267474
...
...
@@ -87,6 +87,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
float
nms_thresh
=
context
.
Attr
<
float
>
(
"nms_thresh"
);
float
min_size
=
context
.
Attr
<
float
>
(
"min_size"
);
float
eta
=
context
.
Attr
<
float
>
(
"eta"
);
bool
pixel_offset
=
context
.
Attr
<
bool
>
(
"pixel_offset"
);
auto
&
dev_ctx
=
context
.
template
device_context
<
platform
::
CPUDeviceContext
>();
...
...
@@ -134,10 +135,10 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
bbox_deltas_slice
.
Resize
({
h_bbox
*
w_bbox
*
c_bbox
/
4
,
4
});
scores_slice
.
Resize
({
h_score
*
w_score
*
c_score
,
1
});
std
::
pair
<
Tensor
,
Tensor
>
tensor_pair
=
ProposalForOneImage
(
dev_ctx
,
im_shape_slice
,
anchors
,
variances
,
bbox_deltas_slice
,
scores_slice
,
pre_nms_top_n
,
post_nms_top_n
,
nms_thresh
,
min_size
,
eta
);
std
::
pair
<
Tensor
,
Tensor
>
tensor_pair
=
ProposalForOneImage
(
dev_ctx
,
im_shape_slice
,
anchors
,
variances
,
bbox_deltas_slice
,
scores_slice
,
pre_nms_top_n
,
post_nms_top_n
,
nms_thresh
,
min_size
,
eta
,
pixel_offset
);
Tensor
&
proposals
=
tensor_pair
.
first
;
Tensor
&
scores
=
tensor_pair
.
second
;
...
...
@@ -168,7 +169,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
const
Tensor
&
bbox_deltas_slice
,
// [M, 4]
const
Tensor
&
scores_slice
,
// [N, 1]
int
pre_nms_top_n
,
int
post_nms_top_n
,
float
nms_thresh
,
float
min_size
,
float
eta
)
const
{
float
eta
,
bool
pixel_offset
=
true
)
const
{
auto
*
scores_data
=
scores_slice
.
data
<
T
>
();
// Sort index
...
...
@@ -203,12 +204,15 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
Tensor
proposals
;
proposals
.
mutable_data
<
T
>
({
index_t
.
numel
(),
4
},
ctx
.
GetPlace
());
BoxCoder
<
T
>
(
ctx
,
&
anchor_sel
,
&
bbox_sel
,
&
var_sel
,
&
proposals
);
BoxCoder
<
T
>
(
ctx
,
&
anchor_sel
,
&
bbox_sel
,
&
var_sel
,
&
proposals
,
pixel_offset
);
ClipTiledBoxes
<
T
>
(
ctx
,
im_shape_slice
,
proposals
,
&
proposals
,
false
);
ClipTiledBoxes
<
T
>
(
ctx
,
im_shape_slice
,
proposals
,
&
proposals
,
false
,
pixel_offset
);
Tensor
keep
;
FilterBoxes
<
T
>
(
ctx
,
&
proposals
,
min_size
,
im_shape_slice
,
false
,
&
keep
);
FilterBoxes
<
T
>
(
ctx
,
&
proposals
,
min_size
,
im_shape_slice
,
false
,
&
keep
,
pixel_offset
);
// Handle the case when there is no keep index left
if
(
keep
.
numel
()
==
0
)
{
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
T
>
set_zero
;
...
...
@@ -229,7 +233,8 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
return
std
::
make_pair
(
bbox_sel
,
scores_filter
);
}
Tensor
keep_nms
=
NMS
<
T
>
(
ctx
,
&
bbox_sel
,
&
scores_filter
,
nms_thresh
,
eta
);
Tensor
keep_nms
=
NMS
<
T
>
(
ctx
,
&
bbox_sel
,
&
scores_filter
,
nms_thresh
,
eta
,
pixel_offset
);
if
(
post_nms_top_n
>
0
&&
post_nms_top_n
<
keep_nms
.
numel
())
{
keep_nms
.
Resize
({
post_nms_top_n
});
...
...
@@ -280,6 +285,9 @@ class GenerateProposalsV2OpMaker : public framework::OpProtoAndCheckerMaker {
"Proposal height and width both need to be greater "
"than this min_size."
);
AddAttr
<
float
>
(
"eta"
,
"The parameter for adaptive NMS."
);
AddAttr
<
bool
>
(
"pixel_offset"
,
"(bool, default True),"
,
"If true, im_shape pixel offset is 1."
)
.
SetDefault
(
true
);
AddComment
(
R"DOC(
This operator is the second version of generate_proposals op to generate
bounding box proposals for Faster RCNN.
...
...
@@ -312,3 +320,8 @@ REGISTER_OPERATOR(
REGISTER_OP_CPU_KERNEL
(
generate_proposals_v2
,
ops
::
GenerateProposalsV2Kernel
<
float
>
,
ops
::
GenerateProposalsV2Kernel
<
double
>
);
REGISTER_OP_VERSION
(
generate_proposals_v2
)
.
AddCheckpoint
(
R"ROC(Registe generate_proposals_v2 for adding the attribute of pixel_offset)ROC"
,
paddle
::
framework
::
compatible
::
OpVersionDesc
().
NewAttr
(
"pixel_offset"
,
"If true, im_shape pixel offset is 1."
,
true
));
paddle/fluid/operators/detection/generate_proposals_v2_op.cu
浏览文件 @
5b267474
...
...
@@ -36,7 +36,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
const
Tensor
&
bbox_deltas
,
// [M, 4]
const
Tensor
&
scores
,
// [N, 1]
int
pre_nms_top_n
,
int
post_nms_top_n
,
float
nms_thresh
,
float
min_size
,
float
eta
)
{
float
eta
,
bool
pixel_offset
)
{
// 1. pre nms
Tensor
scores_sort
,
index_sort
;
SortDescending
<
T
>
(
ctx
,
scores
,
&
scores_sort
,
&
index_sort
);
...
...
@@ -54,7 +54,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
platform
::
ForRange
<
platform
::
CUDADeviceContext
>
for_range
(
ctx
,
pre_nms_num
);
for_range
(
BoxDecodeAndClipFunctor
<
T
>
{
anchors
.
data
<
T
>
(),
bbox_deltas
.
data
<
T
>
(),
variances
.
data
<
T
>
(),
index_sort
.
data
<
int
>
(),
im_shape
.
data
<
T
>
(),
proposals
.
data
<
T
>
()});
index_sort
.
data
<
int
>
(),
im_shape
.
data
<
T
>
(),
proposals
.
data
<
T
>
(),
pixel_offset
});
}
// 3. filter
...
...
@@ -65,7 +66,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
auto
stream
=
ctx
.
stream
();
FilterBBoxes
<
T
,
512
><<<
1
,
512
,
0
,
stream
>>>
(
proposals
.
data
<
T
>
(),
im_shape
.
data
<
T
>
(),
min_size
,
pre_nms_num
,
keep_num_t
.
data
<
int
>
(),
keep_index
.
data
<
int
>
(),
false
);
keep_num_t
.
data
<
int
>
(),
keep_index
.
data
<
int
>
(),
false
,
pixel_offset
);
int
keep_num
;
const
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
ctx
.
GetPlace
());
memory
::
Copy
(
platform
::
CPUPlace
(),
&
keep_num
,
gpu_place
,
...
...
@@ -94,7 +95,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
// 4. nms
Tensor
keep_nms
;
NMS
<
T
>
(
ctx
,
proposals_filter
,
keep_index
,
nms_thresh
,
&
keep_nms
);
NMS
<
T
>
(
ctx
,
proposals_filter
,
keep_index
,
nms_thresh
,
&
keep_nms
,
pixel_offset
);
if
(
post_nms_top_n
>
0
&&
post_nms_top_n
<
keep_nms
.
numel
())
{
keep_nms
.
Resize
({
post_nms_top_n
});
}
...
...
@@ -129,6 +131,7 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> {
float
nms_thresh
=
context
.
Attr
<
float
>
(
"nms_thresh"
);
float
min_size
=
context
.
Attr
<
float
>
(
"min_size"
);
float
eta
=
context
.
Attr
<
float
>
(
"eta"
);
bool
pixel_offset
=
context
.
Attr
<
bool
>
(
"pixel_offset"
);
PADDLE_ENFORCE_GE
(
eta
,
1.
,
platform
::
errors
::
InvalidArgument
(
"Not support adaptive NMS. The attribute 'eta' "
...
...
@@ -184,10 +187,10 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> {
bbox_deltas_slice
.
Resize
({
h_bbox
*
w_bbox
*
c_bbox
/
4
,
4
});
scores_slice
.
Resize
({
h_score
*
w_score
*
c_score
,
1
});
std
::
pair
<
Tensor
,
Tensor
>
box_score_pair
=
ProposalForOneImage
<
T
>
(
dev_ctx
,
im_shape_slice
,
anchors
,
variances
,
bbox_deltas_slice
,
scores_slice
,
pre_nms_top_n
,
post_nms_top_n
,
nms_thresh
,
min_size
,
eta
);
std
::
pair
<
Tensor
,
Tensor
>
box_score_pair
=
ProposalForOneImage
<
T
>
(
dev_ctx
,
im_shape_slice
,
anchors
,
variances
,
bbox_deltas_slice
,
scores_slice
,
pre_nms_top_n
,
post_nms_top_n
,
nms_thresh
,
min_size
,
eta
,
pixel_offset
);
Tensor
&
proposals
=
box_score_pair
.
first
;
Tensor
&
scores
=
box_score_pair
.
second
;
...
...
paddle/fluid/operators/detection/nms_util.h
浏览文件 @
5b267474
...
...
@@ -130,7 +130,7 @@ static inline framework::Tensor VectorToTensor(
template
<
class
T
>
framework
::
Tensor
NMS
(
const
platform
::
DeviceContext
&
ctx
,
framework
::
Tensor
*
bbox
,
framework
::
Tensor
*
scores
,
T
nms_threshold
,
float
eta
)
{
T
nms_threshold
,
float
eta
,
bool
pixel_offset
=
true
)
{
int64_t
num_boxes
=
bbox
->
dims
()[
0
];
// 4: [xmin ymin xmax ymax]
int64_t
box_size
=
bbox
->
dims
()[
1
];
...
...
@@ -144,13 +144,15 @@ framework::Tensor NMS(const platform::DeviceContext& ctx,
int
selected_num
=
0
;
T
adaptive_threshold
=
nms_threshold
;
const
T
*
bbox_data
=
bbox
->
data
<
T
>
();
bool
normalized
=
pixel_offset
?
false
:
true
;
while
(
sorted_indices
.
size
()
!=
0
)
{
int
idx
=
sorted_indices
.
back
().
second
;
bool
flag
=
true
;
for
(
int
kept_idx
:
selected_indices
)
{
if
(
flag
)
{
T
overlap
=
JaccardOverlap
<
T
>
(
bbox_data
+
idx
*
box_size
,
bbox_data
+
kept_idx
*
box_size
,
false
);
T
overlap
=
JaccardOverlap
<
T
>
(
bbox_data
+
idx
*
box_size
,
bbox_data
+
kept_idx
*
box_size
,
normalized
);
flag
=
(
overlap
<=
adaptive_threshold
);
}
else
{
break
;
...
...
paddle/fluid/operators/roi_align_op.cc
浏览文件 @
5b267474
...
...
@@ -175,6 +175,10 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker {
"If <=0, then grid points are adaptive to roi_width "
"and pooled_w, likewise for height"
)
.
SetDefault
(
-
1
);
AddAttr
<
bool
>
(
"aligned"
,
"(bool, default False),"
"If true, pixel shift it by -0.5 for align more perfectly"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
**RoIAlign Operator**
...
...
@@ -242,7 +246,14 @@ REGISTER_OP_VERSION(roi_align)
"it is not used in object detection models yet."
))
.
AddCheckpoint
(
R"ROC(
Upgrade roi_align add a new input [RoisNum])ROC"
,
Upgrade roi_align add a new input [RoisNum])ROC"
,
paddle
::
framework
::
compatible
::
OpVersionDesc
().
NewInput
(
"RoisNum"
,
"The number of RoIs in each image. RoisNum is dispensable."
));
"The number of RoIs in each image. RoisNum is dispensable."
))
.
AddCheckpoint
(
R"ROC(
Upgrade roi_align add a new input [aligned])ROC"
,
paddle
::
framework
::
compatible
::
OpVersionDesc
().
NewAttr
(
"aligned"
,
"If true, pixel shift it by -0.5 for align more perfectly."
,
false
));
paddle/fluid/operators/roi_align_op.cu
浏览文件 @
5b267474
...
...
@@ -105,7 +105,8 @@ __global__ void GPUROIAlignForward(
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
input_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
sampling_ratio
,
int
*
roi_batch_id_data
,
T
*
output_data
)
{
const
int
sampling_ratio
,
int
*
roi_batch_id_data
,
T
*
output_data
,
const
bool
continuous_coordinate
)
{
CUDA_KERNEL_LOOP
(
i
,
nthreads
)
{
int
pw
=
i
%
pooled_width
;
int
ph
=
(
i
/
pooled_width
)
%
pooled_height
;
...
...
@@ -115,13 +116,19 @@ __global__ void GPUROIAlignForward(
const
T
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
T
roi_xmin
=
offset_input_rois
[
0
]
*
spatial_scale
;
T
roi_ymin
=
offset_input_rois
[
1
]
*
spatial_scale
;
T
roi_xmax
=
offset_input_rois
[
2
]
*
spatial_scale
;
T
roi_ymax
=
offset_input_rois
[
3
]
*
spatial_scale
;
T
roi_offset
=
continuous_coordinate
?
static_cast
<
T
>
(
0.5
)
:
0
;
T
roi_xmin
=
offset_input_rois
[
0
]
*
spatial_scale
-
roi_offset
;
T
roi_ymin
=
offset_input_rois
[
1
]
*
spatial_scale
-
roi_offset
;
T
roi_xmax
=
offset_input_rois
[
2
]
*
spatial_scale
-
roi_offset
;
T
roi_ymax
=
offset_input_rois
[
3
]
*
spatial_scale
-
roi_offset
;
T
roi_width
=
max
(
roi_xmax
-
roi_xmin
,
static_cast
<
T
>
(
1.
));
T
roi_height
=
max
(
roi_ymax
-
roi_ymin
,
static_cast
<
T
>
(
1.
));
T
roi_width
=
roi_xmax
-
roi_xmin
;
T
roi_height
=
roi_ymax
-
roi_ymin
;
if
(
!
continuous_coordinate
)
{
roi_width
=
max
(
roi_width
,
static_cast
<
T
>
(
1.
));
roi_height
=
max
(
roi_height
,
static_cast
<
T
>
(
1.
));
}
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
...
...
@@ -153,14 +160,12 @@ __global__ void GPUROIAlignForward(
}
template
<
typename
T
>
__global__
void
GPUROIAlignBackward
(
const
int
nthreads
,
const
T
*
input_rois
,
const
T
*
out_grad
,
const
int
num_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
sampling_ratio
,
int
*
roi_batch_id_data
,
T
*
input_grad
)
{
__global__
void
GPUROIAlignBackward
(
const
int
nthreads
,
const
T
*
input_rois
,
const
T
*
out_grad
,
const
int
num_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
sampling_ratio
,
int
*
roi_batch_id_data
,
T
*
input_grad
,
const
bool
continuous_coordinate
)
{
CUDA_KERNEL_LOOP
(
i
,
nthreads
)
{
int
pw
=
i
%
pooled_width
;
int
ph
=
(
i
/
pooled_width
)
%
pooled_height
;
...
...
@@ -169,13 +174,18 @@ __global__ void GPUROIAlignBackward(const int nthreads, const T* input_rois,
const
T
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
T
roi_xmin
=
offset_input_rois
[
0
]
*
spatial_scale
;
T
roi_ymin
=
offset_input_rois
[
1
]
*
spatial_scale
;
T
roi_xmax
=
offset_input_rois
[
2
]
*
spatial_scale
;
T
roi_ymax
=
offset_input_rois
[
3
]
*
spatial_scale
;
T
roi_width
=
max
(
roi_xmax
-
roi_xmin
,
static_cast
<
T
>
(
1.
));
T
roi_height
=
max
(
roi_ymax
-
roi_ymin
,
static_cast
<
T
>
(
1.
));
T
roi_offset
=
continuous_coordinate
?
T
(
0.5
)
:
0
;
T
roi_xmin
=
offset_input_rois
[
0
]
*
spatial_scale
-
roi_offset
;
T
roi_ymin
=
offset_input_rois
[
1
]
*
spatial_scale
-
roi_offset
;
T
roi_xmax
=
offset_input_rois
[
2
]
*
spatial_scale
-
roi_offset
;
T
roi_ymax
=
offset_input_rois
[
3
]
*
spatial_scale
-
roi_offset
;
T
roi_width
=
roi_xmax
-
roi_xmin
;
T
roi_height
=
roi_ymax
-
roi_ymin
;
if
(
!
continuous_coordinate
)
{
roi_width
=
max
(
roi_width
,
static_cast
<
T
>
(
1.
));
roi_height
=
max
(
roi_height
,
static_cast
<
T
>
(
1.
));
}
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
...
...
@@ -236,6 +246,7 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
aligned
=
ctx
.
Attr
<
bool
>
(
"aligned"
);
auto
in_dims
=
in
->
dims
();
int
batch_size
=
in_dims
[
0
];
...
...
@@ -316,7 +327,7 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> {
GPUROIAlignForward
<
T
><<<
blocks
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
output_size
,
in
->
data
<
T
>
(),
rois
->
data
<
T
>
(),
spatial_scale
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
sampling_ratio
,
roi_id_data
,
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
())
,
aligned
);
}
};
...
...
@@ -334,6 +345,7 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
aligned
=
ctx
.
Attr
<
bool
>
(
"aligned"
);
int
rois_num
=
rois
->
dims
()[
0
];
int
channels
=
in
->
dims
()[
1
];
...
...
@@ -390,8 +402,8 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> {
GPUROIAlignBackward
<
T
><<<
blocks
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
output_grad_size
,
rois
->
data
<
T
>
(),
out_grad
->
data
<
T
>
(),
rois_num
,
spatial_scale
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
sampling_ratio
,
roi_id_data
,
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
())
);
sampling_ratio
,
roi_id_data
,
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
aligned
);
}
}
};
...
...
paddle/fluid/operators/roi_align_op.h
浏览文件 @
5b267474
...
...
@@ -145,6 +145,7 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
aligned
=
ctx
.
Attr
<
bool
>
(
"aligned"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
...
...
@@ -215,15 +216,21 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> {
}
T
*
output_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
rois_data
=
rois
->
data
<
T
>
();
T
roi_offset
=
aligned
?
T
(
0.5
)
:
0
;
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
int
roi_batch_id
=
roi_batch_id_data
[
n
];
T
roi_xmin
=
rois_data
[
0
]
*
spatial_scale
;
T
roi_ymin
=
rois_data
[
1
]
*
spatial_scale
;
T
roi_xmax
=
rois_data
[
2
]
*
spatial_scale
;
T
roi_ymax
=
rois_data
[
3
]
*
spatial_scale
;
T
roi_xmin
=
rois_data
[
0
]
*
spatial_scale
-
roi_offset
;
T
roi_ymin
=
rois_data
[
1
]
*
spatial_scale
-
roi_offset
;
T
roi_xmax
=
rois_data
[
2
]
*
spatial_scale
-
roi_offset
;
T
roi_ymax
=
rois_data
[
3
]
*
spatial_scale
-
roi_offset
;
T
roi_width
=
roi_xmax
-
roi_xmin
;
T
roi_height
=
roi_ymax
-
roi_ymin
;
if
(
!
aligned
)
{
roi_width
=
std
::
max
(
roi_width
,
static_cast
<
T
>
(
1.
));
roi_height
=
std
::
max
(
roi_height
,
static_cast
<
T
>
(
1.
));
}
T
roi_width
=
std
::
max
(
roi_xmax
-
roi_xmin
,
static_cast
<
T
>
(
1.
));
T
roi_height
=
std
::
max
(
roi_ymax
-
roi_ymin
,
static_cast
<
T
>
(
1.
));
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
const
T
*
batch_data
=
input_data
+
roi_batch_id
*
in_stride
[
0
];
...
...
@@ -290,6 +297,7 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> {
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
in_dims
=
in
->
dims
();
auto
aligned
=
ctx
.
Attr
<
bool
>
(
"aligned"
);
int
channels
=
in_dims
[
1
];
int
height
=
in_dims
[
2
];
...
...
@@ -344,14 +352,21 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> {
auto
roi_stride
=
framework
::
stride
(
rois
->
dims
());
auto
out_stride
=
framework
::
stride
(
out_grad
->
dims
());
T
roi_offset
=
aligned
?
T
(
0.5
)
:
0
;
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
int
roi_batch_idx
=
roi_batch_id_data
[
n
];
T
roi_xmin
=
rois_data
[
0
]
*
spatial_scale
;
T
roi_ymin
=
rois_data
[
1
]
*
spatial_scale
;
T
roi_xmax
=
rois_data
[
2
]
*
spatial_scale
;
T
roi_ymax
=
rois_data
[
3
]
*
spatial_scale
;
T
roi_width
=
std
::
max
(
roi_xmax
-
roi_xmin
,
static_cast
<
T
>
(
1.
));
T
roi_height
=
std
::
max
(
roi_ymax
-
roi_ymin
,
static_cast
<
T
>
(
1.
));
T
roi_xmin
=
rois_data
[
0
]
*
spatial_scale
-
roi_offset
;
T
roi_ymin
=
rois_data
[
1
]
*
spatial_scale
-
roi_offset
;
T
roi_xmax
=
rois_data
[
2
]
*
spatial_scale
-
roi_offset
;
T
roi_ymax
=
rois_data
[
3
]
*
spatial_scale
-
roi_offset
;
T
roi_width
=
roi_xmax
-
roi_xmin
;
T
roi_height
=
roi_ymax
-
roi_ymin
;
if
(
!
aligned
)
{
roi_width
=
std
::
max
(
roi_width
,
static_cast
<
T
>
(
1.
));
roi_height
=
std
::
max
(
roi_height
,
static_cast
<
T
>
(
1.
));
}
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
...
...
python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py
浏览文件 @
5b267474
...
...
@@ -31,7 +31,8 @@ class TestDistributeFPNProposalsOp(OpTest):
'max_level'
:
self
.
roi_max_level
,
'min_level'
:
self
.
roi_min_level
,
'refer_scale'
:
self
.
canonical_scale
,
'refer_level'
:
self
.
canonical_level
'refer_level'
:
self
.
canonical_level
,
'pixel_offset'
:
self
.
pixel_offset
,
}
output
=
[(
'out%d'
%
i
,
self
.
rois_fpn
[
i
])
for
i
in
range
(
len
(
self
.
rois_fpn
))]
...
...
@@ -47,10 +48,12 @@ class TestDistributeFPNProposalsOp(OpTest):
self
.
canonical_scale
=
224
self
.
canonical_level
=
4
self
.
images_shape
=
[
512
,
512
]
self
.
pixel_offset
=
True
def
boxes_area
(
self
,
boxes
):
w
=
(
boxes
[:,
2
]
-
boxes
[:,
0
]
+
1
)
h
=
(
boxes
[:,
3
]
-
boxes
[:,
1
]
+
1
)
offset
=
1
if
self
.
pixel_offset
else
0
w
=
(
boxes
[:,
2
]
-
boxes
[:,
0
]
+
offset
)
h
=
(
boxes
[:,
3
]
-
boxes
[:,
1
]
+
offset
)
areas
=
w
*
h
assert
np
.
all
(
areas
>=
0
),
'Negative areas founds'
return
areas
...
...
@@ -59,7 +62,7 @@ class TestDistributeFPNProposalsOp(OpTest):
s
=
np
.
sqrt
(
self
.
boxes_area
(
rois
))
s0
=
self
.
canonical_scale
lvl0
=
self
.
canonical_level
target_lvls
=
np
.
floor
(
lvl0
+
np
.
log2
(
s
/
s0
+
1e-
6
))
target_lvls
=
np
.
floor
(
lvl0
+
np
.
log2
(
s
/
s0
+
1e-
8
))
target_lvls
=
np
.
clip
(
target_lvls
,
lvl_min
,
lvl_max
)
return
target_lvls
...
...
@@ -131,7 +134,8 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp):
'max_level'
:
self
.
roi_max_level
,
'min_level'
:
self
.
roi_min_level
,
'refer_scale'
:
self
.
canonical_scale
,
'refer_level'
:
self
.
canonical_level
'refer_level'
:
self
.
canonical_level
,
'pixel_offset'
:
self
.
pixel_offset
,
}
output
=
[(
'out%d'
%
i
,
self
.
rois_fpn
[
i
])
for
i
in
range
(
len
(
self
.
rois_fpn
))]
...
...
@@ -147,5 +151,16 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp):
}
class
TestDistributeFPNProposalsOpNoOffset
(
TestDistributeFPNProposalsOpWithRoisNum
):
def
init_test_case
(
self
):
self
.
roi_max_level
=
5
self
.
roi_min_level
=
2
self
.
canonical_scale
=
224
self
.
canonical_level
=
4
self
.
images_shape
=
[
512
,
512
]
self
.
pixel_offset
=
False
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
浏览文件 @
5b267474
...
...
@@ -21,7 +21,6 @@ import math
import
paddle
import
paddle.fluid
as
fluid
from
op_test
import
OpTest
from
test_multiclass_nms_op
import
nms
from
test_anchor_generator_op
import
anchor_generator_in_python
import
copy
...
...
@@ -111,18 +110,19 @@ def proposal_for_one_image(im_info, all_anchors, variances, bbox_deltas, scores,
return
proposals
,
scores
def
box_coder
(
all_anchors
,
bbox_deltas
,
variances
):
def
box_coder
(
all_anchors
,
bbox_deltas
,
variances
,
pixel_offset
=
True
):
"""
Decode proposals by anchors and bbox_deltas from RPN
"""
offset
=
1
if
pixel_offset
else
0
#proposals: xmin, ymin, xmax, ymax
proposals
=
np
.
zeros_like
(
bbox_deltas
,
dtype
=
np
.
float32
)
#anchor_loc: width, height, center_x, center_y
anchor_loc
=
np
.
zeros_like
(
bbox_deltas
,
dtype
=
np
.
float32
)
anchor_loc
[:,
0
]
=
all_anchors
[:,
2
]
-
all_anchors
[:,
0
]
+
1
anchor_loc
[:,
1
]
=
all_anchors
[:,
3
]
-
all_anchors
[:,
1
]
+
1
anchor_loc
[:,
0
]
=
all_anchors
[:,
2
]
-
all_anchors
[:,
0
]
+
offset
anchor_loc
[:,
1
]
=
all_anchors
[:,
3
]
-
all_anchors
[:,
1
]
+
offset
anchor_loc
[:,
2
]
=
all_anchors
[:,
0
]
+
0.5
*
anchor_loc
[:,
0
]
anchor_loc
[:,
3
]
=
all_anchors
[:,
1
]
+
0.5
*
anchor_loc
[:,
1
]
...
...
@@ -152,51 +152,60 @@ def box_coder(all_anchors, bbox_deltas, variances):
pred_bbox
[
i
,
3
]
=
math
.
exp
(
min
(
bbox_deltas
[
i
,
3
],
math
.
log
(
1000
/
16.0
)))
*
anchor_loc
[
i
,
1
]
proposals
[:,
0
]
=
pred_bbox
[:,
0
]
-
pred_bbox
[:,
2
]
/
2
proposals
[:,
1
]
=
pred_bbox
[:,
1
]
-
pred_bbox
[:,
3
]
/
2
proposals
[:,
2
]
=
pred_bbox
[:,
0
]
+
pred_bbox
[:,
2
]
/
2
-
1
proposals
[:,
3
]
=
pred_bbox
[:,
1
]
+
pred_bbox
[:,
3
]
/
2
-
1
proposals
[:,
2
]
=
pred_bbox
[:,
0
]
+
pred_bbox
[:,
2
]
/
2
-
offset
proposals
[:,
3
]
=
pred_bbox
[:,
1
]
+
pred_bbox
[:,
3
]
/
2
-
offset
return
proposals
def
clip_tiled_boxes
(
boxes
,
im_shape
):
def
clip_tiled_boxes
(
boxes
,
im_shape
,
pixel_offset
=
True
):
"""Clip boxes to image boundaries. im_shape is [height, width] and boxes
has shape (N, 4 * num_tiled_boxes)."""
assert
boxes
.
shape
[
1
]
%
4
==
0
,
\
'boxes.shape[1] is {:d}, but must be divisible by 4.'
.
format
(
boxes
.
shape
[
1
]
)
offset
=
1
if
pixel_offset
else
0
# x1 >= 0
boxes
[:,
0
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
0
::
4
],
im_shape
[
1
]
-
1
),
0
)
boxes
[:,
0
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
0
::
4
],
im_shape
[
1
]
-
offset
),
0
)
# y1 >= 0
boxes
[:,
1
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
1
::
4
],
im_shape
[
0
]
-
1
),
0
)
boxes
[:,
1
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
1
::
4
],
im_shape
[
0
]
-
offset
),
0
)
# x2 < im_shape[1]
boxes
[:,
2
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
2
::
4
],
im_shape
[
1
]
-
1
),
0
)
boxes
[:,
2
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
2
::
4
],
im_shape
[
1
]
-
offset
),
0
)
# y2 < im_shape[0]
boxes
[:,
3
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
3
::
4
],
im_shape
[
0
]
-
1
),
0
)
boxes
[:,
3
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
3
::
4
],
im_shape
[
0
]
-
offset
),
0
)
return
boxes
def
filter_boxes
(
boxes
,
min_size
,
im_info
):
def
filter_boxes
(
boxes
,
min_size
,
im_info
,
pixel_offset
=
True
):
"""Only keep boxes with both sides >= min_size and center within the image.
"""
# Scale min_size to match image scale
im_scale
=
im_info
[
2
]
min_size
=
max
(
min_size
,
1.0
)
ws
=
boxes
[:,
2
]
-
boxes
[:,
0
]
+
1
hs
=
boxes
[:,
3
]
-
boxes
[:,
1
]
+
1
ws_orig_scale
=
(
boxes
[:,
2
]
-
boxes
[:,
0
])
/
im_scale
+
1
hs_orig_scale
=
(
boxes
[:,
3
]
-
boxes
[:,
1
])
/
im_scale
+
1
x_ctr
=
boxes
[:,
0
]
+
ws
/
2.
y_ctr
=
boxes
[:,
1
]
+
hs
/
2.
keep
=
np
.
where
((
ws_orig_scale
>=
min_size
)
&
(
hs_orig_scale
>=
min_size
)
&
(
x_ctr
<
im_info
[
1
])
&
(
y_ctr
<
im_info
[
0
]))[
0
]
offset
=
1
if
pixel_offset
else
0
ws
=
boxes
[:,
2
]
-
boxes
[:,
0
]
+
offset
hs
=
boxes
[:,
3
]
-
boxes
[:,
1
]
+
offset
if
pixel_offset
:
ws_orig_scale
=
(
boxes
[:,
2
]
-
boxes
[:,
0
])
/
im_scale
+
1
hs_orig_scale
=
(
boxes
[:,
3
]
-
boxes
[:,
1
])
/
im_scale
+
1
x_ctr
=
boxes
[:,
0
]
+
ws
/
2.
y_ctr
=
boxes
[:,
1
]
+
hs
/
2.
keep
=
np
.
where
((
ws_orig_scale
>=
min_size
)
&
(
hs_orig_scale
>=
min_size
)
&
(
x_ctr
<
im_info
[
1
])
&
(
y_ctr
<
im_info
[
0
]))[
0
]
else
:
keep
=
np
.
where
((
ws
>=
min_size
)
&
(
hs
>=
min_size
))[
0
]
return
keep
def
iou
(
box_a
,
box_b
):
def
iou
(
box_a
,
box_b
,
pixel_offset
=
True
):
"""
Apply intersection-over-union overlap between box_a and box_b
"""
...
...
@@ -209,9 +218,9 @@ def iou(box_a, box_b):
ymin_b
=
min
(
box_b
[
1
],
box_b
[
3
])
xmax_b
=
max
(
box_b
[
0
],
box_b
[
2
])
ymax_b
=
max
(
box_b
[
1
],
box_b
[
3
])
area_a
=
(
ymax_a
-
ymin_a
+
1
)
*
(
xmax_a
-
xmin_a
+
1
)
area_b
=
(
ymax_b
-
ymin_b
+
1
)
*
(
xmax_b
-
xmin_b
+
1
)
offset
=
1
if
pixel_offset
else
0
area_a
=
(
ymax_a
-
ymin_a
+
offset
)
*
(
xmax_a
-
xmin_a
+
offset
)
area_b
=
(
ymax_b
-
ymin_b
+
offset
)
*
(
xmax_b
-
xmin_b
+
offset
)
if
area_a
<=
0
and
area_b
<=
0
:
return
0.0
...
...
@@ -220,14 +229,14 @@ def iou(box_a, box_b):
xb
=
min
(
xmax_a
,
xmax_b
)
yb
=
min
(
ymax_a
,
ymax_b
)
inter_area
=
max
(
xb
-
xa
+
1
,
0.0
)
*
max
(
yb
-
ya
+
1
,
0.0
)
inter_area
=
max
(
xb
-
xa
+
offset
,
0.0
)
*
max
(
yb
-
ya
+
offset
,
0.0
)
iou_ratio
=
inter_area
/
(
area_a
+
area_b
-
inter_area
)
return
iou_ratio
def
nms
(
boxes
,
scores
,
nms_threshold
,
eta
=
1.0
):
def
nms
(
boxes
,
scores
,
nms_threshold
,
eta
=
1.0
,
pixel_offset
=
True
):
"""Apply non-maximum suppression at test time to avoid detecting too many
overlapping bounding boxes for a given object.
Args:
...
...
@@ -252,7 +261,9 @@ def nms(boxes, scores, nms_threshold, eta=1.0):
for
k
in
range
(
len
(
selected_indices
)):
if
keep
:
kept_idx
=
selected_indices
[
k
]
overlap
=
iou
(
boxes
[
idx
],
boxes
[
kept_idx
])
overlap
=
iou
(
boxes
[
idx
],
boxes
[
kept_idx
],
pixel_offset
=
pixel_offset
)
keep
=
True
if
overlap
<=
adaptive_threshold
else
False
else
:
break
...
...
python/paddle/fluid/tests/unittests/test_generate_proposals_v2_op.py
浏览文件 @
5b267474
...
...
@@ -21,7 +21,6 @@ import math
import
paddle
import
paddle.fluid
as
fluid
from
op_test
import
OpTest
from
test_multiclass_nms_op
import
nms
from
test_anchor_generator_op
import
anchor_generator_in_python
import
copy
from
test_generate_proposals_op
import
clip_tiled_boxes
,
box_coder
,
nms
...
...
@@ -29,7 +28,7 @@ from test_generate_proposals_op import clip_tiled_boxes, box_coder, nms
def
generate_proposals_v2_in_python
(
scores
,
bbox_deltas
,
im_shape
,
anchors
,
variances
,
pre_nms_topN
,
post_nms_topN
,
nms_thresh
,
min_size
,
eta
):
nms_thresh
,
min_size
,
eta
,
pixel_offset
):
all_anchors
=
anchors
.
reshape
(
-
1
,
4
)
rois
=
np
.
empty
((
0
,
5
),
dtype
=
np
.
float32
)
roi_probs
=
np
.
empty
((
0
,
1
),
dtype
=
np
.
float32
)
...
...
@@ -42,7 +41,8 @@ def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
img_i_boxes
,
img_i_probs
=
proposal_for_one_image
(
im_shape
[
img_idx
,
:],
all_anchors
,
variances
,
bbox_deltas
[
img_idx
,
:,
:,
:],
scores
[
img_idx
,
:,
:,
:],
pre_nms_topN
,
post_nms_topN
,
nms_thresh
,
min_size
,
eta
)
pre_nms_topN
,
post_nms_topN
,
nms_thresh
,
min_size
,
eta
,
pixel_offset
)
rois_num
.
append
(
img_i_probs
.
shape
[
0
])
rpn_rois
.
append
(
img_i_boxes
)
rpn_roi_probs
.
append
(
img_i_probs
)
...
...
@@ -52,7 +52,7 @@ def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
def
proposal_for_one_image
(
im_shape
,
all_anchors
,
variances
,
bbox_deltas
,
scores
,
pre_nms_topN
,
post_nms_topN
,
nms_thresh
,
min_size
,
eta
):
min_size
,
eta
,
pixel_offset
):
# Transpose and reshape predicted bbox transformations to get them
# into the same order as the anchors:
# - bbox deltas will be (4 * A, H, W) format from conv output
...
...
@@ -83,12 +83,12 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
scores
=
scores
[
order
,
:]
bbox_deltas
=
bbox_deltas
[
order
,
:]
all_anchors
=
all_anchors
[
order
,
:]
proposals
=
box_coder
(
all_anchors
,
bbox_deltas
,
variances
)
proposals
=
box_coder
(
all_anchors
,
bbox_deltas
,
variances
,
pixel_offset
)
# clip proposals to image (may result in proposals with zero area
# that will be removed in the next step)
proposals
=
clip_tiled_boxes
(
proposals
,
im_shape
)
proposals
=
clip_tiled_boxes
(
proposals
,
im_shape
,
pixel_offset
)
# remove predicted boxes with height or width < min_size
keep
=
filter_boxes
(
proposals
,
min_size
,
im_shape
)
keep
=
filter_boxes
(
proposals
,
min_size
,
im_shape
,
pixel_offset
)
if
len
(
keep
)
==
0
:
proposals
=
np
.
zeros
((
1
,
4
)).
astype
(
'float32'
)
scores
=
np
.
zeros
((
1
,
1
)).
astype
(
'float32'
)
...
...
@@ -103,7 +103,8 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
keep
=
nms
(
boxes
=
proposals
,
scores
=
scores
,
nms_threshold
=
nms_thresh
,
eta
=
eta
)
eta
=
eta
,
pixel_offset
=
pixel_offset
)
if
post_nms_topN
>
0
and
post_nms_topN
<
len
(
keep
):
keep
=
keep
[:
post_nms_topN
]
proposals
=
proposals
[
keep
,
:]
...
...
@@ -112,17 +113,21 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
return
proposals
,
scores
def
filter_boxes
(
boxes
,
min_size
,
im_shape
):
def
filter_boxes
(
boxes
,
min_size
,
im_shape
,
pixel_offset
=
True
):
"""Only keep boxes with both sides >= min_size and center within the image.
"""
# Scale min_size to match image scale
min_size
=
max
(
min_size
,
1.0
)
ws
=
boxes
[:,
2
]
-
boxes
[:,
0
]
+
1
hs
=
boxes
[:,
3
]
-
boxes
[:,
1
]
+
1
x_ctr
=
boxes
[:,
0
]
+
ws
/
2.
y_ctr
=
boxes
[:,
1
]
+
hs
/
2.
keep
=
np
.
where
((
ws
>=
min_size
)
&
(
hs
>=
min_size
)
&
(
x_ctr
<
im_shape
[
1
])
&
(
y_ctr
<
im_shape
[
0
]))[
0
]
offset
=
1
if
pixel_offset
else
0
ws
=
boxes
[:,
2
]
-
boxes
[:,
0
]
+
offset
hs
=
boxes
[:,
3
]
-
boxes
[:,
1
]
+
offset
if
pixel_offset
:
x_ctr
=
boxes
[:,
0
]
+
ws
/
2.
y_ctr
=
boxes
[:,
1
]
+
hs
/
2.
keep
=
np
.
where
((
ws
>=
min_size
)
&
(
hs
>=
min_size
)
&
(
x_ctr
<
im_shape
[
1
])
&
(
y_ctr
<
im_shape
[
0
]))[
0
]
else
:
keep
=
np
.
where
((
ws
>=
min_size
)
&
(
hs
>=
min_size
))[
0
]
return
keep
...
...
@@ -144,7 +149,8 @@ class TestGenerateProposalsV2Op(OpTest):
'post_nms_topN'
:
self
.
post_nms_topN
,
'nms_thresh'
:
self
.
nms_thresh
,
'min_size'
:
self
.
min_size
,
'eta'
:
self
.
eta
'eta'
:
self
.
eta
,
'pixel_offset'
:
self
.
pixel_offset
,
}
self
.
outputs
=
{
...
...
@@ -165,6 +171,7 @@ class TestGenerateProposalsV2Op(OpTest):
self
.
nms_thresh
=
0.7
self
.
min_size
=
3.0
self
.
eta
=
1.
self
.
pixel_offset
=
True
def
init_test_input
(
self
):
batch_size
=
1
...
...
@@ -191,7 +198,7 @@ class TestGenerateProposalsV2Op(OpTest):
self
.
rpn_rois
,
self
.
rpn_roi_probs
,
self
.
rois_num
=
generate_proposals_v2_in_python
(
self
.
scores
,
self
.
bbox_deltas
,
self
.
im_shape
,
self
.
anchors
,
self
.
variances
,
self
.
pre_nms_topN
,
self
.
post_nms_topN
,
self
.
nms_thresh
,
self
.
min_size
,
self
.
eta
)
self
.
nms_thresh
,
self
.
min_size
,
self
.
eta
,
self
.
pixel_offset
)
class
TestGenerateProposalsV2OutLodOp
(
TestGenerateProposalsV2Op
):
...
...
@@ -231,6 +238,17 @@ class TestGenerateProposalsV2OpNoBoxLeft(TestGenerateProposalsV2Op):
self
.
nms_thresh
=
0.7
self
.
min_size
=
1000.0
self
.
eta
=
1.
self
.
pixel_offset
=
True
class
TestGenerateProposalsV2OpNoOffset
(
TestGenerateProposalsV2Op
):
def
init_test_params
(
self
):
self
.
pre_nms_topN
=
12000
# train 12000, test 2000
self
.
post_nms_topN
=
5000
# train 6000, test 1000
self
.
nms_thresh
=
0.7
self
.
min_size
=
3.0
self
.
eta
=
1.
self
.
pixel_offset
=
False
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_roi_align_op.py
浏览文件 @
5b267474
...
...
@@ -35,7 +35,8 @@ class TestROIAlignOp(OpTest):
'spatial_scale'
:
self
.
spatial_scale
,
'pooled_height'
:
self
.
pooled_height
,
'pooled_width'
:
self
.
pooled_width
,
'sampling_ratio'
:
self
.
sampling_ratio
'sampling_ratio'
:
self
.
sampling_ratio
,
'aligned'
:
self
.
aligned
,
}
self
.
outputs
=
{
'Out'
:
self
.
out_data
}
...
...
@@ -53,6 +54,7 @@ class TestROIAlignOp(OpTest):
self
.
pooled_height
=
2
self
.
pooled_width
=
2
self
.
sampling_ratio
=
-
1
self
.
aligned
=
False
self
.
x
=
np
.
random
.
random
(
self
.
x_dim
).
astype
(
'float64'
)
...
...
@@ -115,16 +117,21 @@ class TestROIAlignOp(OpTest):
(
self
.
rois_num
,
self
.
channels
,
self
.
pooled_height
,
self
.
pooled_width
)).
astype
(
'float64'
)
offset
=
0.5
if
self
.
aligned
else
0.
for
i
in
range
(
self
.
rois_num
):
roi
=
self
.
rois
[
i
]
roi_batch_id
=
int
(
roi
[
0
])
x_i
=
self
.
x
[
roi_batch_id
]
roi_xmin
=
roi
[
1
]
*
self
.
spatial_scale
roi_ymin
=
roi
[
2
]
*
self
.
spatial_scale
roi_xmax
=
roi
[
3
]
*
self
.
spatial_scale
roi_ymax
=
roi
[
4
]
*
self
.
spatial_scale
roi_width
=
max
(
roi_xmax
-
roi_xmin
,
1
)
roi_height
=
max
(
roi_ymax
-
roi_ymin
,
1
)
roi_xmin
=
roi
[
1
]
*
self
.
spatial_scale
-
offset
roi_ymin
=
roi
[
2
]
*
self
.
spatial_scale
-
offset
roi_xmax
=
roi
[
3
]
*
self
.
spatial_scale
-
offset
roi_ymax
=
roi
[
4
]
*
self
.
spatial_scale
-
offset
roi_width
=
roi_xmax
-
roi_xmin
roi_height
=
roi_ymax
-
roi_ymin
if
not
self
.
aligned
:
roi_width
=
max
(
roi_width
,
1
)
roi_height
=
max
(
roi_height
,
1
)
bin_size_h
=
float
(
roi_height
)
/
float
(
self
.
pooled_height
)
bin_size_w
=
float
(
roi_width
)
/
float
(
self
.
pooled_width
)
roi_bin_grid_h
=
self
.
sampling_ratio
if
self
.
sampling_ratio
>
0
else
\
...
...
@@ -192,11 +199,31 @@ class TestROIAlignInLodOp(TestROIAlignOp):
'spatial_scale'
:
self
.
spatial_scale
,
'pooled_height'
:
self
.
pooled_height
,
'pooled_width'
:
self
.
pooled_width
,
'sampling_ratio'
:
self
.
sampling_ratio
'sampling_ratio'
:
self
.
sampling_ratio
,
'aligned'
:
self
.
aligned
}
self
.
outputs
=
{
'Out'
:
self
.
out_data
}
class
TestROIAlignOpWithAligned
(
TestROIAlignOp
):
def
init_test_case
(
self
):
self
.
batch_size
=
3
self
.
channels
=
3
self
.
height
=
8
self
.
width
=
6
# n, c, h, w
self
.
x_dim
=
(
self
.
batch_size
,
self
.
channels
,
self
.
height
,
self
.
width
)
self
.
spatial_scale
=
1.0
/
2.0
self
.
pooled_height
=
2
self
.
pooled_width
=
2
self
.
sampling_ratio
=
-
1
self
.
aligned
=
True
self
.
x
=
np
.
random
.
random
(
self
.
x_dim
).
astype
(
'float64'
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录