Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5b267474
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5b267474
编写于
2月 19, 2021
作者:
G
Guanghua Yu
提交者:
GitHub
2月 19, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add offset parameter in roi_align,generate_proposals.etc ops (#30864)
* add parameter in roi_align op
上级
75f81233
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
354 addition
and
187 deletion
+354
-187
paddle/fluid/operators/detection/bbox_util.cu.h
paddle/fluid/operators/detection/bbox_util.cu.h
+48
-31
paddle/fluid/operators/detection/bbox_util.h
paddle/fluid/operators/detection/bbox_util.h
+43
-29
paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
.../fluid/operators/detection/distribute_fpn_proposals_op.cc
+8
-1
paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu
.../fluid/operators/detection/distribute_fpn_proposals_op.cu
+5
-4
paddle/fluid/operators/detection/distribute_fpn_proposals_op.h
...e/fluid/operators/detection/distribute_fpn_proposals_op.h
+6
-5
paddle/fluid/operators/detection/generate_proposals_v2_op.cc
paddle/fluid/operators/detection/generate_proposals_v2_op.cc
+22
-9
paddle/fluid/operators/detection/generate_proposals_v2_op.cu
paddle/fluid/operators/detection/generate_proposals_v2_op.cu
+11
-8
paddle/fluid/operators/detection/nms_util.h
paddle/fluid/operators/detection/nms_util.h
+5
-3
paddle/fluid/operators/roi_align_op.cc
paddle/fluid/operators/roi_align_op.cc
+13
-2
paddle/fluid/operators/roi_align_op.cu
paddle/fluid/operators/roi_align_op.cu
+37
-25
paddle/fluid/operators/roi_align_op.h
paddle/fluid/operators/roi_align_op.h
+27
-12
python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py
...fluid/tests/unittests/test_distribute_fpn_proposals_op.py
+20
-5
python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
...addle/fluid/tests/unittests/test_generate_proposals_op.py
+39
-28
python/paddle/fluid/tests/unittests/test_generate_proposals_v2_op.py
...le/fluid/tests/unittests/test_generate_proposals_v2_op.py
+35
-17
python/paddle/fluid/tests/unittests/test_roi_align_op.py
python/paddle/fluid/tests/unittests/test_roi_align_op.py
+35
-8
未找到文件。
paddle/fluid/operators/detection/bbox_util.cu.h
浏览文件 @
5b267474
...
@@ -77,17 +77,20 @@ struct BoxDecodeAndClipFunctor {
...
@@ -77,17 +77,20 @@ struct BoxDecodeAndClipFunctor {
const
T
*
var
;
const
T
*
var
;
const
int
*
index
;
const
int
*
index
;
const
T
*
im_info
;
const
T
*
im_info
;
const
bool
pixel_offset
;
T
*
proposals
;
T
*
proposals
;
BoxDecodeAndClipFunctor
(
const
T
*
anchor
,
const
T
*
deltas
,
const
T
*
var
,
BoxDecodeAndClipFunctor
(
const
T
*
anchor
,
const
T
*
deltas
,
const
T
*
var
,
const
int
*
index
,
const
T
*
im_info
,
T
*
proposals
)
const
int
*
index
,
const
T
*
im_info
,
T
*
proposals
,
bool
pixel_offset
=
true
)
:
anchor
(
anchor
),
:
anchor
(
anchor
),
deltas
(
deltas
),
deltas
(
deltas
),
var
(
var
),
var
(
var
),
index
(
index
),
index
(
index
),
im_info
(
im_info
),
im_info
(
im_info
),
proposals
(
proposals
)
{}
proposals
(
proposals
),
pixel_offset
(
pixel_offset
)
{}
T
bbox_clip_default
{
static_cast
<
T
>
(
kBBoxClipDefault
)};
T
bbox_clip_default
{
static_cast
<
T
>
(
kBBoxClipDefault
)};
...
@@ -98,8 +101,9 @@ struct BoxDecodeAndClipFunctor {
...
@@ -98,8 +101,9 @@ struct BoxDecodeAndClipFunctor {
T
axmax
=
anchor
[
k
+
2
];
T
axmax
=
anchor
[
k
+
2
];
T
aymax
=
anchor
[
k
+
3
];
T
aymax
=
anchor
[
k
+
3
];
T
w
=
axmax
-
axmin
+
1.0
;
T
offset
=
pixel_offset
?
static_cast
<
T
>
(
1.0
)
:
0
;
T
h
=
aymax
-
aymin
+
1.0
;
T
w
=
axmax
-
axmin
+
offset
;
T
h
=
aymax
-
aymin
+
offset
;
T
cx
=
axmin
+
0.5
*
w
;
T
cx
=
axmin
+
0.5
*
w
;
T
cy
=
aymin
+
0.5
*
h
;
T
cy
=
aymin
+
0.5
*
h
;
...
@@ -123,13 +127,13 @@ struct BoxDecodeAndClipFunctor {
...
@@ -123,13 +127,13 @@ struct BoxDecodeAndClipFunctor {
T
oxmin
=
d_cx
-
d_w
*
0.5
;
T
oxmin
=
d_cx
-
d_w
*
0.5
;
T
oymin
=
d_cy
-
d_h
*
0.5
;
T
oymin
=
d_cy
-
d_h
*
0.5
;
T
oxmax
=
d_cx
+
d_w
*
0.5
-
1.
;
T
oxmax
=
d_cx
+
d_w
*
0.5
-
offset
;
T
oymax
=
d_cy
+
d_h
*
0.5
-
1.
;
T
oymax
=
d_cy
+
d_h
*
0.5
-
offset
;
proposals
[
i
*
4
]
=
Max
(
Min
(
oxmin
,
im_info
[
1
]
-
1.
),
0.
);
proposals
[
i
*
4
]
=
Max
(
Min
(
oxmin
,
im_info
[
1
]
-
offset
),
0.
);
proposals
[
i
*
4
+
1
]
=
Max
(
Min
(
oymin
,
im_info
[
0
]
-
1.
),
0.
);
proposals
[
i
*
4
+
1
]
=
Max
(
Min
(
oymin
,
im_info
[
0
]
-
offset
),
0.
);
proposals
[
i
*
4
+
2
]
=
Max
(
Min
(
oxmax
,
im_info
[
1
]
-
1.
),
0.
);
proposals
[
i
*
4
+
2
]
=
Max
(
Min
(
oxmax
,
im_info
[
1
]
-
offset
),
0.
);
proposals
[
i
*
4
+
3
]
=
Max
(
Min
(
oymax
,
im_info
[
0
]
-
1.
),
0.
);
proposals
[
i
*
4
+
3
]
=
Max
(
Min
(
oymax
,
im_info
[
0
]
-
offset
),
0.
);
}
}
__device__
__forceinline__
T
Min
(
T
a
,
T
b
)
const
{
return
a
>
b
?
b
:
a
;
}
__device__
__forceinline__
T
Min
(
T
a
,
T
b
)
const
{
return
a
>
b
?
b
:
a
;
}
...
@@ -141,7 +145,8 @@ template <typename T, int BlockSize>
...
@@ -141,7 +145,8 @@ template <typename T, int BlockSize>
static
__global__
void
FilterBBoxes
(
const
T
*
bboxes
,
const
T
*
im_info
,
static
__global__
void
FilterBBoxes
(
const
T
*
bboxes
,
const
T
*
im_info
,
const
T
min_size
,
const
int
num
,
const
T
min_size
,
const
int
num
,
int
*
keep_num
,
int
*
keep
,
int
*
keep_num
,
int
*
keep
,
bool
is_scale
=
true
)
{
bool
is_scale
=
true
,
bool
pixel_offset
=
true
)
{
T
im_h
=
im_info
[
0
];
T
im_h
=
im_info
[
0
];
T
im_w
=
im_info
[
1
];
T
im_w
=
im_info
[
1
];
...
@@ -157,19 +162,25 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
...
@@ -157,19 +162,25 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
T
ymin
=
bboxes
[
k
+
1
];
T
ymin
=
bboxes
[
k
+
1
];
T
xmax
=
bboxes
[
k
+
2
];
T
xmax
=
bboxes
[
k
+
2
];
T
ymax
=
bboxes
[
k
+
3
];
T
ymax
=
bboxes
[
k
+
3
];
T
offset
=
pixel_offset
?
static_cast
<
T
>
(
1.0
)
:
0
;
T
w
=
xmax
-
xmin
+
offset
;
T
h
=
ymax
-
ymin
+
offset
;
if
(
pixel_offset
)
{
T
cx
=
xmin
+
w
/
2.
;
T
cy
=
ymin
+
h
/
2.
;
if
(
is_scale
)
{
w
=
(
xmax
-
xmin
)
/
im_info
[
2
]
+
1.
;
h
=
(
ymax
-
ymin
)
/
im_info
[
2
]
+
1.
;
}
T
w
=
xmax
-
xmin
+
1.0
;
if
(
w
>=
min_size
&&
h
>=
min_size
&&
cx
<=
im_w
&&
cy
<=
im_h
)
{
T
h
=
ymax
-
ymin
+
1.0
;
keep_index
[
threadIdx
.
x
]
=
i
;
T
cx
=
xmin
+
w
/
2.
;
}
T
cy
=
ymin
+
h
/
2.
;
}
else
{
if
(
w
>=
min_size
&&
h
>=
min_size
)
{
if
(
is_scale
)
{
keep_index
[
threadIdx
.
x
]
=
i
;
w
=
(
xmax
-
xmin
)
/
im_info
[
2
]
+
1.
;
}
h
=
(
ymax
-
ymin
)
/
im_info
[
2
]
+
1.
;
}
if
(
w
>=
min_size
&&
h
>=
min_size
&&
cx
<=
im_w
&&
cy
<=
im_h
)
{
keep_index
[
threadIdx
.
x
]
=
i
;
}
}
__syncthreads
();
__syncthreads
();
if
(
threadIdx
.
x
==
0
)
{
if
(
threadIdx
.
x
==
0
)
{
...
@@ -187,19 +198,23 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
...
@@ -187,19 +198,23 @@ static __global__ void FilterBBoxes(const T *bboxes, const T *im_info,
}
}
}
}
static
__device__
float
IoU
(
const
float
*
a
,
const
float
*
b
)
{
static
__device__
float
IoU
(
const
float
*
a
,
const
float
*
b
,
const
bool
pixel_offset
=
true
)
{
float
offset
=
pixel_offset
?
static_cast
<
float
>
(
1.0
)
:
0
;
float
left
=
max
(
a
[
0
],
b
[
0
]),
right
=
min
(
a
[
2
],
b
[
2
]);
float
left
=
max
(
a
[
0
],
b
[
0
]),
right
=
min
(
a
[
2
],
b
[
2
]);
float
top
=
max
(
a
[
1
],
b
[
1
]),
bottom
=
min
(
a
[
3
],
b
[
3
]);
float
top
=
max
(
a
[
1
],
b
[
1
]),
bottom
=
min
(
a
[
3
],
b
[
3
]);
float
width
=
max
(
right
-
left
+
1
,
0.
f
),
height
=
max
(
bottom
-
top
+
1
,
0.
f
);
float
width
=
max
(
right
-
left
+
offset
,
0.
f
),
height
=
max
(
bottom
-
top
+
offset
,
0.
f
);
float
inter_s
=
width
*
height
;
float
inter_s
=
width
*
height
;
float
s_a
=
(
a
[
2
]
-
a
[
0
]
+
1
)
*
(
a
[
3
]
-
a
[
1
]
+
1
);
float
s_a
=
(
a
[
2
]
-
a
[
0
]
+
offset
)
*
(
a
[
3
]
-
a
[
1
]
+
offset
);
float
s_b
=
(
b
[
2
]
-
b
[
0
]
+
1
)
*
(
b
[
3
]
-
b
[
1
]
+
1
);
float
s_b
=
(
b
[
2
]
-
b
[
0
]
+
offset
)
*
(
b
[
3
]
-
b
[
1
]
+
offset
);
return
inter_s
/
(
s_a
+
s_b
-
inter_s
);
return
inter_s
/
(
s_a
+
s_b
-
inter_s
);
}
}
static
__global__
void
NMSKernel
(
const
int
n_boxes
,
static
__global__
void
NMSKernel
(
const
int
n_boxes
,
const
float
nms_overlap_thresh
,
const
float
nms_overlap_thresh
,
const
float
*
dev_boxes
,
uint64_t
*
dev_mask
)
{
const
float
*
dev_boxes
,
uint64_t
*
dev_mask
,
bool
pixel_offset
=
true
)
{
const
int
row_start
=
blockIdx
.
y
;
const
int
row_start
=
blockIdx
.
y
;
const
int
col_start
=
blockIdx
.
x
;
const
int
col_start
=
blockIdx
.
x
;
...
@@ -231,7 +246,8 @@ static __global__ void NMSKernel(const int n_boxes,
...
@@ -231,7 +246,8 @@ static __global__ void NMSKernel(const int n_boxes,
start
=
threadIdx
.
x
+
1
;
start
=
threadIdx
.
x
+
1
;
}
}
for
(
i
=
start
;
i
<
col_size
;
i
++
)
{
for
(
i
=
start
;
i
<
col_size
;
i
++
)
{
if
(
IoU
(
cur_box
,
block_boxes
+
i
*
4
)
>
nms_overlap_thresh
)
{
if
(
IoU
(
cur_box
,
block_boxes
+
i
*
4
,
pixel_offset
)
>
nms_overlap_thresh
)
{
t
|=
1ULL
<<
i
;
t
|=
1ULL
<<
i
;
}
}
}
}
...
@@ -243,7 +259,7 @@ static __global__ void NMSKernel(const int n_boxes,
...
@@ -243,7 +259,7 @@ static __global__ void NMSKernel(const int n_boxes,
template
<
typename
T
>
template
<
typename
T
>
static
void
NMS
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
Tensor
&
proposals
,
static
void
NMS
(
const
platform
::
CUDADeviceContext
&
ctx
,
const
Tensor
&
proposals
,
const
Tensor
&
sorted_indices
,
const
T
nms_threshold
,
const
Tensor
&
sorted_indices
,
const
T
nms_threshold
,
Tensor
*
keep_out
)
{
Tensor
*
keep_out
,
bool
pixel_offset
=
true
)
{
int
boxes_num
=
proposals
.
dims
()[
0
];
int
boxes_num
=
proposals
.
dims
()[
0
];
const
int
col_blocks
=
DIVUP
(
boxes_num
,
kThreadsPerBlock
);
const
int
col_blocks
=
DIVUP
(
boxes_num
,
kThreadsPerBlock
);
dim3
blocks
(
DIVUP
(
boxes_num
,
kThreadsPerBlock
),
dim3
blocks
(
DIVUP
(
boxes_num
,
kThreadsPerBlock
),
...
@@ -255,7 +271,8 @@ static void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
...
@@ -255,7 +271,8 @@ static void NMS(const platform::CUDADeviceContext &ctx, const Tensor &proposals,
framework
::
Vector
<
uint64_t
>
mask
(
boxes_num
*
col_blocks
);
framework
::
Vector
<
uint64_t
>
mask
(
boxes_num
*
col_blocks
);
NMSKernel
<<<
blocks
,
threads
>>>
(
boxes_num
,
nms_threshold
,
boxes
,
NMSKernel
<<<
blocks
,
threads
>>>
(
boxes_num
,
nms_threshold
,
boxes
,
mask
.
CUDAMutableData
(
BOOST_GET_CONST
(
mask
.
CUDAMutableData
(
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
ctx
.
GetPlace
())));
platform
::
CUDAPlace
,
ctx
.
GetPlace
())),
pixel_offset
);
std
::
vector
<
uint64_t
>
remv
(
col_blocks
);
std
::
vector
<
uint64_t
>
remv
(
col_blocks
);
memset
(
&
remv
[
0
],
0
,
sizeof
(
uint64_t
)
*
col_blocks
);
memset
(
&
remv
[
0
],
0
,
sizeof
(
uint64_t
)
*
col_blocks
);
...
...
paddle/fluid/operators/detection/bbox_util.h
浏览文件 @
5b267474
...
@@ -31,7 +31,7 @@ struct RangeInitFunctor {
...
@@ -31,7 +31,7 @@ struct RangeInitFunctor {
};
};
template
<
typename
T
>
template
<
typename
T
>
inline
HOSTDEVICE
T
RoIArea
(
const
T
*
box
,
bool
normalized
)
{
inline
HOSTDEVICE
T
RoIArea
(
const
T
*
box
,
bool
pixel_offset
=
true
)
{
if
(
box
[
2
]
<
box
[
0
]
||
box
[
3
]
<
box
[
1
])
{
if
(
box
[
2
]
<
box
[
0
]
||
box
[
3
]
<
box
[
1
])
{
// If coordinate values are is invalid
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
// (e.g. xmax < xmin or ymax < ymin), return 0.
...
@@ -39,11 +39,11 @@ inline HOSTDEVICE T RoIArea(const T* box, bool normalized) {
...
@@ -39,11 +39,11 @@ inline HOSTDEVICE T RoIArea(const T* box, bool normalized) {
}
else
{
}
else
{
const
T
w
=
box
[
2
]
-
box
[
0
];
const
T
w
=
box
[
2
]
-
box
[
0
];
const
T
h
=
box
[
3
]
-
box
[
1
];
const
T
h
=
box
[
3
]
-
box
[
1
];
if
(
normalized
)
{
if
(
pixel_offset
)
{
return
w
*
h
;
}
else
{
// If coordinate values are not within range [0, 1].
// If coordinate values are not within range [0, 1].
return
(
w
+
1
)
*
(
h
+
1
);
return
(
w
+
1
)
*
(
h
+
1
);
}
else
{
return
w
*
h
;
}
}
}
}
}
}
...
@@ -157,10 +157,12 @@ template <class T>
...
@@ -157,10 +157,12 @@ template <class T>
void
ClipTiledBoxes
(
const
platform
::
DeviceContext
&
ctx
,
void
ClipTiledBoxes
(
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Tensor
&
im_info
,
const
framework
::
Tensor
&
im_info
,
const
framework
::
Tensor
&
input_boxes
,
const
framework
::
Tensor
&
input_boxes
,
framework
::
Tensor
*
out
,
bool
is_scale
=
true
)
{
framework
::
Tensor
*
out
,
bool
is_scale
=
true
,
bool
pixel_offset
=
true
)
{
T
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
out_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
im_info_data
=
im_info
.
data
<
T
>
();
const
T
*
im_info_data
=
im_info
.
data
<
T
>
();
const
T
*
input_boxes_data
=
input_boxes
.
data
<
T
>
();
const
T
*
input_boxes_data
=
input_boxes
.
data
<
T
>
();
T
offset
=
pixel_offset
?
static_cast
<
T
>
(
1.0
)
:
0
;
T
zero
(
0
);
T
zero
(
0
);
T
im_w
=
T
im_w
=
is_scale
?
round
(
im_info_data
[
1
]
/
im_info_data
[
2
])
:
im_info_data
[
1
];
is_scale
?
round
(
im_info_data
[
1
]
/
im_info_data
[
2
])
:
im_info_data
[
1
];
...
@@ -168,13 +170,17 @@ void ClipTiledBoxes(const platform::DeviceContext& ctx,
...
@@ -168,13 +170,17 @@ void ClipTiledBoxes(const platform::DeviceContext& ctx,
is_scale
?
round
(
im_info_data
[
0
]
/
im_info_data
[
2
])
:
im_info_data
[
0
];
is_scale
?
round
(
im_info_data
[
0
]
/
im_info_data
[
2
])
:
im_info_data
[
0
];
for
(
int64_t
i
=
0
;
i
<
input_boxes
.
numel
();
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
input_boxes
.
numel
();
++
i
)
{
if
(
i
%
4
==
0
)
{
if
(
i
%
4
==
0
)
{
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_w
-
1
),
zero
);
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_w
-
offset
),
zero
);
}
else
if
(
i
%
4
==
1
)
{
}
else
if
(
i
%
4
==
1
)
{
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_h
-
1
),
zero
);
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_h
-
offset
),
zero
);
}
else
if
(
i
%
4
==
2
)
{
}
else
if
(
i
%
4
==
2
)
{
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_w
-
1
),
zero
);
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_w
-
offset
),
zero
);
}
else
{
}
else
{
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_h
-
1
),
zero
);
out_data
[
i
]
=
std
::
max
(
std
::
min
(
input_boxes_data
[
i
],
im_h
-
offset
),
zero
);
}
}
}
}
}
}
...
@@ -184,29 +190,35 @@ template <class T>
...
@@ -184,29 +190,35 @@ template <class T>
void
FilterBoxes
(
const
platform
::
DeviceContext
&
ctx
,
void
FilterBoxes
(
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Tensor
*
boxes
,
float
min_size
,
const
framework
::
Tensor
*
boxes
,
float
min_size
,
const
framework
::
Tensor
&
im_info
,
bool
is_scale
,
const
framework
::
Tensor
&
im_info
,
bool
is_scale
,
framework
::
Tensor
*
keep
)
{
framework
::
Tensor
*
keep
,
bool
pixel_offset
=
true
)
{
const
T
*
im_info_data
=
im_info
.
data
<
T
>
();
const
T
*
im_info_data
=
im_info
.
data
<
T
>
();
const
T
*
boxes_data
=
boxes
->
data
<
T
>
();
const
T
*
boxes_data
=
boxes
->
data
<
T
>
();
keep
->
Resize
({
boxes
->
dims
()[
0
]});
keep
->
Resize
({
boxes
->
dims
()[
0
]});
min_size
=
std
::
max
(
min_size
,
1.0
f
);
min_size
=
std
::
max
(
min_size
,
1.0
f
);
int
*
keep_data
=
keep
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
int
*
keep_data
=
keep
->
mutable_data
<
int
>
(
ctx
.
GetPlace
());
T
offset
=
pixel_offset
?
static_cast
<
T
>
(
1.0
)
:
0
;
int
keep_len
=
0
;
int
keep_len
=
0
;
for
(
int
i
=
0
;
i
<
boxes
->
dims
()[
0
];
++
i
)
{
for
(
int
i
=
0
;
i
<
boxes
->
dims
()[
0
];
++
i
)
{
T
ws
=
boxes_data
[
4
*
i
+
2
]
-
boxes_data
[
4
*
i
]
+
1
;
T
ws
=
boxes_data
[
4
*
i
+
2
]
-
boxes_data
[
4
*
i
]
+
offset
;
T
hs
=
boxes_data
[
4
*
i
+
3
]
-
boxes_data
[
4
*
i
+
1
]
+
1
;
T
hs
=
boxes_data
[
4
*
i
+
3
]
-
boxes_data
[
4
*
i
+
1
]
+
offset
;
T
x_ctr
=
boxes_data
[
4
*
i
]
+
ws
/
2
;
if
(
pixel_offset
)
{
T
y_ctr
=
boxes_data
[
4
*
i
+
1
]
+
hs
/
2
;
T
x_ctr
=
boxes_data
[
4
*
i
]
+
ws
/
2
;
T
y_ctr
=
boxes_data
[
4
*
i
+
1
]
+
hs
/
2
;
if
(
is_scale
)
{
if
(
is_scale
)
{
ws
=
(
boxes_data
[
4
*
i
+
2
]
-
boxes_data
[
4
*
i
])
/
im_info_data
[
2
]
+
1
;
ws
=
(
boxes_data
[
4
*
i
+
2
]
-
boxes_data
[
4
*
i
])
/
im_info_data
[
2
]
+
1
;
hs
=
hs
=
(
boxes_data
[
4
*
i
+
3
]
-
boxes_data
[
4
*
i
+
1
])
/
im_info_data
[
2
]
+
(
boxes_data
[
4
*
i
+
3
]
-
boxes_data
[
4
*
i
+
1
])
/
im_info_data
[
2
]
+
1
;
1
;
}
}
if
(
ws
>=
min_size
&&
hs
>=
min_size
&&
x_ctr
<=
im_info_data
[
1
]
&&
if
(
ws
>=
min_size
&&
hs
>=
min_size
&&
x_ctr
<=
im_info_data
[
1
]
&&
y_ctr
<=
im_info_data
[
0
])
{
y_ctr
<=
im_info_data
[
0
])
{
keep_data
[
keep_len
++
]
=
i
;
keep_data
[
keep_len
++
]
=
i
;
}
}
else
{
if
(
ws
>=
min_size
&&
hs
>=
min_size
)
{
keep_data
[
keep_len
++
]
=
i
;
}
}
}
}
}
keep
->
Resize
({
keep_len
});
keep
->
Resize
({
keep_len
});
...
@@ -216,8 +228,8 @@ template <class T>
...
@@ -216,8 +228,8 @@ template <class T>
static
void
BoxCoder
(
const
platform
::
DeviceContext
&
ctx
,
static
void
BoxCoder
(
const
platform
::
DeviceContext
&
ctx
,
framework
::
Tensor
*
all_anchors
,
framework
::
Tensor
*
all_anchors
,
framework
::
Tensor
*
bbox_deltas
,
framework
::
Tensor
*
bbox_deltas
,
framework
::
Tensor
*
variances
,
framework
::
Tensor
*
variances
,
framework
::
Tensor
*
proposals
,
framework
::
Tensor
*
proposals
)
{
const
bool
pixel_offset
=
true
)
{
T
*
proposals_data
=
proposals
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
proposals_data
=
proposals
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int64_t
row
=
all_anchors
->
dims
()[
0
];
int64_t
row
=
all_anchors
->
dims
()[
0
];
...
@@ -230,9 +242,11 @@ static void BoxCoder(const platform::DeviceContext& ctx,
...
@@ -230,9 +242,11 @@ static void BoxCoder(const platform::DeviceContext& ctx,
variances_data
=
variances
->
data
<
T
>
();
variances_data
=
variances
->
data
<
T
>
();
}
}
T
offset
=
pixel_offset
?
static_cast
<
T
>
(
1.0
)
:
0
;
for
(
int64_t
i
=
0
;
i
<
row
;
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
row
;
++
i
)
{
T
anchor_width
=
anchor_data
[
i
*
len
+
2
]
-
anchor_data
[
i
*
len
]
+
1.0
;
T
anchor_width
=
anchor_data
[
i
*
len
+
2
]
-
anchor_data
[
i
*
len
]
+
offset
;
T
anchor_height
=
anchor_data
[
i
*
len
+
3
]
-
anchor_data
[
i
*
len
+
1
]
+
1.0
;
T
anchor_height
=
anchor_data
[
i
*
len
+
3
]
-
anchor_data
[
i
*
len
+
1
]
+
offset
;
T
anchor_center_x
=
anchor_data
[
i
*
len
]
+
0.5
*
anchor_width
;
T
anchor_center_x
=
anchor_data
[
i
*
len
]
+
0.5
*
anchor_width
;
T
anchor_center_y
=
anchor_data
[
i
*
len
+
1
]
+
0.5
*
anchor_height
;
T
anchor_center_y
=
anchor_data
[
i
*
len
+
1
]
+
0.5
*
anchor_height
;
...
@@ -270,8 +284,8 @@ static void BoxCoder(const platform::DeviceContext& ctx,
...
@@ -270,8 +284,8 @@ static void BoxCoder(const platform::DeviceContext& ctx,
proposals_data
[
i
*
len
]
=
bbox_center_x
-
bbox_width
/
2
;
proposals_data
[
i
*
len
]
=
bbox_center_x
-
bbox_width
/
2
;
proposals_data
[
i
*
len
+
1
]
=
bbox_center_y
-
bbox_height
/
2
;
proposals_data
[
i
*
len
+
1
]
=
bbox_center_y
-
bbox_height
/
2
;
proposals_data
[
i
*
len
+
2
]
=
bbox_center_x
+
bbox_width
/
2
-
1
;
proposals_data
[
i
*
len
+
2
]
=
bbox_center_x
+
bbox_width
/
2
-
offset
;
proposals_data
[
i
*
len
+
3
]
=
bbox_center_y
+
bbox_height
/
2
-
1
;
proposals_data
[
i
*
len
+
3
]
=
bbox_center_y
+
bbox_height
/
2
-
offset
;
}
}
// return proposals;
// return proposals;
}
}
...
...
paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
浏览文件 @
5b267474
...
@@ -103,6 +103,9 @@ class DistributeFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -103,6 +103,9 @@ class DistributeFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr
<
int
>
(
"refer_scale"
,
AddAttr
<
int
>
(
"refer_scale"
,
"The referring scale of FPN layer with"
"The referring scale of FPN layer with"
" specified level"
);
" specified level"
);
AddAttr
<
bool
>
(
"pixel_offset"
,
"(bool, default True),"
,
"If true, im_shape pixel offset is 1."
)
.
SetDefault
(
true
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
This operator distribute all proposals into different fpn level,
This operator distribute all proposals into different fpn level,
with respect to scale of the proposals, the referring scale and
with respect to scale of the proposals, the referring scale and
...
@@ -134,4 +137,8 @@ REGISTER_OP_VERSION(distribute_fpn_proposals)
...
@@ -134,4 +137,8 @@ REGISTER_OP_VERSION(distribute_fpn_proposals)
.
NewOutput
(
"MultiLevelRoisNum"
,
.
NewOutput
(
"MultiLevelRoisNum"
,
"The RoIs' number of each image on multiple "
"The RoIs' number of each image on multiple "
"levels. The number on each level has the shape of (B),"
"levels. The number on each level has the shape of (B),"
"B is the number of images."
));
"B is the number of images."
))
.
AddCheckpoint
(
R"ROC(Register distribute_fpn_proposals for adding the attribute of pixel_offset)ROC"
,
paddle
::
framework
::
compatible
::
OpVersionDesc
().
NewAttr
(
"pixel_offset"
,
"If true, im_shape pixel offset is 1."
,
true
));
paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu
浏览文件 @
5b267474
...
@@ -43,15 +43,15 @@ __global__ void GPUDistFpnProposalsHelper(
...
@@ -43,15 +43,15 @@ __global__ void GPUDistFpnProposalsHelper(
const
int
nthreads
,
const
T
*
rois
,
const
int
lod_size
,
const
int
nthreads
,
const
T
*
rois
,
const
int
lod_size
,
const
int
refer_level
,
const
int
refer_scale
,
const
int
max_level
,
const
int
refer_level
,
const
int
refer_scale
,
const
int
max_level
,
const
int
min_level
,
int
*
roi_batch_id_data
,
int
*
sub_lod_list
,
const
int
min_level
,
int
*
roi_batch_id_data
,
int
*
sub_lod_list
,
int
*
target_lvls
)
{
int
*
target_lvls
,
bool
pixel_offset
=
true
)
{
CUDA_KERNEL_LOOP
(
i
,
nthreads
)
{
CUDA_KERNEL_LOOP
(
i
,
nthreads
)
{
const
T
*
offset_roi
=
rois
+
i
*
BBoxSize
;
const
T
*
offset_roi
=
rois
+
i
*
BBoxSize
;
int
roi_batch_ind
=
roi_batch_id_data
[
i
];
int
roi_batch_ind
=
roi_batch_id_data
[
i
];
// get the target level of current rois
// get the target level of current rois
T
roi_area
=
RoIArea
(
offset_roi
,
false
);
T
roi_area
=
RoIArea
(
offset_roi
,
pixel_offset
);
T
roi_scale
=
sqrt
(
roi_area
);
T
roi_scale
=
sqrt
(
roi_area
);
int
tgt_lvl
=
floor
(
int
tgt_lvl
=
floor
(
log2
(
roi_scale
/
static_cast
<
T
>
(
refer_scale
)
+
(
T
)
1e-
6
)
+
refer_level
);
log2
(
roi_scale
/
static_cast
<
T
>
(
refer_scale
)
+
(
T
)
1e-
8
)
+
refer_level
);
tgt_lvl
=
min
(
max_level
,
max
(
tgt_lvl
,
min_level
));
tgt_lvl
=
min
(
max_level
,
max
(
tgt_lvl
,
min_level
));
target_lvls
[
i
]
=
tgt_lvl
;
target_lvls
[
i
]
=
tgt_lvl
;
// compute number of rois in the same batch and same target level
// compute number of rois in the same batch and same target level
...
@@ -73,6 +73,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
...
@@ -73,6 +73,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
const
int
max_level
=
ctx
.
Attr
<
int
>
(
"max_level"
);
const
int
max_level
=
ctx
.
Attr
<
int
>
(
"max_level"
);
const
int
refer_level
=
ctx
.
Attr
<
int
>
(
"refer_level"
);
const
int
refer_level
=
ctx
.
Attr
<
int
>
(
"refer_level"
);
const
int
refer_scale
=
ctx
.
Attr
<
int
>
(
"refer_scale"
);
const
int
refer_scale
=
ctx
.
Attr
<
int
>
(
"refer_scale"
);
const
bool
pixel_offset
=
ctx
.
Attr
<
bool
>
(
"pixel_offset"
);
int
num_level
=
max_level
-
min_level
+
1
;
int
num_level
=
max_level
-
min_level
+
1
;
// check that the fpn_rois is not empty
// check that the fpn_rois is not empty
...
@@ -126,7 +127,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
...
@@ -126,7 +127,7 @@ class GPUDistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
GPUDistFpnProposalsHelper
<
T
><<<
dist_blocks
,
threads
>>>
(
GPUDistFpnProposalsHelper
<
T
><<<
dist_blocks
,
threads
>>>
(
roi_num
,
fpn_rois
->
data
<
T
>
(),
lod_size
,
refer_level
,
refer_scale
,
roi_num
,
fpn_rois
->
data
<
T
>
(),
lod_size
,
refer_level
,
refer_scale
,
max_level
,
min_level
,
roi_batch_id_list_gpu
.
data
<
int
>
(),
max_level
,
min_level
,
roi_batch_id_list_gpu
.
data
<
int
>
(),
sub_lod_list_data
,
target_lvls_data
);
sub_lod_list_data
,
target_lvls_data
,
pixel_offset
);
dev_ctx
.
Wait
();
dev_ctx
.
Wait
();
auto
place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
dev_ctx
.
GetPlace
());
auto
place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
dev_ctx
.
GetPlace
());
...
...
paddle/fluid/operators/detection/distribute_fpn_proposals_op.h
浏览文件 @
5b267474
...
@@ -44,7 +44,7 @@ inline std::vector<size_t> GetLodFromRoisNum(const Tensor* rois_num) {
...
@@ -44,7 +44,7 @@ inline std::vector<size_t> GetLodFromRoisNum(const Tensor* rois_num) {
}
}
template
<
typename
T
>
template
<
typename
T
>
static
inline
T
BBoxArea
(
const
T
*
box
,
bool
normalized
)
{
static
inline
T
BBoxArea
(
const
T
*
box
,
bool
pixel_offset
)
{
if
(
box
[
2
]
<
box
[
0
]
||
box
[
3
]
<
box
[
1
])
{
if
(
box
[
2
]
<
box
[
0
]
||
box
[
3
]
<
box
[
1
])
{
// If coordinate values are is invalid
// If coordinate values are is invalid
// (e.g. xmax < xmin or ymax < ymin), return 0.
// (e.g. xmax < xmin or ymax < ymin), return 0.
...
@@ -52,11 +52,11 @@ static inline T BBoxArea(const T* box, bool normalized) {
...
@@ -52,11 +52,11 @@ static inline T BBoxArea(const T* box, bool normalized) {
}
else
{
}
else
{
const
T
w
=
box
[
2
]
-
box
[
0
];
const
T
w
=
box
[
2
]
-
box
[
0
];
const
T
h
=
box
[
3
]
-
box
[
1
];
const
T
h
=
box
[
3
]
-
box
[
1
];
if
(
normalized
)
{
if
(
pixel_offset
)
{
return
w
*
h
;
}
else
{
// If coordinate values are not within range [0, 1].
// If coordinate values are not within range [0, 1].
return
(
w
+
1
)
*
(
h
+
1
);
return
(
w
+
1
)
*
(
h
+
1
);
}
else
{
return
w
*
h
;
}
}
}
}
}
}
...
@@ -77,6 +77,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
...
@@ -77,6 +77,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
const
int
max_level
=
context
.
Attr
<
int
>
(
"max_level"
);
const
int
max_level
=
context
.
Attr
<
int
>
(
"max_level"
);
const
int
refer_level
=
context
.
Attr
<
int
>
(
"refer_level"
);
const
int
refer_level
=
context
.
Attr
<
int
>
(
"refer_level"
);
const
int
refer_scale
=
context
.
Attr
<
int
>
(
"refer_scale"
);
const
int
refer_scale
=
context
.
Attr
<
int
>
(
"refer_scale"
);
const
bool
pixel_offset
=
context
.
Attr
<
bool
>
(
"pixel_offset"
);
const
int
num_level
=
max_level
-
min_level
+
1
;
const
int
num_level
=
max_level
-
min_level
+
1
;
// check that the fpn_rois is not empty
// check that the fpn_rois is not empty
...
@@ -108,7 +109,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
...
@@ -108,7 +109,7 @@ class DistributeFpnProposalsOpKernel : public framework::OpKernel<T> {
const
T
*
rois_data
=
fpn_rois_slice
.
data
<
T
>
();
const
T
*
rois_data
=
fpn_rois_slice
.
data
<
T
>
();
for
(
int
j
=
0
;
j
<
fpn_rois_slice
.
dims
()[
0
];
++
j
)
{
for
(
int
j
=
0
;
j
<
fpn_rois_slice
.
dims
()[
0
];
++
j
)
{
// get the target level of current rois
// get the target level of current rois
T
roi_scale
=
std
::
sqrt
(
BBoxArea
(
rois_data
,
false
));
T
roi_scale
=
std
::
sqrt
(
BBoxArea
(
rois_data
,
pixel_offset
));
int
tgt_lvl
=
std
::
floor
(
std
::
log2
(
roi_scale
/
refer_scale
+
(
T
)
1e-6
)
+
int
tgt_lvl
=
std
::
floor
(
std
::
log2
(
roi_scale
/
refer_scale
+
(
T
)
1e-6
)
+
refer_level
);
refer_level
);
tgt_lvl
=
std
::
min
(
max_level
,
std
::
max
(
tgt_lvl
,
min_level
));
tgt_lvl
=
std
::
min
(
max_level
,
std
::
max
(
tgt_lvl
,
min_level
));
...
...
paddle/fluid/operators/detection/generate_proposals_v2_op.cc
浏览文件 @
5b267474
...
@@ -87,6 +87,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
...
@@ -87,6 +87,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
float
nms_thresh
=
context
.
Attr
<
float
>
(
"nms_thresh"
);
float
nms_thresh
=
context
.
Attr
<
float
>
(
"nms_thresh"
);
float
min_size
=
context
.
Attr
<
float
>
(
"min_size"
);
float
min_size
=
context
.
Attr
<
float
>
(
"min_size"
);
float
eta
=
context
.
Attr
<
float
>
(
"eta"
);
float
eta
=
context
.
Attr
<
float
>
(
"eta"
);
bool
pixel_offset
=
context
.
Attr
<
bool
>
(
"pixel_offset"
);
auto
&
dev_ctx
=
auto
&
dev_ctx
=
context
.
template
device_context
<
platform
::
CPUDeviceContext
>();
context
.
template
device_context
<
platform
::
CPUDeviceContext
>();
...
@@ -134,10 +135,10 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
...
@@ -134,10 +135,10 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
bbox_deltas_slice
.
Resize
({
h_bbox
*
w_bbox
*
c_bbox
/
4
,
4
});
bbox_deltas_slice
.
Resize
({
h_bbox
*
w_bbox
*
c_bbox
/
4
,
4
});
scores_slice
.
Resize
({
h_score
*
w_score
*
c_score
,
1
});
scores_slice
.
Resize
({
h_score
*
w_score
*
c_score
,
1
});
std
::
pair
<
Tensor
,
Tensor
>
tensor_pair
=
std
::
pair
<
Tensor
,
Tensor
>
tensor_pair
=
ProposalForOneImage
(
ProposalForOneImage
(
dev_ctx
,
im_shape_slice
,
anchors
,
variances
,
dev_ctx
,
im_shape_slice
,
anchors
,
variances
,
bbox_deltas_slice
,
bbox_deltas_slice
,
scores_slice
,
pre_nms_top_n
,
scores_slice
,
pre_nms_top_n
,
post_nms_top_n
,
nms_thresh
,
min_size
,
post_nms_top_n
,
nms_thresh
,
min_size
,
eta
);
eta
,
pixel_offset
);
Tensor
&
proposals
=
tensor_pair
.
first
;
Tensor
&
proposals
=
tensor_pair
.
first
;
Tensor
&
scores
=
tensor_pair
.
second
;
Tensor
&
scores
=
tensor_pair
.
second
;
...
@@ -168,7 +169,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
...
@@ -168,7 +169,7 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
const
Tensor
&
bbox_deltas_slice
,
// [M, 4]
const
Tensor
&
bbox_deltas_slice
,
// [M, 4]
const
Tensor
&
scores_slice
,
// [N, 1]
const
Tensor
&
scores_slice
,
// [N, 1]
int
pre_nms_top_n
,
int
post_nms_top_n
,
float
nms_thresh
,
float
min_size
,
int
pre_nms_top_n
,
int
post_nms_top_n
,
float
nms_thresh
,
float
min_size
,
float
eta
)
const
{
float
eta
,
bool
pixel_offset
=
true
)
const
{
auto
*
scores_data
=
scores_slice
.
data
<
T
>
();
auto
*
scores_data
=
scores_slice
.
data
<
T
>
();
// Sort index
// Sort index
...
@@ -203,12 +204,15 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
...
@@ -203,12 +204,15 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
Tensor
proposals
;
Tensor
proposals
;
proposals
.
mutable_data
<
T
>
({
index_t
.
numel
(),
4
},
ctx
.
GetPlace
());
proposals
.
mutable_data
<
T
>
({
index_t
.
numel
(),
4
},
ctx
.
GetPlace
());
BoxCoder
<
T
>
(
ctx
,
&
anchor_sel
,
&
bbox_sel
,
&
var_sel
,
&
proposals
);
BoxCoder
<
T
>
(
ctx
,
&
anchor_sel
,
&
bbox_sel
,
&
var_sel
,
&
proposals
,
pixel_offset
);
ClipTiledBoxes
<
T
>
(
ctx
,
im_shape_slice
,
proposals
,
&
proposals
,
false
);
ClipTiledBoxes
<
T
>
(
ctx
,
im_shape_slice
,
proposals
,
&
proposals
,
false
,
pixel_offset
);
Tensor
keep
;
Tensor
keep
;
FilterBoxes
<
T
>
(
ctx
,
&
proposals
,
min_size
,
im_shape_slice
,
false
,
&
keep
);
FilterBoxes
<
T
>
(
ctx
,
&
proposals
,
min_size
,
im_shape_slice
,
false
,
&
keep
,
pixel_offset
);
// Handle the case when there is no keep index left
// Handle the case when there is no keep index left
if
(
keep
.
numel
()
==
0
)
{
if
(
keep
.
numel
()
==
0
)
{
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
T
>
set_zero
;
math
::
SetConstant
<
platform
::
CPUDeviceContext
,
T
>
set_zero
;
...
@@ -229,7 +233,8 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
...
@@ -229,7 +233,8 @@ class GenerateProposalsV2Kernel : public framework::OpKernel<T> {
return
std
::
make_pair
(
bbox_sel
,
scores_filter
);
return
std
::
make_pair
(
bbox_sel
,
scores_filter
);
}
}
Tensor
keep_nms
=
NMS
<
T
>
(
ctx
,
&
bbox_sel
,
&
scores_filter
,
nms_thresh
,
eta
);
Tensor
keep_nms
=
NMS
<
T
>
(
ctx
,
&
bbox_sel
,
&
scores_filter
,
nms_thresh
,
eta
,
pixel_offset
);
if
(
post_nms_top_n
>
0
&&
post_nms_top_n
<
keep_nms
.
numel
())
{
if
(
post_nms_top_n
>
0
&&
post_nms_top_n
<
keep_nms
.
numel
())
{
keep_nms
.
Resize
({
post_nms_top_n
});
keep_nms
.
Resize
({
post_nms_top_n
});
...
@@ -280,6 +285,9 @@ class GenerateProposalsV2OpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -280,6 +285,9 @@ class GenerateProposalsV2OpMaker : public framework::OpProtoAndCheckerMaker {
"Proposal height and width both need to be greater "
"Proposal height and width both need to be greater "
"than this min_size."
);
"than this min_size."
);
AddAttr
<
float
>
(
"eta"
,
"The parameter for adaptive NMS."
);
AddAttr
<
float
>
(
"eta"
,
"The parameter for adaptive NMS."
);
AddAttr
<
bool
>
(
"pixel_offset"
,
"(bool, default True),"
,
"If true, im_shape pixel offset is 1."
)
.
SetDefault
(
true
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
This operator is the second version of generate_proposals op to generate
This operator is the second version of generate_proposals op to generate
bounding box proposals for Faster RCNN.
bounding box proposals for Faster RCNN.
...
@@ -312,3 +320,8 @@ REGISTER_OPERATOR(
...
@@ -312,3 +320,8 @@ REGISTER_OPERATOR(
REGISTER_OP_CPU_KERNEL
(
generate_proposals_v2
,
REGISTER_OP_CPU_KERNEL
(
generate_proposals_v2
,
ops
::
GenerateProposalsV2Kernel
<
float
>
,
ops
::
GenerateProposalsV2Kernel
<
float
>
,
ops
::
GenerateProposalsV2Kernel
<
double
>
);
ops
::
GenerateProposalsV2Kernel
<
double
>
);
REGISTER_OP_VERSION
(
generate_proposals_v2
)
.
AddCheckpoint
(
R"ROC(Registe generate_proposals_v2 for adding the attribute of pixel_offset)ROC"
,
paddle
::
framework
::
compatible
::
OpVersionDesc
().
NewAttr
(
"pixel_offset"
,
"If true, im_shape pixel offset is 1."
,
true
));
paddle/fluid/operators/detection/generate_proposals_v2_op.cu
浏览文件 @
5b267474
...
@@ -36,7 +36,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
...
@@ -36,7 +36,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
const
Tensor
&
bbox_deltas
,
// [M, 4]
const
Tensor
&
bbox_deltas
,
// [M, 4]
const
Tensor
&
scores
,
// [N, 1]
const
Tensor
&
scores
,
// [N, 1]
int
pre_nms_top_n
,
int
post_nms_top_n
,
float
nms_thresh
,
float
min_size
,
int
pre_nms_top_n
,
int
post_nms_top_n
,
float
nms_thresh
,
float
min_size
,
float
eta
)
{
float
eta
,
bool
pixel_offset
)
{
// 1. pre nms
// 1. pre nms
Tensor
scores_sort
,
index_sort
;
Tensor
scores_sort
,
index_sort
;
SortDescending
<
T
>
(
ctx
,
scores
,
&
scores_sort
,
&
index_sort
);
SortDescending
<
T
>
(
ctx
,
scores
,
&
scores_sort
,
&
index_sort
);
...
@@ -54,7 +54,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
...
@@ -54,7 +54,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
platform
::
ForRange
<
platform
::
CUDADeviceContext
>
for_range
(
ctx
,
pre_nms_num
);
platform
::
ForRange
<
platform
::
CUDADeviceContext
>
for_range
(
ctx
,
pre_nms_num
);
for_range
(
BoxDecodeAndClipFunctor
<
T
>
{
for_range
(
BoxDecodeAndClipFunctor
<
T
>
{
anchors
.
data
<
T
>
(),
bbox_deltas
.
data
<
T
>
(),
variances
.
data
<
T
>
(),
anchors
.
data
<
T
>
(),
bbox_deltas
.
data
<
T
>
(),
variances
.
data
<
T
>
(),
index_sort
.
data
<
int
>
(),
im_shape
.
data
<
T
>
(),
proposals
.
data
<
T
>
()});
index_sort
.
data
<
int
>
(),
im_shape
.
data
<
T
>
(),
proposals
.
data
<
T
>
(),
pixel_offset
});
}
}
// 3. filter
// 3. filter
...
@@ -65,7 +66,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
...
@@ -65,7 +66,7 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
auto
stream
=
ctx
.
stream
();
auto
stream
=
ctx
.
stream
();
FilterBBoxes
<
T
,
512
><<<
1
,
512
,
0
,
stream
>>>
(
FilterBBoxes
<
T
,
512
><<<
1
,
512
,
0
,
stream
>>>
(
proposals
.
data
<
T
>
(),
im_shape
.
data
<
T
>
(),
min_size
,
pre_nms_num
,
proposals
.
data
<
T
>
(),
im_shape
.
data
<
T
>
(),
min_size
,
pre_nms_num
,
keep_num_t
.
data
<
int
>
(),
keep_index
.
data
<
int
>
(),
false
);
keep_num_t
.
data
<
int
>
(),
keep_index
.
data
<
int
>
(),
false
,
pixel_offset
);
int
keep_num
;
int
keep_num
;
const
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
ctx
.
GetPlace
());
const
auto
gpu_place
=
BOOST_GET_CONST
(
platform
::
CUDAPlace
,
ctx
.
GetPlace
());
memory
::
Copy
(
platform
::
CPUPlace
(),
&
keep_num
,
gpu_place
,
memory
::
Copy
(
platform
::
CPUPlace
(),
&
keep_num
,
gpu_place
,
...
@@ -94,7 +95,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
...
@@ -94,7 +95,8 @@ static std::pair<Tensor, Tensor> ProposalForOneImage(
// 4. nms
// 4. nms
Tensor
keep_nms
;
Tensor
keep_nms
;
NMS
<
T
>
(
ctx
,
proposals_filter
,
keep_index
,
nms_thresh
,
&
keep_nms
);
NMS
<
T
>
(
ctx
,
proposals_filter
,
keep_index
,
nms_thresh
,
&
keep_nms
,
pixel_offset
);
if
(
post_nms_top_n
>
0
&&
post_nms_top_n
<
keep_nms
.
numel
())
{
if
(
post_nms_top_n
>
0
&&
post_nms_top_n
<
keep_nms
.
numel
())
{
keep_nms
.
Resize
({
post_nms_top_n
});
keep_nms
.
Resize
({
post_nms_top_n
});
}
}
...
@@ -129,6 +131,7 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> {
...
@@ -129,6 +131,7 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> {
float
nms_thresh
=
context
.
Attr
<
float
>
(
"nms_thresh"
);
float
nms_thresh
=
context
.
Attr
<
float
>
(
"nms_thresh"
);
float
min_size
=
context
.
Attr
<
float
>
(
"min_size"
);
float
min_size
=
context
.
Attr
<
float
>
(
"min_size"
);
float
eta
=
context
.
Attr
<
float
>
(
"eta"
);
float
eta
=
context
.
Attr
<
float
>
(
"eta"
);
bool
pixel_offset
=
context
.
Attr
<
bool
>
(
"pixel_offset"
);
PADDLE_ENFORCE_GE
(
eta
,
1.
,
PADDLE_ENFORCE_GE
(
eta
,
1.
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"Not support adaptive NMS. The attribute 'eta' "
"Not support adaptive NMS. The attribute 'eta' "
...
@@ -184,10 +187,10 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> {
...
@@ -184,10 +187,10 @@ class CUDAGenerateProposalsV2Kernel : public framework::OpKernel<T> {
bbox_deltas_slice
.
Resize
({
h_bbox
*
w_bbox
*
c_bbox
/
4
,
4
});
bbox_deltas_slice
.
Resize
({
h_bbox
*
w_bbox
*
c_bbox
/
4
,
4
});
scores_slice
.
Resize
({
h_score
*
w_score
*
c_score
,
1
});
scores_slice
.
Resize
({
h_score
*
w_score
*
c_score
,
1
});
std
::
pair
<
Tensor
,
Tensor
>
box_score_pair
=
std
::
pair
<
Tensor
,
Tensor
>
box_score_pair
=
ProposalForOneImage
<
T
>
(
ProposalForOneImage
<
T
>
(
dev_ctx
,
im_shape_slice
,
anchors
,
variances
,
dev_ctx
,
im_shape_slice
,
anchors
,
variances
,
bbox_deltas_slice
,
bbox_deltas_slice
,
scores_slice
,
pre_nms_top_n
,
scores_slice
,
pre_nms_top_n
,
post_nms_top_n
,
nms_thresh
,
min_size
,
post_nms_top_n
,
nms_thresh
,
min_size
,
eta
);
eta
,
pixel_offset
);
Tensor
&
proposals
=
box_score_pair
.
first
;
Tensor
&
proposals
=
box_score_pair
.
first
;
Tensor
&
scores
=
box_score_pair
.
second
;
Tensor
&
scores
=
box_score_pair
.
second
;
...
...
paddle/fluid/operators/detection/nms_util.h
浏览文件 @
5b267474
...
@@ -130,7 +130,7 @@ static inline framework::Tensor VectorToTensor(
...
@@ -130,7 +130,7 @@ static inline framework::Tensor VectorToTensor(
template
<
class
T
>
template
<
class
T
>
framework
::
Tensor
NMS
(
const
platform
::
DeviceContext
&
ctx
,
framework
::
Tensor
NMS
(
const
platform
::
DeviceContext
&
ctx
,
framework
::
Tensor
*
bbox
,
framework
::
Tensor
*
scores
,
framework
::
Tensor
*
bbox
,
framework
::
Tensor
*
scores
,
T
nms_threshold
,
float
eta
)
{
T
nms_threshold
,
float
eta
,
bool
pixel_offset
=
true
)
{
int64_t
num_boxes
=
bbox
->
dims
()[
0
];
int64_t
num_boxes
=
bbox
->
dims
()[
0
];
// 4: [xmin ymin xmax ymax]
// 4: [xmin ymin xmax ymax]
int64_t
box_size
=
bbox
->
dims
()[
1
];
int64_t
box_size
=
bbox
->
dims
()[
1
];
...
@@ -144,13 +144,15 @@ framework::Tensor NMS(const platform::DeviceContext& ctx,
...
@@ -144,13 +144,15 @@ framework::Tensor NMS(const platform::DeviceContext& ctx,
int
selected_num
=
0
;
int
selected_num
=
0
;
T
adaptive_threshold
=
nms_threshold
;
T
adaptive_threshold
=
nms_threshold
;
const
T
*
bbox_data
=
bbox
->
data
<
T
>
();
const
T
*
bbox_data
=
bbox
->
data
<
T
>
();
bool
normalized
=
pixel_offset
?
false
:
true
;
while
(
sorted_indices
.
size
()
!=
0
)
{
while
(
sorted_indices
.
size
()
!=
0
)
{
int
idx
=
sorted_indices
.
back
().
second
;
int
idx
=
sorted_indices
.
back
().
second
;
bool
flag
=
true
;
bool
flag
=
true
;
for
(
int
kept_idx
:
selected_indices
)
{
for
(
int
kept_idx
:
selected_indices
)
{
if
(
flag
)
{
if
(
flag
)
{
T
overlap
=
JaccardOverlap
<
T
>
(
bbox_data
+
idx
*
box_size
,
T
overlap
=
bbox_data
+
kept_idx
*
box_size
,
false
);
JaccardOverlap
<
T
>
(
bbox_data
+
idx
*
box_size
,
bbox_data
+
kept_idx
*
box_size
,
normalized
);
flag
=
(
overlap
<=
adaptive_threshold
);
flag
=
(
overlap
<=
adaptive_threshold
);
}
else
{
}
else
{
break
;
break
;
...
...
paddle/fluid/operators/roi_align_op.cc
浏览文件 @
5b267474
...
@@ -175,6 +175,10 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -175,6 +175,10 @@ class ROIAlignOpMaker : public framework::OpProtoAndCheckerMaker {
"If <=0, then grid points are adaptive to roi_width "
"If <=0, then grid points are adaptive to roi_width "
"and pooled_w, likewise for height"
)
"and pooled_w, likewise for height"
)
.
SetDefault
(
-
1
);
.
SetDefault
(
-
1
);
AddAttr
<
bool
>
(
"aligned"
,
"(bool, default False),"
"If true, pixel shift it by -0.5 for align more perfectly"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
**RoIAlign Operator**
**RoIAlign Operator**
...
@@ -242,7 +246,14 @@ REGISTER_OP_VERSION(roi_align)
...
@@ -242,7 +246,14 @@ REGISTER_OP_VERSION(roi_align)
"it is not used in object detection models yet."
))
"it is not used in object detection models yet."
))
.
AddCheckpoint
(
.
AddCheckpoint
(
R"ROC(
R"ROC(
Upgrade roi_align add a new input [RoisNum])ROC"
,
Upgrade roi_align add a new input [RoisNum])ROC"
,
paddle
::
framework
::
compatible
::
OpVersionDesc
().
NewInput
(
paddle
::
framework
::
compatible
::
OpVersionDesc
().
NewInput
(
"RoisNum"
,
"RoisNum"
,
"The number of RoIs in each image. RoisNum is dispensable."
));
"The number of RoIs in each image. RoisNum is dispensable."
))
.
AddCheckpoint
(
R"ROC(
Upgrade roi_align add a new input [aligned])ROC"
,
paddle
::
framework
::
compatible
::
OpVersionDesc
().
NewAttr
(
"aligned"
,
"If true, pixel shift it by -0.5 for align more perfectly."
,
false
));
paddle/fluid/operators/roi_align_op.cu
浏览文件 @
5b267474
...
@@ -105,7 +105,8 @@ __global__ void GPUROIAlignForward(
...
@@ -105,7 +105,8 @@ __global__ void GPUROIAlignForward(
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
input_rois
,
const
int
nthreads
,
const
T
*
input_data
,
const
T
*
input_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
sampling_ratio
,
int
*
roi_batch_id_data
,
T
*
output_data
)
{
const
int
sampling_ratio
,
int
*
roi_batch_id_data
,
T
*
output_data
,
const
bool
continuous_coordinate
)
{
CUDA_KERNEL_LOOP
(
i
,
nthreads
)
{
CUDA_KERNEL_LOOP
(
i
,
nthreads
)
{
int
pw
=
i
%
pooled_width
;
int
pw
=
i
%
pooled_width
;
int
ph
=
(
i
/
pooled_width
)
%
pooled_height
;
int
ph
=
(
i
/
pooled_width
)
%
pooled_height
;
...
@@ -115,13 +116,19 @@ __global__ void GPUROIAlignForward(
...
@@ -115,13 +116,19 @@ __global__ void GPUROIAlignForward(
const
T
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
const
T
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
T
roi_xmin
=
offset_input_rois
[
0
]
*
spatial_scale
;
T
roi_offset
=
continuous_coordinate
?
static_cast
<
T
>
(
0.5
)
:
0
;
T
roi_ymin
=
offset_input_rois
[
1
]
*
spatial_scale
;
T
roi_xmin
=
offset_input_rois
[
0
]
*
spatial_scale
-
roi_offset
;
T
roi_xmax
=
offset_input_rois
[
2
]
*
spatial_scale
;
T
roi_ymin
=
offset_input_rois
[
1
]
*
spatial_scale
-
roi_offset
;
T
roi_ymax
=
offset_input_rois
[
3
]
*
spatial_scale
;
T
roi_xmax
=
offset_input_rois
[
2
]
*
spatial_scale
-
roi_offset
;
T
roi_ymax
=
offset_input_rois
[
3
]
*
spatial_scale
-
roi_offset
;
T
roi_width
=
max
(
roi_xmax
-
roi_xmin
,
static_cast
<
T
>
(
1.
));
T
roi_width
=
roi_xmax
-
roi_xmin
;
T
roi_height
=
max
(
roi_ymax
-
roi_ymin
,
static_cast
<
T
>
(
1.
));
T
roi_height
=
roi_ymax
-
roi_ymin
;
if
(
!
continuous_coordinate
)
{
roi_width
=
max
(
roi_width
,
static_cast
<
T
>
(
1.
));
roi_height
=
max
(
roi_height
,
static_cast
<
T
>
(
1.
));
}
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
...
@@ -153,14 +160,12 @@ __global__ void GPUROIAlignForward(
...
@@ -153,14 +160,12 @@ __global__ void GPUROIAlignForward(
}
}
template
<
typename
T
>
template
<
typename
T
>
__global__
void
GPUROIAlignBackward
(
const
int
nthreads
,
const
T
*
input_rois
,
__global__
void
GPUROIAlignBackward
(
const
T
*
out_grad
,
const
int
num_rois
,
const
int
nthreads
,
const
T
*
input_rois
,
const
T
*
out_grad
,
const
float
spatial_scale
,
const
int
num_rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
channels
,
const
int
height
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
sampling_ratio
,
int
*
roi_batch_id_data
,
const
int
pooled_width
,
T
*
input_grad
,
const
bool
continuous_coordinate
)
{
const
int
sampling_ratio
,
int
*
roi_batch_id_data
,
T
*
input_grad
)
{
CUDA_KERNEL_LOOP
(
i
,
nthreads
)
{
CUDA_KERNEL_LOOP
(
i
,
nthreads
)
{
int
pw
=
i
%
pooled_width
;
int
pw
=
i
%
pooled_width
;
int
ph
=
(
i
/
pooled_width
)
%
pooled_height
;
int
ph
=
(
i
/
pooled_width
)
%
pooled_height
;
...
@@ -169,13 +174,18 @@ __global__ void GPUROIAlignBackward(const int nthreads, const T* input_rois,
...
@@ -169,13 +174,18 @@ __global__ void GPUROIAlignBackward(const int nthreads, const T* input_rois,
const
T
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
const
T
*
offset_input_rois
=
input_rois
+
n
*
kROISize
;
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
int
roi_batch_ind
=
roi_batch_id_data
[
n
];
T
roi_xmin
=
offset_input_rois
[
0
]
*
spatial_scale
;
T
roi_offset
=
continuous_coordinate
?
T
(
0.5
)
:
0
;
T
roi_ymin
=
offset_input_rois
[
1
]
*
spatial_scale
;
T
roi_xmin
=
offset_input_rois
[
0
]
*
spatial_scale
-
roi_offset
;
T
roi_xmax
=
offset_input_rois
[
2
]
*
spatial_scale
;
T
roi_ymin
=
offset_input_rois
[
1
]
*
spatial_scale
-
roi_offset
;
T
roi_ymax
=
offset_input_rois
[
3
]
*
spatial_scale
;
T
roi_xmax
=
offset_input_rois
[
2
]
*
spatial_scale
-
roi_offset
;
T
roi_ymax
=
offset_input_rois
[
3
]
*
spatial_scale
-
roi_offset
;
T
roi_width
=
max
(
roi_xmax
-
roi_xmin
,
static_cast
<
T
>
(
1.
));
T
roi_height
=
max
(
roi_ymax
-
roi_ymin
,
static_cast
<
T
>
(
1.
));
T
roi_width
=
roi_xmax
-
roi_xmin
;
T
roi_height
=
roi_ymax
-
roi_ymin
;
if
(
!
continuous_coordinate
)
{
roi_width
=
max
(
roi_width
,
static_cast
<
T
>
(
1.
));
roi_height
=
max
(
roi_height
,
static_cast
<
T
>
(
1.
));
}
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
...
@@ -236,6 +246,7 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> {
...
@@ -236,6 +246,7 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
aligned
=
ctx
.
Attr
<
bool
>
(
"aligned"
);
auto
in_dims
=
in
->
dims
();
auto
in_dims
=
in
->
dims
();
int
batch_size
=
in_dims
[
0
];
int
batch_size
=
in_dims
[
0
];
...
@@ -316,7 +327,7 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> {
...
@@ -316,7 +327,7 @@ class GPUROIAlignOpKernel : public framework::OpKernel<T> {
GPUROIAlignForward
<
T
><<<
blocks
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
GPUROIAlignForward
<
T
><<<
blocks
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
output_size
,
in
->
data
<
T
>
(),
rois
->
data
<
T
>
(),
spatial_scale
,
channels
,
output_size
,
in
->
data
<
T
>
(),
rois
->
data
<
T
>
(),
spatial_scale
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
sampling_ratio
,
roi_id_data
,
height
,
width
,
pooled_height
,
pooled_width
,
sampling_ratio
,
roi_id_data
,
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()));
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
())
,
aligned
);
}
}
};
};
...
@@ -334,6 +345,7 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> {
...
@@ -334,6 +345,7 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
aligned
=
ctx
.
Attr
<
bool
>
(
"aligned"
);
int
rois_num
=
rois
->
dims
()[
0
];
int
rois_num
=
rois
->
dims
()[
0
];
int
channels
=
in
->
dims
()[
1
];
int
channels
=
in
->
dims
()[
1
];
...
@@ -390,8 +402,8 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> {
...
@@ -390,8 +402,8 @@ class GPUROIAlignGradOpKernel : public framework::OpKernel<T> {
GPUROIAlignBackward
<
T
><<<
blocks
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
GPUROIAlignBackward
<
T
><<<
blocks
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
output_grad_size
,
rois
->
data
<
T
>
(),
out_grad
->
data
<
T
>
(),
rois_num
,
output_grad_size
,
rois
->
data
<
T
>
(),
out_grad
->
data
<
T
>
(),
rois_num
,
spatial_scale
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
spatial_scale
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
sampling_ratio
,
roi_id_data
,
sampling_ratio
,
roi_id_data
,
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
()),
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
())
);
aligned
);
}
}
}
}
};
};
...
...
paddle/fluid/operators/roi_align_op.h
浏览文件 @
5b267474
...
@@ -145,6 +145,7 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> {
...
@@ -145,6 +145,7 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> {
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
pooled_width
=
ctx
.
Attr
<
int
>
(
"pooled_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
aligned
=
ctx
.
Attr
<
bool
>
(
"aligned"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
...
@@ -215,15 +216,21 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> {
...
@@ -215,15 +216,21 @@ class CPUROIAlignOpKernel : public framework::OpKernel<T> {
}
}
T
*
output_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
output_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
rois_data
=
rois
->
data
<
T
>
();
const
T
*
rois_data
=
rois
->
data
<
T
>
();
T
roi_offset
=
aligned
?
T
(
0.5
)
:
0
;
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
int
roi_batch_id
=
roi_batch_id_data
[
n
];
int
roi_batch_id
=
roi_batch_id_data
[
n
];
T
roi_xmin
=
rois_data
[
0
]
*
spatial_scale
;
T
roi_xmin
=
rois_data
[
0
]
*
spatial_scale
-
roi_offset
;
T
roi_ymin
=
rois_data
[
1
]
*
spatial_scale
;
T
roi_ymin
=
rois_data
[
1
]
*
spatial_scale
-
roi_offset
;
T
roi_xmax
=
rois_data
[
2
]
*
spatial_scale
;
T
roi_xmax
=
rois_data
[
2
]
*
spatial_scale
-
roi_offset
;
T
roi_ymax
=
rois_data
[
3
]
*
spatial_scale
;
T
roi_ymax
=
rois_data
[
3
]
*
spatial_scale
-
roi_offset
;
T
roi_width
=
roi_xmax
-
roi_xmin
;
T
roi_height
=
roi_ymax
-
roi_ymin
;
if
(
!
aligned
)
{
roi_width
=
std
::
max
(
roi_width
,
static_cast
<
T
>
(
1.
));
roi_height
=
std
::
max
(
roi_height
,
static_cast
<
T
>
(
1.
));
}
T
roi_width
=
std
::
max
(
roi_xmax
-
roi_xmin
,
static_cast
<
T
>
(
1.
));
T
roi_height
=
std
::
max
(
roi_ymax
-
roi_ymin
,
static_cast
<
T
>
(
1.
));
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
const
T
*
batch_data
=
input_data
+
roi_batch_id
*
in_stride
[
0
];
const
T
*
batch_data
=
input_data
+
roi_batch_id
*
in_stride
[
0
];
...
@@ -290,6 +297,7 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> {
...
@@ -290,6 +297,7 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> {
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
sampling_ratio
=
ctx
.
Attr
<
int
>
(
"sampling_ratio"
);
auto
in_dims
=
in
->
dims
();
auto
in_dims
=
in
->
dims
();
auto
aligned
=
ctx
.
Attr
<
bool
>
(
"aligned"
);
int
channels
=
in_dims
[
1
];
int
channels
=
in_dims
[
1
];
int
height
=
in_dims
[
2
];
int
height
=
in_dims
[
2
];
...
@@ -344,14 +352,21 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> {
...
@@ -344,14 +352,21 @@ class CPUROIAlignGradOpKernel : public framework::OpKernel<T> {
auto
roi_stride
=
framework
::
stride
(
rois
->
dims
());
auto
roi_stride
=
framework
::
stride
(
rois
->
dims
());
auto
out_stride
=
framework
::
stride
(
out_grad
->
dims
());
auto
out_stride
=
framework
::
stride
(
out_grad
->
dims
());
T
roi_offset
=
aligned
?
T
(
0.5
)
:
0
;
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
int
roi_batch_idx
=
roi_batch_id_data
[
n
];
int
roi_batch_idx
=
roi_batch_id_data
[
n
];
T
roi_xmin
=
rois_data
[
0
]
*
spatial_scale
;
T
roi_xmin
=
rois_data
[
0
]
*
spatial_scale
-
roi_offset
;
T
roi_ymin
=
rois_data
[
1
]
*
spatial_scale
;
T
roi_ymin
=
rois_data
[
1
]
*
spatial_scale
-
roi_offset
;
T
roi_xmax
=
rois_data
[
2
]
*
spatial_scale
;
T
roi_xmax
=
rois_data
[
2
]
*
spatial_scale
-
roi_offset
;
T
roi_ymax
=
rois_data
[
3
]
*
spatial_scale
;
T
roi_ymax
=
rois_data
[
3
]
*
spatial_scale
-
roi_offset
;
T
roi_width
=
std
::
max
(
roi_xmax
-
roi_xmin
,
static_cast
<
T
>
(
1.
));
T
roi_height
=
std
::
max
(
roi_ymax
-
roi_ymin
,
static_cast
<
T
>
(
1.
));
T
roi_width
=
roi_xmax
-
roi_xmin
;
T
roi_height
=
roi_ymax
-
roi_ymin
;
if
(
!
aligned
)
{
roi_width
=
std
::
max
(
roi_width
,
static_cast
<
T
>
(
1.
));
roi_height
=
std
::
max
(
roi_height
,
static_cast
<
T
>
(
1.
));
}
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_h
=
static_cast
<
T
>
(
roi_height
)
/
static_cast
<
T
>
(
pooled_height
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
T
bin_size_w
=
static_cast
<
T
>
(
roi_width
)
/
static_cast
<
T
>
(
pooled_width
);
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
...
...
python/paddle/fluid/tests/unittests/test_distribute_fpn_proposals_op.py
浏览文件 @
5b267474
...
@@ -31,7 +31,8 @@ class TestDistributeFPNProposalsOp(OpTest):
...
@@ -31,7 +31,8 @@ class TestDistributeFPNProposalsOp(OpTest):
'max_level'
:
self
.
roi_max_level
,
'max_level'
:
self
.
roi_max_level
,
'min_level'
:
self
.
roi_min_level
,
'min_level'
:
self
.
roi_min_level
,
'refer_scale'
:
self
.
canonical_scale
,
'refer_scale'
:
self
.
canonical_scale
,
'refer_level'
:
self
.
canonical_level
'refer_level'
:
self
.
canonical_level
,
'pixel_offset'
:
self
.
pixel_offset
,
}
}
output
=
[(
'out%d'
%
i
,
self
.
rois_fpn
[
i
])
output
=
[(
'out%d'
%
i
,
self
.
rois_fpn
[
i
])
for
i
in
range
(
len
(
self
.
rois_fpn
))]
for
i
in
range
(
len
(
self
.
rois_fpn
))]
...
@@ -47,10 +48,12 @@ class TestDistributeFPNProposalsOp(OpTest):
...
@@ -47,10 +48,12 @@ class TestDistributeFPNProposalsOp(OpTest):
self
.
canonical_scale
=
224
self
.
canonical_scale
=
224
self
.
canonical_level
=
4
self
.
canonical_level
=
4
self
.
images_shape
=
[
512
,
512
]
self
.
images_shape
=
[
512
,
512
]
self
.
pixel_offset
=
True
def
boxes_area
(
self
,
boxes
):
def
boxes_area
(
self
,
boxes
):
w
=
(
boxes
[:,
2
]
-
boxes
[:,
0
]
+
1
)
offset
=
1
if
self
.
pixel_offset
else
0
h
=
(
boxes
[:,
3
]
-
boxes
[:,
1
]
+
1
)
w
=
(
boxes
[:,
2
]
-
boxes
[:,
0
]
+
offset
)
h
=
(
boxes
[:,
3
]
-
boxes
[:,
1
]
+
offset
)
areas
=
w
*
h
areas
=
w
*
h
assert
np
.
all
(
areas
>=
0
),
'Negative areas founds'
assert
np
.
all
(
areas
>=
0
),
'Negative areas founds'
return
areas
return
areas
...
@@ -59,7 +62,7 @@ class TestDistributeFPNProposalsOp(OpTest):
...
@@ -59,7 +62,7 @@ class TestDistributeFPNProposalsOp(OpTest):
s
=
np
.
sqrt
(
self
.
boxes_area
(
rois
))
s
=
np
.
sqrt
(
self
.
boxes_area
(
rois
))
s0
=
self
.
canonical_scale
s0
=
self
.
canonical_scale
lvl0
=
self
.
canonical_level
lvl0
=
self
.
canonical_level
target_lvls
=
np
.
floor
(
lvl0
+
np
.
log2
(
s
/
s0
+
1e-
6
))
target_lvls
=
np
.
floor
(
lvl0
+
np
.
log2
(
s
/
s0
+
1e-
8
))
target_lvls
=
np
.
clip
(
target_lvls
,
lvl_min
,
lvl_max
)
target_lvls
=
np
.
clip
(
target_lvls
,
lvl_min
,
lvl_max
)
return
target_lvls
return
target_lvls
...
@@ -131,7 +134,8 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp):
...
@@ -131,7 +134,8 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp):
'max_level'
:
self
.
roi_max_level
,
'max_level'
:
self
.
roi_max_level
,
'min_level'
:
self
.
roi_min_level
,
'min_level'
:
self
.
roi_min_level
,
'refer_scale'
:
self
.
canonical_scale
,
'refer_scale'
:
self
.
canonical_scale
,
'refer_level'
:
self
.
canonical_level
'refer_level'
:
self
.
canonical_level
,
'pixel_offset'
:
self
.
pixel_offset
,
}
}
output
=
[(
'out%d'
%
i
,
self
.
rois_fpn
[
i
])
output
=
[(
'out%d'
%
i
,
self
.
rois_fpn
[
i
])
for
i
in
range
(
len
(
self
.
rois_fpn
))]
for
i
in
range
(
len
(
self
.
rois_fpn
))]
...
@@ -147,5 +151,16 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp):
...
@@ -147,5 +151,16 @@ class TestDistributeFPNProposalsOpWithRoisNum(TestDistributeFPNProposalsOp):
}
}
class
TestDistributeFPNProposalsOpNoOffset
(
TestDistributeFPNProposalsOpWithRoisNum
):
def
init_test_case
(
self
):
self
.
roi_max_level
=
5
self
.
roi_min_level
=
2
self
.
canonical_scale
=
224
self
.
canonical_level
=
4
self
.
images_shape
=
[
512
,
512
]
self
.
pixel_offset
=
False
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
浏览文件 @
5b267474
...
@@ -21,7 +21,6 @@ import math
...
@@ -21,7 +21,6 @@ import math
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
op_test
import
OpTest
from
op_test
import
OpTest
from
test_multiclass_nms_op
import
nms
from
test_anchor_generator_op
import
anchor_generator_in_python
from
test_anchor_generator_op
import
anchor_generator_in_python
import
copy
import
copy
...
@@ -111,18 +110,19 @@ def proposal_for_one_image(im_info, all_anchors, variances, bbox_deltas, scores,
...
@@ -111,18 +110,19 @@ def proposal_for_one_image(im_info, all_anchors, variances, bbox_deltas, scores,
return
proposals
,
scores
return
proposals
,
scores
def
box_coder
(
all_anchors
,
bbox_deltas
,
variances
):
def
box_coder
(
all_anchors
,
bbox_deltas
,
variances
,
pixel_offset
=
True
):
"""
"""
Decode proposals by anchors and bbox_deltas from RPN
Decode proposals by anchors and bbox_deltas from RPN
"""
"""
offset
=
1
if
pixel_offset
else
0
#proposals: xmin, ymin, xmax, ymax
#proposals: xmin, ymin, xmax, ymax
proposals
=
np
.
zeros_like
(
bbox_deltas
,
dtype
=
np
.
float32
)
proposals
=
np
.
zeros_like
(
bbox_deltas
,
dtype
=
np
.
float32
)
#anchor_loc: width, height, center_x, center_y
#anchor_loc: width, height, center_x, center_y
anchor_loc
=
np
.
zeros_like
(
bbox_deltas
,
dtype
=
np
.
float32
)
anchor_loc
=
np
.
zeros_like
(
bbox_deltas
,
dtype
=
np
.
float32
)
anchor_loc
[:,
0
]
=
all_anchors
[:,
2
]
-
all_anchors
[:,
0
]
+
1
anchor_loc
[:,
0
]
=
all_anchors
[:,
2
]
-
all_anchors
[:,
0
]
+
offset
anchor_loc
[:,
1
]
=
all_anchors
[:,
3
]
-
all_anchors
[:,
1
]
+
1
anchor_loc
[:,
1
]
=
all_anchors
[:,
3
]
-
all_anchors
[:,
1
]
+
offset
anchor_loc
[:,
2
]
=
all_anchors
[:,
0
]
+
0.5
*
anchor_loc
[:,
0
]
anchor_loc
[:,
2
]
=
all_anchors
[:,
0
]
+
0.5
*
anchor_loc
[:,
0
]
anchor_loc
[:,
3
]
=
all_anchors
[:,
1
]
+
0.5
*
anchor_loc
[:,
1
]
anchor_loc
[:,
3
]
=
all_anchors
[:,
1
]
+
0.5
*
anchor_loc
[:,
1
]
...
@@ -152,51 +152,60 @@ def box_coder(all_anchors, bbox_deltas, variances):
...
@@ -152,51 +152,60 @@ def box_coder(all_anchors, bbox_deltas, variances):
pred_bbox
[
i
,
3
]
=
math
.
exp
(
pred_bbox
[
i
,
3
]
=
math
.
exp
(
min
(
bbox_deltas
[
i
,
3
],
math
.
log
(
1000
/
16.0
)))
*
anchor_loc
[
i
,
min
(
bbox_deltas
[
i
,
3
],
math
.
log
(
1000
/
16.0
)))
*
anchor_loc
[
i
,
1
]
1
]
proposals
[:,
0
]
=
pred_bbox
[:,
0
]
-
pred_bbox
[:,
2
]
/
2
proposals
[:,
0
]
=
pred_bbox
[:,
0
]
-
pred_bbox
[:,
2
]
/
2
proposals
[:,
1
]
=
pred_bbox
[:,
1
]
-
pred_bbox
[:,
3
]
/
2
proposals
[:,
1
]
=
pred_bbox
[:,
1
]
-
pred_bbox
[:,
3
]
/
2
proposals
[:,
2
]
=
pred_bbox
[:,
0
]
+
pred_bbox
[:,
2
]
/
2
-
1
proposals
[:,
2
]
=
pred_bbox
[:,
0
]
+
pred_bbox
[:,
2
]
/
2
-
offset
proposals
[:,
3
]
=
pred_bbox
[:,
1
]
+
pred_bbox
[:,
3
]
/
2
-
1
proposals
[:,
3
]
=
pred_bbox
[:,
1
]
+
pred_bbox
[:,
3
]
/
2
-
offset
return
proposals
return
proposals
def
clip_tiled_boxes
(
boxes
,
im_shape
):
def
clip_tiled_boxes
(
boxes
,
im_shape
,
pixel_offset
=
True
):
"""Clip boxes to image boundaries. im_shape is [height, width] and boxes
"""Clip boxes to image boundaries. im_shape is [height, width] and boxes
has shape (N, 4 * num_tiled_boxes)."""
has shape (N, 4 * num_tiled_boxes)."""
assert
boxes
.
shape
[
1
]
%
4
==
0
,
\
assert
boxes
.
shape
[
1
]
%
4
==
0
,
\
'boxes.shape[1] is {:d}, but must be divisible by 4.'
.
format
(
'boxes.shape[1] is {:d}, but must be divisible by 4.'
.
format
(
boxes
.
shape
[
1
]
boxes
.
shape
[
1
]
)
)
offset
=
1
if
pixel_offset
else
0
# x1 >= 0
# x1 >= 0
boxes
[:,
0
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
0
::
4
],
im_shape
[
1
]
-
1
),
0
)
boxes
[:,
0
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
0
::
4
],
im_shape
[
1
]
-
offset
),
0
)
# y1 >= 0
# y1 >= 0
boxes
[:,
1
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
1
::
4
],
im_shape
[
0
]
-
1
),
0
)
boxes
[:,
1
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
1
::
4
],
im_shape
[
0
]
-
offset
),
0
)
# x2 < im_shape[1]
# x2 < im_shape[1]
boxes
[:,
2
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
2
::
4
],
im_shape
[
1
]
-
1
),
0
)
boxes
[:,
2
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
2
::
4
],
im_shape
[
1
]
-
offset
),
0
)
# y2 < im_shape[0]
# y2 < im_shape[0]
boxes
[:,
3
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
3
::
4
],
im_shape
[
0
]
-
1
),
0
)
boxes
[:,
3
::
4
]
=
np
.
maximum
(
np
.
minimum
(
boxes
[:,
3
::
4
],
im_shape
[
0
]
-
offset
),
0
)
return
boxes
return
boxes
def
filter_boxes
(
boxes
,
min_size
,
im_info
):
def
filter_boxes
(
boxes
,
min_size
,
im_info
,
pixel_offset
=
True
):
"""Only keep boxes with both sides >= min_size and center within the image.
"""Only keep boxes with both sides >= min_size and center within the image.
"""
"""
# Scale min_size to match image scale
# Scale min_size to match image scale
im_scale
=
im_info
[
2
]
im_scale
=
im_info
[
2
]
min_size
=
max
(
min_size
,
1.0
)
min_size
=
max
(
min_size
,
1.0
)
ws
=
boxes
[:,
2
]
-
boxes
[:,
0
]
+
1
offset
=
1
if
pixel_offset
else
0
hs
=
boxes
[:,
3
]
-
boxes
[:,
1
]
+
1
ws
=
boxes
[:,
2
]
-
boxes
[:,
0
]
+
offset
ws_orig_scale
=
(
boxes
[:,
2
]
-
boxes
[:,
0
])
/
im_scale
+
1
hs
=
boxes
[:,
3
]
-
boxes
[:,
1
]
+
offset
hs_orig_scale
=
(
boxes
[:,
3
]
-
boxes
[:,
1
])
/
im_scale
+
1
if
pixel_offset
:
x_ctr
=
boxes
[:,
0
]
+
ws
/
2.
ws_orig_scale
=
(
boxes
[:,
2
]
-
boxes
[:,
0
])
/
im_scale
+
1
y_ctr
=
boxes
[:,
1
]
+
hs
/
2.
hs_orig_scale
=
(
boxes
[:,
3
]
-
boxes
[:,
1
])
/
im_scale
+
1
keep
=
np
.
where
((
ws_orig_scale
>=
min_size
)
&
(
hs_orig_scale
>=
min_size
)
&
x_ctr
=
boxes
[:,
0
]
+
ws
/
2.
(
x_ctr
<
im_info
[
1
])
&
(
y_ctr
<
im_info
[
0
]))[
0
]
y_ctr
=
boxes
[:,
1
]
+
hs
/
2.
keep
=
np
.
where
((
ws_orig_scale
>=
min_size
)
&
(
hs_orig_scale
>=
min_size
)
&
(
x_ctr
<
im_info
[
1
])
&
(
y_ctr
<
im_info
[
0
]))[
0
]
else
:
keep
=
np
.
where
((
ws
>=
min_size
)
&
(
hs
>=
min_size
))[
0
]
return
keep
return
keep
def
iou
(
box_a
,
box_b
):
def
iou
(
box_a
,
box_b
,
pixel_offset
=
True
):
"""
"""
Apply intersection-over-union overlap between box_a and box_b
Apply intersection-over-union overlap between box_a and box_b
"""
"""
...
@@ -209,9 +218,9 @@ def iou(box_a, box_b):
...
@@ -209,9 +218,9 @@ def iou(box_a, box_b):
ymin_b
=
min
(
box_b
[
1
],
box_b
[
3
])
ymin_b
=
min
(
box_b
[
1
],
box_b
[
3
])
xmax_b
=
max
(
box_b
[
0
],
box_b
[
2
])
xmax_b
=
max
(
box_b
[
0
],
box_b
[
2
])
ymax_b
=
max
(
box_b
[
1
],
box_b
[
3
])
ymax_b
=
max
(
box_b
[
1
],
box_b
[
3
])
offset
=
1
if
pixel_offset
else
0
area_a
=
(
ymax_a
-
ymin_a
+
1
)
*
(
xmax_a
-
xmin_a
+
1
)
area_a
=
(
ymax_a
-
ymin_a
+
offset
)
*
(
xmax_a
-
xmin_a
+
offset
)
area_b
=
(
ymax_b
-
ymin_b
+
1
)
*
(
xmax_b
-
xmin_b
+
1
)
area_b
=
(
ymax_b
-
ymin_b
+
offset
)
*
(
xmax_b
-
xmin_b
+
offset
)
if
area_a
<=
0
and
area_b
<=
0
:
if
area_a
<=
0
and
area_b
<=
0
:
return
0.0
return
0.0
...
@@ -220,14 +229,14 @@ def iou(box_a, box_b):
...
@@ -220,14 +229,14 @@ def iou(box_a, box_b):
xb
=
min
(
xmax_a
,
xmax_b
)
xb
=
min
(
xmax_a
,
xmax_b
)
yb
=
min
(
ymax_a
,
ymax_b
)
yb
=
min
(
ymax_a
,
ymax_b
)
inter_area
=
max
(
xb
-
xa
+
1
,
0.0
)
*
max
(
yb
-
ya
+
1
,
0.0
)
inter_area
=
max
(
xb
-
xa
+
offset
,
0.0
)
*
max
(
yb
-
ya
+
offset
,
0.0
)
iou_ratio
=
inter_area
/
(
area_a
+
area_b
-
inter_area
)
iou_ratio
=
inter_area
/
(
area_a
+
area_b
-
inter_area
)
return
iou_ratio
return
iou_ratio
def
nms
(
boxes
,
scores
,
nms_threshold
,
eta
=
1.0
):
def
nms
(
boxes
,
scores
,
nms_threshold
,
eta
=
1.0
,
pixel_offset
=
True
):
"""Apply non-maximum suppression at test time to avoid detecting too many
"""Apply non-maximum suppression at test time to avoid detecting too many
overlapping bounding boxes for a given object.
overlapping bounding boxes for a given object.
Args:
Args:
...
@@ -252,7 +261,9 @@ def nms(boxes, scores, nms_threshold, eta=1.0):
...
@@ -252,7 +261,9 @@ def nms(boxes, scores, nms_threshold, eta=1.0):
for
k
in
range
(
len
(
selected_indices
)):
for
k
in
range
(
len
(
selected_indices
)):
if
keep
:
if
keep
:
kept_idx
=
selected_indices
[
k
]
kept_idx
=
selected_indices
[
k
]
overlap
=
iou
(
boxes
[
idx
],
boxes
[
kept_idx
])
overlap
=
iou
(
boxes
[
idx
],
boxes
[
kept_idx
],
pixel_offset
=
pixel_offset
)
keep
=
True
if
overlap
<=
adaptive_threshold
else
False
keep
=
True
if
overlap
<=
adaptive_threshold
else
False
else
:
else
:
break
break
...
...
python/paddle/fluid/tests/unittests/test_generate_proposals_v2_op.py
浏览文件 @
5b267474
...
@@ -21,7 +21,6 @@ import math
...
@@ -21,7 +21,6 @@ import math
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
op_test
import
OpTest
from
op_test
import
OpTest
from
test_multiclass_nms_op
import
nms
from
test_anchor_generator_op
import
anchor_generator_in_python
from
test_anchor_generator_op
import
anchor_generator_in_python
import
copy
import
copy
from
test_generate_proposals_op
import
clip_tiled_boxes
,
box_coder
,
nms
from
test_generate_proposals_op
import
clip_tiled_boxes
,
box_coder
,
nms
...
@@ -29,7 +28,7 @@ from test_generate_proposals_op import clip_tiled_boxes, box_coder, nms
...
@@ -29,7 +28,7 @@ from test_generate_proposals_op import clip_tiled_boxes, box_coder, nms
def
generate_proposals_v2_in_python
(
scores
,
bbox_deltas
,
im_shape
,
anchors
,
def
generate_proposals_v2_in_python
(
scores
,
bbox_deltas
,
im_shape
,
anchors
,
variances
,
pre_nms_topN
,
post_nms_topN
,
variances
,
pre_nms_topN
,
post_nms_topN
,
nms_thresh
,
min_size
,
eta
):
nms_thresh
,
min_size
,
eta
,
pixel_offset
):
all_anchors
=
anchors
.
reshape
(
-
1
,
4
)
all_anchors
=
anchors
.
reshape
(
-
1
,
4
)
rois
=
np
.
empty
((
0
,
5
),
dtype
=
np
.
float32
)
rois
=
np
.
empty
((
0
,
5
),
dtype
=
np
.
float32
)
roi_probs
=
np
.
empty
((
0
,
1
),
dtype
=
np
.
float32
)
roi_probs
=
np
.
empty
((
0
,
1
),
dtype
=
np
.
float32
)
...
@@ -42,7 +41,8 @@ def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
...
@@ -42,7 +41,8 @@ def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
img_i_boxes
,
img_i_probs
=
proposal_for_one_image
(
img_i_boxes
,
img_i_probs
=
proposal_for_one_image
(
im_shape
[
img_idx
,
:],
all_anchors
,
variances
,
im_shape
[
img_idx
,
:],
all_anchors
,
variances
,
bbox_deltas
[
img_idx
,
:,
:,
:],
scores
[
img_idx
,
:,
:,
:],
bbox_deltas
[
img_idx
,
:,
:,
:],
scores
[
img_idx
,
:,
:,
:],
pre_nms_topN
,
post_nms_topN
,
nms_thresh
,
min_size
,
eta
)
pre_nms_topN
,
post_nms_topN
,
nms_thresh
,
min_size
,
eta
,
pixel_offset
)
rois_num
.
append
(
img_i_probs
.
shape
[
0
])
rois_num
.
append
(
img_i_probs
.
shape
[
0
])
rpn_rois
.
append
(
img_i_boxes
)
rpn_rois
.
append
(
img_i_boxes
)
rpn_roi_probs
.
append
(
img_i_probs
)
rpn_roi_probs
.
append
(
img_i_probs
)
...
@@ -52,7 +52,7 @@ def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
...
@@ -52,7 +52,7 @@ def generate_proposals_v2_in_python(scores, bbox_deltas, im_shape, anchors,
def
proposal_for_one_image
(
im_shape
,
all_anchors
,
variances
,
bbox_deltas
,
def
proposal_for_one_image
(
im_shape
,
all_anchors
,
variances
,
bbox_deltas
,
scores
,
pre_nms_topN
,
post_nms_topN
,
nms_thresh
,
scores
,
pre_nms_topN
,
post_nms_topN
,
nms_thresh
,
min_size
,
eta
):
min_size
,
eta
,
pixel_offset
):
# Transpose and reshape predicted bbox transformations to get them
# Transpose and reshape predicted bbox transformations to get them
# into the same order as the anchors:
# into the same order as the anchors:
# - bbox deltas will be (4 * A, H, W) format from conv output
# - bbox deltas will be (4 * A, H, W) format from conv output
...
@@ -83,12 +83,12 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
...
@@ -83,12 +83,12 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
scores
=
scores
[
order
,
:]
scores
=
scores
[
order
,
:]
bbox_deltas
=
bbox_deltas
[
order
,
:]
bbox_deltas
=
bbox_deltas
[
order
,
:]
all_anchors
=
all_anchors
[
order
,
:]
all_anchors
=
all_anchors
[
order
,
:]
proposals
=
box_coder
(
all_anchors
,
bbox_deltas
,
variances
)
proposals
=
box_coder
(
all_anchors
,
bbox_deltas
,
variances
,
pixel_offset
)
# clip proposals to image (may result in proposals with zero area
# clip proposals to image (may result in proposals with zero area
# that will be removed in the next step)
# that will be removed in the next step)
proposals
=
clip_tiled_boxes
(
proposals
,
im_shape
)
proposals
=
clip_tiled_boxes
(
proposals
,
im_shape
,
pixel_offset
)
# remove predicted boxes with height or width < min_size
# remove predicted boxes with height or width < min_size
keep
=
filter_boxes
(
proposals
,
min_size
,
im_shape
)
keep
=
filter_boxes
(
proposals
,
min_size
,
im_shape
,
pixel_offset
)
if
len
(
keep
)
==
0
:
if
len
(
keep
)
==
0
:
proposals
=
np
.
zeros
((
1
,
4
)).
astype
(
'float32'
)
proposals
=
np
.
zeros
((
1
,
4
)).
astype
(
'float32'
)
scores
=
np
.
zeros
((
1
,
1
)).
astype
(
'float32'
)
scores
=
np
.
zeros
((
1
,
1
)).
astype
(
'float32'
)
...
@@ -103,7 +103,8 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
...
@@ -103,7 +103,8 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
keep
=
nms
(
boxes
=
proposals
,
keep
=
nms
(
boxes
=
proposals
,
scores
=
scores
,
scores
=
scores
,
nms_threshold
=
nms_thresh
,
nms_threshold
=
nms_thresh
,
eta
=
eta
)
eta
=
eta
,
pixel_offset
=
pixel_offset
)
if
post_nms_topN
>
0
and
post_nms_topN
<
len
(
keep
):
if
post_nms_topN
>
0
and
post_nms_topN
<
len
(
keep
):
keep
=
keep
[:
post_nms_topN
]
keep
=
keep
[:
post_nms_topN
]
proposals
=
proposals
[
keep
,
:]
proposals
=
proposals
[
keep
,
:]
...
@@ -112,17 +113,21 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
...
@@ -112,17 +113,21 @@ def proposal_for_one_image(im_shape, all_anchors, variances, bbox_deltas,
return
proposals
,
scores
return
proposals
,
scores
def
filter_boxes
(
boxes
,
min_size
,
im_shape
):
def
filter_boxes
(
boxes
,
min_size
,
im_shape
,
pixel_offset
=
True
):
"""Only keep boxes with both sides >= min_size and center within the image.
"""Only keep boxes with both sides >= min_size and center within the image.
"""
"""
# Scale min_size to match image scale
# Scale min_size to match image scale
min_size
=
max
(
min_size
,
1.0
)
min_size
=
max
(
min_size
,
1.0
)
ws
=
boxes
[:,
2
]
-
boxes
[:,
0
]
+
1
offset
=
1
if
pixel_offset
else
0
hs
=
boxes
[:,
3
]
-
boxes
[:,
1
]
+
1
ws
=
boxes
[:,
2
]
-
boxes
[:,
0
]
+
offset
x_ctr
=
boxes
[:,
0
]
+
ws
/
2.
hs
=
boxes
[:,
3
]
-
boxes
[:,
1
]
+
offset
y_ctr
=
boxes
[:,
1
]
+
hs
/
2.
if
pixel_offset
:
keep
=
np
.
where
((
ws
>=
min_size
)
&
(
hs
>=
min_size
)
&
(
x_ctr
<
im_shape
[
1
])
x_ctr
=
boxes
[:,
0
]
+
ws
/
2.
&
(
y_ctr
<
im_shape
[
0
]))[
0
]
y_ctr
=
boxes
[:,
1
]
+
hs
/
2.
keep
=
np
.
where
((
ws
>=
min_size
)
&
(
hs
>=
min_size
)
&
(
x_ctr
<
im_shape
[
1
])
&
(
y_ctr
<
im_shape
[
0
]))[
0
]
else
:
keep
=
np
.
where
((
ws
>=
min_size
)
&
(
hs
>=
min_size
))[
0
]
return
keep
return
keep
...
@@ -144,7 +149,8 @@ class TestGenerateProposalsV2Op(OpTest):
...
@@ -144,7 +149,8 @@ class TestGenerateProposalsV2Op(OpTest):
'post_nms_topN'
:
self
.
post_nms_topN
,
'post_nms_topN'
:
self
.
post_nms_topN
,
'nms_thresh'
:
self
.
nms_thresh
,
'nms_thresh'
:
self
.
nms_thresh
,
'min_size'
:
self
.
min_size
,
'min_size'
:
self
.
min_size
,
'eta'
:
self
.
eta
'eta'
:
self
.
eta
,
'pixel_offset'
:
self
.
pixel_offset
,
}
}
self
.
outputs
=
{
self
.
outputs
=
{
...
@@ -165,6 +171,7 @@ class TestGenerateProposalsV2Op(OpTest):
...
@@ -165,6 +171,7 @@ class TestGenerateProposalsV2Op(OpTest):
self
.
nms_thresh
=
0.7
self
.
nms_thresh
=
0.7
self
.
min_size
=
3.0
self
.
min_size
=
3.0
self
.
eta
=
1.
self
.
eta
=
1.
self
.
pixel_offset
=
True
def
init_test_input
(
self
):
def
init_test_input
(
self
):
batch_size
=
1
batch_size
=
1
...
@@ -191,7 +198,7 @@ class TestGenerateProposalsV2Op(OpTest):
...
@@ -191,7 +198,7 @@ class TestGenerateProposalsV2Op(OpTest):
self
.
rpn_rois
,
self
.
rpn_roi_probs
,
self
.
rois_num
=
generate_proposals_v2_in_python
(
self
.
rpn_rois
,
self
.
rpn_roi_probs
,
self
.
rois_num
=
generate_proposals_v2_in_python
(
self
.
scores
,
self
.
bbox_deltas
,
self
.
im_shape
,
self
.
anchors
,
self
.
scores
,
self
.
bbox_deltas
,
self
.
im_shape
,
self
.
anchors
,
self
.
variances
,
self
.
pre_nms_topN
,
self
.
post_nms_topN
,
self
.
variances
,
self
.
pre_nms_topN
,
self
.
post_nms_topN
,
self
.
nms_thresh
,
self
.
min_size
,
self
.
eta
)
self
.
nms_thresh
,
self
.
min_size
,
self
.
eta
,
self
.
pixel_offset
)
class
TestGenerateProposalsV2OutLodOp
(
TestGenerateProposalsV2Op
):
class
TestGenerateProposalsV2OutLodOp
(
TestGenerateProposalsV2Op
):
...
@@ -231,6 +238,17 @@ class TestGenerateProposalsV2OpNoBoxLeft(TestGenerateProposalsV2Op):
...
@@ -231,6 +238,17 @@ class TestGenerateProposalsV2OpNoBoxLeft(TestGenerateProposalsV2Op):
self
.
nms_thresh
=
0.7
self
.
nms_thresh
=
0.7
self
.
min_size
=
1000.0
self
.
min_size
=
1000.0
self
.
eta
=
1.
self
.
eta
=
1.
self
.
pixel_offset
=
True
class
TestGenerateProposalsV2OpNoOffset
(
TestGenerateProposalsV2Op
):
def
init_test_params
(
self
):
self
.
pre_nms_topN
=
12000
# train 12000, test 2000
self
.
post_nms_topN
=
5000
# train 6000, test 1000
self
.
nms_thresh
=
0.7
self
.
min_size
=
3.0
self
.
eta
=
1.
self
.
pixel_offset
=
False
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_roi_align_op.py
浏览文件 @
5b267474
...
@@ -35,7 +35,8 @@ class TestROIAlignOp(OpTest):
...
@@ -35,7 +35,8 @@ class TestROIAlignOp(OpTest):
'spatial_scale'
:
self
.
spatial_scale
,
'spatial_scale'
:
self
.
spatial_scale
,
'pooled_height'
:
self
.
pooled_height
,
'pooled_height'
:
self
.
pooled_height
,
'pooled_width'
:
self
.
pooled_width
,
'pooled_width'
:
self
.
pooled_width
,
'sampling_ratio'
:
self
.
sampling_ratio
'sampling_ratio'
:
self
.
sampling_ratio
,
'aligned'
:
self
.
aligned
,
}
}
self
.
outputs
=
{
'Out'
:
self
.
out_data
}
self
.
outputs
=
{
'Out'
:
self
.
out_data
}
...
@@ -53,6 +54,7 @@ class TestROIAlignOp(OpTest):
...
@@ -53,6 +54,7 @@ class TestROIAlignOp(OpTest):
self
.
pooled_height
=
2
self
.
pooled_height
=
2
self
.
pooled_width
=
2
self
.
pooled_width
=
2
self
.
sampling_ratio
=
-
1
self
.
sampling_ratio
=
-
1
self
.
aligned
=
False
self
.
x
=
np
.
random
.
random
(
self
.
x_dim
).
astype
(
'float64'
)
self
.
x
=
np
.
random
.
random
(
self
.
x_dim
).
astype
(
'float64'
)
...
@@ -115,16 +117,21 @@ class TestROIAlignOp(OpTest):
...
@@ -115,16 +117,21 @@ class TestROIAlignOp(OpTest):
(
self
.
rois_num
,
self
.
channels
,
self
.
pooled_height
,
(
self
.
rois_num
,
self
.
channels
,
self
.
pooled_height
,
self
.
pooled_width
)).
astype
(
'float64'
)
self
.
pooled_width
)).
astype
(
'float64'
)
offset
=
0.5
if
self
.
aligned
else
0.
for
i
in
range
(
self
.
rois_num
):
for
i
in
range
(
self
.
rois_num
):
roi
=
self
.
rois
[
i
]
roi
=
self
.
rois
[
i
]
roi_batch_id
=
int
(
roi
[
0
])
roi_batch_id
=
int
(
roi
[
0
])
x_i
=
self
.
x
[
roi_batch_id
]
x_i
=
self
.
x
[
roi_batch_id
]
roi_xmin
=
roi
[
1
]
*
self
.
spatial_scale
roi_xmin
=
roi
[
1
]
*
self
.
spatial_scale
-
offset
roi_ymin
=
roi
[
2
]
*
self
.
spatial_scale
roi_ymin
=
roi
[
2
]
*
self
.
spatial_scale
-
offset
roi_xmax
=
roi
[
3
]
*
self
.
spatial_scale
roi_xmax
=
roi
[
3
]
*
self
.
spatial_scale
-
offset
roi_ymax
=
roi
[
4
]
*
self
.
spatial_scale
roi_ymax
=
roi
[
4
]
*
self
.
spatial_scale
-
offset
roi_width
=
max
(
roi_xmax
-
roi_xmin
,
1
)
roi_height
=
max
(
roi_ymax
-
roi_ymin
,
1
)
roi_width
=
roi_xmax
-
roi_xmin
roi_height
=
roi_ymax
-
roi_ymin
if
not
self
.
aligned
:
roi_width
=
max
(
roi_width
,
1
)
roi_height
=
max
(
roi_height
,
1
)
bin_size_h
=
float
(
roi_height
)
/
float
(
self
.
pooled_height
)
bin_size_h
=
float
(
roi_height
)
/
float
(
self
.
pooled_height
)
bin_size_w
=
float
(
roi_width
)
/
float
(
self
.
pooled_width
)
bin_size_w
=
float
(
roi_width
)
/
float
(
self
.
pooled_width
)
roi_bin_grid_h
=
self
.
sampling_ratio
if
self
.
sampling_ratio
>
0
else
\
roi_bin_grid_h
=
self
.
sampling_ratio
if
self
.
sampling_ratio
>
0
else
\
...
@@ -192,11 +199,31 @@ class TestROIAlignInLodOp(TestROIAlignOp):
...
@@ -192,11 +199,31 @@ class TestROIAlignInLodOp(TestROIAlignOp):
'spatial_scale'
:
self
.
spatial_scale
,
'spatial_scale'
:
self
.
spatial_scale
,
'pooled_height'
:
self
.
pooled_height
,
'pooled_height'
:
self
.
pooled_height
,
'pooled_width'
:
self
.
pooled_width
,
'pooled_width'
:
self
.
pooled_width
,
'sampling_ratio'
:
self
.
sampling_ratio
'sampling_ratio'
:
self
.
sampling_ratio
,
'aligned'
:
self
.
aligned
}
}
self
.
outputs
=
{
'Out'
:
self
.
out_data
}
self
.
outputs
=
{
'Out'
:
self
.
out_data
}
class
TestROIAlignOpWithAligned
(
TestROIAlignOp
):
def
init_test_case
(
self
):
self
.
batch_size
=
3
self
.
channels
=
3
self
.
height
=
8
self
.
width
=
6
# n, c, h, w
self
.
x_dim
=
(
self
.
batch_size
,
self
.
channels
,
self
.
height
,
self
.
width
)
self
.
spatial_scale
=
1.0
/
2.0
self
.
pooled_height
=
2
self
.
pooled_width
=
2
self
.
sampling_ratio
=
-
1
self
.
aligned
=
True
self
.
x
=
np
.
random
.
random
(
self
.
x_dim
).
astype
(
'float64'
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录