Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
f068e08d
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f068e08d
编写于
9月 28, 2021
作者:
F
Feng Ni
提交者:
GitHub
9月 28, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add roi_align (#35102)
* add roi_align in vision/ops.py
上级
6b587e93
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
267 addition
and
0 deletion
+267
-0
python/paddle/tests/test_ops_roi_align.py
python/paddle/tests/test_ops_roi_align.py
+108
-0
python/paddle/vision/ops.py
python/paddle/vision/ops.py
+159
-0
未找到文件。
python/paddle/tests/test_ops_roi_align.py
0 → 100644
浏览文件 @
f068e08d
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
import
paddle
from
paddle.vision.ops
import
roi_align
,
RoIAlign
class
TestRoIAlign
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
data
=
np
.
random
.
rand
(
1
,
256
,
32
,
32
).
astype
(
'float32'
)
boxes
=
np
.
random
.
rand
(
3
,
4
)
boxes
[:,
2
]
+=
boxes
[:,
0
]
+
3
boxes
[:,
3
]
+=
boxes
[:,
1
]
+
4
self
.
boxes
=
boxes
.
astype
(
'float32'
)
self
.
boxes_num
=
np
.
array
([
3
],
dtype
=
np
.
int32
)
def
roi_align_functional
(
self
,
output_size
):
if
isinstance
(
output_size
,
int
):
output_shape
=
(
3
,
256
,
output_size
,
output_size
)
else
:
output_shape
=
(
3
,
256
,
output_size
[
0
],
output_size
[
1
])
if
paddle
.
in_dynamic_mode
():
data
=
paddle
.
to_tensor
(
self
.
data
)
boxes
=
paddle
.
to_tensor
(
self
.
boxes
)
boxes_num
=
paddle
.
to_tensor
(
self
.
boxes_num
)
align_out
=
roi_align
(
data
,
boxes
,
boxes_num
=
boxes_num
,
output_size
=
output_size
)
np
.
testing
.
assert_equal
(
align_out
.
shape
,
output_shape
)
else
:
data
=
paddle
.
static
.
data
(
shape
=
self
.
data
.
shape
,
dtype
=
self
.
data
.
dtype
,
name
=
'data'
)
boxes
=
paddle
.
static
.
data
(
shape
=
self
.
boxes
.
shape
,
dtype
=
self
.
boxes
.
dtype
,
name
=
'boxes'
)
boxes_num
=
paddle
.
static
.
data
(
shape
=
self
.
boxes_num
.
shape
,
dtype
=
self
.
boxes_num
.
dtype
,
name
=
'boxes_num'
)
align_out
=
roi_align
(
data
,
boxes
,
boxes_num
=
boxes_num
,
output_size
=
output_size
)
place
=
paddle
.
CPUPlace
()
exe
=
paddle
.
static
.
Executor
(
place
)
align_out
=
exe
.
run
(
paddle
.
static
.
default_main_program
(),
feed
=
{
'data'
:
self
.
data
,
'boxes'
:
self
.
boxes
,
'boxes_num'
:
self
.
boxes_num
},
fetch_list
=
[
align_out
])
np
.
testing
.
assert_equal
(
align_out
[
0
].
shape
,
output_shape
)
def
test_roi_align_functional_dynamic
(
self
):
self
.
roi_align_functional
(
3
)
self
.
roi_align_functional
(
output_size
=
(
3
,
4
))
def
test_roi_align_functional_static
(
self
):
paddle
.
enable_static
()
self
.
roi_align_functional
(
3
)
paddle
.
disable_static
()
def
test_RoIAlign
(
self
):
roi_align_c
=
RoIAlign
(
output_size
=
(
4
,
3
))
data
=
paddle
.
to_tensor
(
self
.
data
)
boxes
=
paddle
.
to_tensor
(
self
.
boxes
)
boxes_num
=
paddle
.
to_tensor
(
self
.
boxes_num
)
align_out
=
roi_align_c
(
data
,
boxes
,
boxes_num
)
np
.
testing
.
assert_equal
(
align_out
.
shape
,
(
3
,
256
,
4
,
3
))
def
test_value
(
self
,
):
data
=
np
.
array
([
i
for
i
in
range
(
1
,
17
)]).
reshape
(
1
,
1
,
4
,
4
).
astype
(
np
.
float32
)
boxes
=
np
.
array
(
[[
1.
,
1.
,
2.
,
2.
],
[
1.5
,
1.5
,
3.
,
3.
]]).
astype
(
np
.
float32
)
boxes_num
=
np
.
array
([
2
]).
astype
(
np
.
int32
)
output
=
np
.
array
([[[[
6.
]]],
[[[
9.75
]]]],
dtype
=
np
.
float32
)
data
=
paddle
.
to_tensor
(
data
)
boxes
=
paddle
.
to_tensor
(
boxes
)
boxes_num
=
paddle
.
to_tensor
(
boxes_num
)
roi_align_c
=
RoIAlign
(
output_size
=
1
)
align_out
=
roi_align_c
(
data
,
boxes
,
boxes_num
)
np
.
testing
.
assert_almost_equal
(
align_out
.
numpy
(),
output
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/vision/ops.py
浏览文件 @
f068e08d
...
...
@@ -34,6 +34,8 @@ __all__ = [ #noqa
'RoIPool'
,
'psroi_pool'
,
'PSRoIPool'
,
'roi_align'
,
'RoIAlign'
,
]
...
...
@@ -1138,3 +1140,160 @@ class RoIPool(Layer):
def
extra_repr
(
self
):
main_str
=
'output_size={_output_size}, spatial_scale={_spatial_scale}'
return
main_str
.
format
(
**
self
.
__dict__
)
def
roi_align
(
x
,
boxes
,
boxes_num
,
output_size
,
spatial_scale
=
1.0
,
sampling_ratio
=-
1
,
aligned
=
True
,
name
=
None
):
"""
This operator implements the roi_align layer.
Region of Interest (RoI) Align operator (also known as RoI Align) is to
perform bilinear interpolation on inputs of nonuniform sizes to obtain
fixed-size feature maps (e.g. 7*7), as described in Mask R-CNN.
Dividing each region proposal into equal-sized sections with the pooled_width
and pooled_height. Location remains the origin result.
In each ROI bin, the value of the four regularly sampled locations are
computed directly through bilinear interpolation. The output is the mean of
four locations. Thus avoid the misaligned problem.
Args:
x (Tensor): Input feature, 4D-Tensor with the shape of [N,C,H,W],
where N is the batch size, C is the input channel, H is Height,
W is weight. The data type is float32 or float64.
boxes (Tensor): Boxes (RoIs, Regions of Interest) to pool over. It
should be a 2-D Tensor of shape (num_boxes, 4). The data type is
float32 or float64. Given as [[x1, y1, x2, y2], ...], (x1, y1) is
the top left coordinates, and (x2, y2) is the bottom right coordinates.
boxes_num (Tensor): The number of boxes contained in each picture in
the batch, the data type is int32.
output_size (int or Tuple[int, int]): The pooled output size(h, w), data
type is int32. If int, h and w are both equal to output_size.
spatial_scale (float32): Multiplicative spatial scale factor to translate
ROI coords from their input scale to the scale used when pooling.
Default: 1.0
sampling_ratio (int32): number of sampling points in the interpolation
grid used to compute the output value of each pooled output bin.
If > 0, then exactly ``sampling_ratio x sampling_ratio`` sampling
points per bin are used.
If <= 0, then an adaptive number of grid points are used (computed
as ``ceil(roi_width / output_width)``, and likewise for height).
Default: -1
aligned (bool): If False, use the legacy implementation. If True, pixel
shift the box coordinates it by -0.5 for a better alignment with the
two neighboring pixel indices. This version is used in Detectron2.
Default: True
name(str, optional): For detailed information, please refer to :
ref:`api_guide_Name`. Usually name is no need to set and None by
default.
Returns:
Tensor: The output of ROIAlignOp is a 4-D tensor with shape (num_boxes,
channels, pooled_h, pooled_w). The data type is float32 or float64.
Examples:
.. code-block:: python
import paddle
from paddle.vision.ops import roi_align
data = paddle.rand([1, 256, 32, 32])
boxes = paddle.rand([3, 4])
boxes[:, 2] += boxes[:, 0] + 3
boxes[:, 3] += boxes[:, 1] + 4
boxes_num = paddle.to_tensor([3]).astype('int32')
align_out = roi_align(data, boxes, boxes_num, output_size=3)
assert align_out.shape == [3, 256, 3, 3]
"""
check_type
(
output_size
,
'output_size'
,
(
int
,
tuple
),
'roi_align'
)
if
isinstance
(
output_size
,
int
):
output_size
=
(
output_size
,
output_size
)
pooled_height
,
pooled_width
=
output_size
if
in_dygraph_mode
():
assert
boxes_num
is
not
None
,
"boxes_num should not be None in dygraph mode."
align_out
=
core
.
ops
.
roi_align
(
x
,
boxes
,
boxes_num
,
"pooled_height"
,
pooled_height
,
"pooled_width"
,
pooled_width
,
"spatial_scale"
,
spatial_scale
,
"sampling_ratio"
,
sampling_ratio
,
"aligned"
,
aligned
)
return
align_out
else
:
check_variable_and_dtype
(
x
,
'x'
,
[
'float32'
,
'float64'
],
'roi_align'
)
check_variable_and_dtype
(
boxes
,
'boxes'
,
[
'float32'
,
'float64'
],
'roi_align'
)
helper
=
LayerHelper
(
'roi_align'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
align_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
inputs
=
{
"X"
:
x
,
"ROIs"
:
boxes
,
}
if
boxes_num
is
not
None
:
inputs
[
'RoisNum'
]
=
boxes_num
helper
.
append_op
(
type
=
"roi_align"
,
inputs
=
inputs
,
outputs
=
{
"Out"
:
align_out
},
attrs
=
{
"pooled_height"
:
pooled_height
,
"pooled_width"
:
pooled_width
,
"spatial_scale"
:
spatial_scale
,
"sampling_ratio"
:
sampling_ratio
,
"aligned"
:
aligned
,
})
return
align_out
class
RoIAlign
(
Layer
):
"""
This interface is used to construct a callable object of the `RoIAlign` class.
Please refer to :ref:`api_paddle_vision_ops_roi_align`.
Args:
output_size (int or tuple[int, int]): The pooled output size(h, w),
data type is int32. If int, h and w are both equal to output_size.
spatial_scale (float32, optional): Multiplicative spatial scale factor
to translate ROI coords from their input scale to the scale used
when pooling. Default: 1.0
Returns:
align_out (Tensor): The output of ROIAlign operator is a 4-D tensor with
shape (num_boxes, channels, pooled_h, pooled_w).
Examples:
.. code-block:: python
import paddle
from paddle.vision.ops import RoIAlign
data = paddle.rand([1, 256, 32, 32])
boxes = paddle.rand([3, 4])
boxes[:, 2] += boxes[:, 0] + 3
boxes[:, 3] += boxes[:, 1] + 4
boxes_num = paddle.to_tensor([3]).astype('int32')
roi_align = RoIAlign(output_size=(4, 3))
align_out = roi_align(data, boxes, boxes_num)
assert align_out.shape == [3, 256, 4, 3]
"""
def
__init__
(
self
,
output_size
,
spatial_scale
=
1.0
):
super
(
RoIAlign
,
self
).
__init__
()
self
.
_output_size
=
output_size
self
.
_spatial_scale
=
spatial_scale
def
forward
(
self
,
x
,
boxes
,
boxes_num
,
aligned
=
True
):
return
roi_align
(
x
=
x
,
boxes
=
boxes
,
boxes_num
=
boxes_num
,
output_size
=
self
.
_output_size
,
spatial_scale
=
self
.
_spatial_scale
,
aligned
=
aligned
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录