Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
f42a12da
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f42a12da
编写于
9月 21, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'ups/develop' into remove/kwargs
fix conflicts
上级
5ee7dcba
dbf07982
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
1487 addition
and
32 deletion
+1487
-32
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-1
paddle/fluid/operators/detection/CMakeLists.txt
paddle/fluid/operators/detection/CMakeLists.txt
+1
-0
paddle/fluid/operators/detection/roi_perspective_transform_op.cc
...fluid/operators/detection/roi_perspective_transform_op.cc
+587
-0
paddle/fluid/operators/detection/roi_perspective_transform_op.cu
...fluid/operators/detection/roi_perspective_transform_op.cu
+523
-0
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+1
-1
python/paddle/fluid/io.py
python/paddle/fluid/io.py
+1
-19
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+49
-0
python/paddle/fluid/tests/unittests/dist_transformer.py
python/paddle/fluid/tests/unittests/dist_transformer.py
+8
-11
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+10
-0
python/paddle/fluid/tests/unittests/test_roi_perspective_transform_op.py
...luid/tests/unittests/test_roi_perspective_transform_op.py
+306
-0
未找到文件。
paddle/fluid/API.spec
浏览文件 @
f42a12da
...
...
@@ -73,7 +73,6 @@ paddle.fluid.io.load_params ArgSpec(args=['executor', 'dirname', 'main_program',
paddle.fluid.io.load_persistables ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.io.save_inference_model ArgSpec(args=['dirname', 'feeded_var_names', 'target_vars', 'executor', 'main_program', 'model_filename', 'params_filename', 'export_for_deployment'], varargs=None, keywords=None, defaults=(None, None, None, True))
paddle.fluid.io.load_inference_model ArgSpec(args=['dirname', 'executor', 'model_filename', 'params_filename', 'pserver_endpoints'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.io.get_inference_program ArgSpec(args=['target_vars', 'main_program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.initializer.ConstantInitializer.__init__ ArgSpec(args=['self', 'value', 'force_cpu'], varargs=None, keywords=None, defaults=(0.0, False))
paddle.fluid.initializer.UniformInitializer.__init__ ArgSpec(args=['self', 'low', 'high', 'seed'], varargs=None, keywords=None, defaults=(-1.0, 1.0, 0))
paddle.fluid.initializer.NormalInitializer.__init__ ArgSpec(args=['self', 'loc', 'scale', 'seed'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0))
...
...
@@ -296,6 +295,7 @@ paddle.fluid.layers.ssd_loss ArgSpec(args=['location', 'confidence', 'gt_box', '
paddle.fluid.layers.detection_map ArgSpec(args=['detect_res', 'label', 'class_num', 'background_label', 'overlap_threshold', 'evaluate_difficult', 'has_state', 'input_states', 'out_states', 'ap_version'], varargs=None, keywords=None, defaults=(0, 0.3, True, None, None, None, 'integral'))
paddle.fluid.layers.rpn_target_assign ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'is_crowd', 'im_info', 'rpn_batch_size_per_im', 'rpn_straddle_thresh', 'rpn_fg_fraction', 'rpn_positive_overlap', 'rpn_negative_overlap', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.0, 0.5, 0.7, 0.3, True))
paddle.fluid.layers.anchor_generator ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None))
paddle.fluid.layers.roi_perspective_transform ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,))
paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True))
paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None))
paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
...
...
paddle/fluid/operators/detection/CMakeLists.txt
浏览文件 @
f42a12da
...
...
@@ -31,5 +31,6 @@ polygon_box_transform_op.cu)
detection_library
(
rpn_target_assign_op SRCS rpn_target_assign_op.cc
)
detection_library
(
generate_proposal_labels_op SRCS generate_proposal_labels_op.cc
)
detection_library
(
generate_proposals_op SRCS generate_proposals_op.cc
)
detection_library
(
roi_perspective_transform_op SRCS roi_perspective_transform_op.cc roi_perspective_transform_op.cu
)
#Export local libraries to parent
set
(
DETECTION_LIBRARY
${
LOCAL_DETECTION_LIBS
}
PARENT_SCOPE
)
paddle/fluid/operators/detection/roi_perspective_transform_op.cc
0 → 100644
浏览文件 @
f42a12da
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
static
constexpr
int
kROISize
=
4
;
template
<
typename
T
>
bool
GT_E
(
T
a
,
T
b
)
{
return
(
a
>
b
)
||
fabs
(
a
-
b
)
<
1e-4
;
}
template
<
typename
T
>
bool
LT_E
(
T
a
,
T
b
)
{
return
(
a
<
b
)
||
fabs
(
a
-
b
)
<
1e-4
;
}
template
<
typename
T
>
bool
GT
(
T
a
,
T
b
)
{
return
(
a
-
b
)
>
1e-4
;
}
/*
*check if (x, y) is in the boundary of roi
*/
template
<
typename
T
>
bool
in_quad
(
T
x
,
T
y
,
T
roi_x
[],
T
roi_y
[])
{
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
T
xs
=
roi_x
[
i
];
T
ys
=
roi_y
[
i
];
T
xe
=
roi_x
[(
i
+
1
)
%
4
];
T
ye
=
roi_y
[(
i
+
1
)
%
4
];
if
(
fabs
(
ys
-
ye
)
<
1e-4
)
{
if
(
fabs
(
y
-
ys
)
<
1e-4
&&
fabs
(
y
-
ye
)
<
1e-4
&&
GT_E
<
T
>
(
x
,
std
::
min
(
xs
,
xe
))
&&
LT_E
<
T
>
(
x
,
std
::
max
(
xs
,
xe
)))
{
return
true
;
}
}
else
{
T
intersec_x
=
(
y
-
ys
)
*
(
xe
-
xs
)
/
(
ye
-
ys
)
+
xs
;
if
(
fabs
(
intersec_x
-
x
)
<
1e-4
&&
GT_E
<
T
>
(
y
,
std
::
min
(
ys
,
ye
))
&&
LT_E
<
T
>
(
y
,
std
::
max
(
ys
,
ye
)))
{
return
true
;
}
}
}
int
n_cross
=
0
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
T
xs
=
roi_x
[
i
];
T
ys
=
roi_y
[
i
];
T
xe
=
roi_x
[(
i
+
1
)
%
4
];
T
ye
=
roi_y
[(
i
+
1
)
%
4
];
if
(
fabs
(
ys
-
ye
)
<
1e-4
)
{
continue
;
}
if
(
LT_E
<
T
>
(
y
,
std
::
min
(
ys
,
ye
))
||
GT
<
T
>
(
y
,
std
::
max
(
ys
,
ye
)))
{
continue
;
}
T
intersec_x
=
(
y
-
ys
)
*
(
xe
-
xs
)
/
(
ye
-
ys
)
+
xs
;
if
(
fabs
(
intersec_x
-
x
)
<
1e-4
)
{
return
true
;
}
if
(
GT
<
T
>
(
intersec_x
,
x
))
{
n_cross
++
;
}
}
return
(
n_cross
%
2
==
1
);
}
/**
* Get the matrix of perspective transform.
*
* dx1 = x1 - x2
* dx2 = x3 - x2
* dx3 = x0 - x1 + x2 - x3
* dy1 = y1 - y2
* dy2 = y3 - y2
* dy3 = y0 - y1 + y2 - y3
*
* a11 = (x1 - x0 + a31 * (w - 1) * x1) / (w - 1)
* a12 = (x3 - x0 + a32 * (h - 1) * x3) / (h - 1)
* a13 = x0
* a21 = (y1 - y0 + a31 * (w - 1) * y1) / (w - 1)
* a22 = (y3 - y0 + a32 * (h - 1) * y3) / (h - 1)
* a23 = y0
* a31 = (dx3 * dy2 - dx2 * dy3) / (dx1 * dy2 - dx2 * dy1) / (w - 1)
* a32 = (dx1 * dy3 - dx3 * dy1) / (dx1 * dy2 - dx2 * dy1) / (h - 1)
* a33 = 1
*
*/
template
<
typename
T
>
void
get_transform_matrix
(
const
int
transformed_width
,
const
int
transformed_height
,
T
roi_x
[],
T
roi_y
[],
T
matrix
[])
{
T
x0
=
roi_x
[
0
];
T
x1
=
roi_x
[
1
];
T
x2
=
roi_x
[
2
];
T
x3
=
roi_x
[
3
];
T
y0
=
roi_y
[
0
];
T
y1
=
roi_y
[
1
];
T
y2
=
roi_y
[
2
];
T
y3
=
roi_y
[
3
];
// Estimate the height and width of RoI
T
len1
=
sqrt
((
x0
-
x1
)
*
(
x0
-
x1
)
+
(
y0
-
y1
)
*
(
y0
-
y1
));
T
len2
=
sqrt
((
x1
-
x2
)
*
(
x1
-
x2
)
+
(
y1
-
y2
)
*
(
y1
-
y2
));
T
len3
=
sqrt
((
x2
-
x3
)
*
(
x2
-
x3
)
+
(
y2
-
y3
)
*
(
y2
-
y3
));
T
len4
=
sqrt
((
x3
-
x0
)
*
(
x3
-
x0
)
+
(
y3
-
y0
)
*
(
y3
-
y0
));
T
estimated_height
=
(
len2
+
len4
)
/
2.0
;
T
estimated_width
=
(
len1
+
len3
)
/
2.0
;
// Get the normalized height and normalized width
int
normalized_height
=
transformed_height
;
int
normalized_width
=
std
::
round
(
estimated_width
*
(
normalized_height
-
1
)
/
estimated_height
)
+
1
;
normalized_width
=
std
::
min
(
normalized_width
,
transformed_width
);
T
dx1
=
x1
-
x2
;
T
dx2
=
x3
-
x2
;
T
dx3
=
x0
-
x1
+
x2
-
x3
;
T
dy1
=
y1
-
y2
;
T
dy2
=
y3
-
y2
;
T
dy3
=
y0
-
y1
+
y2
-
y3
;
matrix
[
6
]
=
(
dx3
*
dy2
-
dx2
*
dy3
)
/
(
dx1
*
dy2
-
dx2
*
dy1
)
/
(
normalized_width
-
1
);
matrix
[
7
]
=
(
dx1
*
dy3
-
dx3
*
dy1
)
/
(
dx1
*
dy2
-
dx2
*
dy1
)
/
(
normalized_height
-
1
);
matrix
[
8
]
=
1
;
matrix
[
3
]
=
(
y1
-
y0
+
matrix
[
6
]
*
(
normalized_width
-
1
)
*
y1
)
/
(
normalized_width
-
1
);
matrix
[
4
]
=
(
y3
-
y0
+
matrix
[
7
]
*
(
normalized_height
-
1
)
*
y3
)
/
(
normalized_height
-
1
);
matrix
[
5
]
=
y0
;
matrix
[
0
]
=
(
x1
-
x0
+
matrix
[
6
]
*
(
normalized_width
-
1
)
*
x1
)
/
(
normalized_width
-
1
);
matrix
[
1
]
=
(
x3
-
x0
+
matrix
[
7
]
*
(
normalized_height
-
1
)
*
x3
)
/
(
normalized_height
-
1
);
matrix
[
2
]
=
x0
;
}
/**
* Get the source coordinates in the input feature map.
*
* (u, v, w)^matrix = matrix * (out_w, out_h, 1)^matrix
*
* in_w = u / w
* in_h = v / w
*
*/
template
<
typename
T
>
void
get_source_coords
(
T
matrix
[],
int
out_w
,
int
out_h
,
T
*
in_w
,
T
*
in_h
)
{
T
u
=
matrix
[
0
]
*
out_w
+
matrix
[
1
]
*
out_h
+
matrix
[
2
];
T
v
=
matrix
[
3
]
*
out_w
+
matrix
[
4
]
*
out_h
+
matrix
[
5
];
T
w
=
matrix
[
6
]
*
out_w
+
matrix
[
7
]
*
out_h
+
matrix
[
8
];
in_w
[
0
]
=
u
/
w
;
in_h
[
0
]
=
v
/
w
;
}
/**
* Perform bilinear interpolation in the input feature map.
*/
template
<
typename
T
>
void
bilinear_interpolate
(
const
T
*
in_data
,
const
int
channels
,
const
int
width
,
const
int
height
,
int
in_n
,
int
in_c
,
T
in_w
,
T
in_h
,
T
*
val
)
{
// Deal with cases that source coords are out of feature map boundary
if
(
GT
<
T
>
(
-
0.5
,
in_w
)
||
GT
<
T
>
(
in_w
,
width
-
0.5
)
||
GT
<
T
>
(
-
0.5
,
in_h
)
||
GT
<
T
>
(
in_h
,
height
-
0.5
))
{
// empty
val
[
0
]
=
0.0
;
return
;
}
if
(
GT
<
T
>
(
0
,
in_w
))
{
in_w
=
0
;
}
if
(
GT
<
T
>
(
0
,
in_h
))
{
in_h
=
0
;
}
int
in_w_floor
=
floor
(
in_w
);
int
in_h_floor
=
floor
(
in_h
);
int
in_w_ceil
;
int
in_h_ceil
;
if
(
GT_E
<
T
>
(
in_w_floor
,
width
-
1
))
{
in_w_ceil
=
in_w_floor
=
width
-
1
;
in_w
=
static_cast
<
T
>
(
in_w_floor
);
}
else
{
in_w_ceil
=
in_w_floor
+
1
;
}
if
(
GT_E
<
T
>
(
in_h_floor
,
height
-
1
))
{
in_h_ceil
=
in_h_floor
=
height
-
1
;
in_h
=
static_cast
<
T
>
(
in_h_floor
);
}
else
{
in_h_ceil
=
in_h_floor
+
1
;
}
T
w_floor
=
in_w
-
in_w_floor
;
T
h_floor
=
in_h
-
in_h_floor
;
T
w_ceil
=
1
-
w_floor
;
T
h_ceil
=
1
-
h_floor
;
const
T
*
data
=
in_data
+
(
in_n
*
channels
+
in_c
)
*
height
*
width
;
// Do bilinear interpolation
T
v1
=
data
[
in_h_floor
*
width
+
in_w_floor
];
T
v2
=
data
[
in_h_ceil
*
width
+
in_w_floor
];
T
v3
=
data
[
in_h_ceil
*
width
+
in_w_ceil
];
T
v4
=
data
[
in_h_floor
*
width
+
in_w_ceil
];
T
w1
=
w_ceil
*
h_ceil
;
T
w2
=
w_ceil
*
h_floor
;
T
w3
=
w_floor
*
h_floor
;
T
w4
=
w_floor
*
h_ceil
;
val
[
0
]
=
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
;
}
template
<
typename
T
>
class
CPUROIPerspectiveTransformOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"ROIs"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
transformed_height
=
ctx
.
Attr
<
int
>
(
"transformed_height"
);
auto
transformed_width
=
ctx
.
Attr
<
int
>
(
"transformed_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
in_dims
=
in
->
dims
();
int
channels
=
in_dims
[
1
];
int
in_height
=
in_dims
[
2
];
int
in_width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
const
T
*
input_data
=
in
->
data
<
T
>
();
framework
::
Tensor
roi2image
;
roi2image
.
Resize
({
rois_num
});
int
*
roi2image_data
=
roi2image
.
mutable_data
<
int
>
(
ctx
.
GetPlace
());
auto
lod
=
rois
->
lod
().
back
();
for
(
int
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
for
(
int
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
roi2image_data
[
j
]
=
i
;
}
}
T
*
output_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
rois_data
=
rois
->
data
<
T
>
();
for
(
int
n
=
0
;
n
<
rois_num
;
++
n
)
{
const
T
*
n_rois
=
rois_data
+
n
*
8
;
T
roi_x
[
4
];
T
roi_y
[
4
];
for
(
int
k
=
0
;
k
<
4
;
++
k
)
{
roi_x
[
k
]
=
n_rois
[
2
*
k
]
*
spatial_scale
;
roi_y
[
k
]
=
n_rois
[
2
*
k
+
1
]
*
spatial_scale
;
}
int
image_id
=
roi2image_data
[
n
];
// Get transform matrix
T
transform_matrix
[
9
];
get_transform_matrix
<
T
>
(
transformed_width
,
transformed_height
,
roi_x
,
roi_y
,
transform_matrix
);
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
int
out_h
=
0
;
out_h
<
transformed_height
;
++
out_h
)
{
for
(
int
out_w
=
0
;
out_w
<
transformed_width
;
++
out_w
)
{
int
out_index
=
n
*
channels
*
transformed_height
*
transformed_width
+
c
*
transformed_height
*
transformed_width
+
out_h
*
transformed_width
+
out_w
;
T
in_w
,
in_h
;
get_source_coords
<
T
>
(
transform_matrix
,
out_w
,
out_h
,
&
in_w
,
&
in_h
);
if
(
in_quad
<
T
>
(
in_w
,
in_h
,
roi_x
,
roi_y
))
{
if
(
GT
<
T
>
(
-
0.5
,
in_w
)
||
GT
<
T
>
(
in_w
,
static_cast
<
T
>
(
in_width
-
0.5
))
||
GT
<
T
>
(
-
0.5
,
in_h
)
||
GT
<
T
>
(
in_h
,
static_cast
<
T
>
(
in_height
-
0.5
)))
{
output_data
[
out_index
]
=
0.0
;
}
else
{
bilinear_interpolate
(
input_data
,
channels
,
in_width
,
in_height
,
image_id
,
c
,
in_w
,
in_h
,
output_data
+
out_index
);
}
}
else
{
output_data
[
out_index
]
=
0.0
;
}
}
}
}
}
}
};
template
<
typename
T
>
T
get_feature_gradient
(
T
xs
,
T
ys
,
int
w
,
int
h
,
const
int
width
,
const
int
height
)
{
if
(
GT
<
T
>
(
-
0.5
,
xs
)
||
GT
<
T
>
(
xs
,
width
-
0.5
)
||
GT
<
T
>
(
-
0.5
,
ys
)
||
GT
<
T
>
(
ys
,
height
-
0.5
))
{
return
0
;
}
if
(
GT
<
T
>
(
0
,
xs
))
{
xs
=
0
;
}
if
(
GT
<
T
>
(
0
,
ys
))
{
ys
=
0
;
}
int
xs_floor
=
floor
(
xs
);
int
ys_floor
=
floor
(
ys
);
int
xs_ceil
;
int
ys_ceil
;
if
(
GT_E
(
xs_floor
,
width
-
1
))
{
xs_ceil
=
xs_floor
=
width
-
1
;
xs
=
static_cast
<
T
>
(
xs_floor
);
}
else
{
xs_ceil
=
xs_floor
+
1
;
}
if
(
GT_E
(
ys_floor
,
height
-
1
))
{
ys_ceil
=
ys_floor
=
height
-
1
;
ys
=
static_cast
<
T
>
(
ys_floor
);
}
else
{
ys_ceil
=
ys_floor
+
1
;
}
T
weight
=
0
;
if
(
w
==
xs_floor
)
{
if
(
h
==
ys_floor
)
{
weight
=
(
w
+
1
-
xs
)
*
(
h
+
1
-
ys
);
}
else
if
(
h
==
ys_ceil
)
{
weight
=
(
w
+
1
-
xs
)
*
(
ys
+
1
-
h
);
}
}
else
if
(
w
==
xs_ceil
)
{
if
(
h
==
ys_floor
)
{
weight
=
(
xs
+
1
-
w
)
*
(
h
+
1
-
ys
);
}
else
if
(
h
==
ys_ceil
)
{
weight
=
(
xs
+
1
-
w
)
*
(
ys
+
1
-
h
);
}
}
return
weight
;
}
template
<
typename
T
>
class
CPUROIPerspectiveTransformGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"ROIs"
);
auto
*
out_grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
in_grad
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
transformed_height
=
ctx
.
Attr
<
int
>
(
"transformed_height"
);
auto
transformed_width
=
ctx
.
Attr
<
int
>
(
"transformed_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
in_dims
=
in
->
dims
();
int
batch_size
=
in_dims
[
0
];
int
channels
=
in_dims
[
1
];
int
in_height
=
in_dims
[
2
];
int
in_width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
T
*
in_grad_data
=
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
out_grad_data
=
out_grad
->
data
<
T
>
();
const
T
*
rois_data
=
rois
->
data
<
T
>
();
framework
::
Tensor
roi2image
;
roi2image
.
Resize
({
rois_num
});
int
*
roi2image_data
=
roi2image
.
mutable_data
<
int
>
(
ctx
.
GetPlace
());
auto
lod
=
rois
->
lod
().
back
();
for
(
int
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
for
(
int
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
roi2image_data
[
j
]
=
i
;
}
}
for
(
int
n
=
0
;
n
<
batch_size
;
++
n
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
int
in_h
=
0
;
in_h
<
in_height
;
++
in_h
)
{
for
(
int
in_w
=
0
;
in_w
<
in_width
;
++
in_w
)
{
T
gradient
=
0.0
;
for
(
int
roi_idx
=
lod
[
n
];
roi_idx
<
lod
[
n
+
1
];
++
roi_idx
)
{
const
T
*
rois
=
rois_data
+
roi_idx
*
8
;
T
roi_x
[
4
];
T
roi_y
[
4
];
for
(
int
k
=
0
;
k
<
4
;
++
k
)
{
roi_x
[
k
]
=
rois
[
2
*
k
]
*
spatial_scale
;
roi_y
[
k
]
=
rois
[
2
*
k
+
1
]
*
spatial_scale
;
}
// Get transform matrix
T
matrix
[
9
];
get_transform_matrix
<
T
>
(
transformed_width
,
transformed_height
,
roi_x
,
roi_y
,
matrix
);
const
T
*
out_grad_ptr
=
out_grad_data
+
(
roi_idx
*
channels
+
c
)
*
transformed_height
*
transformed_width
;
for
(
int
out_h
=
0
;
out_h
<
transformed_height
;
++
out_h
)
{
for
(
int
out_w
=
0
;
out_w
<
transformed_width
;
++
out_w
)
{
T
src_w
;
T
src_h
;
get_source_coords
<
T
>
(
matrix
,
out_w
,
out_h
,
&
src_w
,
&
src_h
);
if
(
in_quad
<
T
>
(
src_w
,
src_h
,
roi_x
,
roi_y
))
{
if
(
GT
<
T
>
(
-
0.5
,
src_w
)
||
GT
<
T
>
(
src_w
,
static_cast
<
T
>
(
in_width
-
0.5
))
||
GT
<
T
>
(
-
0.5
,
src_h
)
||
GT
<
T
>
(
src_h
,
static_cast
<
T
>
(
in_height
-
0.5
)))
{
continue
;
}
T
weight
=
get_feature_gradient
<
T
>
(
src_w
,
src_h
,
in_w
,
in_h
,
in_width
,
in_height
);
gradient
+=
out_grad_ptr
[
out_h
*
transformed_width
+
out_w
]
*
weight
;
}
}
}
}
int
out_idx
=
(
n
*
channels
+
c
)
*
in_height
*
in_width
+
in_h
*
in_width
+
in_w
;
in_grad_data
[
out_idx
]
=
gradient
;
}
}
}
}
}
};
class
ROIPerspectiveTransformOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of ROIPerspectiveTransformOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"ROIs"
),
"Input(ROIs) of ROIPerspectiveTransformOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of ROIPerspectiveTransformOp should not be null."
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
rois_dims
=
ctx
->
GetInputDim
(
"ROIs"
);
PADDLE_ENFORCE
(
input_dims
.
size
()
==
4
,
"The format of input tensor is NCHW."
);
PADDLE_ENFORCE
(
rois_dims
.
size
()
==
2
,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 8)"
"given as [[x0, y0, x1, y1, x2, y2, x3, y3], ...]"
);
PADDLE_ENFORCE
(
rois_dims
[
1
]
==
8
,
"ROIs should be a 2-D LoDTensor of shape (num_rois, 8)"
"given as [[x0, y0, x1, y1, x2, y2, x3, y3], ...]."
);
int
transformed_height
=
ctx
->
Attrs
().
Get
<
int
>
(
"transformed_height"
);
int
transformed_width
=
ctx
->
Attrs
().
Get
<
int
>
(
"transformed_width"
);
float
spatial_scale
=
ctx
->
Attrs
().
Get
<
float
>
(
"spatial_scale"
);
PADDLE_ENFORCE_GT
(
transformed_height
,
0
,
"The transformed output height must greater than 0"
);
PADDLE_ENFORCE_GT
(
transformed_width
,
0
,
"The transformed output width must greater than 0"
);
PADDLE_ENFORCE_GT
(
spatial_scale
,
0.0
f
,
"The spatial scale must greater than 0"
);
std
::
vector
<
int64_t
>
out_dims_v
({
rois_dims
[
0
],
// num_rois
input_dims
[
1
],
// channels
static_cast
<
int64_t
>
(
transformed_height
),
static_cast
<
int64_t
>
(
transformed_width
)});
auto
out_dims
=
framework
::
make_ddim
(
out_dims_v
);
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
};
class
ROIPerspectiveTransformGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"The gradient of Out should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutputs
(
framework
::
GradVarName
(
"X"
)),
"The gradient of X should not be null."
);
ctx
->
SetOutputsDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputsDim
(
"X"
));
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
};
class
ROIPerspectiveTransformOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor), "
"the input of ROIPerspectiveTransformOp. "
"The format of input tensor is NCHW. Where N is batch size, "
"C is the number of input channels, "
"H is the height of the feature, and "
"W is the width of the feature."
);
AddInput
(
"ROIs"
,
"(LoDTensor), "
"ROIs (Regions of Interest) to be transformed. "
"should be a 2-D LoDTensor of shape (num_rois, 8)"
"given as [[x1, y1, x2, y2, x3, y3, x4, y4], ...]."
"(x1, y1) is the top left coordinates, and "
"(x2, y2) is the top right coordinates, and"
"(x3, y3) is the bottom right coordinates, and"
"(x4, y4) is the bottom left coordinates."
);
AddOutput
(
"Out"
,
"(Tensor), "
"The output of ROIPerspectiveTransformOp is a 4-D tensor with shape "
"(num_rois, channels, transformed_h, transformed_w)."
);
AddAttr
<
float
>
(
"spatial_scale"
,
"(float, default 1.0), "
"Spatial scale factor to scale ROI coords."
)
.
SetDefault
(
1.0
);
AddAttr
<
int
>
(
"transformed_height"
,
"(int, default 1), "
"The height of transformed output."
)
.
SetDefault
(
1
);
AddAttr
<
int
>
(
"transformed_width"
,
"(int, default 1), "
"The width of transformed output."
)
.
SetDefault
(
1
);
AddComment
(
R"DOC(
**ROIPerspectiveTransform Operator**
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
roi_perspective_transform
,
ops
::
ROIPerspectiveTransformOp
,
ops
::
ROIPerspectiveTransformOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
roi_perspective_transform_grad
,
ops
::
ROIPerspectiveTransformGradOp
);
REGISTER_OP_CPU_KERNEL
(
roi_perspective_transform
,
ops
::
CPUROIPerspectiveTransformOpKernel
<
float
>
);
REGISTER_OP_CPU_KERNEL
(
roi_perspective_transform_grad
,
ops
::
CPUROIPerspectiveTransformGradOpKernel
<
float
>
);
paddle/fluid/operators/detection/roi_perspective_transform_op.cu
0 → 100644
浏览文件 @
f42a12da
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/cuda_primitives.h"
namespace
paddle
{
namespace
operators
{
// CUDA: index helpers
#define idx4_4(index, d1, d2, d3, d4) (index % d4)
#define idx4_3(index, d1, d2, d3, d4) ((index / d4) % d3)
#define idx4_2(index, d1, d2, d3, d4) ((index / d4 / d3) % d2)
#define idx4_1(index, d1, d2, d3, d4) ((index / d4 / d3 / d2) % d1)
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
i += blockDim.x * gridDim.x)
template
<
typename
T
>
__device__
bool
GT_E
(
T
a
,
T
b
)
{
return
(
a
>
b
)
||
fabs
(
a
-
b
)
<
1e-4
;
}
template
<
typename
T
>
__device__
bool
LT_E
(
T
a
,
T
b
)
{
return
(
a
<
b
)
||
fabs
(
a
-
b
)
<
1e-4
;
}
template
<
typename
T
>
__device__
bool
GT
(
T
a
,
T
b
)
{
return
(
a
-
b
)
>
1e-4
;
}
template
<
typename
T
>
__device__
T
max
(
T
a
,
T
b
)
{
return
a
>
b
?
a
:
b
;
}
template
<
typename
T
>
__device__
T
min
(
T
a
,
T
b
)
{
return
a
<
b
?
a
:
b
;
}
/*
* check if (x, y) is in the boundary of roi
*/
template
<
typename
T
>
__device__
bool
in_quad
(
T
x
,
T
y
,
T
roi_x
[],
T
roi_y
[])
{
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
T
start_w
=
roi_x
[
i
];
T
start_h
=
roi_y
[
i
];
T
end_w
=
roi_x
[(
i
+
1
)
%
4
];
T
end_h
=
roi_y
[(
i
+
1
)
%
4
];
if
(
fabs
(
start_h
-
end_h
)
<
1e-4
)
{
if
(
fabs
(
y
-
start_h
)
<
1e-4
&&
fabs
(
y
-
end_h
)
<
1e-4
&&
GT_E
<
T
>
(
x
,
min
<
T
>
(
start_w
,
end_w
))
&&
LT_E
<
T
>
(
x
,
max
<
T
>
(
start_w
,
end_w
)))
{
return
true
;
}
}
else
{
T
intersec_x
=
(
y
-
start_h
)
*
(
end_w
-
start_w
)
/
(
end_h
-
start_h
)
+
start_w
;
if
(
fabs
(
intersec_x
-
x
)
<
1e-4
&&
GT_E
(
y
,
min
<
T
>
(
start_h
,
end_h
))
&&
LT_E
<
T
>
(
y
,
max
<
T
>
(
start_h
,
end_h
)))
{
return
true
;
}
}
}
int
n_cross
=
0
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
T
start_w
=
roi_x
[
i
];
T
start_h
=
roi_y
[
i
];
T
end_w
=
roi_x
[(
i
+
1
)
%
4
];
T
end_h
=
roi_y
[(
i
+
1
)
%
4
];
if
(
fabs
(
start_h
-
end_h
)
<
1e-4
)
{
continue
;
}
if
(
LT_E
<
T
>
(
y
,
min
<
T
>
(
start_h
,
end_h
))
||
GT
<
T
>
(
y
,
max
<
T
>
(
start_h
,
end_h
)))
{
continue
;
}
T
intersec_x
=
(
y
-
start_h
)
*
(
end_w
-
start_w
)
/
(
end_h
-
start_h
)
+
start_w
;
if
(
fabs
(
intersec_x
-
x
)
<
1e-4
)
{
return
true
;
}
if
(
GT
<
T
>
(
intersec_x
,
x
))
{
n_cross
++
;
}
}
return
(
n_cross
%
2
==
1
);
}
/**
* Perform bilinear interpolation in the input feature map.
*/
template
<
typename
T
>
__device__
void
bilinear_interpolate
(
const
T
*
in_data
,
const
int
channels
,
const
int
width
,
const
int
height
,
int
in_n
,
int
in_c
,
T
in_w
,
T
in_h
,
T
*
val
)
{
// Deal with cases that source coords are out of feature map boundary
if
(
GT
<
T
>
(
-
0.5
,
in_w
)
||
GT
<
T
>
(
in_w
,
width
-
0.5
)
||
GT
<
T
>
(
-
0.5
,
in_h
)
||
GT
<
T
>
(
in_h
,
height
-
0.5
))
{
val
[
0
]
=
0.0
;
return
;
}
if
(
GT
<
T
>
(
0
,
in_w
))
{
in_w
=
0
;
}
if
(
GT
<
T
>
(
0
,
in_h
))
{
in_h
=
0
;
}
int
in_w_floor
=
floor
(
in_w
);
int
in_h_floor
=
floor
(
in_h
);
int
in_w_ceil
;
int
in_h_ceil
;
if
(
GT_E
<
T
>
(
in_w_floor
,
width
-
1
))
{
in_w_ceil
=
in_w_floor
=
width
-
1
;
in_w
=
static_cast
<
T
>
(
in_w_floor
);
}
else
{
in_w_ceil
=
in_w_floor
+
1
;
}
if
(
GT_E
<
T
>
(
in_h_floor
,
height
-
1
))
{
in_h_ceil
=
in_h_floor
=
height
-
1
;
in_h
=
static_cast
<
T
>
(
in_h_floor
);
}
else
{
in_h_ceil
=
in_h_floor
+
1
;
}
T
w_floor
=
in_w
-
in_w_floor
;
T
h_floor
=
in_h
-
in_h_floor
;
T
w_ceil
=
1
-
w_floor
;
T
h_ceil
=
1
-
h_floor
;
const
T
*
data
=
in_data
+
(
in_n
*
channels
+
in_c
)
*
height
*
width
;
// Do bilinear interpolation
T
v1
=
data
[
in_h_floor
*
width
+
in_w_floor
];
T
v2
=
data
[
in_h_ceil
*
width
+
in_w_floor
];
T
v3
=
data
[
in_h_ceil
*
width
+
in_w_ceil
];
T
v4
=
data
[
in_h_floor
*
width
+
in_w_ceil
];
T
w1
=
w_ceil
*
h_ceil
;
T
w2
=
w_ceil
*
h_floor
;
T
w3
=
w_floor
*
h_floor
;
T
w4
=
w_floor
*
h_ceil
;
val
[
0
]
=
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
;
}
/**
* Get the source coordinates in the input feature map.
*
* (u, v, w)^matrix = T * (out_w, out_h, 1)^matrix
*
* in_w = u / w
* in_h = v / w
*
*/
template
<
typename
T
>
__device__
void
get_source_coords
(
T
matrix
[],
int
out_w
,
int
out_h
,
T
*
in_w
,
T
*
in_h
)
{
T
u
=
matrix
[
0
]
*
out_w
+
matrix
[
1
]
*
out_h
+
matrix
[
2
];
T
v
=
matrix
[
3
]
*
out_w
+
matrix
[
4
]
*
out_h
+
matrix
[
5
];
T
w
=
matrix
[
6
]
*
out_w
+
matrix
[
7
]
*
out_h
+
matrix
[
8
];
in_w
[
0
]
=
u
/
w
;
in_h
[
0
]
=
v
/
w
;
}
/**
* Get the matrix of perspective transform.
*
* dx1 = x1 - x2
* dx2 = x3 - x2
* dx3 = x0 - x1 + x2 - x3
* dy1 = y1 - y2
* dy2 = y3 - y2
* dy3 = y0 - y1 + y2 - y3
*
* a11 = (x1 - x0 + a31 * (w - 1) * x1) / (w - 1)
* a12 = (x3 - x0 + a32 * (h - 1) * x3) / (h - 1)
* a13 = x0
* a21 = (y1 - y0 + a31 * (w - 1) * y1) / (w - 1)
* a22 = (y3 - y0 + a32 * (h - 1) * y3) / (h - 1)
* a23 = y0
* a31 = (dx3 * dy2 - dx2 * dy3) / (dx1 * dy2 - dx2 * dy1) / (w - 1)
* a32 = (dx1 * dy3 - dx3 * dy1) / (dx1 * dy2 - dx2 * dy1) / (h - 1)
* a33 = 1
*
*/
template
<
typename
T
>
__device__
void
get_transform_matrix
(
const
int
transformed_width
,
const
int
transformed_height
,
T
roi_x
[],
T
roi_y
[],
T
matrix
[])
{
T
x0
=
roi_x
[
0
];
T
x1
=
roi_x
[
1
];
T
x2
=
roi_x
[
2
];
T
x3
=
roi_x
[
3
];
T
y0
=
roi_y
[
0
];
T
y1
=
roi_y
[
1
];
T
y2
=
roi_y
[
2
];
T
y3
=
roi_y
[
3
];
// Estimate the height and width of RoI
T
len1
=
sqrt
((
x0
-
x1
)
*
(
x0
-
x1
)
+
(
y0
-
y1
)
*
(
y0
-
y1
));
T
len2
=
sqrt
((
x1
-
x2
)
*
(
x1
-
x2
)
+
(
y1
-
y2
)
*
(
y1
-
y2
));
T
len3
=
sqrt
((
x2
-
x3
)
*
(
x2
-
x3
)
+
(
y2
-
y3
)
*
(
y2
-
y3
));
T
len4
=
sqrt
((
x3
-
x0
)
*
(
x3
-
x0
)
+
(
y3
-
y0
)
*
(
y3
-
y0
));
T
estimated_height
=
(
len2
+
len4
)
/
2.0
;
T
estimated_width
=
(
len1
+
len3
)
/
2.0
;
// Get the normalized height and normalized width
int
normalized_height
=
transformed_height
;
int
normalized_width
=
round
(
estimated_width
*
(
normalized_height
-
1
)
/
estimated_height
)
+
1
;
normalized_width
=
min
(
normalized_width
,
transformed_width
);
T
dx1
=
x1
-
x2
;
T
dx2
=
x3
-
x2
;
T
dx3
=
x0
-
x1
+
x2
-
x3
;
T
dy1
=
y1
-
y2
;
T
dy2
=
y3
-
y2
;
T
dy3
=
y0
-
y1
+
y2
-
y3
;
matrix
[
6
]
=
(
dx3
*
dy2
-
dx2
*
dy3
)
/
(
dx1
*
dy2
-
dx2
*
dy1
)
/
(
normalized_width
-
1
);
matrix
[
7
]
=
(
dx1
*
dy3
-
dx3
*
dy1
)
/
(
dx1
*
dy2
-
dx2
*
dy1
)
/
(
normalized_height
-
1
);
matrix
[
8
]
=
1
;
matrix
[
3
]
=
(
y1
-
y0
+
matrix
[
6
]
*
(
normalized_width
-
1
)
*
y1
)
/
(
normalized_width
-
1
);
matrix
[
4
]
=
(
y3
-
y0
+
matrix
[
7
]
*
(
normalized_height
-
1
)
*
y3
)
/
(
normalized_height
-
1
);
matrix
[
5
]
=
y0
;
matrix
[
0
]
=
(
x1
-
x0
+
matrix
[
6
]
*
(
normalized_width
-
1
)
*
x1
)
/
(
normalized_width
-
1
);
matrix
[
1
]
=
(
x3
-
x0
+
matrix
[
7
]
*
(
normalized_height
-
1
)
*
x3
)
/
(
normalized_height
-
1
);
matrix
[
2
]
=
x0
;
}
template
<
typename
T
>
__global__
void
RoiTransformKernel
(
const
float
*
input_data
,
const
float
*
rois_data
,
const
int
*
roi2image_data
,
int
num_rois
,
int
in_height
,
int
in_width
,
int
channels
,
int
transformed_height
,
int
transformed_width
,
float
spatial_scale
,
T
*
output_data
)
{
int
output_size
=
num_rois
*
transformed_height
*
transformed_width
*
channels
;
CUDA_1D_KERNEL_LOOP
(
index
,
output_size
)
{
// (n, c, out_h, out_w) is an element in the transformed output
int
out_w
=
idx4_4
(
index
,
num_rois
,
channels
,
transformed_height
,
transformed_width
);
int
out_h
=
idx4_3
(
index
,
num_rois
,
channels
,
transformed_height
,
transformed_width
);
int
c
=
idx4_2
(
index
,
num_rois
,
channels
,
transformed_height
,
transformed_width
);
int
n
=
idx4_1
(
index
,
num_rois
,
channels
,
transformed_height
,
transformed_width
);
auto
bottom_rois
=
rois_data
+
n
*
8
;
int
roi_batch_ind
=
bottom_rois
[
0
];
T
roi_x
[
4
];
T
roi_y
[
4
];
for
(
int
k
=
0
;
k
<
4
;
++
k
)
{
roi_x
[
k
]
=
bottom_rois
[
2
*
k
]
*
spatial_scale
;
roi_y
[
k
]
=
bottom_rois
[
2
*
k
+
1
]
*
spatial_scale
;
}
// Get transform matrix
T
matrix
[
9
];
get_transform_matrix
<
T
>
(
transformed_width
,
transformed_height
,
roi_x
,
roi_y
,
matrix
);
// Get source coords
T
in_w
;
T
in_h
;
get_source_coords
<
T
>
(
matrix
,
out_w
,
out_h
,
&
in_w
,
&
in_h
);
if
(
in_quad
<
T
>
(
in_w
,
in_h
,
roi_x
,
roi_y
))
{
if
(
GT
<
T
>
(
-
0.5
,
in_w
)
||
GT
<
T
>
(
in_w
,
static_cast
<
T
>
(
in_width
-
0.5
))
||
GT
<
T
>
(
-
0.5
,
in_h
)
||
GT
<
T
>
(
in_h
,
static_cast
<
T
>
(
in_height
-
0.5
)))
{
// Skip if source coords is not in input image
output_data
[
index
]
=
0.0
;
}
else
{
// Perform bilinear interpolation
int
in_n
=
roi2image_data
[
n
];
bilinear_interpolate
<
T
>
(
input_data
,
channels
,
in_width
,
in_height
,
in_n
,
c
,
in_w
,
in_h
,
output_data
+
index
);
}
}
else
{
// Skip if source coords is not in quad
output_data
[
index
]
=
0.0
;
}
}
}
template
<
typename
T
>
class
CUDAROIPerspectiveTransformOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"ROIs"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
transformed_height
=
ctx
.
Attr
<
int
>
(
"transformed_height"
);
auto
transformed_width
=
ctx
.
Attr
<
int
>
(
"transformed_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
in_dims
=
in
->
dims
();
int
batch_size
=
in_dims
[
0
];
int
channels
=
in_dims
[
1
];
int
in_height
=
in_dims
[
2
];
int
in_width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
const
T
*
input_data
=
in
->
data
<
T
>
();
T
*
output_data
=
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
rois_data
=
rois
->
data
<
T
>
();
framework
::
Tensor
roi2image
;
framework
::
Tensor
roi2image_dev
;
roi2image
.
Resize
({
rois_num
});
int
*
roi2image_data
=
roi2image
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
auto
lod
=
rois
->
lod
().
back
();
for
(
int
i
=
0
;
i
<
lod
.
size
()
-
1
;
++
i
)
{
for
(
int
j
=
lod
[
i
];
j
<
lod
[
i
+
1
];
++
j
)
{
roi2image_data
[
j
]
=
i
;
}
}
TensorCopySync
(
roi2image
,
ctx
.
GetPlace
(),
&
roi2image_dev
);
int
out_size
=
rois_num
*
transformed_height
*
transformed_width
*
channels
;
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
int
block
=
512
;
int
grid
=
(
out_size
+
block
-
1
)
/
block
;
RoiTransformKernel
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
input_data
,
rois_data
,
roi2image_dev
.
data
<
int
>
(),
rois_num
,
in_height
,
in_width
,
channels
,
transformed_height
,
transformed_width
,
spatial_scale
,
output_data
);
}
};
template
<
typename
T
>
__device__
T
get_feature_gradient
(
T
xs
,
T
ys
,
int
w
,
int
h
,
const
int
width
,
const
int
height
)
{
if
(
GT
<
T
>
(
-
0.5
,
xs
)
||
GT
<
T
>
(
xs
,
width
-
0.5
)
||
GT
<
T
>
(
-
0.5
,
ys
)
||
GT
<
T
>
(
ys
,
height
-
0.5
))
{
return
0
;
}
if
(
GT
<
T
>
(
0
,
xs
))
{
xs
=
0
;
}
if
(
GT
<
T
>
(
0
,
ys
))
{
ys
=
0
;
}
int
xs_floor
=
floor
(
xs
);
int
ys_floor
=
floor
(
ys
);
int
xs_ceil
;
int
ys_ceil
;
if
(
GT_E
<
T
>
(
xs_floor
,
width
-
1
))
{
xs_ceil
=
xs_floor
=
width
-
1
;
xs
=
static_cast
<
T
>
(
xs_floor
);
}
else
{
xs_ceil
=
xs_floor
+
1
;
}
if
(
GT_E
(
ys_floor
,
height
-
1
))
{
ys_ceil
=
ys_floor
=
height
-
1
;
ys
=
static_cast
<
T
>
(
ys_floor
);
}
else
{
ys_ceil
=
ys_floor
+
1
;
}
T
weight
=
0
;
if
(
w
==
xs_floor
)
{
if
(
h
==
ys_floor
)
{
weight
=
(
w
+
1
-
xs
)
*
(
h
+
1
-
ys
);
}
else
if
(
h
==
ys_ceil
)
{
weight
=
(
w
+
1
-
xs
)
*
(
ys
+
1
-
h
);
}
}
else
if
(
w
==
xs_ceil
)
{
if
(
h
==
ys_floor
)
{
weight
=
(
xs
+
1
-
w
)
*
(
h
+
1
-
ys
);
}
else
if
(
h
==
ys_ceil
)
{
weight
=
(
xs
+
1
-
w
)
*
(
ys
+
1
-
h
);
}
}
return
weight
;
}
template
<
typename
T
>
__global__
void
RoiTransformGradKernel
(
const
size_t
*
lod
,
const
T
*
rois_data
,
int
batch_size
,
int
num_rois
,
int
in_height
,
int
in_width
,
int
channels
,
int
transformed_height
,
int
transformed_width
,
float
spatial_scale
,
const
T
*
out_grad_data
,
T
*
in_grad_data
)
{
int
input_size
=
batch_size
*
in_height
*
in_width
*
channels
;
CUDA_1D_KERNEL_LOOP
(
index
,
input_size
)
{
// (n, c, h, w) coords in input
int
in_w
=
idx4_4
(
index
,
batch_size
,
channels
,
in_height
,
in_width
);
int
in_h
=
idx4_3
(
index
,
batch_size
,
channels
,
in_height
,
in_width
);
int
c
=
idx4_2
(
index
,
batch_size
,
channels
,
in_height
,
in_width
);
int
n
=
idx4_1
(
index
,
batch_size
,
channels
,
in_height
,
in_width
);
T
gradient
=
0.0
;
// Accumulate gradient over all RoIs that interpolated this element
for
(
int
roi_idx
=
lod
[
n
];
roi_idx
<
lod
[
n
+
1
];
++
roi_idx
)
{
const
T
*
rois
=
rois_data
+
roi_idx
*
8
;
T
roi_x
[
4
];
T
roi_y
[
4
];
for
(
int
k
=
0
;
k
<
4
;
++
k
)
{
roi_x
[
k
]
=
rois
[
2
*
k
]
*
spatial_scale
;
roi_y
[
k
]
=
rois
[
2
*
k
+
1
]
*
spatial_scale
;
}
// Get transform matrix
T
matrix
[
9
];
get_transform_matrix
<
T
>
(
transformed_width
,
transformed_height
,
roi_x
,
roi_y
,
matrix
);
const
T
*
out_grad_ptr
=
out_grad_data
+
(
roi_idx
*
channels
+
c
)
*
transformed_height
*
transformed_width
;
for
(
int
out_h
=
0
;
out_h
<
transformed_height
;
++
out_h
)
{
for
(
int
out_w
=
0
;
out_w
<
transformed_width
;
++
out_w
)
{
T
src_w
;
T
src_h
;
get_source_coords
<
T
>
(
matrix
,
out_w
,
out_h
,
&
src_w
,
&
src_h
);
if
(
in_quad
<
T
>
(
src_w
,
src_h
,
roi_x
,
roi_y
))
{
if
(
GT
<
T
>
(
-
0.5
,
src_w
)
||
GT
<
T
>
(
src_w
,
static_cast
<
T
>
(
in_width
-
0.5
))
||
GT
<
T
>
(
-
0.5
,
src_h
)
||
GT
<
T
>
(
src_h
,
static_cast
<
T
>
(
in_height
-
0.5
)))
{
continue
;
}
T
weight
=
get_feature_gradient
<
T
>
(
src_w
,
src_h
,
in_w
,
in_h
,
in_width
,
in_height
);
gradient
+=
out_grad_ptr
[
out_h
*
transformed_width
+
out_w
]
*
weight
;
}
}
}
}
in_grad_data
[
index
]
=
gradient
;
}
}
template
<
typename
T
>
class
CUDAROIPerspectiveTransformGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
rois
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"ROIs"
);
auto
*
out_grad
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
in_grad
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
transformed_height
=
ctx
.
Attr
<
int
>
(
"transformed_height"
);
auto
transformed_width
=
ctx
.
Attr
<
int
>
(
"transformed_width"
);
auto
spatial_scale
=
ctx
.
Attr
<
float
>
(
"spatial_scale"
);
auto
in_dims
=
in
->
dims
();
int
batch_size
=
in_dims
[
0
];
int
channels
=
in_dims
[
1
];
int
in_height
=
in_dims
[
2
];
int
in_width
=
in_dims
[
3
];
int
rois_num
=
rois
->
dims
()[
0
];
T
*
in_grad_data
=
in_grad
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
T
*
out_grad_data
=
out_grad
->
data
<
T
>
();
const
T
*
rois_data
=
rois
->
data
<
T
>
();
auto
lod
=
rois
->
lod
().
back
();
auto
lod_data
=
lod
.
CUDAData
(
ctx
.
GetPlace
());
int
in_size
=
in
->
numel
();
auto
stream
=
ctx
.
cuda_device_context
().
stream
();
int
block
=
512
;
int
grid
=
(
in_size
+
block
-
1
)
/
block
;
RoiTransformGradKernel
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
lod_data
,
rois_data
,
batch_size
,
rois_num
,
in_height
,
in_width
,
channels
,
transformed_height
,
transformed_width
,
spatial_scale
,
out_grad_data
,
in_grad_data
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
roi_perspective_transform
,
ops
::
CUDAROIPerspectiveTransformOpKernel
<
float
>
);
REGISTER_OP_CUDA_KERNEL
(
roi_perspective_transform_grad
,
ops
::
CUDAROIPerspectiveTransformGradOpKernel
<
float
>
);
python/paddle/fluid/__init__.py
浏览文件 @
f42a12da
...
...
@@ -46,7 +46,7 @@ from . import transpiler
from
.param_attr
import
ParamAttr
,
WeightNormParamAttr
from
.data_feeder
import
DataFeeder
from
.core
import
LoDTensor
,
LoDTensorArray
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
Scope
from
.transpiler
import
DistributeTranspiler
,
InferenceTranspiler
,
\
from
.transpiler
import
DistributeTranspiler
,
\
memory_optimize
,
release_memory
,
DistributeTranspilerConfig
from
.lod_tensor
import
create_lod_tensor
,
create_random_int_lodtensor
from
.
import
clip
...
...
python/paddle/fluid/io.py
浏览文件 @
f42a12da
...
...
@@ -27,8 +27,7 @@ from . import core
__all__
=
[
'save_vars'
,
'save_params'
,
'save_persistables'
,
'load_vars'
,
'load_params'
,
'load_persistables'
,
'save_inference_model'
,
'load_inference_model'
,
'get_inference_program'
'load_persistables'
,
'save_inference_model'
,
'load_inference_model'
]
...
...
@@ -504,23 +503,6 @@ def load_persistables(executor, dirname, main_program=None, filename=None):
filename
=
filename
)
def
get_inference_program
(
target_vars
,
main_program
=
None
):
if
main_program
is
None
:
main_program
=
default_main_program
()
if
not
isinstance
(
target_vars
,
list
):
target_vars
=
[
target_vars
]
vars
=
[]
for
var
in
target_vars
:
if
isinstance
(
var
,
Evaluator
):
vars
.
extend
(
var
.
states
)
vars
.
extend
(
var
.
metrics
)
else
:
vars
.
append
(
var
)
pruned_program
=
main_program
.
_prune
(
targets
=
vars
)
inference_program
=
pruned_program
.
_inference_optimize
()
return
inference_program
def
prepend_feed_ops
(
inference_program
,
feed_target_names
,
feed_holder_name
=
'feed'
):
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
f42a12da
...
...
@@ -39,6 +39,7 @@ __all__ = [
'detection_map'
,
'rpn_target_assign'
,
'anchor_generator'
,
'roi_perspective_transform'
,
'generate_proposal_labels'
,
'generate_proposals'
,
]
...
...
@@ -1262,6 +1263,54 @@ def anchor_generator(input,
return
anchor
,
var
def
roi_perspective_transform
(
input
,
rois
,
transformed_height
,
transformed_width
,
spatial_scale
=
1.0
):
"""
ROI perspective transform op.
Args:
input (Variable): The input of ROIPerspectiveTransformOp. The format of
input tensor is NCHW. Where N is batch size, C is the
number of input channels, H is the height of the feature,
and W is the width of the feature.
rois (Variable): ROIs (Regions of Interest) to be transformed. It should be
a 2-D LoDTensor of shape (num_rois, 8). Given as
[[x1, y1, x2, y2, x3, y3, x4, y4], ...], (x1, y1) is the
top left coordinates, and (x2, y2) is the top right
coordinates, and (x3, y3) is the bottom right coordinates,
and (x4, y4) is the bottom left coordinates.
transformed_height (integer): The height of transformed output.
transformed_height (integer): The width of transformed output.
spatial_scale (float): Spatial scale factor to scale ROI coords. Default: 1.0
Returns:
Variable: The output of ROIPerspectiveTransformOp which is a 4-D tensor with shape
(num_rois, channels, transformed_h, transformed_w).
Examples:
.. code-block:: python
out = fluid.layers.roi_perspective_transform(input, rois, 7, 7, 1.0)
"""
helper
=
LayerHelper
(
'roi_perspective_transform'
,
**
locals
())
dtype
=
helper
.
input_dtype
()
out
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
type
=
"roi_perspective_transform"
,
inputs
=
{
"X"
:
input
,
"ROIs"
:
rois
},
outputs
=
{
"Out"
:
out
},
attrs
=
{
"transformed_height"
:
transformed_height
,
"transformed_width"
:
transformed_width
,
"spatial_scale"
:
spatial_scale
})
return
out
def
generate_proposal_labels
(
rpn_rois
,
gt_classes
,
is_crowd
,
...
...
python/paddle/fluid/tests/unittests/dist_transformer.py
浏览文件 @
f42a12da
...
...
@@ -437,13 +437,8 @@ def split_data(data, num_part):
]
def
test_context
(
t
rain_prog
m
,
avg_cost
,
train_exe
,
dev_count
,
data_input_names
,
def
test_context
(
t
est_progra
m
,
avg_cost
,
train_exe
,
dev_count
,
data_input_names
,
sum_cost
,
token_num
):
# Context to do validation.
test_program
=
train_progm
.
clone
()
with
fluid
.
program_guard
(
test_program
):
test_program
=
fluid
.
io
.
get_inference_program
([
avg_cost
])
val_data
=
DataReader
(
src_vocab_fpath
=
TrainTaskConfig
.
src_vocab_fpath
,
trg_vocab_fpath
=
TrainTaskConfig
.
trg_vocab_fpath
,
...
...
@@ -505,7 +500,7 @@ def test_context(train_progm, avg_cost, train_exe, dev_count, data_input_names,
def
train_loop
(
exe
,
train_progm
,
dev_count
,
sum_cost
,
avg_cost
,
lr_scheduler
,
token_num
,
predict
):
token_num
,
predict
,
test_program
):
# Initialize the parameters.
if
TrainTaskConfig
.
ckpt_path
:
lr_scheduler
.
current_steps
=
TrainTaskConfig
.
start_step
...
...
@@ -554,7 +549,7 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
-
1
]
+
label_data_input_fields
if
TrainTaskConfig
.
val_file_pattern
is
not
None
:
test
=
test_context
(
t
rain_prog
m
,
avg_cost
,
train_exe
,
dev_count
,
test
=
test_context
(
t
est_progra
m
,
avg_cost
,
train_exe
,
dev_count
,
data_input_names
,
sum_cost
,
token_num
)
# the best cross-entropy value with label smoothing
...
...
@@ -1647,6 +1642,8 @@ def get_model(is_dist, is_async):
local_lr_scheduler
=
LearningRateScheduler
(
ModelHyperParams
.
d_model
,
TrainTaskConfig
.
warmup_steps
,
TrainTaskConfig
.
learning_rate
)
# Context to do validation.
test_program
=
fluid
.
default_main_program
().
clone
(
for_test
=
True
)
if
not
is_dist
:
optimizer
=
fluid
.
optimizer
.
Adam
(
...
...
@@ -1671,7 +1668,7 @@ def get_model(is_dist, is_async):
epsilon
=
TrainTaskConfig
.
eps
)
optimizer
.
minimize
(
sum_cost
)
return
sum_cost
,
avg_cost
,
predict
,
token_num
,
local_lr_scheduler
return
sum_cost
,
avg_cost
,
predict
,
token_num
,
local_lr_scheduler
,
test_program
def
update_args
():
...
...
@@ -1705,7 +1702,7 @@ class DistTransformer2x2(TestDistRunnerBase):
def
run_trainer
(
self
,
use_cuda
,
args
):
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
TrainTaskConfig
.
use_gpu
=
use_cuda
sum_cost
,
avg_cost
,
predict
,
token_num
,
local_lr_scheduler
=
get_model
(
sum_cost
,
avg_cost
,
predict
,
token_num
,
local_lr_scheduler
,
test_program
=
get_model
(
args
.
is_dist
,
not
args
.
sync_mode
)
if
args
.
is_dist
:
...
...
@@ -1726,7 +1723,7 @@ class DistTransformer2x2(TestDistRunnerBase):
TrainTaskConfig
.
local
=
not
args
.
is_dist
train_loop
(
startup_exe
,
trainer_prog
,
1
,
sum_cost
,
avg_cost
,
local_lr_scheduler
,
token_num
,
predict
)
local_lr_scheduler
,
token_num
,
predict
,
test_program
)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
f42a12da
...
...
@@ -725,6 +725,16 @@ class TestBook(unittest.TestCase):
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_roi_perspective_transform
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
"x"
,
shape
=
[
256
,
30
,
30
],
dtype
=
"float32"
)
rois
=
layers
.
data
(
name
=
"rois"
,
shape
=
[
8
],
dtype
=
"float32"
,
lod_level
=
1
)
output
=
layers
.
roi_perspective_transform
(
x
,
rois
,
7
,
7
,
0.6
)
self
.
assertIsNotNone
(
output
)
print
(
str
(
program
))
def
test_sequence_enumerate
(
self
):
program
=
Program
()
with
program_guard
(
program
):
...
...
python/paddle/fluid/tests/unittests/test_roi_perspective_transform_op.py
0 → 100644
浏览文件 @
f42a12da
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License")
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUWARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
math
import
sys
import
paddle.compat
as
cpt
from
op_test
import
OpTest
from
math
import
sqrt
from
math
import
floor
def
gt_e
(
a
,
b
):
return
a
>
b
or
abs
(
a
-
b
)
<
1e-4
def
gt
(
a
,
b
):
return
(
a
-
b
)
>
1e-4
def
lt_e
(
a
,
b
):
return
a
<
b
or
abs
(
a
-
b
)
<
1e-4
def
in_quad
(
x
,
y
,
roi_x
,
roi_y
):
# check if (x, y) is in the boundary of roi
for
i
in
range
(
4
):
xs
=
roi_x
[
i
]
ys
=
roi_y
[
i
]
xe
=
roi_x
[(
i
+
1
)
%
4
]
ye
=
roi_y
[(
i
+
1
)
%
4
]
if
abs
(
ys
-
ye
)
<
1e-4
:
if
abs
(
y
-
ys
)
<
1e-4
and
abs
(
y
-
ye
)
<
1e-4
and
gt_e
(
x
,
min
(
xs
,
xe
))
and
lt_e
(
x
,
max
(
xs
,
xe
)):
return
True
else
:
intersec_x
=
(
y
-
ys
)
*
(
xe
-
xs
)
/
(
ye
-
ys
)
+
xs
if
abs
(
intersec_x
-
x
)
<
1e-4
and
gt_e
(
y
,
min
(
ys
,
ye
))
and
lt_e
(
y
,
max
(
ys
,
ye
)):
return
True
n_cross
=
0
for
i
in
range
(
4
):
xs
=
roi_x
[
i
]
ys
=
roi_y
[
i
]
xe
=
roi_x
[(
i
+
1
)
%
4
]
ye
=
roi_y
[(
i
+
1
)
%
4
]
if
abs
(
ys
-
ye
)
<
1e-4
:
continue
if
lt_e
(
y
,
min
(
ys
,
ye
))
or
gt
(
y
,
max
(
ys
,
ye
)):
continue
intersec_x
=
(
y
-
ys
)
*
(
xe
-
xs
)
/
(
ye
-
ys
)
+
xs
if
abs
(
intersec_x
-
x
)
<
1e-4
:
return
True
if
gt
(
intersec_x
,
x
):
n_cross
+=
1
return
(
n_cross
%
2
==
1
)
def
get_transform_matrix
(
transformed_width
,
transformed_height
,
roi_x
,
roi_y
):
x0
=
roi_x
[
0
]
x1
=
roi_x
[
1
]
x2
=
roi_x
[
2
]
x3
=
roi_x
[
3
]
y0
=
roi_y
[
0
]
y1
=
roi_y
[
1
]
y2
=
roi_y
[
2
]
y3
=
roi_y
[
3
]
len1
=
sqrt
((
x0
-
x1
)
*
(
x0
-
x1
)
+
(
y0
-
y1
)
*
(
y0
-
y1
))
len2
=
sqrt
((
x1
-
x2
)
*
(
x1
-
x2
)
+
(
y1
-
y2
)
*
(
y1
-
y2
))
len3
=
sqrt
((
x2
-
x3
)
*
(
x2
-
x3
)
+
(
y2
-
y3
)
*
(
y2
-
y3
))
len4
=
sqrt
((
x3
-
x0
)
*
(
x3
-
x0
)
+
(
y3
-
y0
)
*
(
y3
-
y0
))
estimated_height
=
(
len2
+
len4
)
/
2.0
estimated_width
=
(
len1
+
len3
)
/
2.0
normalized_height
=
transformed_height
normalized_width
=
round
(
estimated_width
*
(
normalized_height
-
1
)
/
estimated_height
)
+
1
normalized_width
=
min
(
normalized_width
,
transformed_width
)
dx1
=
x1
-
x2
dx2
=
x3
-
x2
dx3
=
x0
-
x1
+
x2
-
x3
dy1
=
y1
-
y2
dy2
=
y3
-
y2
dy3
=
y0
-
y1
+
y2
-
y3
matrix
=
np
.
zeros
([
9
])
matrix
[
6
]
=
(
dx3
*
dy2
-
dx2
*
dy3
)
/
(
dx1
*
dy2
-
dx2
*
dy1
)
/
(
normalized_width
-
1
)
matrix
[
7
]
=
(
dx1
*
dy3
-
dx3
*
dy1
)
/
(
dx1
*
dy2
-
dx2
*
dy1
)
/
(
normalized_height
-
1
)
matrix
[
8
]
=
1
matrix
[
3
]
=
(
y1
-
y0
+
matrix
[
6
]
*
(
normalized_width
-
1
)
*
y1
)
/
(
normalized_width
-
1
)
matrix
[
4
]
=
(
y3
-
y0
+
matrix
[
7
]
*
(
normalized_height
-
1
)
*
y3
)
/
(
normalized_height
-
1
)
matrix
[
5
]
=
y0
matrix
[
0
]
=
(
x1
-
x0
+
matrix
[
6
]
*
(
normalized_width
-
1
)
*
x1
)
/
(
normalized_width
-
1
)
matrix
[
1
]
=
(
x3
-
x0
+
matrix
[
7
]
*
(
normalized_height
-
1
)
*
x3
)
/
(
normalized_height
-
1
)
matrix
[
2
]
=
x0
return
matrix
def
get_source_coords
(
matrix
,
out_w
,
out_h
):
u
=
matrix
[
0
]
*
out_w
+
matrix
[
1
]
*
out_h
+
matrix
[
2
]
v
=
matrix
[
3
]
*
out_w
+
matrix
[
4
]
*
out_h
+
matrix
[
5
]
w
=
matrix
[
6
]
*
out_w
+
matrix
[
7
]
*
out_h
+
matrix
[
8
]
in_w
=
u
/
w
in_h
=
v
/
w
return
in_w
,
in_h
def
bilinear_interpolate
(
in_data
,
in_n
,
in_c
,
in_w
,
in_h
):
batch_size
=
in_data
.
shape
[
0
]
channels
=
in_data
.
shape
[
1
]
height
=
in_data
.
shape
[
2
]
width
=
in_data
.
shape
[
3
]
if
gt
(
-
0.5
,
in_w
)
or
gt
(
in_w
,
width
-
0.5
)
or
gt
(
-
0.5
,
in_h
)
or
gt
(
in_h
,
height
-
0.5
):
return
0.0
if
gt
(
0
,
in_w
):
in_w
=
0
if
gt
(
0
,
in_h
):
in_h
=
0
in_w_floor
=
floor
(
in_w
)
in_h_floor
=
floor
(
in_h
)
if
gt_e
(
in_w_floor
,
width
-
1
):
in_w_ceil
=
width
-
1
in_w_floor
=
width
-
1
in_w
=
in_w_floor
else
:
in_w_ceil
=
in_w_floor
+
1
if
gt_e
(
in_h_floor
,
height
-
1
):
in_h_ceil
=
height
-
1
in_h_floor
=
height
-
1
in_h
=
in_h_floor
else
:
in_h_ceil
=
in_h_floor
+
1
w_floor
=
in_w
-
in_w_floor
h_floor
=
in_h
-
in_h_floor
w_ceil
=
1
-
w_floor
h_ceil
=
1
-
h_floor
v1
=
in_data
[
in_n
][
in_c
][
int
(
in_h_floor
)][
int
(
in_w_floor
)]
v2
=
in_data
[
in_n
][
in_c
][
int
(
in_h_ceil
)][
int
(
in_w_floor
)]
v3
=
in_data
[
in_n
][
in_c
][
int
(
in_h_ceil
)][
int
(
in_w_ceil
)]
v4
=
in_data
[
in_n
][
in_c
][
int
(
in_h_floor
)][
int
(
in_w_ceil
)]
w1
=
w_ceil
*
h_ceil
w2
=
w_ceil
*
h_floor
w3
=
w_floor
*
h_floor
w4
=
w_floor
*
h_ceil
val
=
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
return
val
def
lod_convert
(
lod
):
ret
=
[
0
]
for
count
in
lod
:
ret
.
append
(
ret
[
-
1
]
+
count
)
return
ret
def
roi_transform
(
in_data
,
rois
,
rois_lod
,
transformed_height
,
transformed_width
,
spatial_scale
):
channels
=
in_data
.
shape
[
1
]
in_height
=
in_data
.
shape
[
2
]
in_width
=
in_data
.
shape
[
3
]
rois_num
=
rois
.
shape
[
0
]
roi2image
=
[
0
]
*
rois_num
rois_lod
=
lod_convert
(
rois_lod
[
0
])
for
i
in
range
(
len
(
rois_lod
)
-
1
):
for
j
in
range
(
rois_lod
[
i
],
rois_lod
[
i
+
1
]):
roi2image
[
j
]
=
i
out
=
np
.
zeros
([
rois_num
,
channels
,
transformed_height
,
transformed_width
])
for
n
in
range
(
rois_num
):
roi_x
=
[]
roi_y
=
[]
for
k
in
range
(
4
):
roi_x
.
append
(
rois
[
n
][
2
*
k
]
*
spatial_scale
)
roi_y
.
append
(
rois
[
n
][
2
*
k
+
1
]
*
spatial_scale
)
image_id
=
roi2image
[
n
]
transform_matrix
=
get_transform_matrix
(
transformed_width
,
transformed_height
,
roi_x
,
roi_y
)
for
c
in
range
(
channels
):
for
out_h
in
range
(
transformed_height
):
for
out_w
in
range
(
transformed_width
):
in_w
,
in_h
=
get_source_coords
(
transform_matrix
,
out_w
,
out_h
)
if
in_quad
(
in_w
,
in_h
,
roi_x
,
roi_y
)
and
gt_e
(
in_w
,
-
0.5
)
and
lt_e
(
in_w
,
in_width
-
0.5
)
and
gt_e
(
in_h
,
-
0.5
)
and
lt_e
(
in_h
,
in_height
-
0.5
):
out
[
n
][
c
][
out_h
][
out_w
]
=
bilinear_interpolate
(
in_data
,
image_id
,
c
,
in_w
,
in_h
)
else
:
out
[
n
][
c
][
out_h
][
out_w
]
=
0.0
return
out
.
astype
(
"float32"
)
class
TestROIPoolOp
(
OpTest
):
def
set_data
(
self
):
self
.
init_test_case
()
self
.
make_rois
()
self
.
inputs
=
{
'X'
:
self
.
x
,
'ROIs'
:
(
self
.
rois
,
self
.
rois_lod
)}
self
.
attrs
=
{
'spatial_scale'
:
self
.
spatial_scale
,
'transformed_height'
:
self
.
transformed_height
,
'transformed_width'
:
self
.
transformed_width
}
out
=
roi_transform
(
self
.
x
,
self
.
rois
,
self
.
rois_lod
,
self
.
transformed_height
,
self
.
transformed_width
,
self
.
spatial_scale
)
self
.
outputs
=
{
'Out'
:
out
}
def
init_test_case
(
self
):
self
.
batch_size
=
2
self
.
channels
=
2
self
.
height
=
8
self
.
width
=
8
# n, c, h, w
self
.
x_dim
=
(
self
.
batch_size
,
self
.
channels
,
self
.
height
,
self
.
width
)
self
.
spatial_scale
=
1.0
/
2.0
self
.
transformed_height
=
2
self
.
transformed_width
=
3
self
.
x
=
np
.
random
.
random
(
self
.
x_dim
).
astype
(
'float32'
)
def
make_rois
(
self
):
rois
=
[]
self
.
rois_lod
=
[[]]
for
bno
in
range
(
self
.
batch_size
):
self
.
rois_lod
[
0
].
append
(
bno
+
1
)
for
i
in
range
(
bno
+
1
):
x1
=
np
.
random
.
randint
(
0
,
self
.
width
//
self
.
spatial_scale
-
self
.
transformed_width
)
y1
=
np
.
random
.
randint
(
0
,
self
.
height
//
self
.
spatial_scale
-
self
.
transformed_height
)
x2
=
np
.
random
.
randint
(
x1
+
self
.
transformed_width
,
self
.
width
//
self
.
spatial_scale
)
y2
=
np
.
random
.
randint
(
0
,
self
.
height
//
self
.
spatial_scale
-
self
.
transformed_height
)
x3
=
np
.
random
.
randint
(
x1
+
self
.
transformed_width
,
self
.
width
//
self
.
spatial_scale
)
y3
=
np
.
random
.
randint
(
y1
+
self
.
transformed_height
,
self
.
height
//
self
.
spatial_scale
)
x4
=
np
.
random
.
randint
(
0
,
self
.
width
//
self
.
spatial_scale
-
self
.
transformed_width
)
y4
=
np
.
random
.
randint
(
y1
+
self
.
transformed_height
,
self
.
height
//
self
.
spatial_scale
)
roi
=
[
x1
,
y1
,
x2
,
y2
,
x3
,
y3
,
x4
,
y4
]
rois
.
append
(
roi
)
self
.
rois_num
=
len
(
rois
)
self
.
rois
=
np
.
array
(
rois
).
astype
(
"float32"
)
def
setUp
(
self
):
self
.
op_type
=
"roi_perspective_transform"
self
.
set_data
()
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录