提交 24d1c44a 编写于 作者: L LielinJiang 提交者: whs

Fix roi_perspective_transform_op bug (#18522)

* fix transform matrix bug, test=develop

* modify API.spec
上级 88b52a27
...@@ -382,7 +382,7 @@ paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', ...@@ -382,7 +382,7 @@ paddle.fluid.layers.rpn_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits',
paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', 'fa1d1c9d5e0111684c0db705f86a2595')) paddle.fluid.layers.retinanet_target_assign (ArgSpec(args=['bbox_pred', 'cls_logits', 'anchor_box', 'anchor_var', 'gt_boxes', 'gt_labels', 'is_crowd', 'im_info', 'num_classes', 'positive_overlap', 'negative_overlap'], varargs=None, keywords=None, defaults=(1, 0.5, 0.4)), ('document', 'fa1d1c9d5e0111684c0db705f86a2595'))
paddle.fluid.layers.sigmoid_focal_loss (ArgSpec(args=['x', 'label', 'fg_num', 'gamma', 'alpha'], varargs=None, keywords=None, defaults=(2, 0.25)), ('document', 'aeac6aae100173b3fc7f102cf3023a3d')) paddle.fluid.layers.sigmoid_focal_loss (ArgSpec(args=['x', 'label', 'fg_num', 'gamma', 'alpha'], varargs=None, keywords=None, defaults=(2, 0.25)), ('document', 'aeac6aae100173b3fc7f102cf3023a3d'))
paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', '0aaacaf9858b8270a8ab5b0aacdd94b7')) paddle.fluid.layers.anchor_generator (ArgSpec(args=['input', 'anchor_sizes', 'aspect_ratios', 'variance', 'stride', 'offset', 'name'], varargs=None, keywords=None, defaults=(None, None, [0.1, 0.1, 0.2, 0.2], None, 0.5, None)), ('document', '0aaacaf9858b8270a8ab5b0aacdd94b7'))
paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', '54e3bf70e3bdbd58b3b9b65b3c69a854')) paddle.fluid.layers.roi_perspective_transform (ArgSpec(args=['input', 'rois', 'transformed_height', 'transformed_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1.0,)), ('document', 'a82016342789ba9d85737e405f824ff1'))
paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', '69def376b42ef0681d0cc7f53a2dac4b')) paddle.fluid.layers.generate_proposal_labels (ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random', 'is_cls_agnostic', 'is_cascade_rcnn'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True, False, False)), ('document', '69def376b42ef0681d0cc7f53a2dac4b'))
paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', 'b7d707822b6af2a586bce608040235b1')) paddle.fluid.layers.generate_proposals (ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)), ('document', 'b7d707822b6af2a586bce608040235b1'))
paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'b319b10ddaf17fb4ddf03518685a17ef')) paddle.fluid.layers.generate_mask_labels (ArgSpec(args=['im_info', 'gt_classes', 'is_crowd', 'gt_segms', 'rois', 'labels_int32', 'num_classes', 'resolution'], varargs=None, keywords=None, defaults=None), ('document', 'b319b10ddaf17fb4ddf03518685a17ef'))
......
...@@ -272,6 +272,9 @@ class CPUROIPerspectiveTransformOpKernel : public framework::OpKernel<T> { ...@@ -272,6 +272,9 @@ class CPUROIPerspectiveTransformOpKernel : public framework::OpKernel<T> {
T* output_data = out->mutable_data<T>(ctx.GetPlace()); T* output_data = out->mutable_data<T>(ctx.GetPlace());
const T* rois_data = rois->data<T>(); const T* rois_data = rois->data<T>();
T* transform_matrix =
out_transform_matrix->mutable_data<T>({rois_num, 9}, ctx.GetPlace());
for (int n = 0; n < rois_num; ++n) { for (int n = 0; n < rois_num; ++n) {
const T* n_rois = rois_data + n * 8; const T* n_rois = rois_data + n * 8;
T roi_x[4]; T roi_x[4];
...@@ -282,11 +285,12 @@ class CPUROIPerspectiveTransformOpKernel : public framework::OpKernel<T> { ...@@ -282,11 +285,12 @@ class CPUROIPerspectiveTransformOpKernel : public framework::OpKernel<T> {
} }
int image_id = roi2image_data[n]; int image_id = roi2image_data[n];
// Get transform matrix // Get transform matrix
T* transform_matrix = T matrix[9];
out_transform_matrix->mutable_data<T>({9}, ctx.GetPlace());
get_transform_matrix<T>(transformed_width, transformed_height, roi_x, get_transform_matrix<T>(transformed_width, transformed_height, roi_x,
roi_y, transform_matrix); roi_y, matrix);
for (int i = 0; i < 9; i++) {
transform_matrix[n * 9 + i] = matrix[i];
}
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
for (int out_h = 0; out_h < transformed_height; ++out_h) { for (int out_h = 0; out_h < transformed_height; ++out_h) {
for (int out_w = 0; out_w < transformed_width; ++out_w) { for (int out_w = 0; out_w < transformed_width; ++out_w) {
...@@ -295,7 +299,7 @@ class CPUROIPerspectiveTransformOpKernel : public framework::OpKernel<T> { ...@@ -295,7 +299,7 @@ class CPUROIPerspectiveTransformOpKernel : public framework::OpKernel<T> {
c * transformed_height * transformed_width + c * transformed_height * transformed_width +
out_h * transformed_width + out_w; out_h * transformed_width + out_w;
T in_w, in_h; T in_w, in_h;
get_source_coords<T>(transform_matrix, out_w, out_h, &in_w, &in_h); get_source_coords<T>(matrix, out_w, out_h, &in_w, &in_h);
if (in_quad<T>(in_w, in_h, roi_x, roi_y)) { if (in_quad<T>(in_w, in_h, roi_x, roi_y)) {
if (GT<T>(-0.5, in_w) || if (GT<T>(-0.5, in_w) ||
GT<T>(in_w, static_cast<T>(in_width - 0.5)) || GT<T>(in_w, static_cast<T>(in_width - 0.5)) ||
...@@ -507,7 +511,7 @@ class ROIPerspectiveTransformOp : public framework::OperatorWithKernel { ...@@ -507,7 +511,7 @@ class ROIPerspectiveTransformOp : public framework::OperatorWithKernel {
static_cast<int64_t>(transformed_width)}); static_cast<int64_t>(transformed_width)});
auto mask_dims = framework::make_ddim(mask_dims_v); auto mask_dims = framework::make_ddim(mask_dims_v);
std::vector<int64_t> matrix_dims_v(9); std::vector<int64_t> matrix_dims_v({rois_dims[0], 9});
auto matrix_dims = framework::make_ddim(matrix_dims_v); auto matrix_dims = framework::make_ddim(matrix_dims_v);
ctx->SetOutputDim("Out", out_dims); ctx->SetOutputDim("Out", out_dims);
...@@ -580,7 +584,7 @@ class ROIPerspectiveTransformOpMaker ...@@ -580,7 +584,7 @@ class ROIPerspectiveTransformOpMaker
"(Tensor), " "(Tensor), "
"The output transform matrix of ROIPerspectiveTransformOp is a " "The output transform matrix of ROIPerspectiveTransformOp is a "
"1-D tensor with shape " "1-D tensor with shape "
"(9,)."); "(num_rois, 9).");
AddOutput("Out2InIdx", AddOutput("Out2InIdx",
"(Tensor), " "(Tensor), "
"An intermediate tensor used to map indexes of input feature map " "An intermediate tensor used to map indexes of input feature map "
......
...@@ -274,11 +274,14 @@ __device__ void get_transform_matrix(const int transformed_width, ...@@ -274,11 +274,14 @@ __device__ void get_transform_matrix(const int transformed_width,
} }
template <typename T> template <typename T>
__global__ void RoiTransformKernel( __global__ void RoiTransformKernel(const float* input_data,
const float* input_data, const float* rois_data, const int* roi2image_data, const float* rois_data,
int num_rois, int in_height, int in_width, int channels, const int* roi2image_data, int num_rois,
int transformed_height, int transformed_width, float spatial_scale, int in_height, int in_width, int channels,
T* output_data, int* out2in_idx, T* out2in_w, int* mask, T* matrix) { int transformed_height,
int transformed_width, float spatial_scale,
T* output_data, int* out2in_idx, T* out2in_w,
int* mask, T* transform_matrix) {
int output_size = int output_size =
num_rois * transformed_height * transformed_width * channels; num_rois * transformed_height * transformed_width * channels;
...@@ -303,9 +306,12 @@ __global__ void RoiTransformKernel( ...@@ -303,9 +306,12 @@ __global__ void RoiTransformKernel(
} }
// Get transform matrix // Get transform matrix
T matrix[9];
get_transform_matrix<T>(transformed_width, transformed_height, roi_x, roi_y, get_transform_matrix<T>(transformed_width, transformed_height, roi_x, roi_y,
matrix); matrix);
for (int i = 0; i < 9; i++) {
transform_matrix[n * 9 + i] = matrix[i];
}
// Get source coords // Get source coords
T in_w; T in_w;
T in_h; T in_h;
...@@ -389,7 +395,8 @@ class CUDAROIPerspectiveTransformOpKernel : public framework::OpKernel<T> { ...@@ -389,7 +395,8 @@ class CUDAROIPerspectiveTransformOpKernel : public framework::OpKernel<T> {
int grid = (out_size + block - 1) / block; int grid = (out_size + block - 1) / block;
// Get transform matrix // Get transform matrix
T* matrix = out_transform_matrix->mutable_data<T>({9}, ctx.GetPlace()); T* matrix =
out_transform_matrix->mutable_data<T>({rois_num, 9}, ctx.GetPlace());
RoiTransformKernel<T><<<grid, block, 0, stream>>>( RoiTransformKernel<T><<<grid, block, 0, stream>>>(
input_data, rois_data, roi2image_dev.data<int>(), rois_num, in_height, input_data, rois_data, roi2image_dev.data<int>(), rois_num, in_height,
......
...@@ -2108,7 +2108,7 @@ def roi_perspective_transform(input, ...@@ -2108,7 +2108,7 @@ def roi_perspective_transform(input,
(num_rois, 1, transformed_h, transformed_w). (num_rois, 1, transformed_h, transformed_w).
transform_matrix: The transform matrix of ROIPerspectiveTransformOp which is transform_matrix: The transform matrix of ROIPerspectiveTransformOp which is
a 1-D tensor with shape (9,). a 2-D tensor with shape (num_rois, 9).
Examples: Examples:
.. code-block:: python .. code-block:: python
......
...@@ -200,6 +200,7 @@ def roi_transform(in_data, rois, rois_lod, transformed_height, ...@@ -200,6 +200,7 @@ def roi_transform(in_data, rois, rois_lod, transformed_height,
out = np.zeros([rois_num, channels, transformed_height, transformed_width]) out = np.zeros([rois_num, channels, transformed_height, transformed_width])
mask = np.zeros( mask = np.zeros(
[rois_num, 1, transformed_height, transformed_width]).astype('int') [rois_num, 1, transformed_height, transformed_width]).astype('int')
matrix = np.zeros([rois_num, 9], dtype=in_data.dtype)
for n in range(rois_num): for n in range(rois_num):
roi_x = [] roi_x = []
roi_y = [] roi_y = []
...@@ -209,7 +210,7 @@ def roi_transform(in_data, rois, rois_lod, transformed_height, ...@@ -209,7 +210,7 @@ def roi_transform(in_data, rois, rois_lod, transformed_height,
image_id = roi2image[n] image_id = roi2image[n]
transform_matrix = get_transform_matrix( transform_matrix = get_transform_matrix(
transformed_width, transformed_height, roi_x, roi_y) transformed_width, transformed_height, roi_x, roi_y)
matrix[n] = transform_matrix
for c in range(channels): for c in range(channels):
for out_h in range(transformed_height): for out_h in range(transformed_height):
for out_w in range(transformed_width): for out_w in range(transformed_width):
...@@ -224,7 +225,7 @@ def roi_transform(in_data, rois, rois_lod, transformed_height, ...@@ -224,7 +225,7 @@ def roi_transform(in_data, rois, rois_lod, transformed_height,
else: else:
out[n][c][out_h][out_w] = 0.0 out[n][c][out_h][out_w] = 0.0
mask[n][0][out_h][out_w] = 0 mask[n][0][out_h][out_w] = 0
return out.astype("float32"), mask, transform_matrix return out.astype("float32"), mask, matrix
class TestROIPoolOp(OpTest): class TestROIPoolOp(OpTest):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册