diff --git a/paddle/fluid/operators/psroi_pool_op.cc b/paddle/fluid/operators/psroi_pool_op.cc index d3faa2c8460f21749d08b45e656ce6d704bc0aa3..da637dfeb237dd4f17816e784882720dc2f2ff64 100644 --- a/paddle/fluid/operators/psroi_pool_op.cc +++ b/paddle/fluid/operators/psroi_pool_op.cc @@ -25,22 +25,26 @@ class PSROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", - "Tensor, " + "(Tensor), " "the input of PSROIPoolOp. " "The format of input tensor is NCHW. Where N is the batch size, " "C is the number of input channels, " "H is the height of the input feature map, and " "W is the width. The data type can be float32 or float64"); AddInput("ROIs", - "LoDTensor, " + "(LoDTensor), " "ROIs (Regions of Interest) to pool over. " "should be a 2-D LoDTensor of shape (num_rois, 4) " "given as [(x1, y1, x2, y2), ...]. " "where (x1, y1) is the top left coordinates, and " "(x2, y2) is the bottom right coordinates. " "The roi batch index can be calculated from LoD."); + AddInput("RoisNum", + "(Tensor), " + "The number of RoIs in each image.") + .AsDispensable(); AddOutput("Out", - "Tensor, " + "(Tensor), " "the output of PSROIPoolOp is a 4-D Tensor with shape " "(num_rois, output_channels, pooled_h, pooled_w). " "The data type is the same as `x` "); @@ -65,8 +69,6 @@ class PSROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { "the pooled output width.") .SetDefault(1); AddComment(R"Doc( -**PSROIPool Operator,** `rois` **of this op should be a LoDTensor** - Position sensitive region of interest pooling (also known as PSROIPooling) is to perform position-sensitive average pooling on regions of interest specified by input, takes as input N position-sensitive score maps and a list of num_rois regions of interest. @@ -106,7 +108,14 @@ class PSROIPoolOp : public framework::OperatorWithKernel { platform::errors::InvalidArgument( "ROIs should be a 2-D LoDTensor of shape (num_rois, 4) " "given as [(x1, y1, x2, y2), ...]")); - + if (ctx->HasInput("RoisNum")) { + auto rois_num_dims = ctx->GetInputDim("RoisNum"); + PADDLE_ENFORCE_EQ(rois_num_dims.size(), 1, + platform::errors::InvalidArgument( + "The second dimension of RoisNum should " + "be 1, but received dimension is %d", + rois_num_dims.size())); + } int pooled_height = ctx->Attrs().Get("pooled_height"); int pooled_width = ctx->Attrs().Get("pooled_width"); int output_channels = ctx->Attrs().Get("output_channels"); @@ -184,6 +193,7 @@ class PSROIPoolGradMaker : public framework::SingleGradOpMaker { op->SetType("psroi_pool_grad"); op->SetInput("X", this->Input("X")); op->SetInput("ROIs", this->Input("ROIs")); + op->SetInput("RoisNum", this->Input("RoisNum")); op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); op->SetAttrMap(this->Attrs()); diff --git a/paddle/fluid/operators/psroi_pool_op.cu b/paddle/fluid/operators/psroi_pool_op.cu index 748b6036008f1355bff2a46c6f19fe9f7e7a4dbd..f69edfc1fcfec947e4e8a33bd3af45b36865f2d3 100644 --- a/paddle/fluid/operators/psroi_pool_op.cu +++ b/paddle/fluid/operators/psroi_pool_op.cu @@ -185,34 +185,67 @@ class GPUPSROIPoolOpKernel : public framework::OpKernel { int rois_num = rois->dims()[0]; if (rois_num == 0) return; - - auto rois_lod = rois->lod().back(); - int rois_batch_size = rois_lod.size() - 1; - PADDLE_ENFORCE_EQ(rois_batch_size, batch_size, - platform::errors::InvalidArgument( - "The batch size of input(ROIs) and input(X) must be " - "the same but received batch size of input(ROIs) and " - "input(X) is %d and %d respectively.", - rois_batch_size, batch_size)); - int rois_num_with_lod = rois_lod[rois_batch_size]; - PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod, - platform::errors::InvalidArgument( - "The number of rois from input(ROIs) and its LOD " - "must be the same. Received rois %d of input(ROIs) " - "but the number of rois %d from its LOD is %d", - rois_num, rois_num_with_lod)); - - // set rois batch id + int rois_batch_size; framework::Tensor rois_batch_id_list; rois_batch_id_list.Resize({rois_num}); int* rois_batch_id_data = rois_batch_id_list.mutable_data(platform::CPUPlace()); - for (int n = 0; n < rois_batch_size; ++n) { - for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { - rois_batch_id_data[i] = n; + + if (ctx.HasInput("RoisNum")) { + auto* rois_num_t = ctx.Input("RoisNum"); + rois_batch_size = rois_num_t->numel(); + auto* rois_num_data = rois_num_t->data(); + PADDLE_ENFORCE_EQ( + rois_batch_size, batch_size, + platform::errors::InvalidArgument( + "The batch size of input(ROIs) and input(X) must be " + "the same but received batch size of input(ROIs) and " + "input(X) is %d and %d respectively.", + rois_batch_size, batch_size)); + std::vector rois_num_list(rois_batch_size); + memory::Copy(platform::CPUPlace(), rois_num_list.data(), + BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()), + rois_num_data, sizeof(int) * rois_batch_size, 0); + int rois_num_count = 0; + for (int i = 0; i < rois_batch_size; ++i) { + rois_num_count += rois_num_list[i]; + } + PADDLE_ENFORCE_EQ( + rois_num_count, rois_num, + platform::errors::InvalidArgument( + "the rois_num from input and RoisNum must be the same")); + int start = 0; + for (int n = 0; n < rois_batch_size; ++n) { + for (int i = start; i < start + rois_num_list[n]; ++i) { + rois_batch_id_data[i] = n; + } + start += rois_num_list[n]; + } + } else { + auto rois_lod = rois->lod().back(); + rois_batch_size = rois_lod.size() - 1; + PADDLE_ENFORCE_EQ( + rois_batch_size, batch_size, + platform::errors::InvalidArgument( + "The batch size of input(ROIs) and input(X) must be " + "the same but received batch size of input(ROIs) and " + "input(X) is %d and %d respectively.", + rois_batch_size, batch_size)); + int rois_num_with_lod = rois_lod[rois_batch_size]; + PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod, + platform::errors::InvalidArgument( + "The number of rois from input(ROIs) and its LOD " + "must be the same. Received rois %d of input(ROIs) " + "but the number of rois %d from its LOD is %d", + rois_num, rois_num_with_lod)); + + // set rois batch id + for (int n = 0; n < rois_batch_size; ++n) { + for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { + rois_batch_id_data[i] = n; + } } } - framework::Tensor rois_batch_id_list_gpu; framework::TensorCopy(rois_batch_id_list, ctx.GetPlace(), ctx.device_context(), &rois_batch_id_list_gpu); @@ -257,14 +290,30 @@ class GPUPSROIPoolGradOpKernel : public framework::OpKernel { rois_batch_id_list.Resize({rois_num}); int* rois_batch_id_data = rois_batch_id_list.mutable_data(platform::CPUPlace()); - auto rois_lod = rois->lod().back(); - int rois_batch_size = rois_lod.size() - 1; - for (int n = 0; n < rois_batch_size; ++n) { - for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { - rois_batch_id_data[i] = n; + int rois_batch_size; + if (ctx.HasInput("RoisNum")) { + auto* rois_num_t = ctx.Input("RoisNum"); + rois_batch_size = rois_num_t->numel(); + std::vector rois_num_list(rois_batch_size); + memory::Copy(platform::CPUPlace(), rois_num_list.data(), + BOOST_GET_CONST(platform::CUDAPlace, ctx.GetPlace()), + rois_num_t->data(), sizeof(int) * rois_batch_size, 0); + int start = 0; + for (int n = 0; n < rois_batch_size; ++n) { + for (int i = start; i < start + rois_num_list[n]; ++i) { + rois_batch_id_data[i] = n; + } + start += rois_num_list[n]; + } + } else { + auto rois_lod = rois->lod().back(); + rois_batch_size = rois_lod.size() - 1; + for (int n = 0; n < rois_batch_size; ++n) { + for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { + rois_batch_id_data[i] = n; + } } } - framework::Tensor rois_batch_id_list_gpu; framework::TensorCopy(rois_batch_id_list, ctx.GetPlace(), ctx.device_context(), &rois_batch_id_list_gpu); diff --git a/paddle/fluid/operators/psroi_pool_op.h b/paddle/fluid/operators/psroi_pool_op.h index 4f4cb24844b8c24283af73c9c4201a9205a5fd04..4d7e9ce295fc866c59ebfdfc9661cc0318d98812 100644 --- a/paddle/fluid/operators/psroi_pool_op.h +++ b/paddle/fluid/operators/psroi_pool_op.h @@ -40,6 +40,13 @@ class CPUPSROIPoolOpKernel : public framework::OpKernel { int width = in_dims[3]; int rois_num = rois->dims()[0]; + PADDLE_ENFORCE_EQ(input_channels, + output_channels * pooled_height * pooled_width, + platform::errors::InvalidArgument( + "the channels of input " + "X should equal the product of " + "output_channels x pooled_height x pooled_width")); + auto in_stride = framework::stride(in_dims); auto out_stride = framework::stride(out->dims()); @@ -49,32 +56,52 @@ class CPUPSROIPoolOpKernel : public framework::OpKernel { rois_batch_id_list.Resize({rois_num}); int* rois_batch_id_data = rois_batch_id_list.mutable_data(ctx.GetPlace()); - - auto rois_lod = rois->lod().back(); - int rois_batch_size = rois_lod.size() - 1; - PADDLE_ENFORCE_EQ( - rois_batch_size, batch_size, - platform::errors::InvalidArgument("the rois_batch_size and input(X) " - "batch_size should be the same.")); - int rois_num_with_lod = rois_lod[rois_batch_size]; - PADDLE_ENFORCE_EQ(rois_num_with_lod, rois_num, - platform::errors::InvalidArgument( - "the rois_num from input and lod must be the same")); - - PADDLE_ENFORCE_EQ(input_channels, - output_channels * pooled_height * pooled_width, - platform::errors::InvalidArgument( - "the channels of input " - "X should equal the product of " - "output_channels x pooled_height x pooled_width")); - - // calculate batch id index for each roi according to LoD - for (int n = 0; n < rois_batch_size; ++n) { - for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { - rois_batch_id_data[i] = n; + int rois_batch_size; + if (ctx.HasInput("RoisNum")) { + auto* rois_num_t = ctx.Input("RoisNum"); + rois_batch_size = rois_num_t->numel(); + auto* rois_num_data = rois_num_t->data(); + PADDLE_ENFORCE_EQ( + rois_batch_size, batch_size, + platform::errors::InvalidArgument( + "The batch size of rois and the batch size of images " + " must be the same. But received the batch size of rois is %d, " + "and the batch size of images is %d", + rois_batch_size, batch_size)); + int rois_num_count = 0; + for (int i = 0; i < rois_batch_size; ++i) { + rois_num_count += rois_num_data[i]; + } + PADDLE_ENFORCE_EQ( + rois_num_count, rois_num, + platform::errors::InvalidArgument( + "the rois_num from input and RoisNum must be the same")); + int start = 0; + for (int n = 0; n < rois_batch_size; ++n) { + for (int i = start; i < start + rois_num_data[n]; ++i) { + rois_batch_id_data[i] = n; + } + start += rois_num_data[n]; + } + } else { + auto rois_lod = rois->lod().back(); + rois_batch_size = rois_lod.size() - 1; + PADDLE_ENFORCE_EQ( + rois_batch_size, batch_size, + platform::errors::InvalidArgument("the rois_batch_size and input(X) " + "batch_size should be the same.")); + int rois_num_with_lod = rois_lod[rois_batch_size]; + PADDLE_ENFORCE_EQ( + rois_num_with_lod, rois_num, + platform::errors::InvalidArgument( + "the rois_num from input and lod must be the same")); + // calculate batch id index for each roi according to LoD + for (int n = 0; n < rois_batch_size; ++n) { + for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { + rois_batch_id_data[i] = n; + } } } - T* output_data = out->mutable_data(ctx.GetPlace()); const T* input_rois = rois->data(); @@ -93,7 +120,6 @@ class CPUPSROIPoolOpKernel : public framework::OpKernel { static_cast(round(offset_input_rois[2]) + 1.) * spatial_scale; T roi_end_h = static_cast(round(offset_input_rois[3]) + 1.) * spatial_scale; - // Force too small rois to be 1 x 1 T roi_height = std::max(roi_end_h - roi_start_h, (T)0.1); // avoid 0 T roi_width = std::max(roi_end_w - roi_start_w, (T)0.1); @@ -172,15 +198,28 @@ class CPUPSROIPoolGradOpKernel : public framework::OpKernel { rois_batch_id_list.Resize({rois_num}); int* rois_batch_id_data = rois_batch_id_list.mutable_data(ctx.GetPlace()); - auto rois_lod = rois->lod().back(); - int rois_batch_size = rois_lod.size() - 1; - // calculate batch id index for each roi according to LoD - for (int n = 0; n < rois_batch_size; ++n) { - for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { - rois_batch_id_data[i] = n; + int rois_batch_size; + if (ctx.HasInput("RoisNum")) { + auto* rois_num_t = ctx.Input("RoisNum"); + rois_batch_size = rois_num_t->numel(); + auto* rois_num_data = rois_num_t->data(); + int start = 0; + for (int n = 0; n < rois_batch_size; ++n) { + for (int i = start; i < start + rois_num_data[n]; ++i) { + rois_batch_id_data[i] = n; + } + start += rois_num_data[n]; + } + } else { + auto rois_lod = rois->lod().back(); + rois_batch_size = rois_lod.size() - 1; + // calculate batch id index for each roi according to LoD + for (int n = 0; n < rois_batch_size; ++n) { + for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { + rois_batch_id_data[i] = n; + } } } - const T* input_rois = rois->data(); const T* output_grad_data = output_grad->data(); T* input_grad_data = input_grad->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index 32e14dafb644bfbd0b905f6a02b50f5c7f3b001f..01d101909b549ba0d977b08d5ab8bd1bb81b5aca 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -54,6 +54,7 @@ std::map> op_ins_map = { {"gather", {"X", "Index", "Axis"}}, {"roi_pool", {"X", "ROIs", "RoisNum"}}, {"roi_align", {"X", "ROIs", "RoisNum"}}, + {"psroi_pool", {"X", "ROIs", "RoisNum"}}, {"collect_fpn_proposals", {"MultiLevelRois", "MultiLevelScores", "MultiLevelRoIsNum"}}, {"distribute_fpn_proposals", {"FpnRois", "RoisNum"}}, diff --git a/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py b/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py index 066bcf48612c592ab8f8bc773584821f467e25ce..95b8c5c3c0a941499fd2f99bd0674d088ce344b5 100644 --- a/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py +++ b/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py @@ -14,18 +14,89 @@ from __future__ import print_function +import paddle import math import numpy as np import unittest from op_test import OpTest +def calc_psroi_pool(x, rois, rois_num_per_img, output_channels, spatial_scale, + pooled_height, pooled_width): + """ + Psroi_pool implemented by Numpy. + x: 4-D as (N, C, H, W), + rois: 2-D as [[x1, y1, x2, y2], ...], + rois_num_per_img: 1-D as [nums_of_batch_0, nums_of_batch_1, ...] + """ + output_shape = (len(rois), output_channels, pooled_height, pooled_width) + out_data = np.zeros(output_shape) + batch_id = 0 + rois_num_id = 0 + rois_num_left = rois_num_per_img[rois_num_id] + for i in range(len(rois)): + roi = rois[i] + roi_batch_id = batch_id + rois_num_left -= 1 + if rois_num_left == 0: + rois_num_id += 1 + if rois_num_id < len(rois_num_per_img): + rois_num_left = rois_num_per_img[rois_num_id] + batch_id += 1 + roi_start_w = round(roi[0]) * spatial_scale + roi_start_h = round(roi[1]) * spatial_scale + roi_end_w = (round(roi[2]) + 1.) * spatial_scale + roi_end_h = (round(roi[3]) + 1.) * spatial_scale + + roi_height = max(roi_end_h - roi_start_h, 0.1) + roi_width = max(roi_end_w - roi_start_w, 0.1) + + bin_size_h = roi_height / float(pooled_height) + bin_size_w = roi_width / float(pooled_width) + + x_i = x[roi_batch_id] + + for c in range(output_channels): + for ph in range(pooled_height): + for pw in range(pooled_width): + hstart = int( + math.floor(float(ph) * bin_size_h + roi_start_h)) + wstart = int( + math.floor(float(pw) * bin_size_w + roi_start_w)) + hend = int( + math.ceil(float(ph + 1) * bin_size_h + roi_start_h)) + wend = int( + math.ceil(float(pw + 1) * bin_size_w + roi_start_w)) + hstart = min(max(hstart, 0), x.shape[2]) + hend = min(max(hend, 0), x.shape[2]) + wstart = min(max(wstart, 0), x.shape[3]) + wend = min(max(wend, 0), x.shape[3]) + + c_in = (c * pooled_height + ph) * pooled_width + pw + is_empty = (hend <= hstart) or (wend <= wstart) + out_sum = 0. + for ih in range(hstart, hend): + for iw in range(wstart, wend): + out_sum += x_i[c_in, ih, iw] + bin_area = (hend - hstart) * (wend - wstart) + out_data[i, c, ph, pw] = 0. if is_empty else ( + out_sum / float(bin_area)) + return out_data + + class TestPSROIPoolOp(OpTest): def set_data(self): + paddle.enable_static() self.init_test_case() self.make_rois() - self.calc_psroi_pool() - self.inputs = {'X': self.x, 'ROIs': (self.rois[:, 1:5], self.rois_lod)} + self.outs = calc_psroi_pool(self.x, self.boxes, self.boxes_num, + self.output_channels, self.spatial_scale, + self.pooled_height, + self.pooled_width).astype('float64') + self.inputs = { + 'X': self.x, + 'ROIs': (self.rois_with_batch_id[:, 1:5], self.rois_lod) + } self.attrs = { 'output_channels': self.output_channels, 'spatial_scale': self.spatial_scale, @@ -67,57 +138,10 @@ class TestPSROIPoolOp(OpTest): roi = [bno, x1, y1, x2, y2] rois.append(roi) self.rois_num = len(rois) - self.rois = np.array(rois).astype('float64') - - def calc_psroi_pool(self): - output_shape = (self.rois_num, self.output_channels, self.pooled_height, - self.pooled_width) - out_data = np.zeros(output_shape) - for i in range(self.rois_num): - roi = self.rois[i] - roi_batch_id = int(roi[0]) - roi_start_w = round(roi[1]) * self.spatial_scale - roi_start_h = round(roi[2]) * self.spatial_scale - roi_end_w = (round(roi[3]) + 1.) * self.spatial_scale - roi_end_h = (round(roi[4]) + 1.) * self.spatial_scale - - roi_height = max(roi_end_h - roi_start_h, 0.1) - roi_width = max(roi_end_w - roi_start_w, 0.1) - - bin_size_h = roi_height / float(self.pooled_height) - bin_size_w = roi_width / float(self.pooled_width) - - x_i = self.x[roi_batch_id] - - for c in range(self.output_channels): - for ph in range(self.pooled_height): - for pw in range(self.pooled_width): - hstart = int( - math.floor(float(ph) * bin_size_h + roi_start_h)) - wstart = int( - math.floor(float(pw) * bin_size_w + roi_start_w)) - hend = int( - math.ceil( - float(ph + 1) * bin_size_h + roi_start_h)) - wend = int( - math.ceil( - float(pw + 1) * bin_size_w + roi_start_w)) - hstart = min(max(hstart, 0), self.height) - hend = min(max(hend, 0), self.height) - wstart = min(max(wstart, 0), self.width) - wend = min(max(wend, 0), self.width) - - c_in = (c * self.pooled_height + ph - ) * self.pooled_width + pw - is_empty = (hend <= hstart) or (wend <= wstart) - out_sum = 0. - for ih in range(hstart, hend): - for iw in range(wstart, wend): - out_sum += x_i[c_in, ih, iw] - bin_area = (hend - hstart) * (wend - wstart) - out_data[i, c, ph, pw] = 0. if is_empty else ( - out_sum / float(bin_area)) - self.outs = out_data.astype('float64') + self.rois_with_batch_id = np.array(rois).astype('float64') + self.boxes = self.rois_with_batch_id[:, 1:] + self.boxes_num = np.array( + [bno + 1 for bno in range(self.batch_size)]).astype('int32') def setUp(self): self.op_type = 'psroi_pool' @@ -130,5 +154,175 @@ class TestPSROIPoolOp(OpTest): self.check_grad(['X'], 'Out') +class TestPSROIPoolDynamicFunctionAPI(unittest.TestCase): + def setUp(self): + self.x = np.random.random([2, 490, 28, 28]).astype(np.float32) + self.boxes = np.array( + [[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]]).astype(np.float32) + self.boxes_num = np.array([1, 2]).astype(np.int32) + + def test_output_size(self): + def test_output_size_is_int(): + output_size = 7 + out = paddle.vision.ops.psroi_pool( + paddle.to_tensor(self.x), + paddle.to_tensor(self.boxes), + paddle.to_tensor(self.boxes_num), output_size).numpy() + expect_out = calc_psroi_pool(self.x, self.boxes, self.boxes_num, 10, + 1.0, 7, 7) + self.assertTrue(np.allclose(out, expect_out)) + + def test_output_size_is_tuple(): + output_size = (7, 7) + out = paddle.vision.ops.psroi_pool( + paddle.to_tensor(self.x), + paddle.to_tensor(self.boxes), + paddle.to_tensor(self.boxes_num), output_size).numpy() + expect_out = calc_psroi_pool(self.x, self.boxes, self.boxes_num, 10, + 1.0, 7, 7) + self.assertTrue(np.allclose(out, expect_out)) + + def test_dytype_is_float64(): + output_size = (7, 7) + out = paddle.vision.ops.psroi_pool( + paddle.to_tensor(self.x, 'float64'), + paddle.to_tensor(self.boxes, 'float64'), + paddle.to_tensor(self.boxes_num, 'int32'), output_size).numpy() + expect_out = calc_psroi_pool(self.x, self.boxes, self.boxes_num, 10, + 1.0, 7, 7) + self.assertTrue(np.allclose(out, expect_out)) + + places = ['cpu'] + if paddle.fluid.core.is_compiled_with_cuda(): + places.append('gpu') + for place in places: + paddle.set_device(place) + test_output_size_is_int() + test_output_size_is_tuple() + test_dytype_is_float64() + + +class TestPSROIPoolDynamicClassAPI(unittest.TestCase): + def setUp(self): + self.x = np.random.random([2, 128, 32, 32]).astype(np.float32) + self.boxes = np.array([[3, 5, 6, 13], [7, 4, 22, 18], [4, 5, 7, 10], + [5, 3, 25, 21]]).astype(np.float32) + self.boxes_num = np.array([2, 2]).astype(np.int32) + + def test_output_size(self): + def test_output_size_is_int(): + psroi_module = paddle.vision.ops.PSRoIPool(8, 1.1) + out = psroi_module( + paddle.to_tensor(self.x), + paddle.to_tensor(self.boxes), + paddle.to_tensor(self.boxes_num)).numpy() + expect_out = calc_psroi_pool(self.x, self.boxes, self.boxes_num, 2, + 1.1, 8, 8) + self.assertTrue(np.allclose(out, expect_out)) + + def test_output_size_is_tuple(): + psroi_pool_module = paddle.vision.ops.PSRoIPool(8, 1.1) + out = psroi_pool_module( + paddle.to_tensor(self.x), + paddle.to_tensor(self.boxes), + paddle.to_tensor(self.boxes_num)).numpy() + expect_out = calc_psroi_pool(self.x, self.boxes, self.boxes_num, 2, + 1.1, 8, 8) + self.assertTrue(np.allclose(out, expect_out)) + + def test_dytype_is_float64(): + psroi_pool_module = paddle.vision.ops.PSRoIPool(8, 1.1) + out = psroi_pool_module( + paddle.to_tensor(self.x, 'float64'), + paddle.to_tensor(self.boxes, 'float64'), + paddle.to_tensor(self.boxes_num, 'int32')).numpy() + expect_out = calc_psroi_pool(self.x, self.boxes, self.boxes_num, 2, + 1.1, 8, 8) + self.assertTrue(np.allclose(out, expect_out)) + + paddle.disable_static() + places = ['cpu'] + if paddle.fluid.core.is_compiled_with_cuda(): + places.append('gpu') + for place in places: + paddle.set_device(place) + test_output_size_is_int() + test_output_size_is_tuple() + test_dytype_is_float64() + + +class TestPSROIPoolBoxesNumError(unittest.TestCase): + def setUp(self): + paddle.disable_static() + self.x = paddle.uniform([2, 490, 28, 28], dtype='float32') + self.boxes = paddle.to_tensor( + [[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]], 'float32') + + def test_errors(self): + def test_boxes_num_nums_error(): + boxes_num = paddle.to_tensor([1, 5], 'int32') + out = paddle.vision.ops.psroi_pool( + self.x, self.boxes, boxes_num, output_size=7) + + self.assertRaises(ValueError, test_boxes_num_nums_error) + + def test_boxes_num_length_error(): + boxes_num = paddle.to_tensor([1, 1, 1], 'int32') + out = paddle.vision.ops.psroi_pool( + self.x, self.boxes, boxes_num, output_size=7) + + self.assertRaises(ValueError, test_boxes_num_length_error) + + +class TestPSROIPoolChannelError(unittest.TestCase): + def setUp(self): + paddle.disable_static() + self.x = paddle.uniform([2, 490, 28, 28], dtype='float32') + self.boxes = paddle.to_tensor( + [[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]], 'float32') + self.output_size = 4 + + def test_errors(self): + def test_channel_error(): + boxes_num = paddle.to_tensor([2, 1], 'int32') + out = paddle.vision.ops.psroi_pool(self.x, self.boxes, boxes_num, + self.output_size) + + self.assertRaises(ValueError, test_channel_error) + + +class TestPSROIPoolStaticAPI(unittest.TestCase): + def setUp(self): + paddle.enable_static() + self.x_placeholder = paddle.static.data( + name='x', shape=[2, 490, 28, 28]) + self.x = np.random.random([2, 490, 28, 28]).astype(np.float32) + self.boxes_placeholder = paddle.static.data( + name='boxes', shape=[3, 4], lod_level=1) + self.boxes = np.array( + [[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]]).astype(np.float32) + self.boxes_num = np.array([1, 2]).astype(np.int32) + + def test_function_in_static(self): + output_size = 7 + out = paddle.vision.ops.psroi_pool(self.x_placeholder, + self.boxes_placeholder, + self.boxes_num, output_size) + expect_out = calc_psroi_pool(self.x, self.boxes, self.boxes_num, 10, + 1.0, 7, 7) + places = [paddle.CPUPlace()] + if paddle.fluid.core.is_compiled_with_cuda(): + places.append(paddle.CUDAPlace(0)) + for place in places: + exe = paddle.static.Executor(place) + boxes_lod_data = paddle.fluid.create_lod_tensor(self.boxes, + [[1, 2]], place) + out_res = exe.run(paddle.static.default_main_program(), + feed={'x': self.x, + 'boxes': boxes_lod_data}, + fetch_list=[out.name]) + self.assertTrue(np.allclose(out_res, expect_out)) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index d5e73f977b563403fd9222913b50c298db037f84..5f02b805a3ed31f0c44595e490057069f1da33f9 100644 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -29,7 +29,9 @@ __all__ = [ #noqa 'deform_conv2d', 'DeformConv2D', 'read_file', - 'decode_jpeg' + 'decode_jpeg', + 'psroi_pool', + 'PSRoIPool', ] @@ -900,3 +902,114 @@ def decode_jpeg(x, mode='unchanged', name=None): type="decode_jpeg", inputs=inputs, attrs=attrs, outputs={"Out": out}) return out + + +def psroi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None): + """ + Position sensitive region of interest pooling (also known as PSROIPooling) is to perform + position-sensitive average pooling on regions of interest specified by input. It performs + on inputs of nonuniform sizes to obtain fixed-size feature maps. + + PSROIPooling is proposed by R-FCN. Please refer to https://arxiv.org/abs/1605.06409 for more details. + + Args: + x (Tensor): Input features with shape (N, C, H, W). The data type can be float32 or float64. + boxes (Tensor): Box coordinates of ROIs (Regions of Interest) to pool over. It should be + a 2-D Tensor with shape (num_rois, 4). Given as [[x1, y1, x2, y2], ...], + (x1, y1) is the top left coordinates, and (x2, y2) is the bottom + right coordinates. + boxes_num (Tensor): The number of boxes contained in each picture in the batch. + output_size (int|Tuple(int, int)) The pooled output size(H, W), data type + is int32. If int, H and W are both equal to output_size. + spatial_scale (float): Multiplicative spatial scale factor to translate ROI coords from their + input scale to the scale used when pooling. Default: 1.0 + name(str, optional): The default value is None. + Normally there is no need for user to set this property. + For more information, please refer to :ref:`api_guide_Name` + + Returns: + 4-D Tensor. The pooled ROIs with shape (num_rois, output_channels, pooled_h, pooled_w). + The output_channels equal to C / (pooled_h * pooled_w), where C is the channels of input. + + Examples: + .. code-block:: python + + import paddle + x = paddle.uniform([2, 490, 28, 28], dtype='float32') + boxes = paddle.to_tensor([[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]], dtype='float32') + boxes_num = paddle.to_tensor([1, 2], dtype='int32') + pool_out = paddle.vision.ops.psroi_pool(x, boxes, boxes_num, 7, 1.0) + """ + + check_type(output_size, 'output_size', (int, tuple, list), 'psroi_pool') + if isinstance(output_size, int): + output_size = (output_size, output_size) + pooled_height, pooled_width = output_size + assert (len(x.shape) == 4, + "Input features with shape should be (N, C, H, W)") + output_channels = int(x.shape[1] / (pooled_height * pooled_width)) + if in_dygraph_mode(): + return core.ops.psroi_pool(x, boxes, boxes_num, "output_channels", + output_channels, "spatial_scale", + spatial_scale, "pooled_height", + pooled_height, "pooled_width", pooled_width) + + helper = LayerHelper('psroi_pool', **locals()) + dtype = helper.input_dtype() + out = helper.create_variable_for_type_inference(dtype) + helper.append_op( + type='psroi_pool', + inputs={'X': x, + 'ROIs': boxes}, + outputs={'Out': out}, + attrs={ + 'output_channels': output_channels, + 'spatial_scale': spatial_scale, + 'pooled_height': pooled_height, + 'pooled_width': pooled_width + }) + return out + + +class PSRoIPool(Layer): + """ + This interface is used to construct a callable object of the ``PSRoIPool`` class. Please + refer to :ref:`api_paddle_vision_ops_psroi_pool`. + + Args: + output_size (int|Tuple(int, int)) The pooled output size(H, W), data type + is int32. If int, H and W are both equal to output_size. + spatial_scale (float): Multiplicative spatial scale factor to translate ROI coords from their + input scale to the scale used when pooling. Default: 1.0. + + Shape: + - x: 4-D Tensor with shape (N, C, H, W). + - boxes: 2-D Tensor with shape (num_rois, 4). + - boxes_num: 1-D Tensor. + - output: 4-D tensor with shape (num_rois, output_channels, pooled_h, pooled_w). + The output_channels equal to C / (pooled_h * pooled_w), where C is the channels of input. + + Returns: + None + + Examples: + .. code-block:: python + + import paddle + + psroi_module = paddle.vision.ops.PSRoIPool(7, 1.0) + x = paddle.uniform([2, 490, 28, 28], dtype='float32') + boxes = paddle.to_tensor([[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]], dtype='float32') + boxes_num = paddle.to_tensor([1, 2], dtype='int32') + pool_out = psroi_module(x, boxes, boxes_num) + + """ + + def __init__(self, output_size, spatial_scale=1.0): + super(PSRoIPool, self).__init__() + self.output_size = output_size + self.spatial_scale = spatial_scale + + def forward(self, x, boxes, boxes_num): + return psroi_pool(x, boxes, boxes_num, self.output_size, + self.spatial_scale)