diff --git a/paddle/operators/roi_pool_op.cc b/paddle/operators/roi_pool_op.cc index 7f0cacc400e5c937f0d331337e27667e689743f0..156db9358689c90293311b8f08a7576b680c9472 100755 --- a/paddle/operators/roi_pool_op.cc +++ b/paddle/operators/roi_pool_op.cc @@ -17,6 +17,10 @@ limitations under the License. */ namespace paddle { namespace operators { +using Tensor = framework::Tensor; + +static constexpr int kROISize = 5; + class ROIPoolOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -38,6 +42,9 @@ class ROIPoolOp : public framework::OperatorWithKernel { PADDLE_ENFORCE(rois_dims.size() == 2, "ROIs should be a 2-D tensor of shape (num_rois, 5)" "given as [[batch_id, x1, y1, x2, y2], …]."); + PADDLE_ENFORCE(rois_dims[1] == kROISize, + "ROIs should be a 2-D tensor of shape (num_rois, 5)" + "given as [[batch_id, x1, y1, x2, y2], …]."); int pooled_height = ctx->Attrs().Get("pooled_height"); int pooled_width = ctx->Attrs().Get("pooled_width"); @@ -150,7 +157,9 @@ REGISTER_OP(roi_pool, ops::ROIPoolOp, ops::ROIPoolOpMaker, roi_pool_grad, ops::ROIPoolGradOp); REGISTER_OP_CPU_KERNEL( roi_pool, - ops::CPUROIPoolOpKernel); + ops::CPUROIPoolOpKernel, + ops::CPUROIPoolOpKernel); REGISTER_OP_CPU_KERNEL( roi_pool_grad, - ops::CPUROIPoolGradOpKernel); + ops::CPUROIPoolGradOpKernel, + ops::CPUROIPoolOpKernel); diff --git a/paddle/operators/roi_pool_op.cu b/paddle/operators/roi_pool_op.cu index e405d9beda7555428e077d5f0702dbe9569a4109..97df45f1b5779d5e28e36814450a9577edf85135 100755 --- a/paddle/operators/roi_pool_op.cu +++ b/paddle/operators/roi_pool_op.cu @@ -18,6 +18,8 @@ limitations under the License. */ namespace paddle { namespace operators { +using Tensor = framework::Tensor; + static constexpr int kNumCUDAThreads = 512; static constexpr int kNumMaxinumNumBlocks = 4096; static constexpr int kROISize = 5; @@ -25,7 +27,7 @@ static constexpr int kROISize = 5; static inline int NumBlocks(const int N) { return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads, kNumMaxinumNumBlocks); - } +} template __global__ void GPUROIPoolForward( @@ -64,7 +66,7 @@ static inline int NumBlocks(const int N) { wend = min(max(wend + roi_start_w, 0), width); bool is_empty = (hend <= hstart) || (wend <= wstart); - T maxval = is_empty ? 0 : -std::numeric_limits::max(); + T maxval = is_empty ? 0 : -std::numeric_limits::max(); int maxidx = -1; const T* offset_input_data = input_data + (roi_batch_ind * channels + c) * height * width; @@ -143,14 +145,6 @@ class GPUROIPoolOpKernel : public framework::OpKernel { int width = in_dims[3]; size_t rois_num = rois->dims()[0]; - - out->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; - set_zero(ctx.device_context(), out, static_cast(0)); - argmax->mutable_data(ctx.GetPlace()); - math::SetConstant set_init; - set_init(ctx.device_context(), argmax, static_cast(-1)); - if (rois_num== 0) return; int output_size = out->numel(); @@ -230,7 +224,9 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( roi_pool, - ops::GPUROIPoolOpKernel); + ops::GPUROIPoolOpKernel, + ops::GPUROIPoolOpKernel); REGISTER_OP_GPU_KERNEL( roi_pool_grad, - ops::GPUROIPoolGradOpKernel); + ops::GPUROIPoolGradOpKernel, + ops::GPUROIPoolOpKernel); diff --git a/paddle/operators/roi_pool_op.h b/paddle/operators/roi_pool_op.h index 4eb81b527347276a17486500a34fa33abc4da091..bd7736d63125f1be57c8af5141208f66d0592adb 100755 --- a/paddle/operators/roi_pool_op.h +++ b/paddle/operators/roi_pool_op.h @@ -15,23 +15,18 @@ limitations under the License. */ #pragma once #include "paddle/framework/op_registry.h" #include "paddle/operators/math/math_function.h" -#include "paddle/operators/strided_memcpy.h" namespace paddle { namespace operators { -using Tensor = framework::Tensor; -using LoDTensor = framework::LoDTensor; -using LoD = framework::LoD; - template class CPUROIPoolOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* rois = ctx.Input("ROIs"); - auto* out = ctx.Output("Out"); - auto* argmax = ctx.Output("Argmax"); + auto* in = ctx.Input("X"); + auto* rois = ctx.Input("ROIs"); + auto* out = ctx.Output("Out"); + auto* argmax = ctx.Output("Argmax"); auto pooled_height = ctx.Attr("pooled_height"); auto pooled_width = ctx.Attr("pooled_width"); @@ -54,11 +49,6 @@ class CPUROIPoolOpKernel : public framework::OpKernel { T* output_data = out->mutable_data(ctx.GetPlace()); int64_t* argmax_data = argmax->mutable_data(ctx.GetPlace()); - math::SetConstant set_zero; - set_zero(ctx.device_context(), out, static_cast(0)); - math::SetConstant set_init; - set_init(ctx.device_context(), argmax, static_cast(-1)); - for (int n = 0; n < rois_num; ++n) { int roi_batch_id = rois_data[0]; PADDLE_ENFORCE_GE(roi_batch_id, 0); @@ -83,7 +73,7 @@ class CPUROIPoolOpKernel : public framework::OpKernel { const float bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - const float* batch_data = input_data + roi_batch_id * in_stride[0]; + const T* batch_data = input_data + roi_batch_id * in_stride[0]; for (int c = 0; c < channels; ++c) { for (int ph = 0; ph < pooled_height; ++ph) { @@ -110,7 +100,8 @@ class CPUROIPoolOpKernel : public framework::OpKernel { // Define an empty pooling region to be zero bool is_empty = (hend <= hstart) || (wend <= wstart); output_data[pool_index] = - is_empty ? 0 : -std::numeric_limits::max(); + is_empty ? 0 : -std::numeric_limits::max(); + argmax_data[pool_index] = -1; for (int h = hstart; h < hend; ++h) { for (int w = wstart; w < wend; ++w) { @@ -139,14 +130,14 @@ template class CPUROIPoolGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* in = ctx.Input("X"); - auto* rois = ctx.Input("ROIs"); - auto* argmax = ctx.Input("Argmax"); + auto* in = ctx.Input("X"); + auto* rois = ctx.Input("ROIs"); + auto* argmax = ctx.Input("Argmax"); auto* out_grad = - ctx.Input(framework::GradVarName("Out")); + ctx.Input(framework::GradVarName("Out")); auto* x_grad = - ctx.Output(framework::GradVarName("X")); + ctx.Output(framework::GradVarName("X")); auto pooled_height = ctx.Attr("pooled_height"); auto pooled_width = ctx.Attr("pooled_width"); diff --git a/python/paddle/v2/fluid/tests/test_roi_pool_op.py b/python/paddle/v2/fluid/tests/test_roi_pool_op.py index af35bcced830bc1f37b5549cf54d701d4a559c92..7cedb930ca861aed95c355931d80cb4d265c8235 100644 --- a/python/paddle/v2/fluid/tests/test_roi_pool_op.py +++ b/python/paddle/v2/fluid/tests/test_roi_pool_op.py @@ -77,7 +77,12 @@ class TestROIPoolOp(OpTest): wstart = min(max(wstart + roi_start_w, 0), self.width) wend = min(max(wend + roi_start_w, 0), self.width) - out_data[i, c, ph, pw] = 0 + is_empty = (hend <= hstart) or (wend <= wstart) + if is_empty: + out_data[i, c, ph, pw] = 0 + else: + out_data[i, c, ph, pw] = -sys.float_info.max + argmax_data[i, c, ph, pw] = -1 for h in range(hstart, hend):