fix some issues

cf5b5986 · wanghaox · ef905598 · cf5b5986 · cf5b5986 · cf5b5986
4 changed file
--- a/paddle/operators/roi_pool_op.cc
+++ b/paddle/operators/roi_pool_op.cc
@@ -17,6 +17,10 @@ limitations under the License. */
 namespace paddle {
 namespace operators {

+using Tensor = framework::Tensor;
+
+static constexpr int kROISize = 5;
+
 class ROIPoolOp : public framework::OperatorWithKernel {
 public:
  using framework::OperatorWithKernel::OperatorWithKernel;
@@ -38,6 +42,9 @@ class ROIPoolOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE(rois_dims.size() == 2,
                   "ROIs should be a 2-D tensor of shape (num_rois, 5)"
                   "given as [[batch_id, x1, y1, x2, y2], …].");
+    PADDLE_ENFORCE(rois_dims[1] == kROISize,
+                "ROIs should be a 2-D tensor of shape (num_rois, 5)"
+                "given as [[batch_id, x1, y1, x2, y2], …].");

    int pooled_height = ctx->Attrs().Get<int>("pooled_height");
    int pooled_width = ctx->Attrs().Get<int>("pooled_width");
@@ -150,7 +157,9 @@ REGISTER_OP(roi_pool, ops::ROIPoolOp, ops::ROIPoolOpMaker,
            roi_pool_grad, ops::ROIPoolGradOp);
 REGISTER_OP_CPU_KERNEL(
    roi_pool,
-    ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, float>);
+    ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, float>,
+    ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, double>);
 REGISTER_OP_CPU_KERNEL(
    roi_pool_grad,
-    ops::CPUROIPoolGradOpKernel<paddle::platform::CPUPlace, float>);
+    ops::CPUROIPoolGradOpKernel<paddle::platform::CPUPlace, float>,
+    ops::CPUROIPoolOpKernel<paddle::platform::CPUPlace, double>);
--- a/paddle/operators/roi_pool_op.cu
+++ b/paddle/operators/roi_pool_op.cu
@@ -18,6 +18,8 @@ limitations under the License. */
 namespace paddle {
 namespace operators {

+using Tensor = framework::Tensor;
+
 static constexpr int kNumCUDAThreads = 512;
 static constexpr int kNumMaxinumNumBlocks = 4096;
 static constexpr int kROISize = 5;
@@ -25,7 +27,7 @@ static constexpr int kROISize = 5;
 static inline int NumBlocks(const int N) {
  return std::min((N + kNumCUDAThreads - 1) / kNumCUDAThreads,
                  kNumMaxinumNumBlocks);
-  }
+}

  template <typename T>
  __global__ void GPUROIPoolForward(
@@ -64,7 +66,7 @@ static inline int NumBlocks(const int N) {
      wend = min(max(wend + roi_start_w, 0), width);
      bool is_empty = (hend <= hstart) || (wend <= wstart);

-      T maxval = is_empty ? 0 : -std::numeric_limits<float>::max();
+      T maxval = is_empty ? 0 : -std::numeric_limits<T>::max();
      int maxidx = -1;
      const T* offset_input_data =
          input_data + (roi_batch_ind * channels + c) * height * width;
@@ -143,14 +145,6 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
    int width = in_dims[3];

    size_t rois_num = rois->dims()[0];
-
-    out->mutable_data<T>(ctx.GetPlace());
-    math::SetConstant<Place, T> set_zero;
-    set_zero(ctx.device_context(), out, static_cast<T>(0));
-    argmax->mutable_data<int64_t>(ctx.GetPlace());
-    math::SetConstant<Place, int64_t> set_init;
-    set_init(ctx.device_context(), argmax, static_cast<int64_t>(-1));
-
    if (rois_num== 0) return;

    int output_size = out->numel();
@@ -230,7 +224,9 @@ class GPUROIPoolGradOpKernel : public framework::OpKernel<T> {
 namespace ops = paddle::operators;
 REGISTER_OP_GPU_KERNEL(
    roi_pool,
-    ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, float>);
+    ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, float>,
+    ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, double>);
 REGISTER_OP_GPU_KERNEL(
    roi_pool_grad,
-    ops::GPUROIPoolGradOpKernel<paddle::platform::GPUPlace, float>);
+    ops::GPUROIPoolGradOpKernel<paddle::platform::GPUPlace, float>,
+    ops::GPUROIPoolOpKernel<paddle::platform::GPUPlace, double>);
--- a/paddle/operators/roi_pool_op.h
+++ b/paddle/operators/roi_pool_op.h
@@ -15,23 +15,18 @@ limitations under the License. */
 #pragma once
 #include "paddle/framework/op_registry.h"
 #include "paddle/operators/math/math_function.h"
-#include "paddle/operators/strided_memcpy.h"

 namespace paddle {
 namespace operators {

-using Tensor = framework::Tensor;
-using LoDTensor = framework::LoDTensor;
-using LoD = framework::LoD;
-
 template <typename Place, typename T>
 class CPUROIPoolOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* in = ctx.Input<Tensor>("X");
-    auto* rois = ctx.Input<Tensor>("ROIs");
-    auto* out = ctx.Output<Tensor>("Out");
-    auto* argmax = ctx.Output<Tensor>("Argmax");
+    auto* in = ctx.Input<framework::Tensor>("X");
+    auto* rois = ctx.Input<framework::Tensor>("ROIs");
+    auto* out = ctx.Output<framework::Tensor>("Out");
+    auto* argmax = ctx.Output<framework::Tensor>("Argmax");

    auto pooled_height = ctx.Attr<int>("pooled_height");
    auto pooled_width = ctx.Attr<int>("pooled_width");
@@ -54,11 +49,6 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
    T* output_data = out->mutable_data<T>(ctx.GetPlace());
    int64_t* argmax_data = argmax->mutable_data<int64_t>(ctx.GetPlace());

-    math::SetConstant<Place, T> set_zero;
-    set_zero(ctx.device_context(), out, static_cast<T>(0));
-    math::SetConstant<Place, int64_t> set_init;
-    set_init(ctx.device_context(), argmax, static_cast<int64_t>(-1));
-
    for (int n = 0; n < rois_num; ++n) {
      int roi_batch_id = rois_data[0];
      PADDLE_ENFORCE_GE(roi_batch_id, 0);
@@ -83,7 +73,7 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
      const float bin_size_w =
          static_cast<float>(roi_width) / static_cast<float>(pooled_width);

-      const float* batch_data = input_data + roi_batch_id * in_stride[0];
+      const T* batch_data = input_data + roi_batch_id * in_stride[0];

      for (int c = 0; c < channels; ++c) {
        for (int ph = 0; ph < pooled_height; ++ph) {
@@ -110,7 +100,8 @@ class CPUROIPoolOpKernel : public framework::OpKernel<T> {
            // Define an empty pooling region to be zero
            bool is_empty = (hend <= hstart) || (wend <= wstart);
            output_data[pool_index] =
-                is_empty ? 0 : -std::numeric_limits<float>::max();
+                is_empty ? 0 : -std::numeric_limits<T>::max();
+            argmax_data[pool_index] = -1;

            for (int h = hstart; h < hend; ++h) {
              for (int w = wstart; w < wend; ++w) {
@@ -139,14 +130,14 @@ template <typename Place, typename T>
 class CPUROIPoolGradOpKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
-    auto* in = ctx.Input<Tensor>("X");
-    auto* rois = ctx.Input<Tensor>("ROIs");
-    auto* argmax = ctx.Input<Tensor>("Argmax");
+    auto* in = ctx.Input<framework::Tensor>("X");
+    auto* rois = ctx.Input<framework::Tensor>("ROIs");
+    auto* argmax = ctx.Input<framework::Tensor>("Argmax");

    auto* out_grad =
-        ctx.Input<Tensor>(framework::GradVarName("Out"));
+        ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
    auto* x_grad =
-        ctx.Output<Tensor>(framework::GradVarName("X"));
+        ctx.Output<framework::Tensor>(framework::GradVarName("X"));

    auto pooled_height = ctx.Attr<int>("pooled_height");
    auto pooled_width = ctx.Attr<int>("pooled_width");

--- a/python/paddle/v2/fluid/tests/test_roi_pool_op.py
+++ b/python/paddle/v2/fluid/tests/test_roi_pool_op.py
@@ -77,7 +77,12 @@ class TestROIPoolOp(OpTest):
                        wstart = min(max(wstart + roi_start_w, 0), self.width)
                        wend = min(max(wend + roi_start_w, 0), self.width)

-                        out_data[i, c, ph, pw] = 0
+                        is_empty = (hend <= hstart) or (wend <= wstart)
+                        if is_empty:
+                            out_data[i, c, ph, pw] = 0
+                        else:
+                            out_data[i, c, ph, pw] = -sys.float_info.max
+
                        argmax_data[i, c, ph, pw] = -1
                        
                        for h in range(hstart, hend):