Merge pull request #10700 from baiyfbupt/develop

fix roi_pool op bug

Merge pull request #10700 from baiyfbupt/develop
fix roi_pool op bug
67b8a300 · baiyf · GitHub · ebc73039 · 1d7f91e8 · 67b8a300
隐藏空白更改
内联并排

Showing with 23 addition and 17 deletion

paddle/fluid/operators/roi_pool_op.cu paddle/fluid/operators/roi_pool_op.cu +23 -17

未找到文件。
--- a/paddle/fluid/operators/roi_pool_op.cu
+++ b/paddle/fluid/operators/roi_pool_op.cu
@@ -38,10 +38,10 @@ __global__ void GPUROIPoolForward(
  int index = blockIdx.x * blockDim.x + threadIdx.x;
  int offset = blockDim.x * gridDim.x;
  for (size_t i = index; i < nthreads; i += offset) {
-    int pw = index % pooled_width;
+    int pw = i % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
+    int ph = (i / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
+    int c = (i / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
+    int n = i / pooled_width / pooled_height / channels;
    const int64_t* offset_input_rois = input_rois + n * kROISize;
    int roi_batch_ind = roi_batch_id_data[n];
@@ -52,14 +52,19 @@ __global__ void GPUROIPoolForward(
    int roi_width = max(roi_end_w - roi_start_w + 1, 1);
    int roi_height = max(roi_end_h - roi_start_h + 1, 1);
-    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
-    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
-    int hstart = static_cast<int>(floor(static_cast<T>(ph) * bin_size_h));
-    int wstart = static_cast<int>(floor(static_cast<T>(pw) * bin_size_w));
-    int hend = static_cast<int>(ceil(static_cast<T>(ph + 1) * bin_size_h));
-    int wend = static_cast<int>(ceil(static_cast<T>(pw + 1) * bin_size_w));
+    int hstart = static_cast<int>(floor(static_cast<double>(ph) *
+                                        static_cast<double>(roi_height) /
+                                        static_cast<double>(pooled_height)));
+    int wstart = static_cast<int>(floor(static_cast<double>(pw) *
+                                        static_cast<double>(roi_width) /
+                                        static_cast<double>(pooled_width)));
+    int hend = static_cast<int>(ceil(static_cast<double>(ph + 1) *
+                                     static_cast<double>(roi_height) /
+                                     static_cast<double>(pooled_height)));
+    int wend = static_cast<int>(ceil(static_cast<double>(pw + 1) *
+                                     static_cast<double>(roi_width) /
+                                     static_cast<double>(pooled_width)));
    hstart = min(max(hstart + roi_start_h, 0), height);
    hend = min(max(hend + roi_start_h, 0), height);
    wstart = min(max(wstart + roi_start_w, 0), width);
@@ -79,9 +84,9 @@ __global__ void GPUROIPoolForward(
        }
      }
    }
-    output_data[index] = maxval;
+    output_data[i] = maxval;
    if (argmax_data) {
-      argmax_data[index] = maxidx;
+      argmax_data[i] = maxidx;
    }
  }
 }
@@ -96,10 +101,10 @@ __global__ void GPUROIPoolBackward(
  int index = blockIdx.x * blockDim.x + threadIdx.x;
  int offset = blockDim.x * gridDim.x;
  for (int i = index; i < nthreads; i += offset) {
-    int pw = index % pooled_width;
+    int pw = i % pooled_width;
-    int ph = (index / pooled_width) % pooled_height;
+    int ph = (i / pooled_width) % pooled_height;
-    int c = (index / pooled_width / pooled_height) % channels;
+    int c = (i / pooled_width / pooled_height) % channels;
-    int n = index / pooled_width / pooled_height / channels;
+    int n = i / pooled_width / pooled_height / channels;
    int roi_batch_ind = roi_batch_id_data[n];
    int input_offset = (roi_batch_ind * channels + c) * height * width;
@@ -138,6 +143,7 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
    int width = in_dims[3];
    int rois_num = rois->dims()[0];
    if (rois_num == 0) return;
    int output_size = out->numel();