Merge pull request #15356 from jerrywgz/add_clip_op

Add box clip op

Merge pull request #15356 from jerrywgz/add_clip_op
Add box clip op
1743d1a5 · jerrywgz · GitHub · 43a67a26 · 4f18a9b8 · 1743d1a5
9 changed file
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -325,6 +325,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None
 paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name', 'axis'], varargs=None, keywords=None, defaults=('encode_center_size', True, None, 0))
 paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'anchor_mask', 'class_num', 'ignore_thresh', 'downsample_ratio', 'name'], varargs=None, keywords=None, defaults=(None,))
+paddle.fluid.layers.box_clip ArgSpec(args=['input', 'im_info', 'name'], varargs=None, keywords=None, defaults=(None,))
 paddle.fluid.layers.multiclass_nms ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None))
 paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
 paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))

--- a/paddle/fluid/operators/detection/CMakeLists.txt
+++ b/paddle/fluid/operators/detection/CMakeLists.txt
@@ -31,6 +31,7 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
 polygon_box_transform_op.cu)
 detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
 detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc)
+detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu)
 detection_library(yolov3_loss_op SRCS yolov3_loss_op.cc)
 if(WITH_GPU)

--- a/paddle/fluid/operators/detection/bbox_util.h
+++ b/paddle/fluid/operators/detection/bbox_util.h
@@ -99,5 +99,29 @@ void BboxOverlaps(const framework::Tensor& r_boxes,
  }
 }
+template <class T>
+void ClipTiledBoxes(const platform::DeviceContext& ctx,
+                    const framework::Tensor& im_info,
+                    const framework::Tensor& input_boxes,
+                    framework::Tensor* out) {
+  T* out_data = out->mutable_data<T>(ctx.GetPlace());
+  const T* im_info_data = im_info.data<T>();
+  const T* input_boxes_data = input_boxes.data<T>();
+  T zero(0);
+  T im_w = round(im_info_data[1] / im_info_data[2]);
+  T im_h = round(im_info_data[0] / im_info_data[2]);
+  for (int64_t i = 0; i < input_boxes.numel(); ++i) {
+    if (i % 4 == 0) {
+      out_data[i] = std::max(std::min(input_boxes_data[i], im_w - 1), zero);
+    } else if (i % 4 == 1) {
+      out_data[i] = std::max(std::min(input_boxes_data[i], im_h - 1), zero);
+    } else if (i % 4 == 2) {
+      out_data[i] = std::max(std::min(input_boxes_data[i], im_w - 1), zero);
+    } else {
+      out_data[i] = std::max(std::min(input_boxes_data[i], im_h - 1), zero);
+    }
+  }
+}
 }  // namespace operators
 }  // namespace paddle
--- a/paddle/fluid/operators/detection/box_clip_op.cc
+++ b/paddle/fluid/operators/detection/box_clip_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/detection/box_clip_op.h"
+#include "paddle/fluid/framework/op_registry.h"
+namespace paddle {
+namespace operators {
+class BoxClipOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("Input"),
+                   "Input(Input) of BoxClipOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("ImInfo"),
+                   "Input(ImInfo) of BoxClipOp should not be null.");
+    auto input_box_dims = ctx->GetInputDim("Input");
+    auto im_info_dims = ctx->GetInputDim("ImInfo");
+    if (ctx->IsRuntime()) {
+      auto input_box_size = input_box_dims.size();
+      PADDLE_ENFORCE_EQ(input_box_dims[input_box_size - 1], 4,
+                        "The last dimension of Input must be 4");
+      PADDLE_ENFORCE_EQ(im_info_dims.size(), 2,
+                        "The rank of Input(Input) in BoxClipOp must be 2");
+      PADDLE_ENFORCE_EQ(im_info_dims[1], 3,
+                        "The last dimension of ImInfo must be 3");
+    }
+    ctx->ShareDim("Input", /*->*/ "Output");
+    ctx->ShareLoD("Input", /*->*/ "Output");
+  }
+};
+class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("Input",
+             "(LoDTensor) "
+             "Input is a LoDTensor with shape [..., 4] holds 4 points"
+             "in last dimension in format [xmin, ymin, xmax, ymax]");
+    AddInput("ImInfo",
+             "(Tensor) Information for image reshape is in shape (N, 3), "
+             "in format (height, width, im_scale)");
+    AddOutput("Output",
+              "(LoDTensor) "
+              "Output is a LoDTensor with the same shape as Input"
+              "and it is the result after clip");
+    AddComment(R"DOC(
+This operator clips input boxes to original input images.
+For each input box, The formula is given as follows:
+       $$xmin = \max(\min(xmin, im_w - 1), 0)$$
+       $$ymin = \max(\min(ymin, im_h - 1), 0)$$     
+       $$xmax = \max(\min(xmax, im_w - 1), 0)$$
+       $$ymax = \max(\min(ymax, im_h - 1), 0)$$
+where im_w and im_h are computed from ImInfo, the formula is given as follows:
+       $$im_w = \round(width / im_scale)$$
+       $$im_h = \round(height / im_scale)$$ 
+)DOC");
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(box_clip, ops::BoxClipOp, ops::BoxClipOpMaker,
+                  paddle::framework::EmptyGradOpMaker);
+REGISTER_OP_CPU_KERNEL(
+    box_clip, ops::BoxClipKernel<paddle::platform::CPUDeviceContext, float>,
+    ops::BoxClipKernel<paddle::platform::CPUDeviceContext, double>);
--- a/paddle/fluid/operators/detection/box_clip_op.cu
+++ b/paddle/fluid/operators/detection/box_clip_op.cu
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include <algorithm>
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/detection/box_clip_op.h"
+#include "paddle/fluid/operators/math/math_function.h"
+#include "paddle/fluid/platform/cuda_primitives.h"
+#include "paddle/fluid/platform/hostdevice.h"
+namespace paddle {
+namespace operators {
+using Tensor = framework::Tensor;
+using LoDTenso = framework::LoDTensor;
+static constexpr int ImInfoSize = 3;
+template <typename T, int BlockSize>
+static __global__ void GPUBoxClip(const T *input, const size_t *lod,
+                                  const size_t width, const T *im_info,
+                                  T *output) {
+  T im_w = round(im_info[blockIdx.x * ImInfoSize + 1] /
+                 im_info[blockIdx.x * ImInfoSize + 2]);
+  T im_h = round(im_info[blockIdx.x * ImInfoSize] /
+                 im_info[blockIdx.x * ImInfoSize + 2]);
+  for (int i = threadIdx.x; i < (lod[blockIdx.x + 1] - lod[blockIdx.x]) * width;
+       i += BlockSize) {
+    int idx = lod[blockIdx.x] * width + i;
+    T im_size = (idx % 2 == 0) ? im_w : im_h;
+    output[idx] = max(min(input[idx], im_size - 1), T(0.));
+  }
+}
+template <typename DeviceContext, typename T>
+class GPUBoxClipKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext &context) const override {
+    PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()),
+                   "This kernel only runs on GPU device.");
+    auto *input = context.Input<LoDTensor>("Input");
+    auto *im_info = context.Input<Tensor>("ImInfo");
+    auto *output = context.Output<LoDTensor>("Output");
+    const int64_t num = input->dims()[0];
+    const int64_t bbox_width = input->numel() / num;
+    auto lod = input->lod();
+    framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
+    auto &dev_ctx = context.template device_context<DeviceContext>();
+    auto stream = dev_ctx.stream();
+    const size_t batch_size = lod.back().size() - 1;
+    T *output_data = output->mutable_data<T>(dev_ctx.GetPlace());
+    GPUBoxClip<T, 512><<<batch_size, 512, 0, stream>>>(
+        input->data<T>(), abs_offset_lod[0].CUDAMutableData(dev_ctx.GetPlace()),
+        bbox_width, im_info->data<T>(), output_data);
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(
+    box_clip, ops::GPUBoxClipKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::GPUBoxClipKernel<paddle::platform::CUDADeviceContext, double>);
--- a/paddle/fluid/operators/detection/box_clip_op.h
+++ b/paddle/fluid/operators/detection/box_clip_op.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <string>
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/detection/bbox_util.h"
+#include "paddle/fluid/operators/math/math_function.h"
+namespace paddle {
+namespace operators {
+using Tensor = framework::Tensor;
+using LoDTensor = framework::LoDTensor;
+template <typename DeviceContext, typename T>
+class BoxClipKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* input_box = context.Input<LoDTensor>("Input");
+    auto* im_info = context.Input<LoDTensor>("ImInfo");
+    auto* output_box = context.Output<LoDTensor>("Output");
+    auto& dev_ctx =
+        context.template device_context<platform::CPUDeviceContext>();
+    output_box->mutable_data<T>(context.GetPlace());
+    if (input_box->lod().size()) {
+      PADDLE_ENFORCE_EQ(input_box->lod().size(), 1UL,
+                        "Only support 1 level of LoD.");
+    }
+    auto box_lod = input_box->lod().back();
+    int64_t n = static_cast<int64_t>(box_lod.size() - 1);
+    for (int i = 0; i < n; ++i) {
+      Tensor im_info_slice = im_info->Slice(i, i + 1);
+      Tensor box_slice = input_box->Slice(box_lod[i], box_lod[i + 1]);
+      Tensor output_slice = output_box->Slice(box_lod[i], box_lod[i + 1]);
+      ClipTiledBoxes<T>(dev_ctx, im_info_slice, box_slice, &output_slice);
+    }
+  }
+};
+}  // namespace operators
+}  // namespace paddle
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -49,6 +49,7 @@ __all__ = [
    'box_coder',
    'polygon_box_transform',
    'yolov3_loss',
+    'box_clip',
    'multiclass_nms',
 ]
@@ -2055,6 +2056,54 @@ def generate_proposals(scores,
    return rpn_rois, rpn_roi_probs
+def box_clip(input, im_info, name=None):
+    """
+    Clip the box into the size given by im_info
+    For each input box, The formula is given as follows:
+    .. code-block:: text
+        xmin = max(min(xmin, im_w - 1), 0)
+        ymin = max(min(ymin, im_h - 1), 0) 
+        xmax = max(min(xmax, im_w - 1), 0)
+        ymax = max(min(ymax, im_h - 1), 0)
+    where im_w and im_h are computed from im_info:
+    .. code-block:: text
+        im_h = round(height / scale)
+        im_w = round(weight / scale)
+    Args:
+        input(variable): The input box, the last dimension is 4.
+        im_info(variable): The information of image with shape [N, 3] with 
+                            layout (height, width, scale). height and width
+                            is the input size and scale is the ratio of input
+                            size and original size.
+        name (str): The name of this layer. It is optional.
+    Returns:
+        Variable: The cliped tensor variable.
+    Examples:
+        .. code-block:: python
+            boxes = fluid.layers.data(
+                name='data', shape=[8, 4], dtype='float32', lod_level=1)
+            im_info = fluid.layers.data(name='im_info', shape=[3])
+            out = fluid.layers.box_clip(
+                input=boxes, im_info=im_info, inplace=True)
+    """
+    helper = LayerHelper("box_clip", **locals())
+    output = helper.create_variable_for_type_inference(dtype=input.dtype)
+    inputs = {"Input": input, "ImInfo": im_info}
+    helper.append_op(type="box_clip", inputs=inputs, outputs={"Output": output})
+    return output
 def multiclass_nms(bboxes,
                   scores,
                   score_threshold,
@@ -2132,9 +2181,11 @@ def multiclass_nms(bboxes,
             (After version 1.3, when no boxes detected, the lod is changed 
             from {0} to {1}) 
    Examples:
        .. code-block:: python
            boxes = fluid.layers.data(name='bboxes', shape=[81, 4],
                                      dtype='float32', lod_level=1)
            scores = fluid.layers.data(name='scores', shape=[81],

--- a/python/paddle/fluid/tests/test_detection.py
+++ b/python/paddle/fluid/tests/test_detection.py
@@ -482,6 +482,17 @@ class TestYoloDetection(unittest.TestCase):
            self.assertIsNotNone(loss)
+class TestBoxClip(unittest.TestCase):
+    def test_box_clip(self):
+        program = Program()
+        with program_guard(program):
+            input_box = layers.data(
+                name='input_box', shape=[7, 4], dtype='float32', lod_level=1)
+            im_info = layers.data(name='im_info', shape=[3], dtype='float32')
+            out = layers.box_clip(input_box, im_info)
+            self.assertIsNotNone(out)
 class TestMulticlassNMS(unittest.TestCase):
    def test_multiclass_nms(self):
        program = Program()

--- a/python/paddle/fluid/tests/unittests/test_box_clip_op.py
+++ b/python/paddle/fluid/tests/unittests/test_box_clip_op.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import unittest
+import numpy as np
+import sys
+import math
+from op_test import OpTest
+import copy
+def box_clip(input_box, im_info, output_box):
+    im_w = round(im_info[1] / im_info[2])
+    im_h = round(im_info[0] / im_info[2])
+    output_box[:, :, 0] = np.maximum(
+        np.minimum(input_box[:, :, 0], im_w - 1), 0)
+    output_box[:, :, 1] = np.maximum(
+        np.minimum(input_box[:, :, 1], im_h - 1), 0)
+    output_box[:, :, 2] = np.maximum(
+        np.minimum(input_box[:, :, 2], im_w - 1), 0)
+    output_box[:, :, 3] = np.maximum(
+        np.minimum(input_box[:, :, 3], im_h - 1), 0)
+def batch_box_clip(input_boxes, im_info, lod):
+    n = input_boxes.shape[0]
+    m = input_boxes.shape[1]
+    output_boxes = np.zeros((n, m, 4), dtype=np.float32)
+    cur_offset = 0
+    for i in range(len(lod)):
+        box_clip(input_boxes[cur_offset:(cur_offset + lod[i]), :, :],
+                 im_info[i, :],
+                 output_boxes[cur_offset:(cur_offset + lod[i]), :, :])
+        cur_offset += lod[i]
+    return output_boxes
+class TestBoxClipOp(OpTest):
+    def test_check_output(self):
+        self.check_output()
+    def setUp(self):
+        self.op_type = "box_clip"
+        lod = [[1, 2, 3]]
+        input_boxes = np.random.random((6, 10, 4)) * 5
+        im_info = np.array([[5, 8, 1.], [6, 6, 1.], [7, 5, 1.]])
+        output_boxes = batch_box_clip(input_boxes, im_info, lod[0])
+        self.inputs = {
+            'Input': (input_boxes.astype('float32'), lod),
+            'ImInfo': im_info.astype('float32'),
+        }
+        self.outputs = {'Output': output_boxes}
+if __name__ == '__main__':
+    unittest.main()