refine code, test=develop

11f1baa4 · jerrywgz · 57e5f61e · 11f1baa4 · 11f1baa4 · 11f1baa4
3 changed file
--- a/paddle/fluid/operators/detection/box_clip_op.cc
+++ b/paddle/fluid/operators/detection/box_clip_op.cc
@@ -41,14 +41,6 @@ class BoxClipOp : public framework::OperatorWithKernel {
    ctx->ShareDim("Input", /*->*/ "Output");
    ctx->ShareLoD("Input", /*->*/ "Output");
  }
-  /*
-  protected:
-   framework::OpKernelType GetExpectedKernelType(
-       const framework::ExecutionContext& ctx) const override {
-     auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("Input"));
-     return framework::OpKernelType(data_type, platform::CPUPlace());
-   }
-   */
 };
 class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -68,11 +60,17 @@ class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
    AddComment(R"DOC(
 This operator clips input boxes to original input images.
-The formula is given as follows:
+For each input box, The formula is given as follows:
-       $$height_out = \max(\min(height_loc, im_h), 0)$$
+       $$xmin = \max(\min(xmin, im_w - 1), 0)$$
-       $$width_out = \max(\min(width_loc, im_w), 0)$$     
+       $$ymin = \max(\min(ymin, im_h - 1), 0)$$     
+       $$xmax = \max(\min(xmax, im_w - 1), 0)$$
+       $$ymax = \max(\min(ymax, im_h - 1), 0)$$
+where im_w and im_h are computed from ImInfo, the formula is given as follows:
+       $$im_w = \round(width / im_scale)$$
+       $$im_h = \round(height / im_scale)$$ 
 )DOC");
  }
 };

--- a/paddle/fluid/operators/detection/box_clip_op.cu
+++ b/paddle/fluid/operators/detection/box_clip_op.cu
@@ -30,13 +30,13 @@ template <typename T, int BlockSize>
 static __global__ void GPUBoxClip(const T *input, const size_t *lod,
                                  const size_t width, const T *im_info,
                                  T *output) {
+  T im_w = round(im_info[blockIdx.x * ImInfoSize + 1] /
+                 im_info[blockIdx.x * ImInfoSize + 2]);
+  T im_h = round(im_info[blockIdx.x * ImInfoSize] /
+                 im_info[blockIdx.x * ImInfoSize + 2]);
  for (int i = threadIdx.x; i < (lod[blockIdx.x + 1] - lod[blockIdx.x]) * width;
       i += BlockSize) {
    int idx = lod[blockIdx.x] * width + i;
-    T im_w = round(im_info[blockIdx.x * ImInfoSize + 1] /
-                   im_info[blockIdx.x * ImInfoSize + 2]);
-    T im_h = round(im_info[blockIdx.x * ImInfoSize] /
-                   im_info[blockIdx.x * ImInfoSize + 2]);
    T im_size = (idx % 2 == 0) ? im_w : im_h;
    output[idx] = max(min(input[idx], im_size - 1), T(0.));
  }
@@ -57,9 +57,9 @@ class GPUBoxClipKernel : public framework::OpKernel<T> {
    framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
    auto &dev_ctx = context.template device_context<DeviceContext>();
    auto stream = dev_ctx.stream();
-    const size_t num_lod = lod.back().size() - 1;
+    const size_t batch_size = lod.back().size() - 1;
    T *output_data = output->mutable_data<T>(dev_ctx.GetPlace());
-    GPUBoxClip<T, 512><<<num_lod, 512, 0, stream>>>(
+    GPUBoxClip<T, 512><<<batch_size, 512, 0, stream>>>(
        input->data<T>(), abs_offset_lod[0].CUDAMutableData(dev_ctx.GetPlace()),
        bbox_width, im_info->data<T>(), output_data);
  }

--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -1816,26 +1816,35 @@ def generate_proposals(scores,
 def box_clip(input, im_info, inplace=False, name=None):
    """
    Clip the box into the size given by im_info
-    The formula is given as follows:
+    For each input box, The formula is given as follows:
    .. code-block:: text
-        height_out = max(min(height_loc, im_h), 0)
+        xmin = max(min(xmin, im_w - 1), 0)
-        width_out = max(min(width_loc, im_w), 0)
+        ymin = max(min(ymin, im_h - 1), 0) 
+        xmax = max(min(xmax, im_w - 1), 0)
+        ymax = max(min(ymax, im_h - 1), 0)
+    where im_w and im_h are computed from im_info:
+    .. code-block:: text
+        im_h = round(height / scale)
+        im_w = round(weight / scale)
    Args:
-        input_box(variable): The input box, the last dimension is 4.
+        input(variable): The input box, the last dimension is 4.
        im_info(variable): The information of image with shape [N, 3] with 
                            layout (height, width, scale). height and width
                            is the input size and scale is the ratio of input
                            size and original size.
-        inplace(bool): Must use :attr:`False` if :attr:`input_box` is used in 
+        inplace(bool): Must use :attr:`False` if :attr:`input` is used in 
                       multiple operators. If this flag is set :attr:`True`, 
-                       reuse input :attr:`input_box` to clip, which will 
+                       reuse input :attr:`input` to clip, which will 
-                       change the value of tensor variable :attr:`input_box` 
+                       change the value of tensor variable :attr:`input` 
-                       and might cause errors when :attr:`input_box` is used 
+                       and might cause errors when :attr:`input` is used 
                       in multiple operators. If :attr:`False`, preserve the 
-                       value pf :attr:`input_box` and create a new output 
+                       value pf :attr:`input` and create a new output 
                       tensor variable whose data is copied from input x but 
                       cliped.
        name (str): The name of this layer. It is optional.
@@ -1850,16 +1859,13 @@ def box_clip(input, im_info, inplace=False, name=None):
                name='data', shape=[8, 4], dtype='float32', lod_level=1)
            im_info = fluid.layers.data(name='im_info', shape=[3])
            out = fluid.layers.box_clip(
-                input_box=boxes, im_info=im_info, inplace=True)
+                input=boxes, im_info=im_info, inplace=True)
    """
    helper = LayerHelper("box_clip", **locals())
-    output = helper.create_variable_for_type_inference(dtype=input.dtype)
+    output = x if inplace else helper.create_variable_for_type_inference(\
+             dtype=input.dtype)
    inputs = {"Input": input, "ImInfo": im_info}
-    helper.append_op(
+    helper.append_op(type="box_clip", inputs=inputs, outputs={"Output": output})
-        type="box_clip",
-        inputs=inputs,
-        attrs={"inplace:": inplace},
-        outputs={"Output": output})
    return output