diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc index 15adcdedaed6c517bfbcf7788b9e1fd429beded4..3aa766559a530bc31fbb277f2bcd474da776e63b 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cc +++ b/paddle/fluid/operators/detection/box_clip_op.cc @@ -41,14 +41,6 @@ class BoxClipOp : public framework::OperatorWithKernel { ctx->ShareDim("Input", /*->*/ "Output"); ctx->ShareLoD("Input", /*->*/ "Output"); } - /* - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("Input")); - return framework::OpKernelType(data_type, platform::CPUPlace()); - } - */ }; class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker { @@ -68,11 +60,17 @@ class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( This operator clips input boxes to original input images. -The formula is given as follows: +For each input box, The formula is given as follows: - $$height_out = \max(\min(height_loc, im_h), 0)$$ - $$width_out = \max(\min(width_loc, im_w), 0)$$ + $$xmin = \max(\min(xmin, im_w - 1), 0)$$ + $$ymin = \max(\min(ymin, im_h - 1), 0)$$ + $$xmax = \max(\min(xmax, im_w - 1), 0)$$ + $$ymax = \max(\min(ymax, im_h - 1), 0)$$ +where im_w and im_h are computed from ImInfo, the formula is given as follows: + + $$im_w = \round(width / im_scale)$$ + $$im_h = \round(height / im_scale)$$ )DOC"); } }; diff --git a/paddle/fluid/operators/detection/box_clip_op.cu b/paddle/fluid/operators/detection/box_clip_op.cu index f10c92366de4a1306eedbc193b14dfd674560b9e..b727da5f7b736b6f22407d1dfbca708ed0cf04d9 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cu +++ b/paddle/fluid/operators/detection/box_clip_op.cu @@ -30,13 +30,13 @@ template static __global__ void GPUBoxClip(const T *input, const size_t *lod, const size_t width, const T *im_info, T *output) { + T im_w = round(im_info[blockIdx.x * ImInfoSize + 1] / + im_info[blockIdx.x * ImInfoSize + 2]); + T im_h = round(im_info[blockIdx.x * ImInfoSize] / + im_info[blockIdx.x * ImInfoSize + 2]); for (int i = threadIdx.x; i < (lod[blockIdx.x + 1] - lod[blockIdx.x]) * width; i += BlockSize) { int idx = lod[blockIdx.x] * width + i; - T im_w = round(im_info[blockIdx.x * ImInfoSize + 1] / - im_info[blockIdx.x * ImInfoSize + 2]); - T im_h = round(im_info[blockIdx.x * ImInfoSize] / - im_info[blockIdx.x * ImInfoSize + 2]); T im_size = (idx % 2 == 0) ? im_w : im_h; output[idx] = max(min(input[idx], im_size - 1), T(0.)); } @@ -57,9 +57,9 @@ class GPUBoxClipKernel : public framework::OpKernel { framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); auto &dev_ctx = context.template device_context(); auto stream = dev_ctx.stream(); - const size_t num_lod = lod.back().size() - 1; + const size_t batch_size = lod.back().size() - 1; T *output_data = output->mutable_data(dev_ctx.GetPlace()); - GPUBoxClip<<>>( + GPUBoxClip<<>>( input->data(), abs_offset_lod[0].CUDAMutableData(dev_ctx.GetPlace()), bbox_width, im_info->data(), output_data); } diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 3e2882ea3ce5d4420a5cf184485e61d9fa4f7e0d..9fc23da70ebd04bbbb77b1fdb8a44a097f89c186 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -1816,26 +1816,35 @@ def generate_proposals(scores, def box_clip(input, im_info, inplace=False, name=None): """ Clip the box into the size given by im_info - The formula is given as follows: + For each input box, The formula is given as follows: .. code-block:: text - height_out = max(min(height_loc, im_h), 0) - width_out = max(min(width_loc, im_w), 0) + xmin = max(min(xmin, im_w - 1), 0) + ymin = max(min(ymin, im_h - 1), 0) + xmax = max(min(xmax, im_w - 1), 0) + ymax = max(min(ymax, im_h - 1), 0) + + where im_w and im_h are computed from im_info: + + .. code-block:: text + + im_h = round(height / scale) + im_w = round(weight / scale) Args: - input_box(variable): The input box, the last dimension is 4. + input(variable): The input box, the last dimension is 4. im_info(variable): The information of image with shape [N, 3] with layout (height, width, scale). height and width is the input size and scale is the ratio of input size and original size. - inplace(bool): Must use :attr:`False` if :attr:`input_box` is used in + inplace(bool): Must use :attr:`False` if :attr:`input` is used in multiple operators. If this flag is set :attr:`True`, - reuse input :attr:`input_box` to clip, which will - change the value of tensor variable :attr:`input_box` - and might cause errors when :attr:`input_box` is used + reuse input :attr:`input` to clip, which will + change the value of tensor variable :attr:`input` + and might cause errors when :attr:`input` is used in multiple operators. If :attr:`False`, preserve the - value pf :attr:`input_box` and create a new output + value pf :attr:`input` and create a new output tensor variable whose data is copied from input x but cliped. name (str): The name of this layer. It is optional. @@ -1850,16 +1859,13 @@ def box_clip(input, im_info, inplace=False, name=None): name='data', shape=[8, 4], dtype='float32', lod_level=1) im_info = fluid.layers.data(name='im_info', shape=[3]) out = fluid.layers.box_clip( - input_box=boxes, im_info=im_info, inplace=True) + input=boxes, im_info=im_info, inplace=True) """ helper = LayerHelper("box_clip", **locals()) - output = helper.create_variable_for_type_inference(dtype=input.dtype) + output = x if inplace else helper.create_variable_for_type_inference(\ + dtype=input.dtype) inputs = {"Input": input, "ImInfo": im_info} - helper.append_op( - type="box_clip", - inputs=inputs, - attrs={"inplace:": inplace}, - outputs={"Output": output}) + helper.append_op(type="box_clip", inputs=inputs, outputs={"Output": output}) return output