diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 7228641caab33da82ea8f69772cefc3d15946559..6b4a6061cbb289ecc36156762d196542541d99be 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -36,13 +36,11 @@ if(WITH_XPU) detection_library(iou_similarity_op SRCS iou_similarity_op.cc iou_similarity_op_xpu.cc) detection_library(prior_box_op SRCS prior_box_op.cc) - detection_library(yolo_box_op SRCS yolo_box_op.cc) detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc) else() detection_library(iou_similarity_op SRCS iou_similarity_op.cc iou_similarity_op.cu) detection_library(prior_box_op SRCS prior_box_op.cc) - detection_library(yolo_box_op SRCS yolo_box_op.cc) # detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc) endif() diff --git a/paddle/fluid/operators/detection/yolo_box_op.cc b/paddle/fluid/operators/detection/yolo_box_op.cc deleted file mode 100644 index a60f42de66a68f68498a85b89af512f85a144751..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/detection/yolo_box_op.cc +++ /dev/null @@ -1,269 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/phi/infermeta/binary.h" - -namespace paddle { -namespace operators { - -class YoloBoxOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "YoloBoxOp"); - OP_INOUT_CHECK(ctx->HasInput("ImgSize"), "Input", "ImgSize", "YoloBoxOp"); - OP_INOUT_CHECK(ctx->HasOutput("Boxes"), "Output", "Boxes", "YoloBoxOp"); - OP_INOUT_CHECK(ctx->HasOutput("Scores"), "Output", "Scores", "YoloBoxOp"); - - auto dim_x = ctx->GetInputDim("X"); - auto dim_imgsize = ctx->GetInputDim("ImgSize"); - auto anchors = ctx->Attrs().Get>("anchors"); - int anchor_num = anchors.size() / 2; - auto class_num = ctx->Attrs().Get("class_num"); - auto iou_aware = ctx->Attrs().Get("iou_aware"); - auto iou_aware_factor = ctx->Attrs().Get("iou_aware_factor"); - - PADDLE_ENFORCE_EQ( - dim_x.size(), - 4, - platform::errors::InvalidArgument("Input(X) should be a 4-D tensor." - "But received X dimension(%s)", - dim_x.size())); - if (iou_aware) { - PADDLE_ENFORCE_EQ( - dim_x[1], - anchor_num * (6 + class_num), - platform::errors::InvalidArgument( - "Input(X) dim[1] should be equal to (anchor_mask_number * (6 " - "+ class_num)) while iou_aware is true." - "But received dim[1](%s) != (anchor_mask_number * " - "(6+class_num)(%s).", - dim_x[1], - anchor_num * (6 + class_num))); - PADDLE_ENFORCE_GE( - iou_aware_factor, - 0, - platform::errors::InvalidArgument( - "Attr(iou_aware_factor) should greater than or equal to 0." - "But received iou_aware_factor (%s)", - iou_aware_factor)); - PADDLE_ENFORCE_LE( - iou_aware_factor, - 1, - platform::errors::InvalidArgument( - "Attr(iou_aware_factor) should less than or equal to 1." - "But received iou_aware_factor (%s)", - iou_aware_factor)); - } else { - PADDLE_ENFORCE_EQ( - dim_x[1], - anchor_num * (5 + class_num), - platform::errors::InvalidArgument( - "Input(X) dim[1] should be equal to (anchor_mask_number * (5 " - "+ class_num))." - "But received dim[1](%s) != (anchor_mask_number * " - "(5+class_num)(%s).", - dim_x[1], - anchor_num * (5 + class_num))); - } - PADDLE_ENFORCE_EQ(dim_imgsize.size(), - 2, - platform::errors::InvalidArgument( - "Input(ImgSize) should be a 2-D tensor." - "But received Imgsize size(%s)", - dim_imgsize.size())); - if ((dim_imgsize[0] > 0 && dim_x[0] > 0) || ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ( - dim_imgsize[0], - dim_x[0], - platform::errors::InvalidArgument( - "Input(ImgSize) dim[0] and Input(X) dim[0] should be same.")); - } - PADDLE_ENFORCE_EQ( - dim_imgsize[1], - 2, - platform::errors::InvalidArgument("Input(ImgSize) dim[1] should be 2." - "But received imgsize dim[1](%s).", - dim_imgsize[1])); - PADDLE_ENFORCE_GT(anchors.size(), - 0, - platform::errors::InvalidArgument( - "Attr(anchors) length should be greater than 0." - "But received anchors length(%s).", - anchors.size())); - PADDLE_ENFORCE_EQ(anchors.size() % 2, - 0, - platform::errors::InvalidArgument( - "Attr(anchors) length should be even integer." - "But received anchors length (%s)", - anchors.size())); - PADDLE_ENFORCE_GT(class_num, - 0, - platform::errors::InvalidArgument( - "Attr(class_num) should be an integer greater than 0." - "But received class_num (%s)", - class_num)); - - int box_num; - if ((dim_x[2] > 0 && dim_x[3] > 0) || ctx->IsRuntime()) { - box_num = dim_x[2] * dim_x[3] * anchor_num; - } else { - box_num = -1; - } - std::vector dim_boxes({dim_x[0], box_num, 4}); - ctx->SetOutputDim("Boxes", phi::make_ddim(dim_boxes)); - - std::vector dim_scores({dim_x[0], box_num, class_num}); - ctx->SetOutputDim("Scores", phi::make_ddim(dim_scores)); - } - - protected: - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.GetPlace()); - } -}; - -class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("X", - "The input tensor of YoloBox operator is a 4-D tensor with " - "shape of [N, C, H, W]. The second dimension(C) stores " - "box locations, confidence score and classification one-hot " - "keys of each anchor box. Generally, X should be the output " - "of YOLOv3 network."); - AddInput("ImgSize", - "The image size tensor of YoloBox operator, " - "This is a 2-D tensor with shape of [N, 2]. This tensor holds " - "height and width of each input image used for resizing output " - "box in input image scale."); - AddOutput("Boxes", - "The output tensor of detection boxes of YoloBox operator, " - "This is a 3-D tensor with shape of [N, M, 4], N is the " - "batch num, M is output box number, and the 3rd dimension " - "stores [xmin, ymin, xmax, ymax] coordinates of boxes."); - AddOutput("Scores", - "The output tensor of detection boxes scores of YoloBox " - "operator, This is a 3-D tensor with shape of " - "[N, M, :attr:`class_num`], N is the batch num, M is " - "output box number."); - - AddAttr("class_num", "The number of classes to predict."); - AddAttr>("anchors", - "The anchor width and height, " - "it will be parsed pair by pair.") - .SetDefault(std::vector{}); - AddAttr("downsample_ratio", - "The downsample ratio from network input to YoloBox operator " - "input, so 32, 16, 8 should be set for the first, second, " - "and thrid YoloBox operators.") - .SetDefault(32); - AddAttr("conf_thresh", - "The confidence scores threshold of detection boxes. " - "Boxes with confidence scores under threshold should " - "be ignored.") - .SetDefault(0.01); - AddAttr("clip_bbox", - "Whether clip output bonding box in Input(ImgSize) " - "boundary. Default true.") - .SetDefault(true); - AddAttr("scale_x_y", - "Scale the center point of decoded bounding " - "box. Default 1.0") - .SetDefault(1.); - AddAttr("iou_aware", "Whether use iou aware. Default false.") - .SetDefault(false); - AddAttr("iou_aware_factor", "iou aware factor. Default 0.5.") - .SetDefault(0.5); - AddComment(R"DOC( - This operator generates YOLO detection boxes from output of YOLOv3 network. - - The output of previous network is in shape [N, C, H, W], while H and W - should be the same, H and W specify the grid size, each grid point predict - given number boxes, this given number, which following will be represented as S, - is specified by the number of anchors. In the second dimension(the channel - dimension), C should be equal to S * (5 + class_num) if :attr:`iou_aware` is false, - otherwise C should be equal to S * (6 + class_num). class_num is the object - category number of source dataset(such as 80 in coco dataset), so the - second(channel) dimension, apart from 4 box location coordinates x, y, w, h, - also includes confidence score of the box and class one-hot key of each anchor - box. - - Assume the 4 location coordinates are :math:`t_x, t_y, t_w, t_h`, the box - predictions should be as follows: - - $$ - b_x = \\sigma(t_x) + c_x - $$ - $$ - b_y = \\sigma(t_y) + c_y - $$ - $$ - b_w = p_w e^{t_w} - $$ - $$ - b_h = p_h e^{t_h} - $$ - - in the equation above, :math:`c_x, c_y` is the left top corner of current grid - and :math:`p_w, p_h` is specified by anchors. - - The logistic regression value of the 5th channel of each anchor prediction boxes - represents the confidence score of each prediction box, and the logistic - regression value of the last :attr:`class_num` channels of each anchor prediction - boxes represents the classifcation scores. Boxes with confidence scores less than - :attr:`conf_thresh` should be ignored, and box final scores is the product of - confidence scores and classification scores. - - $$ - score_{pred} = score_{conf} * score_{class} - $$ - - where the confidence scores follow the formula bellow - - .. math:: - - score_{conf} = \begin{case} - obj, \text{if } iou_aware == false \\ - obj^{1 - iou_aware_factor} * iou^{iou_aware_factor}, \text{otherwise} - \end{case} - - )DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -DECLARE_INFER_SHAPE_FUNCTOR(yolo_box, - YoloBoxInferShapeFunctor, - PD_INFER_META(phi::YoloBoxInferMeta)); -REGISTER_OPERATOR( - yolo_box, - ops::YoloBoxOp, - ops::YoloBoxOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - YoloBoxInferShapeFunctor); - -REGISTER_OP_VERSION(yolo_box).AddCheckpoint( - R"ROC( - Upgrade yolo box to add new attribute [iou_aware, iou_aware_factor]. - )ROC", - paddle::framework::compatible::OpVersionDesc() - .NewAttr("iou_aware", "Whether use iou aware", false) - .NewAttr("iou_aware_factor", "iou aware factor", 0.5f)); diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index cd499a2d049e72a87488ff8d195209613c82ff6d..47dc127162c61fc0a502f340fb96bf46f2eec47d 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -1384,15 +1384,6 @@ data_type: x backward: unpool3d_grad -- op : yolo_box - args : (Tensor x, Tensor img_size, int[] anchors, int class_num, float conf_thresh, int downsample_ratio, bool clip_bbox, float scale_x_y=1.0, bool iou_aware=false, float iou_aware_factor=0.5) - output : Tensor(boxes), Tensor(scores) - infer_meta : - func : YoloBoxInferMeta - kernel : - func : yolo_box - data_type : x - - op : yolo_loss args : (Tensor x, Tensor gt_box, Tensor gt_label, Tensor gt_score, int[] anchors, int[] anchor_mask, int class_num, float ignore_thresh, int downsample_ratio, bool use_label_smooth=true, float scale_x_y=1.0) output : Tensor(loss), Tensor(objectness_mask), Tensor(gt_match_mask) diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 655bfe546b6d39737cd6e215d4e20c0fd504e6cd..fecca3a4b858573b2dcea79b17b8cf2c6d296c83 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -2328,6 +2328,12 @@ extra : attrs : ['str[] skip_eager_deletion_vars = {}'] +- op : yolo_box + inputs : + {x : X, img_size : ImgSize} + outputs : + {boxes : Boxes, scores : Scores} + - op: sigmoid_cross_entropy_with_logits backward: sigmoid_cross_entropy_with_logits_grad inputs : diff --git a/paddle/phi/api/yaml/op_version.yaml b/paddle/phi/api/yaml/op_version.yaml index e8bdbb28259f9efdfbc867dbba81767fdb5bf171..91ab3dfb1eb05762d068f79367680f5fc7bdc410 100644 --- a/paddle/phi/api/yaml/op_version.yaml +++ b/paddle/phi/api/yaml/op_version.yaml @@ -250,3 +250,14 @@ - add_attr : axis comment : The axis to apply unique. If None, the input will be flattened. default : std::vector{} + +- op : yolo_box + version : + - checkpoint : Upgrade yolo box to add new attribute [iou_aware, iou_aware_factor]. + action : + - add_attr : iou_aware + comment : Whether use iou aware. + default : "false" + - add_attr : iou_aware_factor + comment : iou aware factor. + default : 0.5f diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 2b2f30f1c63516d222667a2fec86e3ca524317ac..c839a8cfeefcad8be3c121f6a1c0afa2f5871454 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -2028,3 +2028,12 @@ kernel : func : where backward : where_grad + +- op : yolo_box + args : (Tensor x, Tensor img_size, int[] anchors={}, int class_num = 1, float conf_thresh = 0.01, int downsample_ratio = 32, bool clip_bbox = true, float scale_x_y=1.0, bool iou_aware=false, float iou_aware_factor=0.5) + output : Tensor(boxes), Tensor(scores) + infer_meta : + func : YoloBoxInferMeta + kernel : + func : yolo_box + data_type : x diff --git a/paddle/phi/ops/compat/yolo_box_sig.cc b/paddle/phi/ops/compat/yolo_box_sig.cc deleted file mode 100644 index bb39e72a64f5075908ca9b28d5f685fb0d6b6c9f..0000000000000000000000000000000000000000 --- a/paddle/phi/ops/compat/yolo_box_sig.cc +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/core/compat/op_utils.h" - -namespace phi { - -KernelSignature YoloBoxOpArgumentMapping(const ArgumentMappingContext& ctx) { - return KernelSignature("yolo_box", - {"X", "ImgSize"}, - {"anchors", - "class_num", - "conf_thresh", - "downsample_ratio", - "clip_bbox", - "scale_x_y", - "iou_aware", - "iou_aware_factor"}, - {"Boxes", "Scores"}); -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(yolo_box, phi::YoloBoxOpArgumentMapping);