diff --git a/paddle/fluid/operators/detection/yolo_box_op.cc b/paddle/fluid/operators/detection/yolo_box_op.cc index 6d8dac38f7f485de06907e9759bc0f48136fcee8..e0d7e25d944cf2321799da4c73de9f74d9fd287d 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.cc +++ b/paddle/fluid/operators/detection/yolo_box_op.cc @@ -74,9 +74,8 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", - "The input tensor of YoloBox operator, " - "This is a 4-D tensor with shape of [N, C, H, W]. " - "H and W should be same, and the second dimension(C) stores " + "The input tensor of YoloBox operator is a 4-D tensor with " + "shape of [N, C, H, W]. The second dimension(C) stores " "box locations, confidence score and classification one-hot " "keys of each anchor box. Generally, X should be the output " "of YOLOv3 network."); @@ -91,10 +90,10 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { "batch num, M is output box number, and the 3rd dimension " "stores [xmin, ymin, xmax, ymax] coordinates of boxes."); AddOutput("Scores", - "The output tensor ofdetection boxes scores of YoloBox " - "operator, This is a 3-D tensor with shape of [N, M, C], " - "N is the batch num, M is output box number, C is the " - "class number."); + "The output tensor of detection boxes scores of YoloBox " + "operator, This is a 3-D tensor with shape of " + "[N, M, :attr:`class_num`], N is the batch num, M is " + "output box number."); AddAttr("class_num", "The number of classes to predict."); AddAttr>("anchors", @@ -112,7 +111,7 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { "be ignored.") .SetDefault(0.01); AddComment(R"DOC( - This operator generate YOLO detection boxes from output of YOLOv3 network. + This operator generates YOLO detection boxes from output of YOLOv3 network. The output of previous network is in shape [N, C, H, W], while H and W should be the same, H and W specify the grid size, each grid point predict @@ -150,6 +149,10 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker { :attr:`conf_thresh` should be ignored, and box final scores is the product of confidence scores and classification scores. + $$ + score_{pred} = score_{conf} * score_{class} + $$ + )DOC"); } }; diff --git a/paddle/fluid/operators/detection/yolo_box_op.cu b/paddle/fluid/operators/detection/yolo_box_op.cu index 30175be8bb139691033599f32c7a290ca78cba31..12555f5347361781858d9bbf53b0f522047ef3ac 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.cu +++ b/paddle/fluid/operators/detection/yolo_box_op.cu @@ -83,12 +83,22 @@ class YoloBoxOpCUDAKernel : public framework::OpKernel { const int an_num = anchors.size() / 2; int input_size = downsample_ratio * h; - Tensor anchors_t, cpu_anchors_t; - auto cpu_anchors_data = - cpu_anchors_t.mutable_data({an_num * 2}, platform::CPUPlace()); - std::copy(anchors.begin(), anchors.end(), cpu_anchors_data); - TensorCopySync(cpu_anchors_t, ctx.GetPlace(), &anchors_t); - auto anchors_data = anchors_t.data(); + /* Tensor anchors_t, cpu_anchors_t; */ + /* auto cpu_anchors_data = */ + /* cpu_anchors_t.mutable_data({an_num * 2}, platform::CPUPlace()); */ + /* std::copy(anchors.begin(), anchors.end(), cpu_anchors_data); */ + /* TensorCopySync(cpu_anchors_t, ctx.GetPlace(), &anchors_t); */ + /* auto anchors_data = anchors_t.data(); */ + auto& dev_ctx = ctx.cuda_device_context(); + auto& allocator = + platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx); + int bytes = sizeof(int) * anchors.size(); + auto anchors_ptr = allocator.Allocate(sizeof(int) * anchors.size()); + int* anchors_data = reinterpret_cast(anchors_ptr->ptr()); + const auto gplace = boost::get(ctx.GetPlace()); + const auto cplace = platform::CPUPlace(); + memory::Copy(gplace, anchors_data, cplace, anchors.data(), bytes, + dev_ctx.stream()); const T* input_data = input->data(); const int* imgsize_data = img_size->data(); @@ -96,7 +106,6 @@ class YoloBoxOpCUDAKernel : public framework::OpKernel { T* scores_data = scores->mutable_data({n, box_num, class_num}, ctx.GetPlace()); math::SetConstant set_zero; - auto& dev_ctx = ctx.template device_context(); set_zero(dev_ctx, boxes, static_cast(0)); set_zero(dev_ctx, scores, static_cast(0)); diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 6d82b8a12ed5cafa55534b4a7acd688f08fdebe9..56589c1728f43c26ddc81104067f89971859dcb0 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -632,8 +632,8 @@ def yolo_box(x, Returns: Variable: A 3-D tensor with shape [N, M, 4], the coordinates of boxes, - and a 3-D tensor with shape [N, M, C], the classification scores - of boxes. + and a 3-D tensor with shape [N, M, :attr:`class_num`], the classification + scores of boxes. Raises: TypeError: Input x of yolov_box must be Variable @@ -647,7 +647,7 @@ def yolo_box(x, x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32') anchors = [10, 13, 16, 30, 33, 23] - loss = fluid.layers.yolov3_loss(x=x, class_num=80, anchors=anchors, + loss = fluid.layers.yolo_box(x=x, class_num=80, anchors=anchors, conf_thresh=0.01, downsample_ratio=32) """ helper = LayerHelper('yolo_box', **locals()) diff --git a/python/paddle/fluid/tests/unittests/test_yolo_box_op.py b/python/paddle/fluid/tests/unittests/test_yolo_box_op.py index d4a179794c261b6e41e049c16c60eb852bb3e50f..416e6ea9f412d86db877fc36175e8b910b0613fe 100644 --- a/python/paddle/fluid/tests/unittests/test_yolo_box_op.py +++ b/python/paddle/fluid/tests/unittests/test_yolo_box_op.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py index 569fe63d052f9423b32eadd18c28d899a4c5ccdd..020c1139230a9177c4d7765367359d91839d7d46 100644 --- a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py @@ -75,8 +75,8 @@ def YOLOv3Loss(x, gtbox, gtlabel, attrs): mask_num = len(anchor_mask) class_num = attrs["class_num"] ignore_thresh = attrs['ignore_thresh'] - downsample_ratio = attrs['downsample_ratio'] - input_size = downsample_ratio * h + downsample = attrs['downsample'] + input_size = downsample * h x = x.reshape((n, mask_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2)) loss = np.zeros((n)).astype('float32') @@ -86,6 +86,10 @@ def YOLOv3Loss(x, gtbox, gtlabel, attrs): pred_box[:, :, :, :, 0] = (grid_x + sigmoid(pred_box[:, :, :, :, 0])) / w pred_box[:, :, :, :, 1] = (grid_y + sigmoid(pred_box[:, :, :, :, 1])) / h + x[:, :, :, :, 5:] = np.where(x[:, :, :, :, 5:] < -0.5, x[:, :, :, :, 5:], + np.ones_like(x[:, :, :, :, 5:]) * 1.0 / + class_num) + mask_anchors = [] for m in anchor_mask: mask_anchors.append((anchors[2 * m], anchors[2 * m + 1])) @@ -172,7 +176,7 @@ class TestYolov3LossOp(OpTest): "anchor_mask": self.anchor_mask, "class_num": self.class_num, "ignore_thresh": self.ignore_thresh, - "downsample_ratio": self.downsample_ratio, + "downsample": self.downsample, } self.inputs = { @@ -204,7 +208,7 @@ class TestYolov3LossOp(OpTest): self.anchor_mask = [1, 2] self.class_num = 5 self.ignore_thresh = 0.5 - self.downsample_ratio = 32 + self.downsample = 32 self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5) self.gtbox_shape = (3, 5, 4)