use memory Copy. test=develop

e4e37640 · dengkaipeng · 626fb859 · e4e37640 · e4e37640 · e4e37640
5 changed file
--- a/paddle/fluid/operators/detection/yolo_box_op.cc
+++ b/paddle/fluid/operators/detection/yolo_box_op.cc
@@ -74,9 +74,8 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
    AddInput("X",
-             "The input tensor of YoloBox operator, "
+             "The input tensor of YoloBox operator is a 4-D tensor with "
-             "This is a 4-D tensor with shape of [N, C, H, W]. "
+             "shape of [N, C, H, W]. The second dimension(C) stores "
-             "H and W should be same, and the second dimension(C) stores "
             "box locations, confidence score and classification one-hot "
             "keys of each anchor box. Generally, X should be the output "
             "of YOLOv3 network.");
@@ -91,10 +90,10 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker {
              "batch num, M is output box number, and the 3rd dimension "
              "stores [xmin, ymin, xmax, ymax] coordinates of boxes.");
    AddOutput("Scores",
-              "The output tensor ofdetection boxes scores of YoloBox "
+              "The output tensor of detection boxes scores of YoloBox "
-              "operator, This is a 3-D tensor with shape of [N, M, C], "
+              "operator, This is a 3-D tensor with shape of "
-              "N is the batch num, M is output box number, C is the "
+              "[N, M, :attr:`class_num`], N is the batch num, M is "
-              "class number.");
+              "output box number.");
    AddAttr<int>("class_num", "The number of classes to predict.");
    AddAttr<std::vector<int>>("anchors",
@@ -112,7 +111,7 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker {
                   "be ignored.")
        .SetDefault(0.01);
    AddComment(R"DOC(
-         This operator generate YOLO detection boxes from output of YOLOv3 network.
+         This operator generates YOLO detection boxes from output of YOLOv3 network.
         The output of previous network is in shape [N, C, H, W], while H and W
         should be the same, H and W specify the grid size, each grid point predict 
@@ -150,6 +149,10 @@ class YoloBoxOpMaker : public framework::OpProtoAndCheckerMaker {
         :attr:`conf_thresh` should be ignored, and box final scores is the product of 
         confidence scores and classification scores.
+         $$
+         score_{pred} = score_{conf} * score_{class}
+         $$
         )DOC");
  }
 };

--- a/paddle/fluid/operators/detection/yolo_box_op.cu
+++ b/paddle/fluid/operators/detection/yolo_box_op.cu
@@ -83,12 +83,22 @@ class YoloBoxOpCUDAKernel : public framework::OpKernel<T> {
    const int an_num = anchors.size() / 2;
    int input_size = downsample_ratio * h;
-    Tensor anchors_t, cpu_anchors_t;
+    /* Tensor anchors_t, cpu_anchors_t; */
-    auto cpu_anchors_data =
+    /* auto cpu_anchors_data = */
-        cpu_anchors_t.mutable_data<int>({an_num * 2}, platform::CPUPlace());
+    /*     cpu_anchors_t.mutable_data<int>({an_num * 2}, platform::CPUPlace()); */
-    std::copy(anchors.begin(), anchors.end(), cpu_anchors_data);
+    /* std::copy(anchors.begin(), anchors.end(), cpu_anchors_data); */
-    TensorCopySync(cpu_anchors_t, ctx.GetPlace(), &anchors_t);
+    /* TensorCopySync(cpu_anchors_t, ctx.GetPlace(), &anchors_t); */
-    auto anchors_data = anchors_t.data<int>();
+    /* auto anchors_data = anchors_t.data<int>(); */
+    auto& dev_ctx = ctx.cuda_device_context();
+    auto& allocator = 
+      platform::DeviceTemporaryAllocator::Instance().Get(dev_ctx);
+    int bytes = sizeof(int) * anchors.size();
+    auto anchors_ptr = allocator.Allocate(sizeof(int) * anchors.size());
+    int* anchors_data = reinterpret_cast<int*>(anchors_ptr->ptr());
+    const auto gplace = boost::get<platform::CUDAPlace>(ctx.GetPlace());
+    const auto cplace = platform::CPUPlace();
+    memory::Copy(gplace, anchors_data, cplace, anchors.data(), bytes,
+                            dev_ctx.stream());
    const T* input_data = input->data<T>();
    const int* imgsize_data = img_size->data<int>();
@@ -96,7 +106,6 @@ class YoloBoxOpCUDAKernel : public framework::OpKernel<T> {
    T* scores_data =
        scores->mutable_data<T>({n, box_num, class_num}, ctx.GetPlace());
    math::SetConstant<platform::CUDADeviceContext, T> set_zero;
-    auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
    set_zero(dev_ctx, boxes, static_cast<T>(0));
    set_zero(dev_ctx, scores, static_cast<T>(0));

--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -632,8 +632,8 @@ def yolo_box(x,
    Returns:
        Variable: A 3-D tensor with shape [N, M, 4], the coordinates of boxes,
-        and a 3-D tensor with shape [N, M, C], the classification scores
+        and a 3-D tensor with shape [N, M, :attr:`class_num`], the classification 
-        of boxes.
+        scores of boxes.
    Raises:
        TypeError: Input x of yolov_box must be Variable
@@ -647,7 +647,7 @@ def yolo_box(x,
        x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32')
        anchors = [10, 13, 16, 30, 33, 23]
-        loss = fluid.layers.yolov3_loss(x=x, class_num=80, anchors=anchors, 
+        loss = fluid.layers.yolo_box(x=x, class_num=80, anchors=anchors, 
                                        conf_thresh=0.01, downsample_ratio=32)
    """
    helper = LayerHelper('yolo_box', **locals())

--- a/python/paddle/fluid/tests/unittests/test_yolo_box_op.py
+++ b/python/paddle/fluid/tests/unittests/test_yolo_box_op.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py
@@ -75,8 +75,8 @@ def YOLOv3Loss(x, gtbox, gtlabel, attrs):
    mask_num = len(anchor_mask)
    class_num = attrs["class_num"]
    ignore_thresh = attrs['ignore_thresh']
-    downsample_ratio = attrs['downsample_ratio']
+    downsample = attrs['downsample']
-    input_size = downsample_ratio * h
+    input_size = downsample * h
    x = x.reshape((n, mask_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2))
    loss = np.zeros((n)).astype('float32')
@@ -86,6 +86,10 @@ def YOLOv3Loss(x, gtbox, gtlabel, attrs):
    pred_box[:, :, :, :, 0] = (grid_x + sigmoid(pred_box[:, :, :, :, 0])) / w
    pred_box[:, :, :, :, 1] = (grid_y + sigmoid(pred_box[:, :, :, :, 1])) / h
+    x[:, :, :, :, 5:] = np.where(x[:, :, :, :, 5:] < -0.5, x[:, :, :, :, 5:],
+                                 np.ones_like(x[:, :, :, :, 5:]) * 1.0 /
+                                 class_num)
    mask_anchors = []
    for m in anchor_mask:
        mask_anchors.append((anchors[2 * m], anchors[2 * m + 1]))
@@ -172,7 +176,7 @@ class TestYolov3LossOp(OpTest):
            "anchor_mask": self.anchor_mask,
            "class_num": self.class_num,
            "ignore_thresh": self.ignore_thresh,
-            "downsample_ratio": self.downsample_ratio,
+            "downsample": self.downsample,
        }
        self.inputs = {
@@ -204,7 +208,7 @@ class TestYolov3LossOp(OpTest):
        self.anchor_mask = [1, 2]
        self.class_num = 5
        self.ignore_thresh = 0.5
-        self.downsample_ratio = 32
+        self.downsample = 32
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
        self.gtbox_shape = (3, 5, 4)