update according to the code review

58bfaea8 · gaoyuan · 72eccb23 · 58bfaea8 · 58bfaea8 · 58bfaea8
3 changed file
--- a/paddle/operators/box_coder_op.cc
+++ b/paddle/operators/box_coder_op.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -31,23 +31,21 @@ class BoxCoderOp : public framework::OperatorWithKernel {
    auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar");
    auto target_box_dims = ctx->GetInputDim("TargetBox");
-    PADDLE_ENFORCE_EQ(prior_box_dims.size(), 2UL,
+    PADDLE_ENFORCE_EQ(prior_box_dims.size(), 2,
-                      "The shape of PriorBox is [N, 4]");
+                      "The rank of Input of PriorBoxVar must be 2");
-    PADDLE_ENFORCE_EQ(prior_box_dims[1], 4UL,
+    PADDLE_ENFORCE_EQ(prior_box_dims[1], 4, "The shape of PriorBox is [N, 4]");
-                      "The shape of PriorBox is [N, 4]");
+    PADDLE_ENFORCE_EQ(prior_box_dims, prior_box_var_dims);
-    PADDLE_ENFORCE_EQ(prior_box_var_dims.size(), 2UL,
+    PADDLE_ENFORCE_EQ(target_box_dims.size(), 2,
-                      "The shape of PriorBoxVar is [N, 4]");
+                      "The rank of Input of TargetBox must be 2");
-    PADDLE_ENFORCE_EQ(prior_box_var_dims[1], 4UL,
+    PADDLE_ENFORCE_EQ(target_box_dims[1], 4,
-                      "The shape of PriorBoxVar is [N, 4]");
-    PADDLE_ENFORCE_EQ(target_box_dims.size(), 2UL,
-                      "The shape of TargetBox is [M, 4]");
-    PADDLE_ENFORCE_EQ(target_box_dims[1], 4UL,
                      "The shape of TargetBox is [M, 4]");
    GetBoxCodeType(ctx->Attrs().Get<std::string>("code_type"));
-    ctx->SetOutputDim("OutputBox", framework::make_ddim({target_box_dims[0],
+    ctx->SetOutputDim(
-                                                         target_box_dims[1]}));
+        "OutputBox",
+        framework::make_ddim({target_box_dims[0], prior_box_dims[0], 4}));
+    ctx->ShareLoD("TargetBox", /*->*/ "OutputBox");
  }
 };
@@ -58,7 +56,7 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
    AddInput(
        "PriorBox",
        "(Tensor, default Tensor<float>) "
-        "Box list PriorBox is a 2-D Tensor with shape [M, 4] holds N boxes, "
+        "Box list PriorBox is a 2-D Tensor with shape [M, 4] holds M boxes, "
        "each box is represented as [xmin, ymin, xmax, ymax], "
        "[xmin, ymin] is the left top coordinate of the anchor box, "
        "if the input is image feature map, they are close to the origin "
@@ -66,7 +64,7 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
        "coordinate of the anchor box.");
    AddInput("PriorBoxVar",
             "(Tensor, default Tensor<float>) "
-             "PriorBoxVar is a 2-D Tensor with shape [M, 4] holds N group "
+             "PriorBoxVar is a 2-D Tensor with shape [M, 4] holds M group "
             "of variance.");
    AddInput(
        "TargetBox",
@@ -85,14 +83,29 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
        .InEnum({"encode_center_size", "decode_center_size"});
    AddOutput(
        "OutputBox",
-        "(Tensor, default Tensor<float>)"
+        "(LoDTensor or Tensor) "
        "(Tensor) The output of box_coder_op, a tensor with shape [N, M, 4] "
        "representing the result of N target boxes encoded/decoded with "
        "M Prior boxes and variances.");
    AddComment(R"DOC(
 Bounding Box Coder Operator.
-Encode/Decode the priorbox information with the target bounding box.
+Encode/Decode the target bounding box with the priorbox information.
+The Encoding schema described below:
+ox = (tx - px) / pw / pxv
+oy = (ty - py) / ph / pyv
+ow = log(abs(tw / pw)) / pwv 
+oh = log(abs(th / ph)) / phv 
+The Decoding schema described below:
+ox = (pw * pxv * tx * + px) - tw / 2
+oy = (ph * pyv * ty * + py) - th / 2
+ow = exp(pwv * tw) * pw + tw / 2
+oh = exp(phv * th) * ph + th / 2
+where tx, ty, tw, th denote the target box's center coordinates, width and
+height respectively. Similarly, px, py, pw, ph denote the priorbox's(anchor)
+center coordinates, width and height. pxv, pyv, pwv, phv denote the variance
+of the priorbox and ox, oy, ow, oh denote the encoded/decoded coordinates,
+width and height.
 )DOC");
  }
 };

--- a/paddle/operators/box_coder_op.cu
+++ b/paddle/operators/box_coder_op.cu
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -108,7 +108,7 @@ class BoxCoderCUDAKernel : public framework::OpKernel<T> {
    auto* output_box = context.Output<Tensor>("OutputBox");
    if (target_box->lod().size()) {
-      PADDLE_ENFORCE_EQ(target_box->lod().size(), 1UL,
+      PADDLE_ENFORCE_EQ(target_box->lod().size(), 1,
                        "Only support 1 level of LoD.");
    }
    auto row = target_box->dims()[0];

--- a/paddle/operators/box_coder_op.h
+++ b/paddle/operators/box_coder_op.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -35,67 +35,52 @@ class BoxCoderKernel : public framework::OpKernel<T> {
 public:
  void EncodeCenterSize(const Tensor& target_box, const Tensor& prior_box,
                        const Tensor& prior_box_var, T* output) const {
-    PADDLE_ENFORCE_EQ(target_box.dims().size(), 2,
-                      "The rank of target_box must be 2.");
-    PADDLE_ENFORCE_EQ(prior_box.dims().size(), 2,
-                      "The rank of prior_box must be 2.");
-    PADDLE_ENFORCE_EQ(prior_box_var.dims().size(), 2,
-                      "The rank of prior_box_var must be 2.");
-    PADDLE_ENFORCE_EQ(prior_box.dims()[0], prior_box_var.dims()[0],
-                      "The dims of prior_box must equal to prior_box_var.");
    int64_t row = target_box.dims()[0];
    int64_t col = prior_box.dims()[0];
+    int64_t len = prior_box.dims()[1];
    auto* target_box_data = target_box.data<T>();
    auto* prior_box_data = prior_box.data<T>();
    auto* prior_box_var_data = prior_box_var.data<T>();
    for (int64_t i = 0; i < row; ++i) {
      for (int64_t j = 0; j < col; ++j) {
-        T prior_box_width = prior_box_data[j * 4 + 2] - prior_box_data[j * 4];
+        T prior_box_width =
+            prior_box_data[j * len + 2] - prior_box_data[j * len];
        T prior_box_height =
-            prior_box_data[j * 4 + 3] - prior_box_data[j * 4 + 1];
+            prior_box_data[j * len + 3] - prior_box_data[j * len + 1];
        T prior_box_center_x =
-            (prior_box_data[j * 4 + 2] + prior_box_data[j * 4]) / 2;
+            (prior_box_data[j * len + 2] + prior_box_data[j * len]) / 2;
        T prior_box_center_y =
-            (prior_box_data[j * 4 + 3] + prior_box_data[j * 4 + 1]) / 2;
+            (prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2;
        T target_box_center_x =
-            (target_box_data[i * 4 + 2] + target_box_data[i * 4]) / 2;
+            (target_box_data[i * len + 2] + target_box_data[i * len]) / 2;
        T target_box_center_y =
-            (target_box_data[i * 4 + 3] + target_box_data[i * 4 + 1]) / 2;
+            (target_box_data[i * len + 3] + target_box_data[i * len + 1]) / 2;
        T target_box_width =
-            target_box_data[i * 4 + 2] - target_box_data[i * 4];
+            target_box_data[i * len + 2] - target_box_data[i * len];
        T target_box_height =
-            target_box_data[i * 4 + 3] - target_box_data[i * 4 + 1];
+            target_box_data[i * len + 3] - target_box_data[i * len + 1];
-        size_t offset = i * col * 4 + j * 4;
+        size_t offset = i * col * len + j * len;
        output[offset] = (target_box_center_x - prior_box_center_x) /
-                         prior_box_width / prior_box_var_data[j * 4];
+                         prior_box_width / prior_box_var_data[j * len];
        output[offset + 1] = (target_box_center_y - prior_box_center_y) /
-                             prior_box_height / prior_box_var_data[j * 4 + 1];
+                             prior_box_height / prior_box_var_data[j * len + 1];
        output[offset + 2] =
            std::log(std::fabs(target_box_width / prior_box_width)) /
-            prior_box_var_data[j * 4 + 2];
+            prior_box_var_data[j * len + 2];
        output[offset + 3] =
            std::log(std::fabs(target_box_height / prior_box_height)) /
-            prior_box_var_data[j * 4 + 3];
+            prior_box_var_data[j * len + 3];
      }
    }
  }
  void DecodeCenterSize(const Tensor& target_box, const Tensor& prior_box,
                        const Tensor& prior_box_var, T* output) const {
-    PADDLE_ENFORCE_EQ(target_box.dims().size(), 2,
-                      "The rank of target_box must be 2.");
-    PADDLE_ENFORCE_EQ(prior_box.dims().size(), 2,
-                      "The rank of prior_box must be 2.");
-    PADDLE_ENFORCE_EQ(prior_box_var.dims().size(), 2,
-                      "The rank of prior_box_var must be 2.");
-    PADDLE_ENFORCE_EQ(prior_box.dims()[0], prior_box_var.dims()[0],
-                      "The dims of prior_box must equal to prior_box_var.");
    int64_t row = target_box.dims()[0];
    int64_t col = prior_box.dims()[0];
+    int64_t len = prior_box.dims()[1];
    auto* target_box_data = target_box.data<T>();
    auto* prior_box_data = prior_box.data<T>();
@@ -103,29 +88,30 @@ class BoxCoderKernel : public framework::OpKernel<T> {
    for (int64_t i = 0; i < row; ++i) {
      for (int64_t j = 0; j < col; ++j) {
-        T prior_box_width = prior_box_data[j * 4 + 2] - prior_box_data[j * 4];
+        T prior_box_width =
+            prior_box_data[j * len + 2] - prior_box_data[j * len];
        T prior_box_height =
-            prior_box_data[j * 4 + 3] - prior_box_data[j * 4 + 1];
+            prior_box_data[j * len + 3] - prior_box_data[j * len + 1];
        T prior_box_center_x =
-            (prior_box_data[j * 4 + 2] + prior_box_data[j * 4]) / 2;
+            (prior_box_data[j * len + 2] + prior_box_data[j * len]) / 2;
        T prior_box_center_y =
-            (prior_box_data[j * 4 + 3] + prior_box_data[j * 4 + 1]) / 2;
+            (prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2;
-        T target_box_center_x = prior_box_var_data[j * 4] *
+        T target_box_center_x = prior_box_var_data[j * len] *
-                                    target_box_data[i * 4] * prior_box_width +
+                                    target_box_data[i * len] * prior_box_width +
                                prior_box_center_x;
-        T target_box_center_y = prior_box_var_data[j * 4 + 1] *
+        T target_box_center_y = prior_box_var_data[j * len + 1] *
-                                    target_box_data[i * 4 + 1] *
+                                    target_box_data[i * len + 1] *
                                    prior_box_height +
                                prior_box_center_y;
-        T target_box_width = std::exp(prior_box_var_data[j * 4 + 2] *
+        T target_box_width = std::exp(prior_box_var_data[j * len + 2] *
-                                      target_box_data[i * 4 + 2]) *
+                                      target_box_data[i * len + 2]) *
                             prior_box_width;
-        T target_box_height = std::exp(prior_box_var_data[j * 4 + 3] *
+        T target_box_height = std::exp(prior_box_var_data[j * len + 3] *
-                                       target_box_data[i * 4 + 3]) *
+                                       target_box_data[i * len + 3]) *
                              prior_box_height;
-        size_t offset = i * col * 4 + j * 4;
+        size_t offset = i * col * len + j * len;
        output[offset] = target_box_center_x - target_box_width / 2;
        output[offset + 1] = target_box_center_y - target_box_height / 2;
        output[offset + 2] = target_box_center_x + target_box_width / 2;
@@ -146,8 +132,9 @@ class BoxCoderKernel : public framework::OpKernel<T> {
    }
    auto row = target_box->dims()[0];
    auto col = prior_box->dims()[0];
+    auto len = prior_box->dims()[1];
-    output_box->mutable_data<T>({row, col, 4}, context.GetPlace());
+    output_box->mutable_data<T>({row, col, len}, context.GetPlace());
    auto code_type = GetBoxCodeType(context.Attr<std::string>("code_type"));
    T* output = output_box->data<T>();