提交 58bfaea8 编写于 作者: G gaoyuan

update according to the code review

上级 72eccb23
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
......@@ -31,23 +31,21 @@ class BoxCoderOp : public framework::OperatorWithKernel {
auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar");
auto target_box_dims = ctx->GetInputDim("TargetBox");
PADDLE_ENFORCE_EQ(prior_box_dims.size(), 2UL,
"The shape of PriorBox is [N, 4]");
PADDLE_ENFORCE_EQ(prior_box_dims[1], 4UL,
"The shape of PriorBox is [N, 4]");
PADDLE_ENFORCE_EQ(prior_box_var_dims.size(), 2UL,
"The shape of PriorBoxVar is [N, 4]");
PADDLE_ENFORCE_EQ(prior_box_var_dims[1], 4UL,
"The shape of PriorBoxVar is [N, 4]");
PADDLE_ENFORCE_EQ(target_box_dims.size(), 2UL,
"The shape of TargetBox is [M, 4]");
PADDLE_ENFORCE_EQ(target_box_dims[1], 4UL,
PADDLE_ENFORCE_EQ(prior_box_dims.size(), 2,
"The rank of Input of PriorBoxVar must be 2");
PADDLE_ENFORCE_EQ(prior_box_dims[1], 4, "The shape of PriorBox is [N, 4]");
PADDLE_ENFORCE_EQ(prior_box_dims, prior_box_var_dims);
PADDLE_ENFORCE_EQ(target_box_dims.size(), 2,
"The rank of Input of TargetBox must be 2");
PADDLE_ENFORCE_EQ(target_box_dims[1], 4,
"The shape of TargetBox is [M, 4]");
GetBoxCodeType(ctx->Attrs().Get<std::string>("code_type"));
ctx->SetOutputDim("OutputBox", framework::make_ddim({target_box_dims[0],
target_box_dims[1]}));
ctx->SetOutputDim(
"OutputBox",
framework::make_ddim({target_box_dims[0], prior_box_dims[0], 4}));
ctx->ShareLoD("TargetBox", /*->*/ "OutputBox");
}
};
......@@ -58,7 +56,7 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput(
"PriorBox",
"(Tensor, default Tensor<float>) "
"Box list PriorBox is a 2-D Tensor with shape [M, 4] holds N boxes, "
"Box list PriorBox is a 2-D Tensor with shape [M, 4] holds M boxes, "
"each box is represented as [xmin, ymin, xmax, ymax], "
"[xmin, ymin] is the left top coordinate of the anchor box, "
"if the input is image feature map, they are close to the origin "
......@@ -66,7 +64,7 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
"coordinate of the anchor box.");
AddInput("PriorBoxVar",
"(Tensor, default Tensor<float>) "
"PriorBoxVar is a 2-D Tensor with shape [M, 4] holds N group "
"PriorBoxVar is a 2-D Tensor with shape [M, 4] holds M group "
"of variance.");
AddInput(
"TargetBox",
......@@ -85,14 +83,29 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
.InEnum({"encode_center_size", "decode_center_size"});
AddOutput(
"OutputBox",
"(Tensor, default Tensor<float>)"
"(LoDTensor or Tensor) "
"(Tensor) The output of box_coder_op, a tensor with shape [N, M, 4] "
"representing the result of N target boxes encoded/decoded with "
"M Prior boxes and variances.");
AddComment(R"DOC(
Bounding Box Coder Operator.
Encode/Decode the priorbox information with the target bounding box.
Encode/Decode the target bounding box with the priorbox information.
The Encoding schema described below:
ox = (tx - px) / pw / pxv
oy = (ty - py) / ph / pyv
ow = log(abs(tw / pw)) / pwv
oh = log(abs(th / ph)) / phv
The Decoding schema described below:
ox = (pw * pxv * tx * + px) - tw / 2
oy = (ph * pyv * ty * + py) - th / 2
ow = exp(pwv * tw) * pw + tw / 2
oh = exp(phv * th) * ph + th / 2
where tx, ty, tw, th denote the target box's center coordinates, width and
height respectively. Similarly, px, py, pw, ph denote the priorbox's(anchor)
center coordinates, width and height. pxv, pyv, pwv, phv denote the variance
of the priorbox and ox, oy, ow, oh denote the encoded/decoded coordinates,
width and height.
)DOC");
}
};
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
......@@ -108,7 +108,7 @@ class BoxCoderCUDAKernel : public framework::OpKernel<T> {
auto* output_box = context.Output<Tensor>("OutputBox");
if (target_box->lod().size()) {
PADDLE_ENFORCE_EQ(target_box->lod().size(), 1UL,
PADDLE_ENFORCE_EQ(target_box->lod().size(), 1,
"Only support 1 level of LoD.");
}
auto row = target_box->dims()[0];
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
......@@ -35,67 +35,52 @@ class BoxCoderKernel : public framework::OpKernel<T> {
public:
void EncodeCenterSize(const Tensor& target_box, const Tensor& prior_box,
const Tensor& prior_box_var, T* output) const {
PADDLE_ENFORCE_EQ(target_box.dims().size(), 2,
"The rank of target_box must be 2.");
PADDLE_ENFORCE_EQ(prior_box.dims().size(), 2,
"The rank of prior_box must be 2.");
PADDLE_ENFORCE_EQ(prior_box_var.dims().size(), 2,
"The rank of prior_box_var must be 2.");
PADDLE_ENFORCE_EQ(prior_box.dims()[0], prior_box_var.dims()[0],
"The dims of prior_box must equal to prior_box_var.");
int64_t row = target_box.dims()[0];
int64_t col = prior_box.dims()[0];
int64_t len = prior_box.dims()[1];
auto* target_box_data = target_box.data<T>();
auto* prior_box_data = prior_box.data<T>();
auto* prior_box_var_data = prior_box_var.data<T>();
for (int64_t i = 0; i < row; ++i) {
for (int64_t j = 0; j < col; ++j) {
T prior_box_width = prior_box_data[j * 4 + 2] - prior_box_data[j * 4];
T prior_box_width =
prior_box_data[j * len + 2] - prior_box_data[j * len];
T prior_box_height =
prior_box_data[j * 4 + 3] - prior_box_data[j * 4 + 1];
prior_box_data[j * len + 3] - prior_box_data[j * len + 1];
T prior_box_center_x =
(prior_box_data[j * 4 + 2] + prior_box_data[j * 4]) / 2;
(prior_box_data[j * len + 2] + prior_box_data[j * len]) / 2;
T prior_box_center_y =
(prior_box_data[j * 4 + 3] + prior_box_data[j * 4 + 1]) / 2;
(prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2;
T target_box_center_x =
(target_box_data[i * 4 + 2] + target_box_data[i * 4]) / 2;
(target_box_data[i * len + 2] + target_box_data[i * len]) / 2;
T target_box_center_y =
(target_box_data[i * 4 + 3] + target_box_data[i * 4 + 1]) / 2;
(target_box_data[i * len + 3] + target_box_data[i * len + 1]) / 2;
T target_box_width =
target_box_data[i * 4 + 2] - target_box_data[i * 4];
target_box_data[i * len + 2] - target_box_data[i * len];
T target_box_height =
target_box_data[i * 4 + 3] - target_box_data[i * 4 + 1];
target_box_data[i * len + 3] - target_box_data[i * len + 1];
size_t offset = i * col * 4 + j * 4;
size_t offset = i * col * len + j * len;
output[offset] = (target_box_center_x - prior_box_center_x) /
prior_box_width / prior_box_var_data[j * 4];
prior_box_width / prior_box_var_data[j * len];
output[offset + 1] = (target_box_center_y - prior_box_center_y) /
prior_box_height / prior_box_var_data[j * 4 + 1];
prior_box_height / prior_box_var_data[j * len + 1];
output[offset + 2] =
std::log(std::fabs(target_box_width / prior_box_width)) /
prior_box_var_data[j * 4 + 2];
prior_box_var_data[j * len + 2];
output[offset + 3] =
std::log(std::fabs(target_box_height / prior_box_height)) /
prior_box_var_data[j * 4 + 3];
prior_box_var_data[j * len + 3];
}
}
}
void DecodeCenterSize(const Tensor& target_box, const Tensor& prior_box,
const Tensor& prior_box_var, T* output) const {
PADDLE_ENFORCE_EQ(target_box.dims().size(), 2,
"The rank of target_box must be 2.");
PADDLE_ENFORCE_EQ(prior_box.dims().size(), 2,
"The rank of prior_box must be 2.");
PADDLE_ENFORCE_EQ(prior_box_var.dims().size(), 2,
"The rank of prior_box_var must be 2.");
PADDLE_ENFORCE_EQ(prior_box.dims()[0], prior_box_var.dims()[0],
"The dims of prior_box must equal to prior_box_var.");
int64_t row = target_box.dims()[0];
int64_t col = prior_box.dims()[0];
int64_t len = prior_box.dims()[1];
auto* target_box_data = target_box.data<T>();
auto* prior_box_data = prior_box.data<T>();
......@@ -103,29 +88,30 @@ class BoxCoderKernel : public framework::OpKernel<T> {
for (int64_t i = 0; i < row; ++i) {
for (int64_t j = 0; j < col; ++j) {
T prior_box_width = prior_box_data[j * 4 + 2] - prior_box_data[j * 4];
T prior_box_width =
prior_box_data[j * len + 2] - prior_box_data[j * len];
T prior_box_height =
prior_box_data[j * 4 + 3] - prior_box_data[j * 4 + 1];
prior_box_data[j * len + 3] - prior_box_data[j * len + 1];
T prior_box_center_x =
(prior_box_data[j * 4 + 2] + prior_box_data[j * 4]) / 2;
(prior_box_data[j * len + 2] + prior_box_data[j * len]) / 2;
T prior_box_center_y =
(prior_box_data[j * 4 + 3] + prior_box_data[j * 4 + 1]) / 2;
(prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2;
T target_box_center_x = prior_box_var_data[j * 4] *
target_box_data[i * 4] * prior_box_width +
T target_box_center_x = prior_box_var_data[j * len] *
target_box_data[i * len] * prior_box_width +
prior_box_center_x;
T target_box_center_y = prior_box_var_data[j * 4 + 1] *
target_box_data[i * 4 + 1] *
T target_box_center_y = prior_box_var_data[j * len + 1] *
target_box_data[i * len + 1] *
prior_box_height +
prior_box_center_y;
T target_box_width = std::exp(prior_box_var_data[j * 4 + 2] *
target_box_data[i * 4 + 2]) *
T target_box_width = std::exp(prior_box_var_data[j * len + 2] *
target_box_data[i * len + 2]) *
prior_box_width;
T target_box_height = std::exp(prior_box_var_data[j * 4 + 3] *
target_box_data[i * 4 + 3]) *
T target_box_height = std::exp(prior_box_var_data[j * len + 3] *
target_box_data[i * len + 3]) *
prior_box_height;
size_t offset = i * col * 4 + j * 4;
size_t offset = i * col * len + j * len;
output[offset] = target_box_center_x - target_box_width / 2;
output[offset + 1] = target_box_center_y - target_box_height / 2;
output[offset + 2] = target_box_center_x + target_box_width / 2;
......@@ -146,8 +132,9 @@ class BoxCoderKernel : public framework::OpKernel<T> {
}
auto row = target_box->dims()[0];
auto col = prior_box->dims()[0];
auto len = prior_box->dims()[1];
output_box->mutable_data<T>({row, col, 4}, context.GetPlace());
output_box->mutable_data<T>({row, col, len}, context.GetPlace());
auto code_type = GetBoxCodeType(context.Attr<std::string>("code_type"));
T* output = output_box->data<T>();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册