diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 50ffef72baa1c5f210fd6e92de05d24a39ac86b4..7068a37ef009520fd4782a90c6a7706d26cfd58d 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -315,7 +315,7 @@ paddle.fluid.layers.roi_perspective_transform ArgSpec(args=['input', 'rois', 'tr paddle.fluid.layers.generate_proposal_labels ArgSpec(args=['rpn_rois', 'gt_classes', 'is_crowd', 'gt_boxes', 'im_info', 'batch_size_per_im', 'fg_fraction', 'fg_thresh', 'bg_thresh_hi', 'bg_thresh_lo', 'bbox_reg_weights', 'class_nums', 'use_random'], varargs=None, keywords=None, defaults=(256, 0.25, 0.25, 0.5, 0.0, [0.1, 0.1, 0.2, 0.2], None, True)) paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'im_info', 'anchors', 'variances', 'pre_nms_top_n', 'post_nms_top_n', 'nms_thresh', 'min_size', 'eta', 'name'], varargs=None, keywords=None, defaults=(6000, 1000, 0.5, 0.1, 1.0, None)) paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name'], varargs=None, keywords=None, defaults=('encode_center_size', True, None)) +paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'axis', 'name'], varargs=None, keywords=None, defaults=('encode_center_size', True, 0, None)) paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'class_num', 'ignore_thresh', 'loss_weight_xy', 'loss_weight_wh', 'loss_weight_conf_target', 'loss_weight_conf_notarget', 'loss_weight_class', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None)) paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)) diff --git a/paddle/fluid/operators/detection/box_coder_op.cc b/paddle/fluid/operators/detection/box_coder_op.cc index 06fbb9815c52ea69e3aa9e893512e039853b9514..5db600b19a314094df333908ef37a8c5619e4a50 100644 --- a/paddle/fluid/operators/detection/box_coder_op.cc +++ b/paddle/fluid/operators/detection/box_coder_op.cc @@ -32,31 +32,53 @@ class BoxCoderOp : public framework::OperatorWithKernel { if (ctx->IsRuntime()) { PADDLE_ENFORCE_EQ(prior_box_dims.size(), 2, - "The rank of Input of PriorBoxVar must be 2"); + "The rank of Input of PriorBox must be 2"); PADDLE_ENFORCE_EQ(prior_box_dims[1], 4, "The shape of PriorBox is [N, 4]"); if (ctx->HasInput("PriorBoxVar")) { auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar"); - PADDLE_ENFORCE_EQ(prior_box_dims, prior_box_var_dims); + PADDLE_ENFORCE( + prior_box_var_dims.size() == 1 || prior_box_var_dims.size() == 2, + "Input(PriorBoxVar) of BoxCoderOp should be 1 or 2."); + if (prior_box_var_dims.size() == 1) { + PADDLE_ENFORCE_EQ( + prior_box_var_dims[0], 4, + "The 1st dimension of Input(PriorBoxVar) should be 1" + "when the rank is 1."); + } else { + PADDLE_ENFORCE_EQ( + prior_box_dims, prior_box_var_dims, + "The dimension of Input(PriorBoxVar) should be equal to" + "the dimension of Input(PriorBox when the rank is 2.)"); + } } auto code_type = GetBoxCodeType(ctx->Attrs().Get("code_type")); + int axis = ctx->Attrs().Get("axis"); if (code_type == BoxCodeType::kEncodeCenterSize) { PADDLE_ENFORCE_EQ(target_box_dims.size(), 2, "The rank of Input of TargetBox must be 2"); PADDLE_ENFORCE_EQ(target_box_dims[1], 4, "The shape of TargetBox is [M, 4]"); + ctx->SetOutputDim( + "OutputBox", + framework::make_ddim({target_box_dims[0], prior_box_dims[0], 4})); } else if (code_type == BoxCodeType::kDecodeCenterSize) { PADDLE_ENFORCE_EQ(target_box_dims.size(), 3, "The rank of Input of TargetBox must be 3"); - PADDLE_ENFORCE_EQ(target_box_dims[1], prior_box_dims[0]); + if (axis == 0) { + PADDLE_ENFORCE_EQ(target_box_dims[1], prior_box_dims[0]); + } else if (axis == 1) { + PADDLE_ENFORCE_EQ(target_box_dims[0], prior_box_dims[0]); + } else { + PADDLE_THROW("axis must be 0 or 1."); + } PADDLE_ENFORCE_EQ(target_box_dims[2], prior_box_dims[1]); + ctx->ShareDim("TargetBox", /*->*/ "OutputBox"); } } - ctx->SetOutputDim( - "OutputBox", - framework::make_ddim({target_box_dims[0], prior_box_dims[0], 4})); + ctx->ShareLoD("TargetBox", /*->*/ "OutputBox"); } }; @@ -100,6 +122,12 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker { "(bool, default true) " "whether treat the priorbox as a noramlized box") .SetDefault(true); + AddAttr("axis", + "(int, default 1)" + "which axis to broadcast for box decode, it is only valid" + "when code type is decode_center_size") + .SetDefault(0) + .InEnum({0, 1}); AddOutput("OutputBox", "(LoDTensor or Tensor) " "When code_type is 'encode_center_size', the output tensor of " diff --git a/paddle/fluid/operators/detection/box_coder_op.cu b/paddle/fluid/operators/detection/box_coder_op.cu index a7af111f63d654319dd1d90d2032956951dfe49e..ca62afd8edfc8ed8f682c8998818050f00524807 100644 --- a/paddle/fluid/operators/detection/box_coder_op.cu +++ b/paddle/fluid/operators/detection/box_coder_op.cu @@ -20,7 +20,8 @@ __global__ void EncodeCenterSizeKernel(const T* prior_box_data, const T* prior_box_var_data, const T* target_box_data, const int row, const int col, const int len, - const bool normalized, T* output) { + const bool normalized, + const T prior_box_var_size, T* output) { const int idx = threadIdx.x + blockIdx.x * blockDim.x; if (idx < row * col) { const int row_idx = idx / col; @@ -30,11 +31,9 @@ __global__ void EncodeCenterSizeKernel(const T* prior_box_data, T prior_box_height = prior_box_data[col_idx * len + 3] - prior_box_data[col_idx * len + 1] + (normalized == false); - T prior_box_center_x = - (prior_box_data[col_idx * len + 2] + prior_box_data[col_idx * len]) / 2; - T prior_box_center_y = (prior_box_data[col_idx * len + 3] + - prior_box_data[col_idx * len + 1]) / - 2; + T prior_box_center_x = prior_box_data[col_idx * len] + prior_box_width / 2; + T prior_box_center_y = + prior_box_data[col_idx * len + 1] + prior_box_height / 2; T target_box_center_x = (target_box_data[row_idx * len + 2] + target_box_data[row_idx * len]) / @@ -55,10 +54,14 @@ __global__ void EncodeCenterSizeKernel(const T* prior_box_data, output[idx * len + 2] = log(fabs(target_box_width / prior_box_width)); output[idx * len + 3] = log(fabs(target_box_height / prior_box_height)); if (prior_box_var_data) { - output[idx * len] /= prior_box_var_data[col_idx * len]; - output[idx * len + 1] /= prior_box_var_data[col_idx * len + 1]; - output[idx * len + 2] /= prior_box_var_data[col_idx * len + 2]; - output[idx * len + 3] /= prior_box_var_data[col_idx * len + 3]; + int prior_var_offset = 0; + if (prior_box_var_size == 2) { + prior_var_offset = col_idx * len; + } + output[idx * len] /= prior_box_var_data[prior_var_offset]; + output[idx * len + 1] /= prior_box_var_data[prior_var_offset + 1]; + output[idx * len + 2] /= prior_box_var_data[prior_var_offset + 2]; + output[idx * len + 3] /= prior_box_var_data[prior_var_offset + 3]; } } } @@ -68,33 +71,48 @@ __global__ void DecodeCenterSizeKernel(const T* prior_box_data, const T* prior_box_var_data, const T* target_box_data, const int row, const int col, const int len, - const bool normalized, T* output) { + const bool normalized, + const T prior_box_var_size, + const int axis, T* output) { const int idx = threadIdx.x + blockIdx.x * blockDim.x; + int prior_box_offset = 0; if (idx < row * col) { const int col_idx = idx % col; - T prior_box_width = prior_box_data[col_idx * len + 2] - - prior_box_data[col_idx * len] + (normalized == false); - T prior_box_height = prior_box_data[col_idx * len + 3] - - prior_box_data[col_idx * len + 1] + + const int row_idx = idx / col; + if (axis == 0) + prior_box_offset = col_idx * len; + else if (axis == 1) + prior_box_offset = row_idx * len; + T prior_box_width = prior_box_data[prior_box_offset + 2] - + prior_box_data[prior_box_offset] + + (normalized == false); + T prior_box_height = prior_box_data[prior_box_offset + 3] - + prior_box_data[prior_box_offset + 1] + (normalized == false); T prior_box_center_x = - (prior_box_data[col_idx * len + 2] + prior_box_data[col_idx * len]) / 2; - T prior_box_center_y = (prior_box_data[col_idx * len + 3] + - prior_box_data[col_idx * len + 1]) / - 2; + prior_box_data[prior_box_offset] + prior_box_width / 2; + T prior_box_center_y = + prior_box_data[prior_box_offset + 1] + prior_box_height / 2; T target_box_width, target_box_height; T target_box_center_x, target_box_center_y; if (prior_box_var_data) { - target_box_width = exp(prior_box_var_data[col_idx * len + 2] * + int prior_var_offset = 0; + if (prior_box_var_size == 2) { + if (axis == 0) + prior_var_offset = col_idx * len; + else if (axis == 1) + prior_var_offset = row_idx * len; + } + target_box_width = exp(prior_box_var_data[prior_var_offset + 2] * target_box_data[idx * len + 2]) * prior_box_width; - target_box_height = exp(prior_box_var_data[col_idx * len + 3] * + target_box_height = exp(prior_box_var_data[prior_var_offset + 3] * target_box_data[idx * len + 3]) * prior_box_height; - target_box_center_x = prior_box_var_data[col_idx * len] * + target_box_center_x = prior_box_var_data[prior_var_offset] * target_box_data[idx * len] * prior_box_width + prior_box_center_x; - target_box_center_y = prior_box_var_data[col_idx * len + 1] * + target_box_center_y = prior_box_var_data[prior_var_offset + 1] * target_box_data[idx * len + 1] * prior_box_height + prior_box_center_y; @@ -131,14 +149,25 @@ class BoxCoderCUDAKernel : public framework::OpKernel { const T* prior_box_data = prior_box->data(); const T* target_box_data = target_box->data(); const T* prior_box_var_data = nullptr; - if (prior_box_var) prior_box_var_data = prior_box_var->data(); + auto prior_box_var_size = 0; + if (prior_box_var) { + prior_box_var_data = prior_box_var->data(); + prior_box_var_size = prior_box_var->dims().size(); + } if (target_box->lod().size()) { PADDLE_ENFORCE_EQ(target_box->lod().size(), 1, "Only support 1 level of LoD."); } + auto code_type = GetBoxCodeType(context.Attr("code_type")); + bool normalized = context.Attr("box_normalized"); + int axis = context.Attr("axis"); + auto row = target_box->dims()[0]; auto col = prior_box->dims()[0]; + if (code_type == BoxCodeType::kDecodeCenterSize) { + col = target_box->dims()[1]; + } auto len = prior_box->dims()[1]; int block = 512; int grid = (row * col + block - 1) / block; @@ -147,16 +176,14 @@ class BoxCoderCUDAKernel : public framework::OpKernel { output_box->mutable_data({row, col, len}, context.GetPlace()); T* output = output_box->data(); - auto code_type = GetBoxCodeType(context.Attr("code_type")); - bool normalized = context.Attr("box_normalized"); if (code_type == BoxCodeType::kEncodeCenterSize) { EncodeCenterSizeKernel<<>>( prior_box_data, prior_box_var_data, target_box_data, row, col, len, - normalized, output); + normalized, prior_box_var_size, output); } else if (code_type == BoxCodeType::kDecodeCenterSize) { DecodeCenterSizeKernel<<>>( prior_box_data, prior_box_var_data, target_box_data, row, col, len, - normalized, output); + normalized, prior_box_var_size, axis, output); } } }; diff --git a/paddle/fluid/operators/detection/box_coder_op.h b/paddle/fluid/operators/detection/box_coder_op.h index b2a2bcdce932032a761a1fc064fe622f7629f9bf..986869d8a359a9920938c225f938639dcea6a6de 100644 --- a/paddle/fluid/operators/detection/box_coder_op.h +++ b/paddle/fluid/operators/detection/box_coder_op.h @@ -53,10 +53,9 @@ class BoxCoderKernel : public framework::OpKernel { T prior_box_height = prior_box_data[j * len + 3] - prior_box_data[j * len + 1] + (normalized == false); - T prior_box_center_x = - (prior_box_data[j * len + 2] + prior_box_data[j * len]) / 2; + T prior_box_center_x = prior_box_data[j * len] + prior_box_width / 2; T prior_box_center_y = - (prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2; + prior_box_data[j * len + 1] + prior_box_height / 2; T target_box_center_x = (target_box_data[i * len + 2] + target_box_data[i * len]) / 2; @@ -78,10 +77,14 @@ class BoxCoderKernel : public framework::OpKernel { output[offset + 3] = std::log(std::fabs(target_box_height / prior_box_height)); if (prior_box_var) { - output[offset] /= prior_box_var_data[j * len]; - output[offset + 1] /= prior_box_var_data[j * len + 1]; - output[offset + 2] /= prior_box_var_data[j * len + 2]; - output[offset + 3] /= prior_box_var_data[j * len + 3]; + int prior_var_offset = 0; + if (prior_box_var->dims().size() == 2) { + prior_var_offset = j * len; + } + output[offset] /= prior_box_var_data[prior_var_offset]; + output[offset + 1] /= prior_box_var_data[prior_var_offset + 1]; + output[offset + 2] /= prior_box_var_data[prior_var_offset + 2]; + output[offset + 3] /= prior_box_var_data[prior_var_offset + 3]; } } } @@ -89,48 +92,63 @@ class BoxCoderKernel : public framework::OpKernel { void DecodeCenterSize(const framework::Tensor* target_box, const framework::Tensor* prior_box, const framework::Tensor* prior_box_var, - const bool normalized, T* output) const { + const bool normalized, const int axis, + T* output) const { int64_t row = target_box->dims()[0]; - int64_t col = prior_box->dims()[0]; - int64_t len = prior_box->dims()[1]; + int64_t col = target_box->dims()[1]; + int64_t len = target_box->dims()[2]; auto* target_box_data = target_box->data(); auto* prior_box_data = prior_box->data(); const T* prior_box_var_data = nullptr; if (prior_box_var) prior_box_var_data = prior_box_var->data(); - + int prior_box_offset = 0; #ifdef PADDLE_WITH_MKLML #pragma omp parallel for collapse(2) #endif for (int64_t i = 0; i < row; ++i) { for (int64_t j = 0; j < col; ++j) { size_t offset = i * col * len + j * len; - T prior_box_width = prior_box_data[j * len + 2] - - prior_box_data[j * len] + (normalized == false); - T prior_box_height = prior_box_data[j * len + 3] - - prior_box_data[j * len + 1] + + if (axis == 0) { + prior_box_offset = j * len; + } else if (axis == 1) { + prior_box_offset = i * len; + } + T prior_box_width = prior_box_data[prior_box_offset + 2] - + prior_box_data[prior_box_offset] + + (normalized == false); + T prior_box_height = prior_box_data[prior_box_offset + 3] - + prior_box_data[prior_box_offset + 1] + (normalized == false); T prior_box_center_x = - (prior_box_data[j * len + 2] + prior_box_data[j * len]) / 2; + prior_box_data[prior_box_offset] + prior_box_width / 2; T prior_box_center_y = - (prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2; + prior_box_data[prior_box_offset + 1] + prior_box_height / 2; T target_box_center_x = 0, target_box_center_y = 0; T target_box_width = 0, target_box_height = 0; if (prior_box_var) { - target_box_center_x = prior_box_var_data[j * len] * + int prior_var_offset = 0; + if (prior_box_var->dims().size() == 2) { + if (axis == 0) + prior_var_offset = j * len; + else if (axis == 1) + prior_var_offset = i * len; + } + target_box_center_x = prior_box_var_data[prior_var_offset] * target_box_data[offset] * prior_box_width + prior_box_center_x; - target_box_center_y = prior_box_var_data[j * len + 1] * + target_box_center_y = prior_box_var_data[prior_var_offset + 1] * target_box_data[offset + 1] * prior_box_height + prior_box_center_y; - target_box_width = std::exp(prior_box_var_data[j * len + 2] * + target_box_width = std::exp(prior_box_var_data[prior_var_offset + 2] * target_box_data[offset + 2]) * prior_box_width; - target_box_height = std::exp(prior_box_var_data[j * len + 3] * - target_box_data[offset + 3]) * - prior_box_height; + target_box_height = + std::exp(prior_box_var_data[prior_var_offset + 3] * + target_box_data[offset + 3]) * + prior_box_height; } else { target_box_center_x = target_box_data[offset] * prior_box_width + prior_box_center_x; @@ -157,25 +175,29 @@ class BoxCoderKernel : public framework::OpKernel { auto* prior_box_var = context.Input("PriorBoxVar"); auto* target_box = context.Input("TargetBox"); auto* output_box = context.Output("OutputBox"); - + const int axis = context.Attr("axis"); if (target_box->lod().size()) { PADDLE_ENFORCE_EQ(target_box->lod().size(), 1UL, "Only support 1 level of LoD."); } + auto code_type = GetBoxCodeType(context.Attr("code_type")); + bool normalized = context.Attr("box_normalized"); + auto row = target_box->dims()[0]; auto col = prior_box->dims()[0]; + if (code_type == BoxCodeType::kDecodeCenterSize) { + col = target_box->dims()[1]; + } auto len = prior_box->dims()[1]; output_box->mutable_data({row, col, len}, context.GetPlace()); - auto code_type = GetBoxCodeType(context.Attr("code_type")); - bool normalized = context.Attr("box_normalized"); T* output = output_box->data(); if (code_type == BoxCodeType::kEncodeCenterSize) { EncodeCenterSize(target_box, prior_box, prior_box_var, normalized, output); } else if (code_type == BoxCodeType::kDecodeCenterSize) { - DecodeCenterSize(target_box, prior_box, prior_box_var, normalized, + DecodeCenterSize(target_box, prior_box, prior_box_var, normalized, axis, output); } } diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 8aed97dc59b100d4e37832e0a148d73662742ba0..c844050c5db28bc42006338a6d5054c4b27a30a6 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -342,6 +342,7 @@ def box_coder(prior_box, target_box, code_type="encode_center_size", box_normalized=True, + axis=0, name=None): """ ${comment} @@ -352,6 +353,7 @@ def box_coder(prior_box, target_box(${target_box_type}): ${target_box_comment} code_type(${code_type_type}): ${code_type_comment} box_normalized(${box_normalized_type}): ${box_normalized_comment} + axis(${axis_type}): ${axis_comment} Returns: output_box(${output_box_type}): ${output_box_comment} @@ -372,8 +374,11 @@ def box_coder(prior_box, "PriorBoxVar": prior_box_var, "TargetBox": target_box }, - attrs={"code_type": code_type, - "box_normalized": box_normalized}, + attrs={ + "code_type": code_type, + "box_normalized": box_normalized, + "axis": axis + }, outputs={"OutputBox": output_box}) return output_box diff --git a/python/paddle/fluid/tests/unittests/test_box_coder_op.py b/python/paddle/fluid/tests/unittests/test_box_coder_op.py index 2511c5c22e012babdeb71a71d3546456ea2ceaf3..b6f6bc1450fb3a19883a21f9a296292a8b66df0a 100644 --- a/python/paddle/fluid/tests/unittests/test_box_coder_op.py +++ b/python/paddle/fluid/tests/unittests/test_box_coder_op.py @@ -21,22 +21,32 @@ import math from op_test import OpTest -def box_coder(target_box, prior_box, prior_box_var, output_box, code_type, - box_normalized): - prior_box_x = ( - (prior_box[:, 2] + prior_box[:, 0]) / 2).reshape(1, prior_box.shape[0]) - prior_box_y = ( - (prior_box[:, 3] + prior_box[:, 1]) / 2).reshape(1, prior_box.shape[0]) - prior_box_width = ( - (prior_box[:, 2] - prior_box[:, 0])).reshape(1, prior_box.shape[0]) - prior_box_height = ( - (prior_box[:, 3] - prior_box[:, 1])).reshape(1, prior_box.shape[0]) - prior_box_var = prior_box_var.reshape(1, prior_box_var.shape[0], - prior_box_var.shape[1]) - if not box_normalized: - prior_box_height = prior_box_height + 1 - prior_box_width = prior_box_width + 1 - +def box_coder(target_box, + prior_box, + prior_box_var, + output_box, + code_type, + box_normalized, + axis=0): + prior_box_width = prior_box[:, 2] - prior_box[:, 0] + \ + (box_normalized==False) + prior_box_height = prior_box[:, 3] - prior_box[:, 1] + \ + (box_normalized==False) + prior_box_x = prior_box_width * 0.5 + prior_box[:, 0] + prior_box_y = prior_box_height * 0.5 + prior_box[:, 1] + if axis == 0: + prior_box_width = prior_box_width.reshape(1, prior_box.shape[0]) + prior_box_height = prior_box_height.reshape(1, prior_box.shape[0]) + prior_box_x = prior_box_x.reshape(1, prior_box.shape[0]) + prior_box_y = prior_box_y.reshape(1, prior_box.shape[0]) + else: + prior_box_width = prior_box_width.reshape(prior_box.shape[0], 1) + prior_box_height = prior_box_height.reshape(prior_box.shape[0], 1) + prior_box_x = prior_box_x.reshape(prior_box.shape[0], 1) + prior_box_y = prior_box_y.reshape(prior_box.shape[0], 1) + if prior_box_var.ndim == 2: + prior_box_var = prior_box_var.reshape(1, prior_box_var.shape[0], + prior_box_var.shape[1]) if (code_type == "EncodeCenterSize"): target_box_x = ((target_box[:, 2] + target_box[:, 0]) / 2).reshape( target_box.shape[0], 1) @@ -49,26 +59,52 @@ def box_coder(target_box, prior_box, prior_box_var, output_box, code_type, if not box_normalized: target_box_height = target_box_height + 1 target_box_width = target_box_width + 1 - - output_box[:,:,0] = (target_box_x - prior_box_x) / prior_box_width / \ - prior_box_var[:,:,0] - output_box[:,:,1] = (target_box_y - prior_box_y) / prior_box_height / \ - prior_box_var[:,:,1] - output_box[:,:,2] = np.log(np.fabs(target_box_width / prior_box_width)) / \ - prior_box_var[:,:,2] - output_box[:,:,3] = np.log(np.fabs(target_box_height / prior_box_height)) / \ - prior_box_var[:,:,3] + if prior_box_var.ndim == 1: + output_box[:,:,0] = (target_box_x - prior_box_x) / \ + prior_box_width / \ + prior_box_var[0] + output_box[:,:,1] = (target_box_y - prior_box_y) / \ + prior_box_height / \ + prior_box_var[1] + output_box[:,:,2] = np.log(np.fabs(target_box_width / \ + prior_box_width)) / \ + prior_box_var[2] + output_box[:,:,3] = np.log(np.fabs(target_box_height / \ + prior_box_height)) / \ + prior_box_var[3] + else: + output_box[:,:,0] = (target_box_x - prior_box_x) / \ + prior_box_width / \ + prior_box_var[:,:,0] + output_box[:,:,1] = (target_box_y - prior_box_y) / \ + prior_box_height / \ + prior_box_var[:,:,1] + output_box[:,:,2] = np.log(np.fabs(target_box_width / \ + prior_box_width)) / \ + prior_box_var[:,:,2] + output_box[:,:,3] = np.log(np.fabs(target_box_height / \ + prior_box_height)) / \ + prior_box_var[:,:,3] elif (code_type == "DecodeCenterSize"): - target_box_x = prior_box_var[:,:,0] * target_box[:,:,0] * \ - prior_box_width + prior_box_x - target_box_y = prior_box_var[:,:,1] * target_box[:,:,1] * \ - prior_box_height + prior_box_y - target_box_width = np.exp(prior_box_var[:,:,2] * target_box[:,:,2]) * \ - prior_box_width - target_box_height = np.exp(prior_box_var[:,:,3] * target_box[:,:,3]) * \ - prior_box_height - + if prior_box_var.ndim == 1: + target_box_x = prior_box_var[0] * target_box[:,:,0] * \ + prior_box_width + prior_box_x + target_box_y = prior_box_var[1] * target_box[:,:,1] * \ + prior_box_height + prior_box_y + target_box_width = np.exp(prior_box_var[2] * target_box[:,:,2]) * \ + prior_box_width + target_box_height = np.exp(prior_box_var[3] * target_box[:,:,3]) * \ + prior_box_height + else: + target_box_x = prior_box_var[:,:,0] * target_box[:,:,0] * \ + prior_box_width + prior_box_x + target_box_y = prior_box_var[:,:,1] * target_box[:,:,1] * \ + prior_box_height + prior_box_y + target_box_width = np.exp(prior_box_var[:,:,2] * \ + target_box[:,:,2]) * prior_box_width + target_box_height = np.exp(prior_box_var[:,:,3] * \ + target_box[:,:,3]) * prior_box_height output_box[:, :, 0] = target_box_x - target_box_width / 2 output_box[:, :, 1] = target_box_y - target_box_height / 2 output_box[:, :, 2] = target_box_x + target_box_width / 2 @@ -78,10 +114,17 @@ def box_coder(target_box, prior_box, prior_box_var, output_box, code_type, output_box[:, :, 3] = output_box[:, :, 3] - 1 -def batch_box_coder(prior_box, prior_box_var, target_box, lod, code_type, - box_normalized): +def batch_box_coder(prior_box, + prior_box_var, + target_box, + lod, + code_type, + box_normalized, + axis=0): n = target_box.shape[0] m = prior_box.shape[0] + if code_type == "DecodeCenterSize": + m = target_box.shape[1] output_box = np.zeros((n, m, 4), dtype=np.float32) cur_offset = 0 for i in range(len(lod)): @@ -91,10 +134,8 @@ def batch_box_coder(prior_box, prior_box_var, target_box, lod, code_type, output_box[cur_offset:(cur_offset + lod[i]), :, :], code_type, box_normalized) elif (code_type == "DecodeCenterSize"): - box_coder(target_box[cur_offset:(cur_offset + lod[i]), :, :], - prior_box, prior_box_var, - output_box[cur_offset:(cur_offset + lod[i]), :, :], - code_type, box_normalized) + box_coder(target_box, prior_box, prior_box_var, output_box, + code_type, box_normalized, axis) cur_offset += lod[i] return output_box @@ -111,6 +152,32 @@ class TestBoxCoderOp(OpTest): target_box = np.random.random((5, 10, 4)).astype('float32') code_type = "DecodeCenterSize" box_normalized = False + output_box = batch_box_coder(prior_box, prior_box_var, target_box, + lod[0], code_type, box_normalized) + self.inputs = { + 'PriorBox': prior_box, + 'PriorBoxVar': prior_box_var, + 'TargetBox': target_box, + } + self.attrs = { + 'code_type': 'decode_center_size', + 'box_normalized': False + } + self.outputs = {'OutputBox': output_box} + + +class TestBoxCoderOpWithOneRankVar(OpTest): + def test_check_output(self): + self.check_output() + + def setUp(self): + self.op_type = "box_coder" + lod = [[1, 1, 1, 1, 1]] + prior_box = np.random.random((6, 4)).astype('float32') + prior_box_var = np.random.random((4)).astype('float32') + target_box = np.random.random((3, 6, 4)).astype('float32') + code_type = "DecodeCenterSize" + box_normalized = False output_box = batch_box_coder(prior_box, prior_box_var, target_box, lod[0], code_type, box_normalized) @@ -176,5 +243,34 @@ class TestBoxCoderOpWithLoD(OpTest): self.outputs = {'OutputBox': output_box} +class TestBoxCoderOpWithAxis(OpTest): + def test_check_output(self): + self.check_output() + + def setUp(self): + self.op_type = "box_coder" + lod = [[1, 1, 1, 1, 1]] + prior_box = np.random.random((5, 4)).astype('float32') + prior_box_var = np.random.random((4)).astype('float32') + target_box = np.random.random((5, 6, 4)).astype('float32') + code_type = "DecodeCenterSize" + box_normalized = False + axis = 1 + output_box = batch_box_coder(prior_box, prior_box_var, target_box, + lod[0], code_type, box_normalized, axis) + + self.inputs = { + 'PriorBox': prior_box, + 'PriorBoxVar': prior_box_var, + 'TargetBox': target_box, + } + self.attrs = { + 'code_type': 'decode_center_size', + 'box_normalized': False, + 'axis': axis + } + self.outputs = {'OutputBox': output_box} + + if __name__ == '__main__': unittest.main()