diff --git a/paddle/operators/box_coder_op.cc b/paddle/operators/box_coder_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..539813d4858b8faef386047f9ef64aa232aefca1 --- /dev/null +++ b/paddle/operators/box_coder_op.cc @@ -0,0 +1,121 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/box_coder_op.h" + +namespace paddle { +namespace operators { + +class BoxCoderOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("PriorBox"), + "Input(PriorBox) of BoxCoderOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("PriorBoxVar"), + "Input(PriorBoxVar) of BoxCoderOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("TargetBox"), + "Input(TargetBox) of BoxCoderOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("OutputBox"), + "Output(OutputBox) of BoxCoderOp should not be null."); + + auto prior_box_dims = ctx->GetInputDim("PriorBox"); + auto prior_box_var_dims = ctx->GetInputDim("PriorBoxVar"); + auto target_box_dims = ctx->GetInputDim("TargetBox"); + + PADDLE_ENFORCE_EQ(prior_box_dims.size(), 2, + "The rank of Input of PriorBoxVar must be 2"); + PADDLE_ENFORCE_EQ(prior_box_dims[1], 4, "The shape of PriorBox is [N, 4]"); + PADDLE_ENFORCE_EQ(prior_box_dims, prior_box_var_dims); + PADDLE_ENFORCE_EQ(target_box_dims.size(), 2, + "The rank of Input of TargetBox must be 2"); + PADDLE_ENFORCE_EQ(target_box_dims[1], 4, + "The shape of TargetBox is [M, 4]"); + + GetBoxCodeType(ctx->Attrs().Get("code_type")); + + ctx->SetOutputDim( + "OutputBox", + framework::make_ddim({target_box_dims[0], prior_box_dims[0], 4})); + ctx->ShareLoD("TargetBox", /*->*/ "OutputBox"); + } +}; + +class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker { + public: + BoxCoderOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "PriorBox", + "(Tensor, default Tensor) " + "Box list PriorBox is a 2-D Tensor with shape [M, 4] holds M boxes, " + "each box is represented as [xmin, ymin, xmax, ymax], " + "[xmin, ymin] is the left top coordinate of the anchor box, " + "if the input is image feature map, they are close to the origin " + "of the coordinate system. [xmax, ymax] is the right bottom " + "coordinate of the anchor box."); + AddInput("PriorBoxVar", + "(Tensor, default Tensor) " + "PriorBoxVar is a 2-D Tensor with shape [M, 4] holds M group " + "of variance."); + AddInput( + "TargetBox", + "(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape " + "[N, 4], each box is represented as [xmin, ymin, xmax, ymax], " + "[xmin, ymin] is the left top coordinate of the box if the input " + "is image feature map, they are close to the origin of the coordinate " + "system. [xmax, ymax] is the right bottom coordinate of the box. " + "This tensor can contain LoD information to represent a batch " + "of inputs. One instance of this batch can contain different " + "numbers of entities."); + AddAttr("code_type", + "(string, default encode_center_size) " + "the code type used with the target box") + .SetDefault("encode_center_size") + .InEnum({"encode_center_size", "decode_center_size"}); + AddOutput( + "OutputBox", + "(LoDTensor or Tensor) " + "(Tensor) The output of box_coder_op, a tensor with shape [N, M, 4] " + "representing the result of N target boxes encoded/decoded with " + "M Prior boxes and variances."); + + AddComment(R"DOC( +Bounding Box Coder Operator. +Encode/Decode the target bounding box with the priorbox information. +The Encoding schema described below: +ox = (tx - px) / pw / pxv +oy = (ty - py) / ph / pyv +ow = log(abs(tw / pw)) / pwv +oh = log(abs(th / ph)) / phv +The Decoding schema described below: +ox = (pw * pxv * tx * + px) - tw / 2 +oy = (ph * pyv * ty * + py) - th / 2 +ow = exp(pwv * tw) * pw + tw / 2 +oh = exp(phv * th) * ph + th / 2 +where tx, ty, tw, th denote the target box's center coordinates, width and +height respectively. Similarly, px, py, pw, ph denote the priorbox's(anchor) +center coordinates, width and height. pxv, pyv, pwv, phv denote the variance +of the priorbox and ox, oy, ow, oh denote the encoded/decoded coordinates, +width and height. +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(box_coder, ops::BoxCoderOp, ops::BoxCoderOpMaker); +REGISTER_OP_CPU_KERNEL(box_coder, ops::BoxCoderKernel, + ops::BoxCoderKernel); diff --git a/paddle/operators/box_coder_op.cu b/paddle/operators/box_coder_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..98bd93457fafb49f2af5e1ff258fbfa9f9985600 --- /dev/null +++ b/paddle/operators/box_coder_op.cu @@ -0,0 +1,150 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/box_coder_op.h" +#include "paddle/platform/cuda_helper.h" + +namespace paddle { +namespace operators { + +template +__global__ void EncodeCenterSizeKernel(const T* prior_box_data, + const T* prior_box_var_data, + const T* target_box_data, const int row, + const int col, const int len, + T* output) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < row * col) { + const int row_idx = idx / col; + const int col_idx = idx % col; + T prior_box_width = + prior_box_data[col_idx * len + 2] - prior_box_data[col_idx * len]; + T prior_box_height = + prior_box_data[col_idx * len + 3] - prior_box_data[col_idx * len + 1]; + T prior_box_center_x = + (prior_box_data[col_idx * len + 2] + prior_box_data[col_idx * len]) / 2; + T prior_box_center_y = (prior_box_data[col_idx * len + 3] + + prior_box_data[col_idx * len + 1]) / + 2; + + T target_box_center_x = + (target_box_data[row_idx * len + 2] + target_box_data[row_idx * len]) / + 2; + T target_box_center_y = (target_box_data[row_idx * len + 3] + + target_box_data[row_idx * len + 1]) / + 2; + T target_box_width = + target_box_data[row_idx * len + 2] - target_box_data[row_idx * len]; + T target_box_height = + target_box_data[row_idx * len + 3] - target_box_data[row_idx * len + 1]; + + output[idx * len] = (target_box_center_x - prior_box_center_x) / + prior_box_width / prior_box_var_data[col_idx * len]; + output[idx * len + 1] = (target_box_center_y - prior_box_center_y) / + prior_box_height / + prior_box_var_data[col_idx * len + 1]; + output[idx * len + 2] = log(fabs(target_box_width / prior_box_width)) / + prior_box_var_data[col_idx * len + 2]; + output[idx * len + 3] = log(fabs(target_box_height / prior_box_height)) / + prior_box_var_data[col_idx * len + 3]; + } +} + +template +__global__ void DecodeCenterSizeKernel(const T* prior_box_data, + const T* prior_box_var_data, + const T* target_box_data, const int row, + const int col, const int len, + T* output) { + const int idx = threadIdx.x + blockIdx.x * blockDim.x; + if (idx < row * col) { + const int row_idx = idx / col; + const int col_idx = idx % col; + T prior_box_width = + prior_box_data[col_idx * len + 2] - prior_box_data[col_idx * len]; + T prior_box_height = + prior_box_data[col_idx * len + 3] - prior_box_data[col_idx * len + 1]; + T prior_box_center_x = + (prior_box_data[col_idx * len + 2] + prior_box_data[col_idx * len]) / 2; + T prior_box_center_y = (prior_box_data[col_idx * len + 3] + + prior_box_data[col_idx * len + 1]) / + 2; + + T target_box_width = exp(prior_box_var_data[col_idx * len + 2] * + target_box_data[row_idx * len + 2]) * + prior_box_width; + T target_box_height = exp(prior_box_var_data[col_idx * len + 3] * + target_box_data[row_idx * len + 3]) * + prior_box_height; + T target_box_center_x = prior_box_var_data[col_idx * len] * + target_box_data[row_idx * len] * + prior_box_width + + prior_box_center_x; + T target_box_center_y = prior_box_var_data[col_idx * len + 1] * + target_box_data[row_idx * len + 1] * + prior_box_height + + prior_box_center_y; + + output[idx * len] = target_box_center_x - target_box_width / 2; + output[idx * len + 1] = target_box_center_y - target_box_height / 2; + output[idx * len + 2] = target_box_center_x + target_box_width / 2; + output[idx * len + 3] = target_box_center_y + target_box_height / 2; + } +} + +template +class BoxCoderCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + PADDLE_ENFORCE(platform::is_gpu_place(context.GetPlace()), + "This kernel only runs on GPU device."); + auto* prior_box = context.Input("PriorBox"); + auto* prior_box_var = context.Input("PriorBoxVar"); + auto* target_box = context.Input("TargetBox"); + auto* output_box = context.Output("OutputBox"); + + if (target_box->lod().size()) { + PADDLE_ENFORCE_EQ(target_box->lod().size(), 1, + "Only support 1 level of LoD."); + } + auto row = target_box->dims()[0]; + auto col = prior_box->dims()[0]; + auto len = prior_box->dims()[1]; + int block = 512; + int grid = (row * col + block - 1) / block; + auto& device_ctx = context.cuda_device_context(); + + const T* prior_box_data = prior_box->data(); + const T* prior_box_var_data = prior_box_var->data(); + const T* target_box_data = target_box->data(); + + output_box->mutable_data({row, col, len}, context.GetPlace()); + T* output = output_box->data(); + + auto code_type = GetBoxCodeType(context.Attr("code_type")); + if (code_type == BoxCodeType::kEncodeCenterSize) { + EncodeCenterSizeKernel<<>>( + prior_box_data, prior_box_var_data, target_box_data, row, col, len, + output); + } else if (code_type == BoxCodeType::kDecodeCenterSize) { + DecodeCenterSizeKernel<<>>( + prior_box_data, prior_box_var_data, target_box_data, row, col, len, + output); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL(box_coder, ops::BoxCoderCUDAKernel, + ops::BoxCoderCUDAKernel); diff --git a/paddle/operators/box_coder_op.h b/paddle/operators/box_coder_op.h new file mode 100644 index 0000000000000000000000000000000000000000..086251f6e066f082743f332ce72918c6e572ce19 --- /dev/null +++ b/paddle/operators/box_coder_op.h @@ -0,0 +1,151 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/op_registry.h" +#include "paddle/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +enum class BoxCodeType { kEncodeCenterSize = 0, kDecodeCenterSize = 1 }; + +inline BoxCodeType GetBoxCodeType(const std::string& type) { + if (type == "encode_center_size") { + return BoxCodeType::kEncodeCenterSize; + } else if (type == "decode_center_size") { + return BoxCodeType::kDecodeCenterSize; + } + PADDLE_THROW("Not support type %s.", type); +} + +template +class BoxCoderKernel : public framework::OpKernel { + public: + void EncodeCenterSize(const framework::Tensor& target_box, + const framework::Tensor& prior_box, + const framework::Tensor& prior_box_var, + T* output) const { + int64_t row = target_box.dims()[0]; + int64_t col = prior_box.dims()[0]; + int64_t len = prior_box.dims()[1]; + auto* target_box_data = target_box.data(); + auto* prior_box_data = prior_box.data(); + auto* prior_box_var_data = prior_box_var.data(); + + for (int64_t i = 0; i < row; ++i) { + for (int64_t j = 0; j < col; ++j) { + T prior_box_width = + prior_box_data[j * len + 2] - prior_box_data[j * len]; + T prior_box_height = + prior_box_data[j * len + 3] - prior_box_data[j * len + 1]; + T prior_box_center_x = + (prior_box_data[j * len + 2] + prior_box_data[j * len]) / 2; + T prior_box_center_y = + (prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2; + + T target_box_center_x = + (target_box_data[i * len + 2] + target_box_data[i * len]) / 2; + T target_box_center_y = + (target_box_data[i * len + 3] + target_box_data[i * len + 1]) / 2; + T target_box_width = + target_box_data[i * len + 2] - target_box_data[i * len]; + T target_box_height = + target_box_data[i * len + 3] - target_box_data[i * len + 1]; + + size_t offset = i * col * len + j * len; + output[offset] = (target_box_center_x - prior_box_center_x) / + prior_box_width / prior_box_var_data[j * len]; + output[offset + 1] = (target_box_center_y - prior_box_center_y) / + prior_box_height / prior_box_var_data[j * len + 1]; + output[offset + 2] = + std::log(std::fabs(target_box_width / prior_box_width)) / + prior_box_var_data[j * len + 2]; + output[offset + 3] = + std::log(std::fabs(target_box_height / prior_box_height)) / + prior_box_var_data[j * len + 3]; + } + } + } + void DecodeCenterSize(const framework::Tensor& target_box, + const framework::Tensor& prior_box, + const framework::Tensor& prior_box_var, + T* output) const { + int64_t row = target_box.dims()[0]; + int64_t col = prior_box.dims()[0]; + int64_t len = prior_box.dims()[1]; + + auto* target_box_data = target_box.data(); + auto* prior_box_data = prior_box.data(); + auto* prior_box_var_data = prior_box_var.data(); + + for (int64_t i = 0; i < row; ++i) { + for (int64_t j = 0; j < col; ++j) { + T prior_box_width = + prior_box_data[j * len + 2] - prior_box_data[j * len]; + T prior_box_height = + prior_box_data[j * len + 3] - prior_box_data[j * len + 1]; + T prior_box_center_x = + (prior_box_data[j * len + 2] + prior_box_data[j * len]) / 2; + T prior_box_center_y = + (prior_box_data[j * len + 3] + prior_box_data[j * len + 1]) / 2; + + T target_box_center_x = prior_box_var_data[j * len] * + target_box_data[i * len] * prior_box_width + + prior_box_center_x; + T target_box_center_y = prior_box_var_data[j * len + 1] * + target_box_data[i * len + 1] * + prior_box_height + + prior_box_center_y; + T target_box_width = std::exp(prior_box_var_data[j * len + 2] * + target_box_data[i * len + 2]) * + prior_box_width; + T target_box_height = std::exp(prior_box_var_data[j * len + 3] * + target_box_data[i * len + 3]) * + prior_box_height; + + size_t offset = i * col * len + j * len; + output[offset] = target_box_center_x - target_box_width / 2; + output[offset + 1] = target_box_center_y - target_box_height / 2; + output[offset + 2] = target_box_center_x + target_box_width / 2; + output[offset + 3] = target_box_center_y + target_box_height / 2; + } + } + } + + void Compute(const framework::ExecutionContext& context) const override { + auto* prior_box = context.Input("PriorBox"); + auto* prior_box_var = context.Input("PriorBoxVar"); + auto* target_box = context.Input("TargetBox"); + auto* output_box = context.Output("OutputBox"); + + if (target_box->lod().size()) { + PADDLE_ENFORCE_EQ(target_box->lod().size(), 1UL, + "Only support 1 level of LoD."); + } + auto row = target_box->dims()[0]; + auto col = prior_box->dims()[0]; + auto len = prior_box->dims()[1]; + + output_box->mutable_data({row, col, len}, context.GetPlace()); + + auto code_type = GetBoxCodeType(context.Attr("code_type")); + T* output = output_box->data(); + if (code_type == BoxCodeType::kEncodeCenterSize) { + EncodeCenterSize(*target_box, *prior_box, *prior_box_var, output); + } else if (code_type == BoxCodeType::kDecodeCenterSize) { + DecodeCenterSize(*target_box, *prior_box, *prior_box_var, output); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/v2/fluid/tests/test_box_coder_op.py b/python/paddle/v2/fluid/tests/test_box_coder_op.py new file mode 100644 index 0000000000000000000000000000000000000000..0dc18476fd5dce7cd293f6cb85f419be7d88ec95 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_box_coder_op.py @@ -0,0 +1,127 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import sys +import math +from op_test import OpTest + + +def box_coder(target_box, prior_box, prior_box_var, output_box, code_type): + prior_box_x = ( + (prior_box[:, 2] + prior_box[:, 0]) / 2).reshape(1, prior_box.shape[0]) + prior_box_y = ( + (prior_box[:, 3] + prior_box[:, 1]) / 2).reshape(1, prior_box.shape[0]) + prior_box_width = ( + (prior_box[:, 2] - prior_box[:, 0])).reshape(1, prior_box.shape[0]) + prior_box_height = ( + (prior_box[:, 3] - prior_box[:, 1])).reshape(1, prior_box.shape[0]) + prior_box_var = prior_box_var.reshape(1, prior_box_var.shape[0], + prior_box_var.shape[1]) + + if (code_type == "EncodeCenterSize"): + target_box_x = ((target_box[:, 2] + target_box[:, 0]) / 2).reshape( + target_box.shape[0], 1) + target_box_y = ((target_box[:, 3] + target_box[:, 1]) / 2).reshape( + target_box.shape[0], 1) + target_box_width = ((target_box[:, 2] - target_box[:, 0])).reshape( + target_box.shape[0], 1) + target_box_height = ((target_box[:, 3] - target_box[:, 1])).reshape( + target_box.shape[0], 1) + + output_box[:,:,0] = (target_box_x - prior_box_x) / prior_box_width / \ + prior_box_var[:,:,0] + output_box[:,:,1] = (target_box_y - prior_box_y) / prior_box_height / \ + prior_box_var[:,:,1] + output_box[:,:,2] = np.log(np.fabs(target_box_width / prior_box_width)) / \ + prior_box_var[:,:,2] + output_box[:,:,3] = np.log(np.fabs(target_box_height / prior_box_height)) / \ + prior_box_var[:,:,3] + + elif (code_type == "DecodeCenterSize"): + target_box = target_box.reshape(target_box.shape[0], 1, + target_box.shape[1]) + target_box_x = prior_box_var[:,:,0] * target_box[:,:,0] * \ + prior_box_width + prior_box_x + target_box_y = prior_box_var[:,:,1] * target_box[:,:,1] * \ + prior_box_height + prior_box_y + target_box_width = np.exp(prior_box_var[:,:,2] * target_box[:,:,2]) * \ + prior_box_width + target_box_height = np.exp(prior_box_var[:,:,3] * target_box[:,:,3]) * \ + prior_box_height + output_box[:, :, 0] = target_box_x - target_box_width / 2 + output_box[:, :, 1] = target_box_y - target_box_height / 2 + output_box[:, :, 2] = target_box_x + target_box_width / 2 + output_box[:, :, 3] = target_box_y + target_box_height / 2 + + +def batch_box_coder(prior_box, prior_box_var, target_box, lod, code_type): + n = target_box.shape[0] + m = prior_box.shape[0] + output_box = np.zeros((n, m, 4), dtype=np.float32) + for i in range(len(lod) - 1): + box_coder(target_box[lod[i]:lod[i + 1], :], prior_box, prior_box_var, + output_box[lod[i]:lod[i + 1], :, :], code_type) + return output_box + + +class TestBoxCoderOp(OpTest): + def test_check_output(self): + self.check_output() + + def setUp(self): + self.op_type = "box_coder" + lod = [[0, 20]] + prior_box = np.random.random((10, 4)).astype('float32') + prior_box_var = np.random.random((10, 4)).astype('float32') + target_box = np.random.random((20, 4)).astype('float32') + code_type = "DecodeCenterSize" + output_box = batch_box_coder(prior_box, prior_box_var, target_box, + lod[0], code_type) + + self.inputs = { + 'PriorBox': prior_box, + 'PriorBoxVar': prior_box_var, + 'TargetBox': target_box, + } + self.attrs = {'code_type': 'decode_center_size'} + self.outputs = {'OutputBox': output_box} + + +class TestBoxCoderOpWithLoD(OpTest): + def test_check_output(self): + self.check_output() + + def setUp(self): + self.op_type = "box_coder" + lod = [[0, 4, 12, 20]] + prior_box = np.random.random((10, 4)).astype('float32') + prior_box_var = np.random.random((10, 4)).astype('float32') + target_box = np.random.random((20, 4)).astype('float32') + code_type = "EncodeCenterSize" + output_box = batch_box_coder(prior_box, prior_box_var, target_box, + lod[0], code_type) + + self.inputs = { + 'PriorBox': prior_box, + 'PriorBoxVar': prior_box_var, + 'TargetBox': (target_box, lod), + } + self.attrs = {'code_type': 'encode_center_size'} + self.outputs = {'OutputBox': output_box} + + +if __name__ == '__main__': + unittest.main()