diff --git a/src/common/types.cpp b/src/common/types.cpp index cbaf289e27ffcd34eff7b113e06219a595de5257..cc709291e9b94c9fd7ca625ecd598e54e3a0d114 100644 --- a/src/common/types.cpp +++ b/src/common/types.cpp @@ -108,6 +108,7 @@ const char *G_OP_TYPE_SLICE = "slice"; const char *G_OP_TYPE_ANCHOR_GENERATOR = "anchor_generator"; const char *G_OP_TYPE_GENERATE_PROPOSALS = "generate_proposals"; const char *G_OP_TYPE_PSROI_POOL = "psroi_pool"; +const char *G_OP_TYPE_ROI_PERSPECTIVE = "roi_perspective_transform"; std::unordered_map< std::string, std::pair, std::vector>> @@ -204,5 +205,6 @@ std::unordered_map< {G_OP_TYPE_GENERATE_PROPOSALS, {{"Scores", "BboxDeltas", "ImInfo", "Anchors", "Variances"}, {"RpnRois", "RpnRoiProbs"}}}, - {G_OP_TYPE_PSROI_POOL, {{"X", "ROIs"}, {"Out"}}}}; + {G_OP_TYPE_PSROI_POOL, {{"X", "ROIs"}, {"Out"}}}, + {G_OP_TYPE_ROI_PERSPECTIVE, {{"X", "ROIs"}, {"Out"}}}}; } // namespace paddle_mobile diff --git a/src/common/types.h b/src/common/types.h index 267015539fa7d3ed7868f841ce22a83ed665e972..25094daaa1d028339f0efa2e75558a8b900913c6 100644 --- a/src/common/types.h +++ b/src/common/types.h @@ -196,6 +196,7 @@ extern const char *G_OP_TYPE_SLICE; extern const char *G_OP_TYPE_ANCHOR_GENERATOR; extern const char *G_OP_TYPE_GENERATE_PROPOSALS; extern const char *G_OP_TYPE_PSROI_POOL; +extern const char *G_OP_TYPE_ROI_PERSPECTIVE; extern std::unordered_map< std::string, std::pair, std::vector>> diff --git a/src/framework/load_ops.h b/src/framework/load_ops.h index 0727c0cb04ec93047e612863f23dd92cb131cbed..890966cf557f276d29a9da7380b7e76337b6748e 100644 --- a/src/framework/load_ops.h +++ b/src/framework/load_ops.h @@ -315,3 +315,6 @@ LOAD_OP1(generate_proposals, CPU); #ifdef PSROI_POOL_OP LOAD_OP1(psroi_pool, CPU); #endif +#ifdef ROI_PERSPECTIVE_OP +LOAD_OP1(roi_perspective_transform, CPU); +#endif diff --git a/src/operators/detection_ops.cpp b/src/operators/detection_ops.cpp index 5ce53dca939f5725d03fa0137ee4655515d242ab..38a149a355f089b9c270b00e783ca0a28ae51062 100644 --- a/src/operators/detection_ops.cpp +++ b/src/operators/detection_ops.cpp @@ -64,6 +64,22 @@ void PSRoiPoolOp::InferShape() const { } #endif +#ifdef ROI_PERSPECTIVE_OP +template +void RoiPerspectiveOp::InferShape() const { + const auto &input_dims = this->param_.input_x_->dims(); + const auto &rois_dims = this->param_.input_rois_->dims(); + const int transformed_height = this->param_.transformed_height_; + const int transformed_width = this->param_.transformed_width_; + std::vector out_dims_v({rois_dims[0], // num_rois + input_dims[1], // channels + static_cast(transformed_height), + static_cast(transformed_width)}); + auto out_dims = framework::make_ddim(out_dims_v); + this->param_.output_->Resize(out_dims); +} +#endif + } // namespace operators } // namespace paddle_mobile @@ -78,4 +94,7 @@ REGISTER_OPERATOR_CPU(generate_proposals, ops::ProposalOp); #ifdef PSROI_POOL_OP REGISTER_OPERATOR_CPU(psroi_pool, ops::PSRoiPoolOp); #endif +#ifdef ROI_PERSPECTIVE_OP +REGISTER_OPERATOR_CPU(roi_perspective_transform, ops::RoiPerspectiveOp); +#endif #endif diff --git a/src/operators/detection_ops.h b/src/operators/detection_ops.h index e69a2d3e7dff43b42d25ca63f67c1ce067e28df9..38d0890756a84bfc70119f30d8515159c57cca21 100644 --- a/src/operators/detection_ops.h +++ b/src/operators/detection_ops.h @@ -34,5 +34,9 @@ DECLARE_OPERATOR(Proposal, ProposalParam, ProposalKernel); DECLARE_OPERATOR(PSRoiPool, PSRoiPoolParam, PSRoiPoolKernel); #endif +#ifdef ROI_PERSPECTIVE_OP +DECLARE_OPERATOR(RoiPerspective, RoiPerspectiveParam, RoiPerspectiveKernel); +#endif + } // namespace operators } // namespace paddle_mobile diff --git a/src/operators/kernel/arm/resize_kernel.cpp b/src/operators/kernel/arm/resize_kernel.cpp index b53b7545e33c929fe0b55bccd68e7b955db0d676..6a6af367889f5abaed96f3d2c75196d1fa0b96d8 100644 --- a/src/operators/kernel/arm/resize_kernel.cpp +++ b/src/operators/kernel/arm/resize_kernel.cpp @@ -22,8 +22,8 @@ namespace operators { void BiLinearResizeTensor(const float* src, const int src_height, const int src_width, float* dst, const int dst_height, const int dst_width) { - const float scale_w = src_width / (float)dst_width; - const float scale_h = src_height / (float)dst_height; + const float scale_w = src_width / static_cast(dst_width); + const float scale_h = src_height / static_cast(dst_height); float* dst_data = dst; const float* src_data = src; @@ -33,8 +33,8 @@ void BiLinearResizeTensor(const float* src, const int src_height, int src_h = std::floor(fh); fh -= src_h; - const float w_h0 = std::abs((float)1.0 - fh); - const float w_h1 = std::abs(fh); + const float w_h0 = fabs(1.0 - fh); + const float w_h1 = fabs(fh); const int dst_offset_1 = dst_h * dst_width; const int src_offset_1 = src_h * src_width; @@ -45,8 +45,8 @@ void BiLinearResizeTensor(const float* src, const int src_height, float fw = dst_w * scale_w; int src_w = std::floor(fw); fw -= src_w; - const float w_w0 = std::abs((float)1.0 - fw); - const float w_w1 = std::abs(fw); + const float w_w0 = fabs(1.0 - fw); + const float w_w1 = fabs(fw); float dst_value = 0; diff --git a/src/operators/kernel/arm/roi_perspective_kernel.cpp b/src/operators/kernel/arm/roi_perspective_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8cdb8f04b2f1808a9dff46ef04159ed291481292 --- /dev/null +++ b/src/operators/kernel/arm/roi_perspective_kernel.cpp @@ -0,0 +1,277 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef ROI_PERSPECTIVE_OP + +#include +#include "operators/kernel/detection_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template +inline bool GT_E(T a, T b) { + return (a > b) || fabs(a - b) < 1e-4; +} + +template +inline bool LT_E(T a, T b) { + return (a < b) || fabs(a - b) < 1e-4; +} + +// check if (x, y) is in the boundary of roi +template +bool in_quad(T x, T y, T roi_x[], T roi_y[]) { + for (int i = 0; i < 4; i++) { + T xs = roi_x[i]; + T ys = roi_y[i]; + T xe = roi_x[(i + 1) % 4]; + T ye = roi_y[(i + 1) % 4]; + if (fabs(ys - ye) < 1e-4) { + if (fabs(y - ys) < 1e-4 && fabs(y - ye) < 1e-4 && + GT_E(x, std::min(xs, xe)) && LT_E(x, std::max(xs, xe))) { + return true; + } + } else { + T intersec_x = (y - ys) * (xe - xs) / (ye - ys) + xs; + if (fabs(intersec_x - x) < 1e-4 && GT_E(y, std::min(ys, ye)) && + LT_E(y, std::max(ys, ye))) { + return true; + } + } + } + + int n_cross = 0; + for (int i = 0; i < 4; i++) { + T xs = roi_x[i]; + T ys = roi_y[i]; + T xe = roi_x[(i + 1) % 4]; + T ye = roi_y[(i + 1) % 4]; + if (fabs(ys - ye) < 1e-4) { + continue; + } + if (LT_E(y, std::min(ys, ye)) || (y > std::max(ys, ye))) { + continue; + } + T intersec_x = (y - ys) * (xe - xs) / (ye - ys) + xs; + if (fabs(intersec_x - x) < 1e-4) { + return true; + } + if (intersec_x > x) { + n_cross++; + } + } + return (n_cross % 2 == 1); +} + +template +void get_transform_matrix(const int transformed_width, + const int transformed_height, T roi_x[], T roi_y[], + T matrix[]) { + T x0 = roi_x[0]; + T x1 = roi_x[1]; + T x2 = roi_x[2]; + T x3 = roi_x[3]; + T y0 = roi_y[0]; + T y1 = roi_y[1]; + T y2 = roi_y[2]; + T y3 = roi_y[3]; + + // Estimate the height and width of RoI + T len1 = sqrt((x0 - x1) * (x0 - x1) + (y0 - y1) * (y0 - y1)); + T len2 = sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2)); + T len3 = sqrt((x2 - x3) * (x2 - x3) + (y2 - y3) * (y2 - y3)); + T len4 = sqrt((x3 - x0) * (x3 - x0) + (y3 - y0) * (y3 - y0)); + T estimated_height = (len2 + len4) / 2.0; + T estimated_width = (len1 + len3) / 2.0; + + // Get the normalized height and normalized width + int normalized_height = transformed_height; + int normalized_width = + std::round(estimated_width * (normalized_height - 1) / estimated_height) + + 1; + normalized_width = std::min(normalized_width, transformed_width); + + T dx1 = x1 - x2; + T dx2 = x3 - x2; + T dx3 = x0 - x1 + x2 - x3; + T dy1 = y1 - y2; + T dy2 = y3 - y2; + T dy3 = y0 - y1 + y2 - y3; + + matrix[6] = (dx3 * dy2 - dx2 * dy3) / (dx1 * dy2 - dx2 * dy1) / + (normalized_width - 1); + matrix[7] = (dx1 * dy3 - dx3 * dy1) / (dx1 * dy2 - dx2 * dy1) / + (normalized_height - 1); + matrix[8] = 1; + + matrix[3] = (y1 - y0 + matrix[6] * (normalized_width - 1) * y1) / + (normalized_width - 1); + matrix[4] = (y3 - y0 + matrix[7] * (normalized_height - 1) * y3) / + (normalized_height - 1); + matrix[5] = y0; + + matrix[0] = (x1 - x0 + matrix[6] * (normalized_width - 1) * x1) / + (normalized_width - 1); + matrix[1] = (x3 - x0 + matrix[7] * (normalized_height - 1) * x3) / + (normalized_height - 1); + matrix[2] = x0; +} + +// Get the source coordinates in the input feature map. +// (u, v, w)^matrix = matrix * (out_w, out_h, 1)^matrix +// in_w = u / w +// in_h = v / w +template +void get_source_coords(T matrix[], int out_w, int out_h, T *in_w, T *in_h) { + T u = matrix[0] * out_w + matrix[1] * out_h + matrix[2]; + T v = matrix[3] * out_w + matrix[4] * out_h + matrix[5]; + T w = matrix[6] * out_w + matrix[7] * out_h + matrix[8]; + + in_w[0] = u / w; + in_h[0] = v / w; +} + +template +void bilinear_interpolate(const T *in_data, const int channels, const int width, + const int height, int in_n, int in_c, T in_w, T in_h, + T *val) { + // Deal with cases that source coords are out of feature map boundary + if ((-0.5 > in_w) || (in_w > width - 0.5) || (-0.5 > in_h) || + (in_h > height - 0.5)) { + // empty + val[0] = 0.0; + return; + } + + if (in_w < 0) { + in_w = 0; + } + if (in_h < 0) { + in_h = 0; + } + + int in_w_floor = floor(in_w); + int in_h_floor = floor(in_h); + int in_w_ceil; + int in_h_ceil; + + if (GT_E(in_w_floor, width - 1)) { + in_w_ceil = in_w_floor = width - 1; + in_w = static_cast(in_w_floor); + } else { + in_w_ceil = in_w_floor + 1; + } + + if (GT_E(in_h_floor, height - 1)) { + in_h_ceil = in_h_floor = height - 1; + in_h = static_cast(in_h_floor); + } else { + in_h_ceil = in_h_floor + 1; + } + T w_floor = in_w - in_w_floor; + T h_floor = in_h - in_h_floor; + T w_ceil = 1 - w_floor; + T h_ceil = 1 - h_floor; + const T *data = in_data + (in_n * channels + in_c) * height * width; + // Do bilinear interpolation + T v1 = data[in_h_floor * width + in_w_floor]; + T v2 = data[in_h_ceil * width + in_w_floor]; + T v3 = data[in_h_ceil * width + in_w_ceil]; + T v4 = data[in_h_floor * width + in_w_ceil]; + T w1 = w_ceil * h_ceil; + T w2 = w_ceil * h_floor; + T w3 = w_floor * h_floor; + T w4 = w_floor * h_ceil; + val[0] = w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4; +} + +template <> +bool RoiPerspectiveKernel::Init(RoiPerspectiveParam *param) { + return true; +} + +template <> +void RoiPerspectiveKernel::Compute( + const RoiPerspectiveParam ¶m) { + const auto *input_x = param.input_x_; + const auto *input_rois = param.input_rois_; + auto *output = param.output_; + + const auto &in_dims = input_x->dims(); + const int channels = in_dims[1]; + const int in_height = in_dims[2]; + const int in_width = in_dims[3]; + const int rois_num = input_rois->dims()[0]; + const int transformed_height = param.transformed_height_; + const int transformed_width = param.transformed_width_; + const float spatial_scale = param.spatial_scale_; + + const float *input_data = input_x->data(); + const float *rois_data = input_rois->data(); + float *output_data = output->mutable_data(); + + std::vector roi2image(rois_num); + const auto &lod = input_rois->lod().back(); + for (size_t i = 0; i < lod.size() - 1; ++i) { + for (size_t j = lod[i]; j < lod[i + 1]; ++j) { + roi2image[j] = i; + } + } + + for (int n = 0; n < rois_num; ++n) { + const float *n_rois = rois_data + n * 8; + float roi_x[4]; + float roi_y[4]; + for (int k = 0; k < 4; ++k) { + roi_x[k] = n_rois[2 * k] * spatial_scale; + roi_y[k] = n_rois[2 * k + 1] * spatial_scale; + } + int image_id = roi2image[n]; + // Get transform matrix + float transform_matrix[9]; + get_transform_matrix(transformed_width, transformed_height, roi_x, + roi_y, transform_matrix); + for (int c = 0; c < channels; ++c) { + for (int out_h = 0; out_h < transformed_height; ++out_h) { + for (int out_w = 0; out_w < transformed_width; ++out_w) { + int out_index = + n * channels * transformed_height * transformed_width + + c * transformed_height * transformed_width + + out_h * transformed_width + out_w; + float in_w, in_h; + get_source_coords(transform_matrix, out_w, out_h, &in_w, + &in_h); + if (in_quad(in_w, in_h, roi_x, roi_y)) { + if ((-0.5 > in_w) || (in_w > (in_width - 0.5)) || (-0.5 > in_h) || + (in_h > (in_height - 0.5))) { + output_data[out_index] = 0.0; + } else { + bilinear_interpolate(input_data, channels, in_width, + in_height, image_id, c, in_w, in_h, + output_data + out_index); + } + } else { + output_data[out_index] = 0.0; + } + } + } + } + } +} + +} // namespace operators +} // namespace paddle_mobile + +#endif // ROI_PERSPECTIVE_OP diff --git a/src/operators/kernel/detection_kernel.h b/src/operators/kernel/detection_kernel.h index c0047c23901922ae87ba188d605b0ccca8128fc0..3d669b2d3d19a40928d54816b65deca6c5169673 100644 --- a/src/operators/kernel/detection_kernel.h +++ b/src/operators/kernel/detection_kernel.h @@ -136,5 +136,35 @@ class PSRoiPoolParam : public OpParam { DECLARE_KERNEL(PSRoiPool, PSRoiPoolParam); #endif +#ifdef ROI_PERSPECTIVE_OP +template +class RoiPerspectiveParam : public OpParam { + public: + RoiPerspectiveParam(const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs, + const Scope &scope) { + input_x_ = OpParam::GetVarValue("X", inputs, scope); + input_rois_ = + OpParam::GetVarValue("ROIs", inputs, scope); + output_ = OpParam::GetVarValue("Out", outputs, scope); + + spatial_scale_ = OpParam::GetAttr("spatial_scale", attrs); + transformed_height_ = OpParam::GetAttr("transformed_height", attrs); + transformed_width_ = OpParam::GetAttr("transformed_width", attrs); + } + + public: + framework::Tensor *input_x_; + framework::LoDTensor *input_rois_; + framework::Tensor *output_; + + float spatial_scale_; + int transformed_height_; + int transformed_width_; +}; + +DECLARE_KERNEL(RoiPerspective, RoiPerspectiveParam); +#endif + } // namespace operators } // namespace paddle_mobile diff --git a/tools/op.cmake b/tools/op.cmake index 0f37eb7e98f053f519a1fb0d6a2829b33e82e45a..d72f1a65c829fc0ff5202b8bb4c16385091215b5 100644 --- a/tools/op.cmake +++ b/tools/op.cmake @@ -291,6 +291,7 @@ if(NOT FOUND_MATCH) set(ANCHOR_GENERATOR_OP ON) set(PROPOSAL_OP ON) set(PSROI_POOL_OP ON) + set(ROI_PERSPECTIVE_OP ON) endif() # option(BATCHNORM_OP "" ON) @@ -585,3 +586,6 @@ endif() if (PSROI_POOL_OP) add_definitions(-DPSROI_POOL_OP) endif() +if (ROI_PERSPECTIVE_OP) + add_definitions(-DROI_PERSPECTIVE_OP) +endif()