// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include "include/preprocess_op.h" namespace PaddleDetection { void InitInfo::Run(cv::Mat* im, ImageBlob* data) { data->im_shape_ = {static_cast(im->rows), static_cast(im->cols)}; data->scale_factor_ = {1., 1.}; data->in_net_shape_ = {static_cast(im->rows), static_cast(im->cols)}; } void NormalizeImage::Run(cv::Mat* im, ImageBlob* data) { double e = 1.0; if (is_scale_) { e /= 255.0; } (*im).convertTo(*im, CV_32FC3, e); for (int h = 0; h < im->rows; h++) { for (int w = 0; w < im->cols; w++) { im->at(h, w)[0] = (im->at(h, w)[0] - mean_[0]) / scale_[0]; im->at(h, w)[1] = (im->at(h, w)[1] - mean_[1]) / scale_[1]; im->at(h, w)[2] = (im->at(h, w)[2] - mean_[2]) / scale_[2]; } } } void Permute::Run(cv::Mat* im, ImageBlob* data) { (*im).convertTo(*im, CV_32FC3); int rh = im->rows; int rw = im->cols; int rc = im->channels(); (data->im_data_).resize(rc * rh * rw); float* base = (data->im_data_).data(); for (int i = 0; i < rc; ++i) { cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i); } } void Resize::Run(cv::Mat* im, ImageBlob* data) { auto resize_scale = GenerateScale(*im); cv::resize( *im, *im, cv::Size(), resize_scale.first, resize_scale.second, interp_); data->in_net_shape_ = {static_cast(im->rows), static_cast(im->cols)}; data->im_shape_ = { static_cast(im->rows), static_cast(im->cols), }; data->scale_factor_ = { resize_scale.second, resize_scale.first, }; } std::pair Resize::GenerateScale(const cv::Mat& im) { std::pair resize_scale; int origin_w = im.cols; int origin_h = im.rows; if (keep_ratio_) { int im_size_max = std::max(origin_w, origin_h); int im_size_min = std::min(origin_w, origin_h); int target_size_max = *std::max_element(target_size_.begin(), target_size_.end()); int target_size_min = *std::min_element(target_size_.begin(), target_size_.end()); float scale_min = static_cast(target_size_min) / static_cast(im_size_min); float scale_max = static_cast(target_size_max) / static_cast(im_size_max); float scale_ratio = std::min(scale_min, scale_max); resize_scale = {scale_ratio, scale_ratio}; } else { resize_scale.first = static_cast(target_size_[1]) / static_cast(origin_w); resize_scale.second = static_cast(target_size_[0]) / static_cast(origin_h); } return resize_scale; } void LetterBoxResize::Run(cv::Mat* im, ImageBlob* data) { float resize_scale = GenerateScale(*im); int new_shape_w = std::round(im->cols * resize_scale); int new_shape_h = std::round(im->rows * resize_scale); data->im_shape_ = {static_cast(new_shape_h), static_cast(new_shape_w)}; float padw = (target_size_[1] - new_shape_w) / 2.; float padh = (target_size_[0] - new_shape_h) / 2.; int top = std::round(padh - 0.1); int bottom = std::round(padh + 0.1); int left = std::round(padw - 0.1); int right = std::round(padw + 0.1); cv::resize( *im, *im, cv::Size(new_shape_w, new_shape_h), 0, 0, cv::INTER_AREA); data->in_net_shape_ = { static_cast(im->rows), static_cast(im->cols), }; cv::copyMakeBorder(*im, *im, top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar(127.5)); data->in_net_shape_ = { static_cast(im->rows), static_cast(im->cols), }; data->scale_factor_ = { resize_scale, resize_scale, }; } float LetterBoxResize::GenerateScale(const cv::Mat& im) { int origin_w = im.cols; int origin_h = im.rows; int target_h = target_size_[0]; int target_w = target_size_[1]; float ratio_h = static_cast(target_h) / static_cast(origin_h); float ratio_w = static_cast(target_w) / static_cast(origin_w); float resize_scale = std::min(ratio_h, ratio_w); return resize_scale; } void PadStride::Run(cv::Mat* im, ImageBlob* data) { if (stride_ <= 0) { data->in_net_im_ = im->clone(); return; } int rc = im->channels(); int rh = im->rows; int rw = im->cols; int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_; int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_; cv::copyMakeBorder( *im, *im, 0, nh - rh, 0, nw - rw, cv::BORDER_CONSTANT, cv::Scalar(0)); data->in_net_im_ = im->clone(); data->in_net_shape_ = { static_cast(im->rows), static_cast(im->cols), }; } void TopDownEvalAffine::Run(cv::Mat* im, ImageBlob* data) { cv::resize(*im, *im, cv::Size(trainsize_[0], trainsize_[1]), 0, 0, interp_); // todo: Simd::ResizeBilinear(); data->in_net_shape_ = { static_cast(trainsize_[1]), static_cast(trainsize_[0]), }; } void GetAffineTrans(const cv::Point2f center, const cv::Point2f input_size, const cv::Point2f output_size, cv::Mat* trans) { cv::Point2f srcTri[3]; cv::Point2f dstTri[3]; float src_w = input_size.x; float dst_w = output_size.x; float dst_h = output_size.y; cv::Point2f src_dir(0, -0.5 * src_w); cv::Point2f dst_dir(0, -0.5 * dst_w); srcTri[0] = center; srcTri[1] = center + src_dir; cv::Point2f src_d = srcTri[0] - srcTri[1]; srcTri[2] = srcTri[1] + cv::Point2f(-src_d.y, src_d.x); dstTri[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5); dstTri[1] = cv::Point2f(dst_w * 0.5, dst_h * 0.5) + dst_dir; cv::Point2f dst_d = dstTri[0] - dstTri[1]; dstTri[2] = dstTri[1] + cv::Point2f(-dst_d.y, dst_d.x); *trans = cv::getAffineTransform(srcTri, dstTri); } void WarpAffine::Run(cv::Mat* im, ImageBlob* data) { cv::cvtColor(*im, *im, cv::COLOR_RGB2BGR); cv::Mat trans(2, 3, CV_32FC1); cv::Point2f center; cv::Point2f input_size; int h = im->rows; int w = im->cols; if (keep_res_) { input_h_ = (h | pad_) + 1; input_w_ = (w + pad_) + 1; input_size = cv::Point2f(input_w_, input_h_); center = cv::Point2f(w / 2, h / 2); } else { float s = std::max(h, w) * 1.0; input_size = cv::Point2f(s, s); center = cv::Point2f(w / 2., h / 2.); } cv::Point2f output_size(input_w_, input_h_); GetAffineTrans(center, input_size, output_size, &trans); cv::warpAffine(*im, *im, trans, cv::Size(input_w_, input_h_)); data->in_net_shape_ = { static_cast(input_h_), static_cast(input_w_), }; } void Pad::Run(cv::Mat* im, ImageBlob* data) { int h = size_[0]; int w = size_[1]; int rh = im->rows; int rw = im->cols; if (h == rh && w == rw){ data->in_net_im_ = im->clone(); return; } cv::copyMakeBorder( *im, *im, 0, h - rh, 0, w - rw, cv::BORDER_CONSTANT, cv::Scalar(114)); data->in_net_im_ = im->clone(); data->in_net_shape_ = { static_cast(im->rows), static_cast(im->cols), }; } // Preprocessor op running order const std::vector Preprocessor::RUN_ORDER = {"InitInfo", "TopDownEvalAffine", "Resize", "LetterBoxResize", "WarpAffine", "NormalizeImage", "PadStride", "Pad", "Permute"}; void Preprocessor::Run(cv::Mat* im, ImageBlob* data) { for (const auto& name : RUN_ORDER) { if (ops_.find(name) != ops_.end()) { ops_[name]->Run(im, data); } } } void CropImg(cv::Mat& img, cv::Mat& crop_img, std::vector& area, std::vector& center, std::vector& scale, float expandratio) { int crop_x1 = std::max(0, area[0]); int crop_y1 = std::max(0, area[1]); int crop_x2 = std::min(img.cols - 1, area[2]); int crop_y2 = std::min(img.rows - 1, area[3]); int center_x = (crop_x1 + crop_x2) / 2.; int center_y = (crop_y1 + crop_y2) / 2.; int half_h = (crop_y2 - crop_y1) / 2.; int half_w = (crop_x2 - crop_x1) / 2.; // adjust h or w to keep image ratio, expand the shorter edge if (half_h * 3 > half_w * 4) { half_w = static_cast(half_h * 0.75); } else { half_h = static_cast(half_w * 4 / 3); } crop_x1 = std::max(0, center_x - static_cast(half_w * (1 + expandratio))); crop_y1 = std::max(0, center_y - static_cast(half_h * (1 + expandratio))); crop_x2 = std::min(img.cols - 1, static_cast(center_x + half_w * (1 + expandratio))); crop_y2 = std::min(img.rows - 1, static_cast(center_y + half_h * (1 + expandratio))); crop_img = img(cv::Range(crop_y1, crop_y2 + 1), cv::Range(crop_x1, crop_x2 + 1)); center.clear(); center.emplace_back((crop_x1 + crop_x2) / 2); center.emplace_back((crop_y1 + crop_y2) / 2); scale.clear(); scale.emplace_back((crop_x2 - crop_x1)); scale.emplace_back((crop_y2 - crop_y1)); } bool CheckDynamicInput(const std::vector& imgs) { if (imgs.size() == 1) return false; int h = imgs.at(0).rows; int w = imgs.at(0).cols; for (int i = 1; i < imgs.size(); ++i) { int hi = imgs.at(i).rows; int wi = imgs.at(i).cols; if (hi != h || wi != w) { return true; } } return false; } std::vector PadBatch(const std::vector& imgs) { std::vector out_imgs; int max_h = 0; int max_w = 0; int rh = 0; int rw = 0; // find max_h and max_w in batch for (int i = 0; i < imgs.size(); ++i) { rh = imgs.at(i).rows; rw = imgs.at(i).cols; if (rh > max_h) max_h = rh; if (rw > max_w) max_w = rw; } for (int i = 0; i < imgs.size(); ++i) { cv::Mat im = imgs.at(i); cv::copyMakeBorder(im, im, 0, max_h - imgs.at(i).rows, 0, max_w - imgs.at(i).cols, cv::BORDER_CONSTANT, cv::Scalar(0)); out_imgs.push_back(im); } return out_imgs; } } // namespace PaddleDetection