diff --git a/configs/yolox/_base_/yolox_reader.yml b/configs/yolox/_base_/yolox_reader.yml index 657638817883fa51990012f7546d38e159b22a2a..a33b847b159a515248c8556a24bb29e779f1def8 100644 --- a/configs/yolox/_base_/yolox_reader.yml +++ b/configs/yolox/_base_/yolox_reader.yml @@ -27,8 +27,8 @@ TrainReader: EvalReader: sample_transforms: - Decode: {} - - Resize: {target_size: 640, keep_ratio: True} - - Pad: {size: 640, fill_value: [114., 114., 114.]} + - Resize: {target_size: [640, 640], keep_ratio: True, interp: 1} + - Pad: {size: [640, 640], fill_value: [114., 114., 114.]} - Permute: {} batch_size: 4 @@ -38,7 +38,7 @@ TestReader: image_shape: [3, 640, 640] sample_transforms: - Decode: {} - - Resize: {target_size: 640, keep_ratio: True} - - Pad: {size: 640, fill_value: [114., 114., 114.]} + - Resize: {target_size: [640, 640], keep_ratio: True, interp: 1} + - Pad: {size: [640, 640], fill_value: [114., 114., 114.]} - Permute: {} batch_size: 1 diff --git a/configs/yolox/yolox_nano_300e_coco.yml b/configs/yolox/yolox_nano_300e_coco.yml index 79f961317907d412de548d199c46a520d201110b..80b8b5c51fbc200ecce2ff10013b7e9a94300999 100644 --- a/configs/yolox/yolox_nano_300e_coco.yml +++ b/configs/yolox/yolox_nano_300e_coco.yml @@ -64,8 +64,8 @@ TrainReader: EvalReader: sample_transforms: - Decode: {} - - Resize: {target_size: 416, keep_ratio: True} - - Pad: {size: 416, fill_value: [114., 114., 114.]} + - Resize: {target_size: [416, 416], keep_ratio: True, interp: 1} + - Pad: {size: [416, 416], fill_value: [114., 114., 114.]} - Permute: {} batch_size: 8 @@ -75,7 +75,7 @@ TestReader: image_shape: [3, 416, 416] sample_transforms: - Decode: {} - - Resize: {target_size: 416, keep_ratio: True} - - Pad: {size: 416, fill_value: [114., 114., 114.]} + - Resize: {target_size: [416, 416], keep_ratio: True, interp: 1} + - Pad: {size: [416, 416], fill_value: [114., 114., 114.]} - Permute: {} batch_size: 1 diff --git a/configs/yolox/yolox_tiny_300e_coco.yml b/configs/yolox/yolox_tiny_300e_coco.yml index 6e32c4851871e7c35e788b5f0fbb269047738da0..c81c172d27982c460bbead78f966158c67de7bc2 100644 --- a/configs/yolox/yolox_tiny_300e_coco.yml +++ b/configs/yolox/yolox_tiny_300e_coco.yml @@ -52,8 +52,8 @@ TrainReader: EvalReader: sample_transforms: - Decode: {} - - Resize: {target_size: 416, keep_ratio: True} - - Pad: {size: 416, fill_value: [114., 114., 114.]} + - Resize: {target_size: [416, 416], keep_ratio: True, interp: 1} + - Pad: {size: [416, 416], fill_value: [114., 114., 114.]} - Permute: {} batch_size: 8 @@ -63,7 +63,7 @@ TestReader: image_shape: [3, 416, 416] sample_transforms: - Decode: {} - - Resize: {target_size: 416, keep_ratio: True} - - Pad: {size: 416, fill_value: [114., 114., 114.]} + - Resize: {target_size: [416, 416], keep_ratio: True, interp: 1} + - Pad: {size: [416, 416], fill_value: [114., 114., 114.]} - Permute: {} batch_size: 1 diff --git a/deploy/cpp/include/preprocess_op.h b/deploy/cpp/include/preprocess_op.h index 2d24799a332293a8ef14a32d6d09f6daaa116425..a54bc2afb8aacbc55241b866ba41acc00491e4f3 100644 --- a/deploy/cpp/include/preprocess_op.h +++ b/deploy/cpp/include/preprocess_op.h @@ -161,6 +161,20 @@ class WarpAffine : public PreprocessOp { int pad_ = 31; }; +class Pad : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) { + size_ = item["size"].as>(); + fill_value_ = item["fill_value"].as>(); + } + + virtual void Run(cv::Mat* im, ImageBlob* data); + + private: + std::vector size_; + std::vector fill_value_; +}; + void CropImg(cv::Mat& img, cv::Mat& crop_img, std::vector& area, @@ -203,6 +217,8 @@ class Preprocessor { return std::make_shared(); } else if (name == "WarpAffine") { return std::make_shared(); + }else if (name == "Pad") { + return std::make_shared(); } std::cerr << "can not find function of OP: " << name << " and return: nullptr" << std::endl; diff --git a/deploy/cpp/src/preprocess_op.cc b/deploy/cpp/src/preprocess_op.cc index 98c700f6219ca4201d87902d2b337d5adc4d0d5c..6147555be57a2739fcd4a773eb281aaa966763b0 100644 --- a/deploy/cpp/src/preprocess_op.cc +++ b/deploy/cpp/src/preprocess_op.cc @@ -229,6 +229,23 @@ void WarpAffine::Run(cv::Mat* im, ImageBlob* data) { }; } +void Pad::Run(cv::Mat* im, ImageBlob* data) { + int h = size_[0]; + int w = size_[1]; + int rh = im->rows; + int rw = im->cols; + if (h == rh && w == rw){ + data->in_net_im_ = im->clone(); + return; + } + cv::copyMakeBorder( + *im, *im, 0, h - rh, 0, w - rw, cv::BORDER_CONSTANT, cv::Scalar(114)); + data->in_net_im_ = im->clone(); + data->in_net_shape_ = { + static_cast(im->rows), static_cast(im->cols), + }; +} + // Preprocessor op running order const std::vector Preprocessor::RUN_ORDER = {"InitInfo", "TopDownEvalAffine", @@ -237,6 +254,7 @@ const std::vector Preprocessor::RUN_ORDER = {"InitInfo", "WarpAffine", "NormalizeImage", "PadStride", + "Pad", "Permute"}; void Preprocessor::Run(cv::Mat* im, ImageBlob* data) { diff --git a/deploy/python/preprocess.py b/deploy/python/preprocess.py index b8cf256d508e66798063ada9f62d45dbee8fca07..315364775850dd2e19d59f226dc896cc933a328a 100644 --- a/deploy/python/preprocess.py +++ b/deploy/python/preprocess.py @@ -247,77 +247,30 @@ class LetterBoxResize(object): class Pad(object): - def __init__(self, - size=None, - size_divisor=32, - pad_mode=0, - offsets=None, - fill_value=(127.5, 127.5, 127.5)): + def __init__(self, size, fill_value=[114.0, 114.0, 114.0]): """ - Pad image to a specified size or multiple of size_divisor. + Pad image to a specified size. Args: - size (int, Sequence): image target size, if None, pad to multiple of size_divisor, default None - size_divisor (int): size divisor, default 32 - pad_mode (int): pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets - if 0, only pad to right and bottom. if 1, pad according to center. if 2, only pad left and top - offsets (list): [offset_x, offset_y], specify offset while padding, only supported pad_mode=-1 - fill_value (bool): rgb value of pad area, default (127.5, 127.5, 127.5) + size (list[int]): image target size + fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0) """ super(Pad, self).__init__() if isinstance(size, int): size = [size, size] - - assert pad_mode in [ - -1, 0, 1, 2 - ], 'currently only supports four modes [-1, 0, 1, 2]' - if pad_mode == -1: - assert offsets, 'if pad_mode is -1, offsets should not be None' - self.size = size - self.size_divisor = size_divisor - self.pad_mode = pad_mode self.fill_value = fill_value - self.offsets = offsets - - def apply_image(self, image, offsets, im_size, size): - x, y = offsets - im_h, im_w = im_size - h, w = size - canvas = np.ones((h, w, 3), dtype=np.float32) - canvas *= np.array(self.fill_value, dtype=np.float32) - canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32) - return canvas def __call__(self, im, im_info): im_h, im_w = im.shape[:2] - if self.size: - h, w = self.size - assert ( - im_h <= h and im_w <= w - ), '(h, w) of target size should be greater than (im_h, im_w)' - else: - h = int(np.ceil(im_h / self.size_divisor) * self.size_divisor) - w = int(np.ceil(im_w / self.size_divisor) * self.size_divisor) - + h, w = self.size if h == im_h and w == im_w: im = im.astype(np.float32) return im, im_info - if self.pad_mode == -1: - offset_x, offset_y = self.offsets - elif self.pad_mode == 0: - offset_y, offset_x = 0, 0 - elif self.pad_mode == 1: - offset_y, offset_x = (h - im_h) // 2, (w - im_w) // 2 - else: - offset_y, offset_x = h - im_h, w - im_w - - offsets, im_size, size = [offset_x, offset_y], [im_h, im_w], [h, w] - im = self.apply_image(im, offsets, im_size, size) - - if self.pad_mode == 0: - return im, im_info - + canvas = np.ones((h, w, 3), dtype=np.float32) + canvas *= np.array(self.fill_value, dtype=np.float32) + canvas[0:im_h, 0:im_w, :] = im.astype(np.float32) + im = canvas return im, im_info