transforms.h 7.9 KB
Newer Older
C
Channingss 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <yaml-cpp/yaml.h>

#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
S
syyxsxx 已提交
24
#include <iostream>
C
Channingss 已提交
25 26 27 28 29 30 31

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>

namespace PaddleX {

J
jack 已提交
32 33 34 35
/*
 * @brief
 * This class represents object for storing all preprocessed data
 * */
C
Channingss 已提交
36 37 38 39 40 41 42
class ImageBlob {
 public:
  // Original image height and width
  std::vector<int> ori_im_size_ = std::vector<int>(2);
  // Newest image height and width after process
  std::vector<int> new_im_size_ = std::vector<int>(2);
  // Image height and width before resize
C
Channingss 已提交
43
  std::vector<std::vector<int>> im_size_before_resize_;
C
Channingss 已提交
44 45 46 47 48 49 50 51 52 53 54 55 56 57
  // Reshape order
  std::vector<std::string> reshape_order_;
  // Resize scale
  float scale = 1.0;
  // Buffer for image data after preprocessing
  std::vector<float> im_data_;

  void clear() {
    im_size_before_resize_.clear();
    reshape_order_.clear();
    im_data_.clear();
  }
};

J
jack 已提交
58 59 60 61
/*
 * @brief
 * Abstraction of preprocessing operation class
 * */
C
Channingss 已提交
62 63 64
class Transform {
 public:
  virtual void Init(const YAML::Node& item) = 0;
J
jack 已提交
65 66 67 68 69
  /*
   * @brief
   * This method executes preprocessing operation on image matrix,
   * result will be returned at second parameter.
   * @param im: single image matrix to be preprocessed
J
jack 已提交
70
   * @param data: the raw data of single image matrix after preprocessed
J
jack 已提交
71 72
   * @return true if transform successfully
   * */
C
Channingss 已提交
73 74 75
  virtual bool Run(cv::Mat* im, ImageBlob* data) = 0;
};

J
jack 已提交
76 77 78 79
/*
 * @brief
 * This class execute normalization operation on image matrix
 * */
C
Channingss 已提交
80 81 82 83 84
class Normalize : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
    mean_ = item["mean"].as<std::vector<float>>();
    std_ = item["std"].as<std::vector<float>>();
85 86 87 88 89 90 91 92 93 94
    if (item["min_val"].IsDefined()) {
      min_val_ = item["min_val"].as<std::vector<float>>();
    } else {
      min_val_ = std::vector<float>(0., mean_.size());
    }
    if (item["max_val"].IsDefined()) {
      max_val_ = item["max_val"].as<std::vector<float>>();
    } else {
      max_val_ = std::vector<float>(255., mean_.size());
    }
C
Channingss 已提交
95 96 97 98 99 100 101
  }

  virtual bool Run(cv::Mat* im, ImageBlob* data);

 private:
  std::vector<float> mean_;
  std::vector<float> std_;
102 103
  std::vector<float> min_val_;
  std::vector<float> max_val_;
C
Channingss 已提交
104 105
};

J
jack 已提交
106 107
/*
 * @brief
J
jack 已提交
108
 * This class execute resize by short operation on image matrix. At first, it resizes
J
jack 已提交
109 110
 * the short side of image matrix to specified length. Accordingly, the long side
 * will be resized in the same proportion. If new length of long side exceeds max
J
jack 已提交
111
 * size, the long size will be resized to max size, and the short size will be
J
jack 已提交
112 113
 * resized in the same proportion
 * */
C
Channingss 已提交
114 115 116 117 118 119 120 121 122
class ResizeByShort : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
    short_size_ = item["short_size"].as<int>();
    if (item["max_size"].IsDefined()) {
      max_size_ = item["max_size"].as<int>();
    } else {
      max_size_ = -1;
    }
C
Channingss 已提交
123
  }
C
Channingss 已提交
124 125 126 127 128 129 130 131
  virtual bool Run(cv::Mat* im, ImageBlob* data);

 private:
  float GenerateScale(const cv::Mat& im);
  int short_size_;
  int max_size_;
};

J
jack 已提交
132 133 134 135 136 137
/*
 * @brief
 * This class execute resize by long operation on image matrix. At first, it resizes
 * the long side of image matrix to specified length. Accordingly, the short side
 * will be resized in the same proportion.
 * */
C
Channingss 已提交
138 139 140 141
class ResizeByLong : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
    long_size_ = item["long_size"].as<int>();
C
Channingss 已提交
142
  }
C
Channingss 已提交
143 144 145 146 147 148
  virtual bool Run(cv::Mat* im, ImageBlob* data);

 private:
  int long_size_;
};

J
jack 已提交
149 150 151 152 153
/*
 * @brief
 * This class execute resize operation on image matrix. It resizes width and height
 * to specified length.
 * */
C
Channingss 已提交
154 155 156
class Resize : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
157 158 159
    if (item["interp"].IsDefined()) {
      interp_ = item["interp"].as<std::string>();
    }
C
Channingss 已提交
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
    if (item["target_size"].IsScalar()) {
      height_ = item["target_size"].as<int>();
      width_ = item["target_size"].as<int>();
    } else if (item["target_size"].IsSequence()) {
      std::vector<int> target_size = item["target_size"].as<std::vector<int>>();
      width_ = target_size[0];
      height_ = target_size[1];
    }
    if (height_ <= 0 || width_ <= 0) {
      std::cerr << "[Resize] target_size should greater than 0" << std::endl;
      exit(-1);
    }
  }
  virtual bool Run(cv::Mat* im, ImageBlob* data);

 private:
  int height_;
  int width_;
  std::string interp_;
};

J
jack 已提交
181 182 183 184 185
/*
 * @brief
 * This class execute center crop operation on image matrix. It crops the center
 * of image matrix accroding to specified size.
 * */
C
Channingss 已提交
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
class CenterCrop : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
    if (item["crop_size"].IsScalar()) {
      height_ = item["crop_size"].as<int>();
      width_ = item["crop_size"].as<int>();
    } else if (item["crop_size"].IsSequence()) {
      std::vector<int> crop_size = item["crop_size"].as<std::vector<int>>();
      width_ = crop_size[0];
      height_ = crop_size[1];
    }
  }
  virtual bool Run(cv::Mat* im, ImageBlob* data);

 private:
  int height_;
  int width_;
};

J
jack 已提交
205 206 207 208 209
/*
 * @brief
 * This class execute padding operation on image matrix. It makes border on edge
 * of image matrix.
 * */
C
Channingss 已提交
210 211 212 213 214
class Padding : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
    if (item["coarsest_stride"].IsDefined()) {
      coarsest_stride_ = item["coarsest_stride"].as<int>();
215
      if (coarsest_stride_ < 1) {
C
Channingss 已提交
216 217 218 219
        std::cerr << "[Padding] coarest_stride should greater than 0"
                  << std::endl;
        exit(-1);
      }
220
    }
C
Channingss 已提交
221
    if (item["target_size"].IsDefined()) {
C
Channingss 已提交
222 223 224 225
      if (item["target_size"].IsScalar()) {
        width_ = item["target_size"].as<int>();
        height_ = item["target_size"].as<int>();
      } else if (item["target_size"].IsSequence()) {
226 227
        width_ = item["target_size"].as<std::vector<int>>()[0];
        height_ = item["target_size"].as<std::vector<int>>()[1];
C
Channingss 已提交
228 229
      }
    }
S
syyxsxx 已提交
230 231
    if (item["im_padding_value"].IsDefined()) {
      im_value_ = item["im_padding_value"].as<std::vector<float>>();
S
syyxsxx 已提交
232
    } else {
S
syyxsxx 已提交
233 234
      im_value_ = {0, 0, 0};
    }
C
Channingss 已提交
235 236
  }
  virtual bool Run(cv::Mat* im, ImageBlob* data);
J
jack 已提交
237

C
Channingss 已提交
238 239 240 241
 private:
  int coarsest_stride_ = -1;
  int width_ = 0;
  int height_ = 0;
S
syyxsxx 已提交
242
  std::vector<float> im_value_;
C
Channingss 已提交
243
};
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262

/*
 * @brief
 * This class execute clip operation on image matrix
 * */
class Clip : public Transform {
 public:
  virtual void Init(const YAML::Node& item) {
    min_val_ = item["min_val"].as<std::vector<float>>();
    max_val_ = item["max_val"].as<std::vector<float>>();
  }

  virtual bool Run(cv::Mat* im, ImageBlob* data);

 private:
  std::vector<float> min_val_;
  std::vector<float> max_val_;
};

J
jack 已提交
263 264 265 266 267
/*
 * @brief
 * This class is transform operations manager. It stores all neccessary
 * transform operations and run them in correct order.
 * */
C
Channingss 已提交
268 269 270 271 272
class Transforms {
 public:
  void Init(const YAML::Node& node, bool to_rgb = true);
  std::shared_ptr<Transform> CreateTransform(const std::string& name);
  bool Run(cv::Mat* im, ImageBlob* data);
J
jack 已提交
273

C
Channingss 已提交
274 275 276 277 278 279
 private:
  std::vector<std::shared_ptr<Transform>> transforms_;
  bool to_rgb_ = true;
};

}  // namespace PaddleX