thresholder.cpp 12.3 KB
Newer Older
T
theraysmith 已提交
1 2
///////////////////////////////////////////////////////////////////////
// File:        thresholder.cpp
S
Stefan Weil 已提交
3
// Description: Base API for thresholding images in tesseract.
T
theraysmith 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Author:      Ray Smith
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

#include "otsuthr.h"
20
#include "thresholder.h"
21
#include "tprintf.h" // for tprintf
T
theraysmith 已提交
22

23
#if defined(USE_OPENCL)
24
#  include "openclwrapper.h" // for OpenclDevice
25
#endif
26

E
Egor Pugin 已提交
27 28 29 30 31 32
#include <allheaders.h>

#include <cstdint> // for uint32_t
#include <cstring>
#include <tuple>

T
theraysmith 已提交
33 34 35
namespace tesseract {

ImageThresholder::ImageThresholder()
36 37 38 39 40 41 42 43
    : pix_(nullptr)
    , image_width_(0)
    , image_height_(0)
    , pix_channels_(0)
    , pix_wpl_(0)
    , scale_(1)
    , yres_(300)
    , estimated_res_(300) {
T
theraysmith 已提交
44 45 46 47 48 49 50 51 52
  SetRectangle(0, 0, 0, 0);
}

ImageThresholder::~ImageThresholder() {
  Clear();
}

// Destroy the Pix if there is one, freeing memory.
void ImageThresholder::Clear() {
53
  pix_.destroy();
T
theraysmith 已提交
54 55 56 57
}

// Return true if no image has been set.
bool ImageThresholder::IsEmpty() const {
S
Stefan Weil 已提交
58
  return pix_ == nullptr;
T
theraysmith 已提交
59 60
}

61 62
// SetImage makes a copy of all the image data, so it may be deleted
// immediately after this call.
T
theraysmith 已提交
63 64 65 66 67 68
// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
// Palette color images will not work properly and must be converted to
// 24 bit.
// Binary images of 1 bit per pixel may also be given but they must be
// byte packed with the MSB of the first byte being the first pixel, and a
// one pixel is WHITE. For binary images set bytes_per_pixel=0.
69
void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int height,
T
theraysmith 已提交
70
                                int bytes_per_pixel, int bytes_per_line) {
71
  int bpp = bytes_per_pixel * 8;
72
  if (bpp == 0) {
73
    bpp = 1;
74
  }
75
  Image pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
76
  l_uint32 *data = pixGetData(pix);
77 78
  int wpl = pixGetWpl(pix);
  switch (bpp) {
79 80 81
    case 1:
      for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
        for (int x = 0; x < width; ++x) {
82
          if (imagedata[x / 8] & (0x80 >> (x % 8))) {
83
            CLEAR_DATA_BIT(data, x);
84
          } else {
85
            SET_DATA_BIT(data, x);
86
          }
87
        }
88
      }
89
      break;
90

91 92 93
    case 8:
      // Greyscale just copies the bytes in the right order.
      for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
94
        for (int x = 0; x < width; ++x) {
95
          SET_DATA_BYTE(data, x, imagedata[x]);
96
        }
97 98
      }
      break;
99

100 101 102 103 104 105 106 107
    case 24:
      // Put the colors in the correct places in the line buffer.
      for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
        for (int x = 0; x < width; ++x, ++data) {
          SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
          SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
          SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
        }
108
      }
109
      break;
110

111 112 113 114 115 116 117
    case 32:
      // Maintain byte order consistency across different endianness.
      for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
        for (int x = 0; x < width; ++x) {
          data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
                    (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
        }
118
      }
119
      break;
120

121 122
    default:
      tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
123 124
  }
  SetImage(pix);
125
  pix.destroy();
T
theraysmith 已提交
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
}

// Store the coordinates of the rectangle to process for later use.
// Doesn't actually do any thresholding.
void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
  rect_left_ = left;
  rect_top_ = top;
  rect_width_ = width;
  rect_height_ = height;
}

// Get enough parameters to be able to rebuild bounding boxes in the
// original image (not just within the rectangle).
// Left and top are enough with top-down coordinates, but
// the height of the rectangle and the image are needed for bottom-up.
141 142
void ImageThresholder::GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth,
                                     int *imageheight) {
T
theraysmith 已提交
143 144 145 146 147 148 149 150
  *left = rect_left_;
  *top = rect_top_;
  *width = rect_width_;
  *height = rect_height_;
  *imagewidth = image_width_;
  *imageheight = image_height_;
}

151 152 153 154 155
// Pix vs raw, which to use? Pix is the preferred input for efficiency,
// since raw buffers are copied.
// SetImage for Pix clones its input, so the source pix may be pixDestroyed
// immediately after, but may not go away until after the Thresholder has
// finished with it.
156
void ImageThresholder::SetImage(const Image pix) {
157
  if (pix_ != nullptr) {
158
    pix_.destroy();
159
  }
160
  Image src = pix;
T
theraysmith 已提交
161 162 163
  int depth;
  pixGetDimensions(src, &image_width_, &image_height_, &depth);
  // Convert the image as necessary so it is one of binary, plain RGB, or
164 165 166
  // 8 bit with no colormap. Guarantee that we always end up with our own copy,
  // not just a clone of the input.
  if (pixGetColormap(src)) {
167
    Image tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
168 169 170
    depth = pixGetDepth(tmp);
    if (depth > 1 && depth < 8) {
      pix_ = pixConvertTo8(tmp, false);
171
      tmp.destroy();
172 173 174 175
    } else {
      pix_ = tmp;
    }
  } else if (depth > 1 && depth < 8) {
T
theraysmith 已提交
176 177
    pix_ = pixConvertTo8(src, false);
  } else {
E
Egor Pugin 已提交
178
    pix_ = src.copy();
T
theraysmith 已提交
179 180
  }
  depth = pixGetDepth(pix_);
181 182
  pix_channels_ = depth / 8;
  pix_wpl_ = pixGetWpl(pix_);
183
  scale_ = 1;
184
  estimated_res_ = yres_ = pixGetYRes(pix_);
T
theraysmith 已提交
185 186 187
  Init();
}

A
Amit Dovev 已提交
188
std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
E
Egor Pugin 已提交
189
                                                         ThresholdMethod method) {
A
Amit Dovev 已提交
190 191 192 193 194 195 196 197 198 199 200 201
  Image pix_binary = nullptr;
  Image pix_thresholds = nullptr;

  if (pix_channels_ == 0) {
    // We have a binary image, but it still has to be copied, as this API
    // allows the caller to modify the output.
    Image original = GetPixRect();
    pix_binary = original.copy();
    original.destroy();
    return std::make_tuple(false, nullptr, pix_binary, nullptr);
  }

202
  auto pix_grey = GetPixRectGrey();
A
Amit Dovev 已提交
203 204

  int r;
205
  if (method == ThresholdMethod::Sauvola) {
206 207
    r = pixSauvolaBinarizeTiled(pix_grey, 25, 0.40, 300, 300, (PIX**)pix_thresholds,
                                (PIX**)pix_binary);
208 209 210
  } else {
    // AdaptiveOtsu.
    r = pixOtsuAdaptiveThreshold(pix_grey, 300, 300, 0, 0, 0.1,
211
                                 (PIX**)pix_thresholds, (PIX**)pix_binary);
A
Amit Dovev 已提交
212
  }
E
Egor Pugin 已提交
213

214
  bool ok = (r == 0);
A
Amit Dovev 已提交
215 216 217
  return std::make_tuple(ok, pix_grey, pix_binary, pix_thresholds);
}

T
theraysmith 已提交
218 219 220
// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
221
/// Returns false on error.
A
Amit Dovev 已提交
222
bool ImageThresholder::ThresholdToPix(Image *pix) {
223
  if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
224 225 226
    tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
    return false;
  }
227
  if (pix_channels_ == 0) {
228 229
    // We have a binary image, but it still has to be copied, as this API
    // allows the caller to modify the output.
230
    Image original = GetPixRect();
E
Egor Pugin 已提交
231
    *pix = original.copy();
232
    original.destroy();
T
theraysmith 已提交
233
  } else {
234
    OtsuThresholdRectToPix(pix_, pix);
T
theraysmith 已提交
235
  }
236
  return true;
T
theraysmith 已提交
237 238
}

239 240 241 242 243 244
// Gets a pix that contains an 8 bit threshold value at each pixel. The
// returned pix may be an integer reduction of the binary image such that
// the scale factor may be inferred from the ratio of the sizes, even down
// to the extreme of a 1x1 pixel thresholds image.
// Ideally the 8 bit threshold should be the exact threshold used to generate
// the binary image in ThresholdToPix, but this is not a hard constraint.
S
Stefan Weil 已提交
245
// Returns nullptr if the input is binary. PixDestroy after use.
246
Image ImageThresholder::GetPixRectThresholds() {
247
  if (IsBinary()) {
248
    return nullptr;
249
  }
250
  Image pix_grey = GetPixRectGrey();
251 252
  int width = pixGetWidth(pix_grey);
  int height = pixGetHeight(pix_grey);
253 254 255
  std::vector<int> thresholds;
  std::vector<int> hi_values;
  OtsuThreshold(pix_grey, 0, 0, width, height, thresholds, hi_values);
256 257
  pix_grey.destroy();
  Image pix_thresholds = pixCreate(width, height, 8);
258 259 260 261 262
  int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
  pixSetAllArbitrary(pix_thresholds, threshold);
  return pix_thresholds;
}

263 264 265 266 267
// Common initialization shared between SetImage methods.
void ImageThresholder::Init() {
  SetRectangle(0, 0, image_width_, image_height_);
}

T
theraysmith 已提交
268 269 270 271 272
// Get a clone/copy of the source image rectangle.
// The returned Pix must be pixDestroyed.
// This function will be used in the future by the page layout analysis, and
// the layout analysis that uses it will only be available with Leptonica,
// so there is no raw equivalent.
273
Image ImageThresholder::GetPixRect() {
274 275
  if (IsFullImage()) {
    // Just clone the whole thing.
E
Egor Pugin 已提交
276
    return pix_.clone();
277 278
  } else {
    // Crop to the given rectangle.
279
    Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
280
    Image cropped = pixClipRectangle(pix_, box, nullptr);
281 282
    boxDestroy(&box);
    return cropped;
T
theraysmith 已提交
283 284 285
  }
}

286 287
// Get a clone/copy of the source image rectangle, reduced to greyscale,
// and at the same resolution as the output binary.
288
// The returned Pix must be pixDestroyed.
289
// Provided to the classifier to extract features from the greyscale image.
290
Image ImageThresholder::GetPixRectGrey() {
291
  auto pix = GetPixRect(); // May have to be reduced to grey.
292 293
  int depth = pixGetDepth(pix);
  if (depth != 8) {
294 295
    if (depth == 24) {
      auto tmp = pixConvert24To32(pix);
296
      pix.destroy();
297 298 299
      pix = tmp;
    }
    auto result = pixConvertTo8(pix, false);
300
    pix.destroy();
301 302 303
    return result;
  }
  return pix;
T
theraysmith 已提交
304 305
}

306
// Otsu thresholds the rectangle, taking the rectangle from *this.
307
void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const {
308 309
  std::vector<int> thresholds;
  std::vector<int> hi_values;
310

311
  int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, rect_height_,
312
                                   thresholds, hi_values);
313
  // only use opencl if compiled w/ OpenCL and selected device is opencl
314 315
#ifdef USE_OPENCL
  OpenclDevice od;
316 317
  if (num_channels == 4 && od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) {
    od.ThresholdRectToPixOCL((unsigned char *)pixGetData(src_pix), num_channels,
318
                             pixGetWpl(src_pix) * 4, &thresholds[0], &hi_values[0], out_pix /*pix_OCL*/,
319
                             rect_height_, rect_width_, rect_top_, rect_left_);
320 321
  } else {
#endif
322
    ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
323 324
#ifdef USE_OPENCL
  }
325
#endif
T
theraysmith 已提交
326 327
}

328 329 330 331
/// Threshold the rectangle, taking everything except the src_pix
/// from the class, using thresholds/hi_values to the output pix.
/// NOTE that num_channels is the size of the thresholds and hi_values
// arrays and also the bytes per pixel in src_pix.
332 333
void ImageThresholder::ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds,
                                          const std::vector<int> &hi_values, Image *pix) const {
T
theraysmith 已提交
334
  *pix = pixCreate(rect_width_, rect_height_, 1);
335
  uint32_t *pixdata = pixGetData(*pix);
T
theraysmith 已提交
336
  int wpl = pixGetWpl(*pix);
337
  int src_wpl = pixGetWpl(src_pix);
338
  uint32_t *srcdata = pixGetData(src_pix);
339 340
  pixSetXRes(*pix, pixGetXRes(src_pix));
  pixSetYRes(*pix, pixGetYRes(src_pix));
T
theraysmith 已提交
341
  for (int y = 0; y < rect_height_; ++y) {
342 343
    const uint32_t *linedata = srcdata + (y + rect_top_) * src_wpl;
    uint32_t *pixline = pixdata + y * wpl;
344
    for (int x = 0; x < rect_width_; ++x) {
T
theraysmith 已提交
345
      bool white_result = true;
346
      for (int ch = 0; ch < num_channels; ++ch) {
347 348
        int pixel = GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch);
        if (hi_values[ch] >= 0 && (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
T
theraysmith 已提交
349 350 351 352
          white_result = false;
          break;
        }
      }
353
      if (white_result) {
T
theraysmith 已提交
354
        CLEAR_DATA_BIT(pixline, x);
355
      } else {
T
theraysmith 已提交
356
        SET_DATA_BIT(pixline, x);
357
      }
T
theraysmith 已提交
358 359 360 361
    }
  }
}

362
} // namespace tesseract.