thresholder.cpp 11.5 KB
Newer Older
T
theraysmith 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
///////////////////////////////////////////////////////////////////////
// File:        thresholder.cpp
// Description: Base API for thresolding images in tesseract.
// Author:      Ray Smith
// Created:     Mon May 12 11:28:15 PDT 2008
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

#include "allheaders.h"

#include "thresholder.h"

24
#include <cstdint>      // for uint32_t
25
#include <cstring>
T
theraysmith 已提交
26 27 28

#include "otsuthr.h"

29
#include "openclwrapper.h" // for PERF_COUNT_START, ...
30

T
theraysmith 已提交
31 32 33
namespace tesseract {

ImageThresholder::ImageThresholder()
S
Stefan Weil 已提交
34
  : pix_(nullptr),
T
theraysmith 已提交
35
    image_width_(0), image_height_(0),
36
    pix_channels_(0), pix_wpl_(0),
37
    scale_(1), yres_(300), estimated_res_(300) {
T
theraysmith 已提交
38 39 40 41 42 43 44 45 46
  SetRectangle(0, 0, 0, 0);
}

ImageThresholder::~ImageThresholder() {
  Clear();
}

// Destroy the Pix if there is one, freeing memory.
void ImageThresholder::Clear() {
47
  pixDestroy(&pix_);
T
theraysmith 已提交
48 49 50 51
}

// Return true if no image has been set.
bool ImageThresholder::IsEmpty() const {
S
Stefan Weil 已提交
52
  return pix_ == nullptr;
T
theraysmith 已提交
53 54
}

55 56
// SetImage makes a copy of all the image data, so it may be deleted
// immediately after this call.
T
theraysmith 已提交
57 58 59 60 61 62 63 64 65
// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
// Palette color images will not work properly and must be converted to
// 24 bit.
// Binary images of 1 bit per pixel may also be given but they must be
// byte packed with the MSB of the first byte being the first pixel, and a
// one pixel is WHITE. For binary images set bytes_per_pixel=0.
void ImageThresholder::SetImage(const unsigned char* imagedata,
                                int width, int height,
                                int bytes_per_pixel, int bytes_per_line) {
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
  int bpp = bytes_per_pixel * 8;
  if (bpp == 0) bpp = 1;
  Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
  l_uint32* data = pixGetData(pix);
  int wpl = pixGetWpl(pix);
  switch (bpp) {
  case 1:
    for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
      for (int x = 0; x < width; ++x) {
        if (imagedata[x / 8] & (0x80 >> (x % 8)))
          CLEAR_DATA_BIT(data, x);
        else
          SET_DATA_BIT(data, x);
      }
    }
    break;

  case 8:
    // Greyscale just copies the bytes in the right order.
    for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
      for (int x = 0; x < width; ++x)
        SET_DATA_BYTE(data, x, imagedata[x]);
    }
    break;

  case 24:
    // Put the colors in the correct places in the line buffer.
    for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
      for (int x = 0; x < width; ++x, ++data) {
        SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
        SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
        SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
      }
    }
    break;

  case 32:
    // Maintain byte order consistency across different endianness.
    for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
      for (int x = 0; x < width; ++x) {
        data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
                  (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
      }
    }
    break;

  default:
    tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
  }
  SetImage(pix);
  pixDestroy(&pix);
T
theraysmith 已提交
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
}

// Store the coordinates of the rectangle to process for later use.
// Doesn't actually do any thresholding.
void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
  rect_left_ = left;
  rect_top_ = top;
  rect_width_ = width;
  rect_height_ = height;
}

// Get enough parameters to be able to rebuild bounding boxes in the
// original image (not just within the rectangle).
// Left and top are enough with top-down coordinates, but
// the height of the rectangle and the image are needed for bottom-up.
void ImageThresholder::GetImageSizes(int* left, int* top,
                                     int* width, int* height,
                                     int* imagewidth, int* imageheight) {
  *left = rect_left_;
  *top = rect_top_;
  *width = rect_width_;
  *height = rect_height_;
  *imagewidth = image_width_;
  *imageheight = image_height_;
}

143 144 145 146 147
// Pix vs raw, which to use? Pix is the preferred input for efficiency,
// since raw buffers are copied.
// SetImage for Pix clones its input, so the source pix may be pixDestroyed
// immediately after, but may not go away until after the Thresholder has
// finished with it.
T
theraysmith 已提交
148
void ImageThresholder::SetImage(const Pix* pix) {
S
Stefan Weil 已提交
149
  if (pix_ != nullptr)
T
theraysmith 已提交
150 151 152 153 154
    pixDestroy(&pix_);
  Pix* src = const_cast<Pix*>(pix);
  int depth;
  pixGetDimensions(src, &image_width_, &image_height_, &depth);
  // Convert the image as necessary so it is one of binary, plain RGB, or
155 156 157 158 159 160 161 162 163 164 165 166
  // 8 bit with no colormap. Guarantee that we always end up with our own copy,
  // not just a clone of the input.
  if (pixGetColormap(src)) {
    Pix* tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
    depth = pixGetDepth(tmp);
    if (depth > 1 && depth < 8) {
      pix_ = pixConvertTo8(tmp, false);
      pixDestroy(&tmp);
    } else {
      pix_ = tmp;
    }
  } else if (depth > 1 && depth < 8) {
T
theraysmith 已提交
167 168
    pix_ = pixConvertTo8(src, false);
  } else {
S
Stefan Weil 已提交
169
    pix_ = pixCopy(nullptr, src);
T
theraysmith 已提交
170 171
  }
  depth = pixGetDepth(pix_);
172 173
  pix_channels_ = depth / 8;
  pix_wpl_ = pixGetWpl(pix_);
174
  scale_ = 1;
175
  estimated_res_ = yres_ = pixGetYRes(pix_);
T
theraysmith 已提交
176 177 178 179 180 181
  Init();
}

// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
182 183
/// Returns false on error.
bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
184
  if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
185 186 187
    tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
    return false;
  }
188
  if (pix_channels_ == 0) {
189 190 191 192 193
    // We have a binary image, but it still has to be copied, as this API
    // allows the caller to modify the output.
    Pix* original = GetPixRect();
    *pix = pixCopy(nullptr, original);
    pixDestroy(&original);
T
theraysmith 已提交
194
  } else {
195
    OtsuThresholdRectToPix(pix_, pix);
T
theraysmith 已提交
196
  }
197
  return true;
T
theraysmith 已提交
198 199
}

200 201 202 203 204 205
// Gets a pix that contains an 8 bit threshold value at each pixel. The
// returned pix may be an integer reduction of the binary image such that
// the scale factor may be inferred from the ratio of the sizes, even down
// to the extreme of a 1x1 pixel thresholds image.
// Ideally the 8 bit threshold should be the exact threshold used to generate
// the binary image in ThresholdToPix, but this is not a hard constraint.
S
Stefan Weil 已提交
206
// Returns nullptr if the input is binary. PixDestroy after use.
207
Pix* ImageThresholder::GetPixRectThresholds() {
S
Stefan Weil 已提交
208
  if (IsBinary()) return nullptr;
209 210 211 212 213
  Pix* pix_grey = GetPixRectGrey();
  int width = pixGetWidth(pix_grey);
  int height = pixGetHeight(pix_grey);
  int* thresholds;
  int* hi_values;
214
  OtsuThreshold(pix_grey, 0, 0, width, height, &thresholds, &hi_values);
215 216 217 218 219 220 221 222 223
  pixDestroy(&pix_grey);
  Pix* pix_thresholds = pixCreate(width, height, 8);
  int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
  pixSetAllArbitrary(pix_thresholds, threshold);
  delete [] thresholds;
  delete [] hi_values;
  return pix_thresholds;
}

224 225 226 227 228
// Common initialization shared between SetImage methods.
void ImageThresholder::Init() {
  SetRectangle(0, 0, image_width_, image_height_);
}

T
theraysmith 已提交
229 230 231 232 233 234
// Get a clone/copy of the source image rectangle.
// The returned Pix must be pixDestroyed.
// This function will be used in the future by the page layout analysis, and
// the layout analysis that uses it will only be available with Leptonica,
// so there is no raw equivalent.
Pix* ImageThresholder::GetPixRect() {
235 236 237 238 239 240
  if (IsFullImage()) {
    // Just clone the whole thing.
    return pixClone(pix_);
  } else {
    // Crop to the given rectangle.
    Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
S
Stefan Weil 已提交
241
    Pix* cropped = pixClipRectangle(pix_, box, nullptr);
242 243
    boxDestroy(&box);
    return cropped;
T
theraysmith 已提交
244 245 246
  }
}

247 248
// Get a clone/copy of the source image rectangle, reduced to greyscale,
// and at the same resolution as the output binary.
249
// The returned Pix must be pixDestroyed.
250
// Provided to the classifier to extract features from the greyscale image.
251 252 253 254 255 256 257 258 259 260
Pix* ImageThresholder::GetPixRectGrey() {
  Pix* pix = GetPixRect();  // May have to be reduced to grey.
  int depth = pixGetDepth(pix);
  if (depth != 8) {
    Pix* result = depth < 8 ? pixConvertTo8(pix, false)
                            : pixConvertRGBToLuminance(pix);
    pixDestroy(&pix);
    return result;
  }
  return pix;
T
theraysmith 已提交
261 262
}

263 264 265 266
// Otsu thresholds the rectangle, taking the rectangle from *this.
void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix,
                                              Pix** out_pix) const {
  PERF_COUNT_START("OtsuThresholdRectToPix")
T
theraysmith 已提交
267 268
  int* thresholds;
  int* hi_values;
269

270 271 272
  int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_,
                                   rect_height_, &thresholds, &hi_values);
  // only use opencl if compiled w/ OpenCL and selected device is opencl
273 274
#ifdef USE_OPENCL
  OpenclDevice od;
275 276
  if (num_channels == 4 &&
      od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) {
R
Ray Smith 已提交
277 278 279 280
    od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels,
                             pixGetWpl(src_pix) * 4, thresholds, hi_values,
                             out_pix /*pix_OCL*/, rect_height_, rect_width_,
                             rect_top_, rect_left_);
281 282
  } else {
#endif
283
    ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
284 285
#ifdef USE_OPENCL
  }
286
#endif
T
theraysmith 已提交
287 288
  delete [] thresholds;
  delete [] hi_values;
289

290
  PERF_COUNT_END
T
theraysmith 已提交
291 292
}

293 294 295 296 297 298
/// Threshold the rectangle, taking everything except the src_pix
/// from the class, using thresholds/hi_values to the output pix.
/// NOTE that num_channels is the size of the thresholds and hi_values
// arrays and also the bytes per pixel in src_pix.
void ImageThresholder::ThresholdRectToPix(Pix* src_pix,
                                          int num_channels,
T
theraysmith 已提交
299 300 301
                                          const int* thresholds,
                                          const int* hi_values,
                                          Pix** pix) const {
302
  PERF_COUNT_START("ThresholdRectToPix")
T
theraysmith 已提交
303
  *pix = pixCreate(rect_width_, rect_height_, 1);
304
  uint32_t* pixdata = pixGetData(*pix);
T
theraysmith 已提交
305
  int wpl = pixGetWpl(*pix);
306
  int src_wpl = pixGetWpl(src_pix);
307
  uint32_t* srcdata = pixGetData(src_pix);
T
theraysmith 已提交
308
  for (int y = 0; y < rect_height_; ++y) {
309 310
    const uint32_t* linedata = srcdata + (y + rect_top_) * src_wpl;
    uint32_t* pixline = pixdata + y * wpl;
311
    for (int x = 0; x < rect_width_; ++x) {
T
theraysmith 已提交
312
      bool white_result = true;
313
      for (int ch = 0; ch < num_channels; ++ch) {
314 315
        int pixel =
            GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch);
T
theraysmith 已提交
316
        if (hi_values[ch] >= 0 &&
317
            (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
T
theraysmith 已提交
318 319 320 321 322 323 324 325 326 327
          white_result = false;
          break;
        }
      }
      if (white_result)
        CLEAR_DATA_BIT(pixline, x);
      else
        SET_DATA_BIT(pixline, x);
    }
  }
328

329
  PERF_COUNT_END
T
theraysmith 已提交
330 331 332
}

}  // namespace tesseract.