Add more binarization options

Use functions from Leptonica to provide more binarization options. The new options are: 1) Adaptive Otsu and 2) Sauvola (Tiled) .

Add more binarization options
Use functions from Leptonica to provide more binarization options. The new options are: 1) Adaptive Otsu and 2) Sauvola (Tiled) .
11c73c94 · Amit Dovev · 65118b2e · 11c73c94 · 11c73c94 · 11c73c94
7 changed file
--- a/include/tesseract/publictypes.h
+++ b/include/tesseract/publictypes.h
@@ -278,6 +278,17 @@ enum OcrEngineMode {
  OEM_COUNT                    // Number of OEMs
 };

+/** 
+ * Except when OTSU_TRESH is chosen
+ * Leptonica is used for thresholding 
+ */
+enum ThreshMethod {
+  OTSU_TRESH,          // Legacy Tesseract's Otsu thresholding
+  ADAPTIVE_OTSU_TRESH,
+  SAUVOLA_TILED_TRESH,
+  TRESH_METHODS_COUNT, // Number of Thresholding methods
+};
+
 } // namespace tesseract.

 #endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@@ -2108,20 +2108,41 @@ bool TessBaseAPI::Threshold(Pix **pix) {
    thresholder_->SetSourceYResolution(kMinCredibleResolution);
  }
  auto pageseg_mode = static_cast<PageSegMode>(static_cast<int>(tesseract_->tessedit_pageseg_mode));
-  Image im(*pix);
-  if (!thresholder_->ThresholdToPix(pageseg_mode, &im)) {
-    return false;
-  }
-  *pix = im;
+
+  Image pix_binary(*pix);
+  Image pix_grey;
+  Image pix_thresholds;
+
+  auto thresholding_method = static_cast<ThreshMethod>(static_cast<int>(tesseract_->thresholding_method));
+
+  if (thresholding_method == OTSU_TRESH) {
+    if (!thresholder_->ThresholdToPix(pageseg_mode, &pix_binary)) {
+      return false;
+    }
+    *pix = pix_binary;
+  
+    if (!thresholder_->IsBinary()) {
+      tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
+      tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
+    } else {
+      tesseract_->set_pix_thresholds(nullptr);
+      tesseract_->set_pix_grey(nullptr);
+    }
+  } else {
+    auto [ok, pix_grey, pix_binary, pix_thresholds] = thresholder_->Threshold(thresholding_method);
+
+    if (!ok) {
+      return false;
+    }
+    *pix = pix_binary;
+
+    tesseract_->set_pix_thresholds(pix_thresholds);
+    tesseract_->set_pix_grey(pix_grey); 
+  } 
+   
  thresholder_->GetImageSizes(&rect_left_, &rect_top_, &rect_width_, &rect_height_, &image_width_,
                              &image_height_);
-  if (!thresholder_->IsBinary()) {
-    tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
-    tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
-  } else {
-    tesseract_->set_pix_thresholds(nullptr);
-    tesseract_->set_pix_grey(nullptr);
-  }
+  
  // Set the internal resolution that is used for layout parameters from the
  // estimated resolution, rather than the image resolution, which may be
  // fabricated, but we will use the image resolution, if there is one, to

--- a/src/ccmain/tesseractclass.cpp
+++ b/src/ccmain/tesseractclass.cpp
@@ -74,6 +74,9 @@ Tesseract::Tesseract()
               "11=sparse_text, 12=sparse_text+osd, 13=raw_line"
               " (Values from PageSegMode enum in tesseract/publictypes.h)",
               this->params())
+    , INT_MEMBER(thresholding_method, OTSU_TRESH,
+            "Thresholding method: 0 = Otsu, 1 = Adaptive Otsu, 2 = Sauvola",
+            this->params())
    , INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT,
                      "Which OCR engine(s) to run (Tesseract, LSTM, both)."
                      " Defaults to loading and running the most accurate"

--- a/src/ccmain/tesseractclass.h
+++ b/src/ccmain/tesseractclass.h
@@ -762,6 +762,8 @@ public:
            "Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block,"
            " 5=line, 6=word, 7=char"
            " (Values from PageSegMode enum in tesseract/publictypes.h)");
+  INT_VAR_H(thresholding_method, OTSU_TRESH,
+            "Thresholding method: 0 = Otsu, 1 = Adaptive Otsu, 2 = Sauvola");
  INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT,
            "Which OCR engine(s) to run (Tesseract, LSTM, both). Defaults"
            " to loading and running the most accurate available.");

--- a/src/ccmain/thresholder.cpp
+++ b/src/ccmain/thresholder.cpp
@@ -20,6 +20,7 @@

 #include <cstdint> // for uint32_t
 #include <cstring>
+#include <tuple>

 #include "otsuthr.h"
 #include "thresholder.h"
@@ -184,6 +185,45 @@ void ImageThresholder::SetImage(const Image pix) {
  Init();
 }

+std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
+                                                         ThreshMethod method) {
+  Image pix_grey = nullptr;
+  Image pix_binary = nullptr;
+  Image pix_thresholds = nullptr;
+
+  if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
+    tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
+    return std::make_tuple(false, nullptr, nullptr, nullptr);
+  }
+  
+  if (pix_channels_ == 0) {
+    // We have a binary image, but it still has to be copied, as this API
+    // allows the caller to modify the output.
+    Image original = GetPixRect();
+    pix_binary = original.copy();
+    original.destroy();
+    return std::make_tuple(false, nullptr, pix_binary, nullptr);
+  }
+
+  pix_grey = GetPixRectGrey();
+
+  if (method == OTSU_TRESH || method >= TRESH_METHODS_COUNT) {
+    method = ADAPTIVE_OTSU_TRESH;
+  }
+
+  int r;
+  if (method == ADAPTIVE_OTSU_TRESH) {
+    r = pixOtsuAdaptiveThreshold(pix_grey, 300, 300, 0, 0, 0.1, 
+                                 pix_thresholds.a(), pix_binary.a());
+  } else if (method == SAUVOLA_TILED_TRESH) {
+    r = pixSauvolaBinarizeTiled(pix_grey, 25, 0.40, 300, 300, pix_thresholds.a(), 
+                                pix_binary.a());
+  }
+  
+  bool ok = r == 0 ? true : false;
+  return std::make_tuple(ok, pix_grey, pix_binary, pix_thresholds);
+}
+
 // Threshold the source image as efficiently as possible to the output Pix.
 // Creates a Pix and sets pix to point to the resulting pointer.
 // Caller must use pixDestroy to free the created Pix.

--- a/src/ccmain/thresholder.h
+++ b/src/ccmain/thresholder.h
@@ -121,6 +121,9 @@ public:
  /// Returns false on error.
  virtual bool ThresholdToPix(PageSegMode pageseg_mode, Image *pix);

+  virtual std::tuple<bool, Image, Image, Image> Threshold(
+                                                          ThreshMethod method);
+
  // Gets a pix that contains an 8 bit threshold value at each pixel. The
  // returned pix may be an integer reduction of the binary image such that
  // the scale factor may be inferred from the ratio of the sizes, even down

--- a/src/ccstruct/image.h
+++ b/src/ccstruct/image.h
@@ -33,6 +33,8 @@ public:
  // service
  operator Pix *() const { return pix_; }
  Pix *operator->() const { return pix_; }
+  Pix **a() { return &pix_; }
+

  // api
  Image clone() const; // increases refcount