diff --git a/include/tesseract/publictypes.h b/include/tesseract/publictypes.h index e4821e91b31260ce2f5c89b9411f5367c8bd5700..a81220ff805ecbdf28f9c9d922f8b9a5d62db242 100644 --- a/include/tesseract/publictypes.h +++ b/include/tesseract/publictypes.h @@ -278,6 +278,17 @@ enum OcrEngineMode { OEM_COUNT // Number of OEMs }; +/** + * Except when OTSU_TRESH is chosen + * Leptonica is used for thresholding + */ +enum ThreshMethod { + OTSU_TRESH, // Legacy Tesseract's Otsu thresholding + ADAPTIVE_OTSU_TRESH, + SAUVOLA_TILED_TRESH, + TRESH_METHODS_COUNT, // Number of Thresholding methods +}; + } // namespace tesseract. #endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_ diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp index fd2ec91633d1e157440375c523a73f0dccb7af9c..4b376ee55ec3ae3ce8f98dd7db49f56913e65646 100644 --- a/src/api/baseapi.cpp +++ b/src/api/baseapi.cpp @@ -2108,20 +2108,41 @@ bool TessBaseAPI::Threshold(Pix **pix) { thresholder_->SetSourceYResolution(kMinCredibleResolution); } auto pageseg_mode = static_cast(static_cast(tesseract_->tessedit_pageseg_mode)); - Image im(*pix); - if (!thresholder_->ThresholdToPix(pageseg_mode, &im)) { - return false; - } - *pix = im; + + Image pix_binary(*pix); + Image pix_grey; + Image pix_thresholds; + + auto thresholding_method = static_cast(static_cast(tesseract_->thresholding_method)); + + if (thresholding_method == OTSU_TRESH) { + if (!thresholder_->ThresholdToPix(pageseg_mode, &pix_binary)) { + return false; + } + *pix = pix_binary; + + if (!thresholder_->IsBinary()) { + tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); + tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); + } else { + tesseract_->set_pix_thresholds(nullptr); + tesseract_->set_pix_grey(nullptr); + } + } else { + auto [ok, pix_grey, pix_binary, pix_thresholds] = thresholder_->Threshold(thresholding_method); + + if (!ok) { + return false; + } + *pix = pix_binary; + + tesseract_->set_pix_thresholds(pix_thresholds); + tesseract_->set_pix_grey(pix_grey); + } + thresholder_->GetImageSizes(&rect_left_, &rect_top_, &rect_width_, &rect_height_, &image_width_, &image_height_); - if (!thresholder_->IsBinary()) { - tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); - tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); - } else { - tesseract_->set_pix_thresholds(nullptr); - tesseract_->set_pix_grey(nullptr); - } + // Set the internal resolution that is used for layout parameters from the // estimated resolution, rather than the image resolution, which may be // fabricated, but we will use the image resolution, if there is one, to diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp index f609d40d0b28d285a5a01b9f590057016c12129a..50a7789103d8702619f9004b8dafbdc020dbed19 100644 --- a/src/ccmain/tesseractclass.cpp +++ b/src/ccmain/tesseractclass.cpp @@ -74,6 +74,9 @@ Tesseract::Tesseract() "11=sparse_text, 12=sparse_text+osd, 13=raw_line" " (Values from PageSegMode enum in tesseract/publictypes.h)", this->params()) + , INT_MEMBER(thresholding_method, OTSU_TRESH, + "Thresholding method: 0 = Otsu, 1 = Adaptive Otsu, 2 = Sauvola", + this->params()) , INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT, "Which OCR engine(s) to run (Tesseract, LSTM, both)." " Defaults to loading and running the most accurate" diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h index 2acb13903e867375c73357a63b72356fb3bc9803..ad9011ec2a32ad75562978519abbb737841a5b2d 100644 --- a/src/ccmain/tesseractclass.h +++ b/src/ccmain/tesseractclass.h @@ -762,6 +762,8 @@ public: "Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block," " 5=line, 6=word, 7=char" " (Values from PageSegMode enum in tesseract/publictypes.h)"); + INT_VAR_H(thresholding_method, OTSU_TRESH, + "Thresholding method: 0 = Otsu, 1 = Adaptive Otsu, 2 = Sauvola"); INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT, "Which OCR engine(s) to run (Tesseract, LSTM, both). Defaults" " to loading and running the most accurate available."); diff --git a/src/ccmain/thresholder.cpp b/src/ccmain/thresholder.cpp index 89b5e3f1dae06b8645f22a50144525043c5bbdea..ba17fc8f3742d860c95e95f196b08a6aaac99586 100644 --- a/src/ccmain/thresholder.cpp +++ b/src/ccmain/thresholder.cpp @@ -20,6 +20,7 @@ #include // for uint32_t #include +#include #include "otsuthr.h" #include "thresholder.h" @@ -184,6 +185,45 @@ void ImageThresholder::SetImage(const Image pix) { Init(); } +std::tuple ImageThresholder::Threshold( + ThreshMethod method) { + Image pix_grey = nullptr; + Image pix_binary = nullptr; + Image pix_thresholds = nullptr; + + if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) { + tprintf("Image too large: (%d, %d)\n", image_width_, image_height_); + return std::make_tuple(false, nullptr, nullptr, nullptr); + } + + if (pix_channels_ == 0) { + // We have a binary image, but it still has to be copied, as this API + // allows the caller to modify the output. + Image original = GetPixRect(); + pix_binary = original.copy(); + original.destroy(); + return std::make_tuple(false, nullptr, pix_binary, nullptr); + } + + pix_grey = GetPixRectGrey(); + + if (method == OTSU_TRESH || method >= TRESH_METHODS_COUNT) { + method = ADAPTIVE_OTSU_TRESH; + } + + int r; + if (method == ADAPTIVE_OTSU_TRESH) { + r = pixOtsuAdaptiveThreshold(pix_grey, 300, 300, 0, 0, 0.1, + pix_thresholds.a(), pix_binary.a()); + } else if (method == SAUVOLA_TILED_TRESH) { + r = pixSauvolaBinarizeTiled(pix_grey, 25, 0.40, 300, 300, pix_thresholds.a(), + pix_binary.a()); + } + + bool ok = r == 0 ? true : false; + return std::make_tuple(ok, pix_grey, pix_binary, pix_thresholds); +} + // Threshold the source image as efficiently as possible to the output Pix. // Creates a Pix and sets pix to point to the resulting pointer. // Caller must use pixDestroy to free the created Pix. diff --git a/src/ccmain/thresholder.h b/src/ccmain/thresholder.h index 4d3a426e12fa5fc5de75a1ee16eb00649bf74119..e5c3909c7363f421ba79f0f113a92e82184a8c0d 100644 --- a/src/ccmain/thresholder.h +++ b/src/ccmain/thresholder.h @@ -121,6 +121,9 @@ public: /// Returns false on error. virtual bool ThresholdToPix(PageSegMode pageseg_mode, Image *pix); + virtual std::tuple Threshold( + ThreshMethod method); + // Gets a pix that contains an 8 bit threshold value at each pixel. The // returned pix may be an integer reduction of the binary image such that // the scale factor may be inferred from the ratio of the sizes, even down diff --git a/src/ccstruct/image.h b/src/ccstruct/image.h index 192884fe17fd9396f478a4eaef4ac884805e8051..ca7030fc000f252776293a432b6264207928c1a0 100644 --- a/src/ccstruct/image.h +++ b/src/ccstruct/image.h @@ -33,6 +33,8 @@ public: // service operator Pix *() const { return pix_; } Pix *operator->() const { return pix_; } + Pix **a() { return &pix_; } + // api Image clone() const; // increases refcount