提交 11c73c94 编写于 作者: A Amit Dovev

Add more binarization options

Use functions from Leptonica to provide more binarization options. The new options are: 1) Adaptive Otsu and 2) Sauvola (Tiled) .
上级 65118b2e
......@@ -278,6 +278,17 @@ enum OcrEngineMode {
OEM_COUNT // Number of OEMs
};
/**
* Except when OTSU_TRESH is chosen
* Leptonica is used for thresholding
*/
enum ThreshMethod {
OTSU_TRESH, // Legacy Tesseract's Otsu thresholding
ADAPTIVE_OTSU_TRESH,
SAUVOLA_TILED_TRESH,
TRESH_METHODS_COUNT, // Number of Thresholding methods
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_
......@@ -2108,20 +2108,41 @@ bool TessBaseAPI::Threshold(Pix **pix) {
thresholder_->SetSourceYResolution(kMinCredibleResolution);
}
auto pageseg_mode = static_cast<PageSegMode>(static_cast<int>(tesseract_->tessedit_pageseg_mode));
Image im(*pix);
if (!thresholder_->ThresholdToPix(pageseg_mode, &im)) {
return false;
}
*pix = im;
Image pix_binary(*pix);
Image pix_grey;
Image pix_thresholds;
auto thresholding_method = static_cast<ThreshMethod>(static_cast<int>(tesseract_->thresholding_method));
if (thresholding_method == OTSU_TRESH) {
if (!thresholder_->ThresholdToPix(pageseg_mode, &pix_binary)) {
return false;
}
*pix = pix_binary;
if (!thresholder_->IsBinary()) {
tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
} else {
tesseract_->set_pix_thresholds(nullptr);
tesseract_->set_pix_grey(nullptr);
}
} else {
auto [ok, pix_grey, pix_binary, pix_thresholds] = thresholder_->Threshold(thresholding_method);
if (!ok) {
return false;
}
*pix = pix_binary;
tesseract_->set_pix_thresholds(pix_thresholds);
tesseract_->set_pix_grey(pix_grey);
}
thresholder_->GetImageSizes(&rect_left_, &rect_top_, &rect_width_, &rect_height_, &image_width_,
&image_height_);
if (!thresholder_->IsBinary()) {
tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds());
tesseract_->set_pix_grey(thresholder_->GetPixRectGrey());
} else {
tesseract_->set_pix_thresholds(nullptr);
tesseract_->set_pix_grey(nullptr);
}
// Set the internal resolution that is used for layout parameters from the
// estimated resolution, rather than the image resolution, which may be
// fabricated, but we will use the image resolution, if there is one, to
......
......@@ -74,6 +74,9 @@ Tesseract::Tesseract()
"11=sparse_text, 12=sparse_text+osd, 13=raw_line"
" (Values from PageSegMode enum in tesseract/publictypes.h)",
this->params())
, INT_MEMBER(thresholding_method, OTSU_TRESH,
"Thresholding method: 0 = Otsu, 1 = Adaptive Otsu, 2 = Sauvola",
this->params())
, INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT,
"Which OCR engine(s) to run (Tesseract, LSTM, both)."
" Defaults to loading and running the most accurate"
......
......@@ -762,6 +762,8 @@ public:
"Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block,"
" 5=line, 6=word, 7=char"
" (Values from PageSegMode enum in tesseract/publictypes.h)");
INT_VAR_H(thresholding_method, OTSU_TRESH,
"Thresholding method: 0 = Otsu, 1 = Adaptive Otsu, 2 = Sauvola");
INT_VAR_H(tessedit_ocr_engine_mode, tesseract::OEM_DEFAULT,
"Which OCR engine(s) to run (Tesseract, LSTM, both). Defaults"
" to loading and running the most accurate available.");
......
......@@ -20,6 +20,7 @@
#include <cstdint> // for uint32_t
#include <cstring>
#include <tuple>
#include "otsuthr.h"
#include "thresholder.h"
......@@ -184,6 +185,45 @@ void ImageThresholder::SetImage(const Image pix) {
Init();
}
std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
ThreshMethod method) {
Image pix_grey = nullptr;
Image pix_binary = nullptr;
Image pix_thresholds = nullptr;
if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
return std::make_tuple(false, nullptr, nullptr, nullptr);
}
if (pix_channels_ == 0) {
// We have a binary image, but it still has to be copied, as this API
// allows the caller to modify the output.
Image original = GetPixRect();
pix_binary = original.copy();
original.destroy();
return std::make_tuple(false, nullptr, pix_binary, nullptr);
}
pix_grey = GetPixRectGrey();
if (method == OTSU_TRESH || method >= TRESH_METHODS_COUNT) {
method = ADAPTIVE_OTSU_TRESH;
}
int r;
if (method == ADAPTIVE_OTSU_TRESH) {
r = pixOtsuAdaptiveThreshold(pix_grey, 300, 300, 0, 0, 0.1,
pix_thresholds.a(), pix_binary.a());
} else if (method == SAUVOLA_TILED_TRESH) {
r = pixSauvolaBinarizeTiled(pix_grey, 25, 0.40, 300, 300, pix_thresholds.a(),
pix_binary.a());
}
bool ok = r == 0 ? true : false;
return std::make_tuple(ok, pix_grey, pix_binary, pix_thresholds);
}
// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
......
......@@ -121,6 +121,9 @@ public:
/// Returns false on error.
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Image *pix);
virtual std::tuple<bool, Image, Image, Image> Threshold(
ThreshMethod method);
// Gets a pix that contains an 8 bit threshold value at each pixel. The
// returned pix may be an integer reduction of the binary image such that
// the scale factor may be inferred from the ratio of the sizes, even down
......
......@@ -33,6 +33,8 @@ public:
// service
operator Pix *() const { return pix_; }
Pix *operator->() const { return pix_; }
Pix **a() { return &pix_; }
// api
Image clone() const; // increases refcount
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册