提交 137f4806 编写于 作者: T theraysmith

Added sub/superscript, small/dropcap detection

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@547 d0cd1f9f-072b-0410-8dd7-cf729c803f20
上级 d8a2303d
......@@ -514,9 +514,9 @@ PageIterator* TessBaseAPI::AnalyseLayout() {
if (block_list_->empty())
return NULL; // The page was empty.
page_res_ = new PAGE_RES(block_list_, NULL);
// TODO(rays) Support transmission of image scaling and resolution through
// ImageThresholder, so it can be used here in place of literal 1, 300.
return new PageIterator(page_res_, tesseract_, 1, 300,
return new PageIterator(page_res_, tesseract_,
thresholder_->GetScaleFactor(),
thresholder_->GetScaledYResolution(),
rect_left_, rect_top_, rect_width_, rect_height_);
}
return NULL;
......@@ -798,9 +798,9 @@ bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename,
ResultIterator* TessBaseAPI::GetIterator() {
if (tesseract_ == NULL || page_res_ == NULL)
return NULL;
// TODO(rays) Support transmission of image scaling and resolution through
// ImageThresholder, so it can be used here in place of literal 1, 300.
return new ResultIterator(page_res_, tesseract_, 1, 300,
return new ResultIterator(page_res_, tesseract_,
thresholder_->GetScaleFactor(),
thresholder_->GetScaledYResolution(),
rect_left_, rect_top_, rect_width_, rect_height_);
}
......@@ -952,17 +952,15 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
hocr_str += "<strong>";
if (word->italic > 0)
hocr_str += "<em>";
int i;
int i;
// escape special characters
for (i = 0;
choice->unichar_string()[i] != '\0';
i++) {
if (choice->unichar_string()[i] == '<') { hocr_str += "&lt;"; }
else if (choice->unichar_string()[i] == '>') { hocr_str += "&gt;"; }
else if (choice->unichar_string()[i] == '&') { hocr_str += "&amp;"; }
else if (choice->unichar_string()[i] == '"') { hocr_str += "&quot;"; }
else if (choice->unichar_string()[i] == '\'') { hocr_str += "&#39;"; }
else { hocr_str += choice->unichar_string()[i]; }
for (i = 0; choice->unichar_string()[i] != '\0'; i++) {
if (choice->unichar_string()[i] == '<') hocr_str += "&lt;";
else if (choice->unichar_string()[i] == '>') hocr_str += "&gt;";
else if (choice->unichar_string()[i] == '&') hocr_str += "&amp;";
else if (choice->unichar_string()[i] == '"') hocr_str += "&quot;";
else if (choice->unichar_string()[i] == '\'') hocr_str += "&#39;";
else hocr_str += choice->unichar_string()[i];
}
if (word->italic > 0)
hocr_str += "</em>";
......@@ -973,7 +971,7 @@ char* TessBaseAPI::GetHOCRText(int page_number) {
hocr_str += " ";
}
}
if (block != NULL)
if (block != NULL)
hocr_str += "</span>\n</p>\n</div>\n";
hocr_str += "</div>\n";
......@@ -1206,6 +1204,7 @@ bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) {
bool success = true;
PageSegMode current_psm = GetPageSegMode();
SetPageSegMode(mode);
SetVariable("classify_enable_learning", "0");
char* text = GetUTF8Text();
if (text != NULL) {
PAGE_RES_IT it(page_res_);
......
......@@ -153,6 +153,7 @@ const char* ResultIterator::WordFontAttributes(bool* is_bold,
bool* is_underlined,
bool* is_monospace,
bool* is_serif,
bool* is_smallcaps,
int* pointsize,
int* font_id) const {
if (it_->word() == NULL) return NULL; // Already at the end!
......@@ -165,6 +166,7 @@ const char* ResultIterator::WordFontAttributes(bool* is_bold,
*is_underlined = false; // TODO(rays) fix this!
*is_monospace = font_info.is_fixed_pitch();
*is_serif = font_info.is_serif();
*is_smallcaps = it_->word()->small_caps;
// The font size is calculated from a multiple of the x-height
// that came from the block.
float row_height = it_->row()->row->x_height() *
......@@ -192,6 +194,33 @@ bool ResultIterator::WordIsNumeric() const {
return permuter == NUMBER_PERM;
}
// Returns true if the current symbol is a superscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool ResultIterator::SymbolIsSuperscript() const {
if (cblob_it_ == NULL && it_->word() != NULL)
return it_->word()->box_word->BlobPosition(blob_index_) == SP_SUPERSCRIPT;
return false;
}
// Returns true if the current symbol is a subscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool ResultIterator::SymbolIsSubscript() const {
if (cblob_it_ == NULL && it_->word() != NULL)
return it_->word()->box_word->BlobPosition(blob_index_) == SP_SUBSCRIPT;
return false;
}
// Returns true if the current symbol is a dropcap.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool ResultIterator::SymbolIsDropcap() const {
if (cblob_it_ == NULL && it_->word() != NULL)
return it_->word()->box_word->BlobPosition(blob_index_) == SP_DROPCAP;
return false;
}
ChoiceIterator::ChoiceIterator(const ResultIterator& result_it) {
ASSERT_HOST(result_it.it_->word() != NULL);
tesseract_ = result_it.tesseract_;
......
......@@ -97,6 +97,7 @@ class ResultIterator : public PageIterator {
bool* is_underlined,
bool* is_monospace,
bool* is_serif,
bool* is_smallcaps,
int* pointsize,
int* font_id) const;
......@@ -105,6 +106,21 @@ class ResultIterator : public PageIterator {
// Returns true if the current word is numeric.
bool WordIsNumeric() const;
// ============= Functions that refer to symbols only ============.
// Returns true if the current symbol is a superscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSuperscript() const;
// Returns true if the current symbol is a subscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSubscript() const;
// Returns true if the current symbol is a dropcap.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsDropcap() const;
};
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
......
......@@ -18,7 +18,7 @@
**********************************************************************/
#include "mfcpch.h"
//#define USE_VLD //Uncomment for Visual Leak Detector.
// #define USE_VLD //Uncomment for Visual Leak Detector.
#if (defined _MSC_VER && defined USE_VLD)
#include <vld.h>
#endif
......@@ -178,9 +178,8 @@ int WINAPI WinMain( //main for windows //command line
argsin[1] = strdup (lpszCmdLine);
/*allocate memory for the args. There can never be more than half*/
/*the total number of characters in the arguments.*/
argv =
(char **) malloc (((strlen (argsin[0]) + strlen (argsin[1])) / 2 + 1) *
sizeof (char *));
argv = (char **)malloc(((strlen(argsin[0]) + strlen(argsin[1])) / 2 + 1) *
sizeof(char *));
/*now construct argv as it should be for C.*/
argc = parse_args (2, argsin, argv);
......
......@@ -519,8 +519,26 @@ bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
for (int i = 0; i < word_length; ++i)
choices[i].delete_data_pointers();
delete [] choices;
if (word_res->best_state.empty())
return false;
if (word_res->best_state.empty()) {
// Build the original segmentation and if it is the same length as the
// truth, assume it will do.
int blob_count = 1;
for (int s = 0; s < array_count(word_res->seam_array); ++s) {
SEAM* seam =
reinterpret_cast<SEAM*>(array_value(word_res->seam_array, s));
if (seam->split1 == NULL) {
word_res->best_state.push_back(blob_count);
blob_count = 1;
} else {
++blob_count;
}
}
word_res->best_state.push_back(blob_count);
if (word_res->best_state.size() != target_text.size()) {
word_res->best_state.clear(); // No good. Original segmentation bad size.
return false;
}
}
word_res->correct_text.clear();
for (int i = 0; i < target_text.size(); ++i) {
word_res->correct_text.push_back(
......
......@@ -569,7 +569,7 @@ static void SwitchWordOrDiscard(bool accept_new_word, WERD_RES* word,
word->raw_choice = new_word->raw_choice;
new_word->raw_choice = NULL;
word->reject_map = new_word->reject_map;
word->done = new_word->done;
word->CopySimpleFields(*new_word);
} else {
// The new_word is no better, so destroy it and cleanup.
new_word->ClearResults();
......@@ -664,6 +664,26 @@ void Tesseract::classify_word_pass2(WERD_RES *word, BLOCK* block, ROW *row) {
}
if (accept_new_xht)
done_this_pass = true;
// Test for small caps. Word capheight must be close to block xheight,
// and word must contain no lower case letters, and at least one upper case.
double small_cap_xheight = block->x_height() * kXHeightCapRatio;
double small_cap_delta = (block->x_height() - small_cap_xheight) / 2.0;
if (unicharset.script_has_xheight() &&
small_cap_xheight - small_cap_delta <= word->x_height &&
word->x_height <= small_cap_xheight + small_cap_delta) {
// Scan for upper/lower.
int num_upper = 0;
int num_lower = 0;
for (int i = 0; i < word->best_choice->length(); ++i) {
if (unicharset.get_isupper(word->best_choice->unichar_id(i)))
++num_upper;
else if (unicharset.get_islower(word->best_choice->unichar_id(i)))
++num_lower;
}
if (num_upper > 0 && num_lower == 0)
word->small_caps = true;
}
word->SetScriptPositions(unicharset);
set_global_subloc_code(SUBLOC_NORM);
}
......
......@@ -76,7 +76,29 @@ enum CMD_EVENTS
REFRESH_CMD_EVENT,
QUIT_CMD_EVENT,
RECOG_WERDS,
RECOG_PSEUDO
RECOG_PSEUDO,
SHOW_SUBSCRIPT_CMD_EVENT,
SHOW_SUPERSCRIPT_CMD_EVENT,
SHOW_ITALIC_CMD_EVENT,
SHOW_BOLD_CMD_EVENT,
SHOW_UNDERLINE_CMD_EVENT,
SHOW_FIXEDPITCH_CMD_EVENT,
SHOW_SERIF_CMD_EVENT,
SHOW_SMALLCAPS_CMD_EVENT,
SHOW_DROPCAPS_CMD_EVENT,
};
enum ColorationMode {
CM_RAINBOW,
CM_SUBSCRIPT,
CM_SUPERSCRIPT,
CM_ITALIC,
CM_BOLD,
CM_UNDERLINE,
CM_FIXEDPITCH,
CM_SERIF,
CM_SMALLCAPS,
CM_DROPCAPS
};
/*
......@@ -99,6 +121,7 @@ CMD_EVENTS mode = CHANGE_DISP_CMD_EVENT; // selected words op
// These variables should remain global, since they are only used for the
// debug mode (in which only a single Tesseract thread/instance will be exist).
BITS16 word_display_mode;
static ColorationMode color_mode = CM_RAINBOW;
BOOL8 display_image = FALSE;
BOOL8 display_blocks = FALSE;
BOOL8 display_baselines = FALSE;
......@@ -253,6 +276,16 @@ SVMenuNode *Tesseract::build_menu_new() {
parent_menu->AddChild("Polygonal Approx", POLYGONAL_CMD_EVENT, FALSE);
parent_menu->AddChild("Baseline Normalized", BL_NORM_CMD_EVENT, FALSE);
parent_menu->AddChild("Edge Steps", BITMAP_CMD_EVENT, TRUE);
parent_menu->AddChild("Subscripts", SHOW_SUBSCRIPT_CMD_EVENT);
parent_menu->AddChild("Superscripts", SHOW_SUPERSCRIPT_CMD_EVENT);
parent_menu->AddChild("Italics", SHOW_ITALIC_CMD_EVENT);
parent_menu->AddChild("Bold", SHOW_BOLD_CMD_EVENT);
parent_menu->AddChild("Underline", SHOW_UNDERLINE_CMD_EVENT);
parent_menu->AddChild("FixedPitch", SHOW_FIXEDPITCH_CMD_EVENT);
parent_menu->AddChild("Serifs", SHOW_SERIF_CMD_EVENT);
parent_menu->AddChild("SmallCaps", SHOW_SMALLCAPS_CMD_EVENT);
parent_menu->AddChild("DropCaps", SHOW_DROPCAPS_CMD_EVENT);
parent_menu = root_menu_item->AddChild("OTHER");
......@@ -368,7 +401,8 @@ BOOL8 Tesseract::process_cmd_win_event( // UI command semantics
char msg[160];
BOOL8 exit = FALSE;
switch(cmd_event) {
color_mode = CM_RAINBOW;
switch (cmd_event) {
case NULL_CMD_EVENT:
break;
......@@ -434,6 +468,42 @@ BOOL8 Tesseract::process_cmd_win_event( // UI command semantics
display_baselines =(new_value[0] == 'T');
do_re_display(&tesseract::Tesseract::word_display);
break;
case SHOW_SUBSCRIPT_CMD_EVENT:
color_mode = CM_SUBSCRIPT;
do_re_display(&tesseract::Tesseract::word_display);
break;
case SHOW_SUPERSCRIPT_CMD_EVENT:
color_mode = CM_SUPERSCRIPT;
do_re_display(&tesseract::Tesseract::word_display);
break;
case SHOW_ITALIC_CMD_EVENT:
color_mode = CM_ITALIC;
do_re_display(&tesseract::Tesseract::word_display);
break;
case SHOW_BOLD_CMD_EVENT:
color_mode = CM_BOLD;
do_re_display(&tesseract::Tesseract::word_display);
break;
case SHOW_UNDERLINE_CMD_EVENT:
color_mode = CM_UNDERLINE;
do_re_display(&tesseract::Tesseract::word_display);
break;
case SHOW_FIXEDPITCH_CMD_EVENT:
color_mode = CM_FIXEDPITCH;
do_re_display(&tesseract::Tesseract::word_display);
break;
case SHOW_SERIF_CMD_EVENT:
color_mode = CM_SERIF;
do_re_display(&tesseract::Tesseract::word_display);
break;
case SHOW_SMALLCAPS_CMD_EVENT:
color_mode = CM_SMALLCAPS;
do_re_display(&tesseract::Tesseract::word_display);
break;
case SHOW_DROPCAPS_CMD_EVENT:
color_mode = CM_DROPCAPS;
do_re_display(&tesseract::Tesseract::word_display);
break;
case REFRESH_CMD_EVENT:
do_re_display(&tesseract::Tesseract::word_display);
break;
......@@ -649,11 +719,63 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
float shift; // from bot left
C_BLOB_IT c_it; // cblob iterator
if (color_mode != CM_RAINBOW && word_res->box_word != NULL) {
BoxWord* box_word = word_res->box_word;
int length = box_word->length();
int font_id = word_res->font1;
if (font_id < 0) font_id = 0;
const UnicityTable<FontInfo> &font_table = get_fontinfo_table();
FontInfo font_info = font_table.get(font_id);
for (int i = 0; i < length; ++i) {
ScrollView::Color color = ScrollView::GREEN;
switch (color_mode) {
case CM_SUBSCRIPT:
if (box_word->BlobPosition(i) == SP_SUBSCRIPT)
color = ScrollView::RED;
break;
case CM_SUPERSCRIPT:
if (box_word->BlobPosition(i) == SP_SUPERSCRIPT)
color = ScrollView::RED;
break;
case CM_ITALIC:
if (font_info.is_italic())
color = ScrollView::RED;
break;
case CM_BOLD:
if (font_info.is_bold())
color = ScrollView::RED;
break;
case CM_FIXEDPITCH:
if (font_info.is_fixed_pitch())
color = ScrollView::RED;
break;
case CM_SERIF:
if (font_info.is_serif())
color = ScrollView::RED;
break;
case CM_SMALLCAPS:
if (word_res->small_caps)
color = ScrollView::RED;
break;
case CM_DROPCAPS:
if (box_word->BlobPosition(i) == SP_DROPCAP)
color = ScrollView::RED;
break;
// TODO(rays) underline is currently completely unsupported.
case CM_UNDERLINE:
default:
break;
}
image_win->Pen(color);
TBOX box = box_word->BlobBox(i);
image_win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
}
return true;
}
/*
Note the double coercions of(COLOUR)((inT32)editor_image_word_bb_color)
etc. are to keep the compiler happy.
*/
// display bounding box
if (word->display_flag(DF_BOX)) {
word->bounding_box().plot(image_win,
......
......@@ -17,15 +17,7 @@
//
///////////////////////////////////////////////////////////////////////
// Include automatically generated configuration file if running autoconf.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif
#ifdef HAVE_LIBLEPT
// Include leptonica library only if autoconf (or makefile etc) tell us to.
#include "allheaders.h"
#endif
#include "thresholder.h"
......@@ -37,13 +29,11 @@
namespace tesseract {
ImageThresholder::ImageThresholder()
:
#ifdef HAVE_LIBLEPT
pix_(NULL),
#endif
: pix_(NULL),
image_data_(NULL),
image_width_(0), image_height_(0),
image_bytespp_(0), image_bytespl_(0) {
image_bytespp_(0), image_bytespl_(0),
scale_(1), yres_(300) {
SetRectangle(0, 0, 0, 0);
}
......@@ -53,21 +43,17 @@ ImageThresholder::~ImageThresholder() {
// Destroy the Pix if there is one, freeing memory.
void ImageThresholder::Clear() {
#ifdef HAVE_LIBLEPT
if (pix_ != NULL) {
pixDestroy(&pix_);
pix_ = NULL;
}
#endif
image_data_ = NULL;
}
// Return true if no image has been set.
bool ImageThresholder::IsEmpty() const {
#ifdef HAVE_LIBLEPT
if (pix_ != NULL)
return false;
#endif
return image_data_ == NULL;
}
......@@ -84,16 +70,16 @@ bool ImageThresholder::IsEmpty() const {
void ImageThresholder::SetImage(const unsigned char* imagedata,
int width, int height,
int bytes_per_pixel, int bytes_per_line) {
#ifdef HAVE_LIBLEPT
if (pix_ != NULL)
pixDestroy(&pix_);
pix_ = NULL;
#endif
image_data_ = imagedata;
image_width_ = width;
image_height_ = height;
image_bytespp_ = bytes_per_pixel;
image_bytespl_ = bytes_per_line;
scale_ = 1;
yres_ = 300;
Init();
}
......@@ -121,55 +107,6 @@ void ImageThresholder::GetImageSizes(int* left, int* top,
*imageheight = image_height_;
}
// Return true if HAVE_LIBLEPT and this thresholder implements the Pix
// interface.
bool ImageThresholder::HasThresholdToPix() const {
#ifdef HAVE_LIBLEPT
return true;
#else
return false;
#endif
}
// Threshold the source image as efficiently as possible to the output
// tesseract IMAGE class.
void ImageThresholder::ThresholdToIMAGE(IMAGE* image) {
#ifdef HAVE_LIBLEPT
if (pix_ != NULL) {
if (image_bytespp_ == 0) {
// We have a binary image, so it just has to be converted.
CopyBinaryRectPixToIMAGE(image);
} else {
if (image_bytespp_ == 4) {
// Color data can just be passed direct.
const uinT32* data = pixGetData(pix_);
OtsuThresholdRectToIMAGE(reinterpret_cast<const uinT8*>(data),
image_bytespp_, image_bytespl_, image);
} else {
// Convert 8-bit to IMAGE and then pass its
// buffer to the raw interface to complete the conversion.
IMAGE temp_image;
temp_image.FromPix(pix_);
OtsuThresholdRectToIMAGE(temp_image.get_buffer(),
image_bytespp_,
COMPUTE_IMAGE_XDIM(temp_image.get_xsize(),
temp_image.get_bpp()),
image);
}
}
return;
}
#endif
if (image_bytespp_ > 0) {
// Threshold grey or color.
OtsuThresholdRectToIMAGE(image_data_, image_bytespp_, image_bytespl_,
image);
} else {
CopyBinaryRectRawToIMAGE(image);
}
}
#ifdef HAVE_LIBLEPT
// NOTE: Opposite to SetImage for raw images, SetImage for Pix clones its
// input, so the source pix may be pixDestroyed immediately after.
void ImageThresholder::SetImage(const Pix* pix) {
......@@ -191,6 +128,8 @@ void ImageThresholder::SetImage(const Pix* pix) {
depth = pixGetDepth(pix_);
image_bytespp_ = depth / 8;
image_bytespl_ = pixGetWpl(pix_) * sizeof(l_uint32);
scale_ = 1;
yres_ = pixGetYRes(src);
Init();
}
......@@ -275,74 +214,7 @@ Pix* ImageThresholder::GetPixRectGrey() {
}
return pix;
}
#endif
// Otsu threshold the rectangle, taking everything except the image buffer
// pointer from the class, to the output IMAGE.
void ImageThresholder::OtsuThresholdRectToIMAGE(const unsigned char* imagedata,
int bytes_per_pixel,
int bytes_per_line,
IMAGE* image) const {
int* thresholds;
int* hi_values;
OtsuThreshold(imagedata, bytes_per_pixel, bytes_per_line,
rect_left_, rect_top_, rect_width_, rect_height_,
&thresholds, &hi_values);
// Threshold the image to the given IMAGE.
ThresholdRectToIMAGE(imagedata, bytes_per_pixel, bytes_per_line,
thresholds, hi_values, image);
delete [] thresholds;
delete [] hi_values;
}
// Threshold the given grey or color image into the tesseract global
// image ready for recognition. Requires thresholds and hi_value
// produced by OtsuThreshold in otsuthr.cpp.
void ImageThresholder::ThresholdRectToIMAGE(const unsigned char* imagedata,
int bytes_per_pixel,
int bytes_per_line,
const int* thresholds,
const int* hi_values,
IMAGE* image) const {
IMAGELINE line;
image->create(rect_width_, rect_height_, 1);
line.init(rect_width_);
// For each line in the image, fill the IMAGELINE class and put it into the
// output IMAGE. Note that Tesseract stores images with the
// bottom at y=0 and 0 is black, so we need 2 kinds of inversion.
const unsigned char* data = imagedata + rect_top_* bytes_per_line +
rect_left_ * bytes_per_pixel;
for (int y = rect_height_ - 1 ; y >= 0; --y) {
const unsigned char* pix = data;
for (int x = 0; x < rect_width_; ++x, pix += bytes_per_pixel) {
line.pixels[x] = 1;
for (int ch = 0; ch < bytes_per_pixel; ++ch) {
if (hi_values[ch] >= 0 &&
(pix[ch] > thresholds[ch]) == (hi_values[ch] == 0)) {
line.pixels[x] = 0;
break;
}
}
}
image->put_line(0, y, rect_width_, &line, 0);
data += bytes_per_line;
}
}
// Cut out the requested rectangle of the binary image to the output IMAGE.
void ImageThresholder::CopyBinaryRectRawToIMAGE(IMAGE* image) const {
IMAGE rect_image;
rect_image.capture(const_cast<unsigned char*>(image_data_),
image_width_, rect_top_ + rect_height_, 1);
image->create(rect_width_, rect_height_, 1);
// copy_sub_image uses coords starting at the bottom, so the y coord of the
// copy is the bottom of the rect_image.
copy_sub_image(&rect_image, rect_left_, 0, rect_width_, rect_height_,
image, 0, 0, false);
}
#ifdef HAVE_LIBLEPT
// Otsu threshold the rectangle, taking everything except the image buffer
// pointer from the class, to the output Pix.
void ImageThresholder::OtsuThresholdRectToPix(const unsigned char* imagedata,
......@@ -438,21 +310,5 @@ void ImageThresholder::RawRectToPix(Pix** pix) const {
}
}
// Cut out the requested rectangle of the binary image to the output IMAGE.
void ImageThresholder::CopyBinaryRectPixToIMAGE(IMAGE* image) const {
if (IsFullImage()) {
// Just poke it directly into the tess image.
image->FromPix(pix_);
} else {
// Crop to the given rectangle.
Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
Pix* cropped = pixClipRectangle(pix_, box, NULL);
image->FromPix(cropped);
pixDestroy(&cropped);
boxDestroy(&box);
}
}
#endif
} // namespace tesseract.
......@@ -27,7 +27,7 @@ namespace tesseract {
/// Base class for all tesseract image thresholding classes.
/// Specific classes can add new thresholding methods by
/// overriding ThresholdToIMAGE and/or ThresholdToPix.
/// overriding ThresholdToPix.
/// Each instance deals with a single image, but the design is intended to
/// be useful for multiple calls to SetRectangle and ThresholdTo* if
/// desired.
......@@ -66,10 +66,6 @@ class ImageThresholder {
virtual void GetImageSizes(int* left, int* top, int* width, int* height,
int* imagewidth, int* imageheight);
/// Return true if this thresholder implements the Pix
/// interface.
virtual bool HasThresholdToPix() const;
/// Return true if the source image is color.
bool IsColor() const {
return image_bytespp_ >= 3;
......@@ -80,9 +76,15 @@ class ImageThresholder {
return image_bytespp_ == 0;
}
/// Threshold the source image as efficiently as possible to the output
/// tesseract IMAGE class.
virtual void ThresholdToIMAGE(IMAGE* image);
int GetScaleFactor() const {
return scale_;
}
int GetSourceYResolution() const {
return yres_;
}
int GetScaledYResolution() const {
return scale_ * yres_;
}
/// Pix vs raw, which to use?
/// Implementations should provide the ability to source and target Pix
......@@ -126,23 +128,6 @@ class ImageThresholder {
rect_width_ == image_width_ && rect_height_ == image_height_;
}
/// Otsu threshold the rectangle, taking everything except the image buffer
/// pointer from the class, to the output IMAGE.
void OtsuThresholdRectToIMAGE(const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
IMAGE* image) const;
/// Threshold the rectangle, taking everything except the image buffer pointer
/// from the class, using thresholds/hi_values to the output IMAGE.
void ThresholdRectToIMAGE(const unsigned char* imagedata,
int bytes_per_pixel, int bytes_per_line,
const int* thresholds, const int* hi_values,
IMAGE* image) const;
/// Cut out the requested rectangle of the source raw binary image to the
/// output IMAGE.
void CopyBinaryRectRawToIMAGE(IMAGE* image) const;
/// Otsu threshold the rectangle, taking everything except the image buffer
/// pointer from the class, to the output Pix.
void OtsuThresholdRectToPix(const unsigned char* imagedata,
......@@ -159,9 +144,6 @@ class ImageThresholder {
/// Copy the raw image rectangle, taking all data from the class, to the Pix.
void RawRectToPix(Pix** pix) const;
/// Cut out the requested rectangle of the binary image to the output IMAGE.
void CopyBinaryRectPixToIMAGE(IMAGE* image) const;
protected:
/// Clone or other copy of the source Pix.
/// The pix will always be PixDestroy()ed on destruction of the class.
......@@ -174,6 +156,8 @@ class ImageThresholder {
int image_bytespp_; //< Bytes per pixel of source image/pix.
int image_bytespl_; //< Bytes per line of source image/pix.
// Limits of image rectangle to be processed.
int scale_; //< Scale factor from original image.
int yres_; //< y pixels/inch in source image
int rect_left_;
int rect_top_;
int rect_width_;
......
......@@ -1081,4 +1081,4 @@ void plot_blob_list(ScrollView* win, // window to draw in
it.data()->plot(win, body_colour, child_colour);
}
}
#endif //GRAPHICS_DISABLED
#endif // GRAPHICS_DISABLED
......@@ -760,5 +760,5 @@ void plot_blob_list(ScrollView* win, // window to draw in
BLOBNBOX_LIST *list, // blob list
ScrollView::Color body_colour, // colour to draw
ScrollView::Color child_colour); // colour of child
#endif //GRAPHICS_DISABLED
#endif // GRAPHICS_DISABLED
#endif
......@@ -29,6 +29,12 @@ namespace tesseract {
// tolerance. Otherwise, the blob may be chopped and we have to just use
// the word bounding box.
const int kBoxClipTolerance = 2;
// Min offset in baseline-normalized coords to make a character a subscript.
const int kMinSubscriptOffset = 20;
// Min offset in baseline-normalized coords to make a character a superscript.
const int kMinSuperscriptOffset = 20;
// Max y of bottom of a drop-cap blob.
const int kMaxDropCapBottom = -128;
BoxWord::BoxWord() : length_(0) {
}
......@@ -95,20 +101,35 @@ BoxWord* BoxWord::CopyFromNormalized(const DENORM* denorm,
return boxword;
}
BoxWord* BoxWord::CopyFromPBLOBs(PBLOB_LIST* blobs) {
BoxWord* boxword = new BoxWord();
// Count the blobs.
boxword->length_ = blobs->length();
// Sets up the script_pos_ member using the tessword to get the bln
// bounding boxes, the best_choice to get the unichars, and the unicharset
// to get the target positions. If small_caps is true, sub/super are not
// considered, but dropcaps are.
void BoxWord::SetScriptPositions(const UNICHARSET& unicharset, bool small_caps,
TWERD* tessword, WERD_CHOICE* best_choice) {
// Allocate memory.
boxword->boxes_.reserve(boxword->length_);
// Copy the boxes.
PBLOB_IT pb_it(blobs);
int i = 0;
for (pb_it.mark_cycle_pt(); !pb_it.cycled_list(); pb_it.forward(), ++i) {
boxword->boxes_.push_back(pb_it.data()->bounding_box());
script_pos_.init_to_size(length_, SP_NORMAL);
int blob_index = 0;
for (TBLOB* tblob = tessword->blobs; tblob != NULL; tblob = tblob->next,
++blob_index) {
int class_id = best_choice->unichar_id(blob_index);
TBOX blob_box = tblob->bounding_box();
int top = blob_box.top();
int bottom = blob_box.bottom();
int min_bottom, max_bottom, min_top, max_top;
unicharset.get_top_bottom(class_id, &min_bottom, &max_bottom,
&min_top, &max_top);
if (bottom <= kMaxDropCapBottom) {
script_pos_[blob_index] = SP_DROPCAP;
} else if (!small_caps) {
if (top + kMinSubscriptOffset < min_top) {
script_pos_[blob_index] = SP_SUBSCRIPT;
} else if (bottom - kMinSuperscriptOffset > max_bottom) {
script_pos_[blob_index] = SP_SUPERSCRIPT;
}
}
}
boxword->ComputeBoundingBox();
return boxword;
}
// Clean up the bounding boxes from the polygonal approximation by
......
......@@ -27,11 +27,21 @@ class BLOCK;
class DENORM;
class PBLOB_LIST;
struct TWERD;
class UNICHARSET;
class WERD;
class WERD_CHOICE;
class WERD_RES;
namespace tesseract {
// ScriptPos tells whether a character is subscript, superscript or normal.
enum ScriptPos {
SP_NORMAL,
SP_SUBSCRIPT,
SP_SUPERSCRIPT,
SP_DROPCAP
};
// Class to hold an array of bounding boxes for an output word and
// the bounding box of the whole word.
class BoxWord {
......@@ -50,7 +60,13 @@ class BoxWord {
// back to the original image coordinates.
static BoxWord* CopyFromNormalized(const DENORM* denorm,
TWERD* tessword);
static BoxWord* CopyFromPBLOBs(PBLOB_LIST* blobs);
// Sets up the script_pos_ member using the tessword to get the bln
// bounding boxes, the best_choice to get the unichars, and the unicharset
// to get the target positions. If small_caps is true, sub/super are not
// considered, but dropcaps are.
void SetScriptPositions(const UNICHARSET& unicharset, bool small_caps,
TWERD* tessword, WERD_CHOICE* best_choice);
// Clean up the bounding boxes from the polygonal approximation by
// expanding slightly, then clipping to the blobs from the original_word
......@@ -83,6 +99,11 @@ class BoxWord {
const TBOX& BlobBox(int index) const {
return boxes_[index];
}
ScriptPos BlobPosition(int index) const {
if (index < 0 || index >= script_pos_.size())
return SP_NORMAL;
return script_pos_[index];
}
private:
void ComputeBoundingBox();
......@@ -90,6 +111,7 @@ class BoxWord {
TBOX bbox_;
int length_;
GenericVector<TBOX> boxes_;
GenericVector<ScriptPos> script_pos_;
};
} // namespace tesseract.
......
......@@ -208,6 +208,7 @@ void WERD_RES::CopySimpleFields(const WERD_RES& source) {
tess_would_adapt = source.tess_would_adapt;
done = source.done;
unlv_crunch_mode = source.unlv_crunch_mode;
small_caps = source.small_caps;
italic = source.italic;
bold = source.bold;
font1 = source.font1;
......@@ -301,6 +302,13 @@ void WERD_RES::SetupBoxWord() {
box_word->ClipToOriginalWord(denorm.block(), word);
}
// Sets up the script positions in the output boxword using the best_choice
// to get the unichars, and the unicharset to get the target positions.
void WERD_RES::SetScriptPositions(const UNICHARSET& unicharset) {
box_word->SetScriptPositions(unicharset, small_caps, rebuild_word,
best_choice);
}
// Classifies the word with some already-calculated BLOB_CHOICEs.
// The choices are an array of blob_count pointers to BLOB_CHOICE,
// providing a single classifier result for each blob.
......
......@@ -194,6 +194,7 @@ class WERD_RES : public ELIST_LINK {
BOOL8 tess_accepted; //Tess thinks its ok?
BOOL8 tess_would_adapt; //Tess would adapt?
BOOL8 done; //ready for output?
bool small_caps; // Word appears to be small caps.
inT8 italic;
inT8 bold;
inT8 font1; //primary font
......@@ -239,6 +240,7 @@ class WERD_RES : public ELIST_LINK {
tess_would_adapt = FALSE;
done = FALSE;
unlv_crunch_mode = CR_NONE;
small_caps = false;
italic = FALSE;
bold = FALSE;
font1 = -1;
......@@ -283,6 +285,10 @@ class WERD_RES : public ELIST_LINK {
// Sets/replaces the box_word with one made from the rebuild_word.
void SetupBoxWord();
// Sets up the script positions in the output boxword using the best_choice
// to get the unichars, and the unicharset to get the target positions.
void SetScriptPositions(const UNICHARSET& unicharset);
// Classifies the word with some already-calculated BLOB_CHOICEs.
// The choices are an array of blob_count pointers to BLOB_CHOICE,
// providing a single classifier result for each blob.
......
......@@ -17,7 +17,7 @@
*
**********************************************************************/
#include "mfcpch.h" //precompiled headers
#include "mfcpch.h" // precompiled headers
#include "rect.h"
// Include automatically generated configuration file if running autoconf.
......
......@@ -107,6 +107,7 @@ void Textord::make_old_baselines(TO_BLOCK *block, // block to do
}
}
correlate_lines(block, gradient);
block->block->set_xheight(block->xheight);
}
......
......@@ -42,9 +42,6 @@
// Some of the code in this file is dependent upon leptonica. If you don't
// have it, you don't get this functionality.
#ifdef HAVE_CONFIG_H
#include "config_auto.h"
#endif
#ifdef HAVE_LIBLEPT
#include "allheaders.h"
#endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册