提交 3e8c0bc2 编写于 作者: T theraysmith

Various fixes, including memory leak in fixspace, font labels on output,...

Various fixes, including memory leak in fixspace, font labels on output, removed some annoying debug output, fixes to initialization of parameters, general cleanup, and added Hindi

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@567 d0cd1f9f-072b-0410-8dd7-cf729c803f20
上级 c81483f7
......@@ -72,13 +72,13 @@ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)"
LTLIBRARIES = $(lib_LTLIBRARIES)
libtesseract_main_la_DEPENDENCIES = \
../wordrec/libtesseract_wordrec.la
am_libtesseract_main_la_OBJECTS = adaptions.lo applybox.lo charcut.lo \
control.lo cube_control.lo cube_reco_context.lo docqual.lo \
fixspace.lo fixxht.lo imgscale.lo osdetect.lo output.lo \
pagesegmain.lo pagewalk.lo paramsd.lo pgedit.lo reject.lo \
scaleimg.lo recogtraining.lo tesseract_cube_combiner.lo \
tessbox.lo tessedit.lo tesseractclass.lo tessvars.lo \
tfacepp.lo thresholder.lo tstruct.lo werdit.lo
am_libtesseract_main_la_OBJECTS = adaptions.lo applybox.lo control.lo \
cube_control.lo cube_reco_context.lo docqual.lo fixspace.lo \
fixxht.lo imgscale.lo osdetect.lo output.lo pagesegmain.lo \
pagewalk.lo paramsd.lo pgedit.lo reject.lo scaleimg.lo \
recogtraining.lo tesseract_cube_combiner.lo tessbox.lo \
tessedit.lo tesseractclass.lo tessvars.lo tfacepp.lo \
thresholder.lo werdit.lo
libtesseract_main_la_OBJECTS = $(am_libtesseract_main_la_OBJECTS)
libtesseract_main_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
......@@ -280,25 +280,25 @@ AM_CPPFLAGS = \
EXTRA_DIST = tessembedded.cpp
include_HEADERS = \
charcut.h control.h cube_reco_context.h \
control.h cube_reco_context.h \
docqual.h fixspace.h \
imgscale.h osdetect.h output.h \
paramsd.h pgedit.h reject.h scaleimg.h \
tessbox.h tessedit.h tessembedded.h tesseractclass.h \
tesseract_cube_combiner.h \
tessvars.h tfacep.h tfacepp.h thresholder.h tstruct.h \
tessvars.h tfacep.h tfacepp.h thresholder.h \
werdit.h
lib_LTLIBRARIES = libtesseract_main.la
libtesseract_main_la_SOURCES = \
adaptions.cpp applybox.cpp \
charcut.cpp control.cpp cube_control.cpp cube_reco_context.cpp \
control.cpp cube_control.cpp cube_reco_context.cpp \
docqual.cpp fixspace.cpp fixxht.cpp \
imgscale.cpp osdetect.cpp output.cpp pagesegmain.cpp \
pagewalk.cpp paramsd.cpp pgedit.cpp reject.cpp scaleimg.cpp \
recogtraining.cpp tesseract_cube_combiner.cpp \
tessbox.cpp tessedit.cpp tesseractclass.cpp tessvars.cpp \
tfacepp.cpp thresholder.cpp tstruct.cpp \
tfacepp.cpp thresholder.cpp \
werdit.cpp
libtesseract_main_la_LIBADD = \
......@@ -381,7 +381,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/adaptions.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/applybox.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/charcut.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/control.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cube_control.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cube_reco_context.Plo@am__quote@
......@@ -405,7 +404,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tessvars.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tfacepp.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/thresholder.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tstruct.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/werdit.Plo@am__quote@
.cpp.o:
......
......@@ -486,7 +486,7 @@ bool Tesseract::ConvertStringToUnichars(const char* utf8,
// Returns false if the re-segmentation fails.
// Uses brute-force combination of up to kMaxGroupSize adjacent blobs, and
// applies a full search on the classifier results to find the best classified
// segmentation. As a compromise to obtain better recall, 1-1 ambigiguity
// segmentation. As a compromise to obtain better recall, 1-1 ambiguity
// substitutions ARE used.
bool Tesseract::FindSegmentation(const GenericVector<UNICHAR_ID>& target_text,
WERD_RES* word_res) {
......
......@@ -170,9 +170,22 @@ void Tesseract::recog_all_words(PAGE_RES* page_res,
const TBOX* target_word_box,
const char* word_config,
int dopasses) {
// TODO(rays): Normalize the "classify word" interface. For instance:
// (1) word.denorm gets set in word->SetupForRecognition() but does
// not get invoked for cube alone. Maybe it should?
// (2) run_cube() checks whether word->best_choice is NULL, and if
// so determines that "neither cube nor tess have an answer."
// However, if tess gets run at all, the first thing it does is
// call word->SetupForRecognition which inserts a poorly scoring
// best_answer. So what is the way that an engine (tess or cube)
// says "I don't have an answer": an empty list or a single
// poorly scoring best_answer?
// reset page iterator
// If we only intend to run cube - run it and return.
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
PrepareForCubeOCR();
mutable_splitter()->Clear();
run_cube(page_res);
return;
}
......@@ -394,6 +407,8 @@ void Tesseract::recog_all_words(PAGE_RES* page_res,
// ****************** Pass 5 *******************
// If cube is loaded and its combiner is present, run it.
if (tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED) {
PrepareForCubeOCR();
mutable_splitter()->Clear();
run_cube(page_res);
}
......@@ -520,6 +535,7 @@ void Tesseract::classify_word_pass1(WERD_RES *word, // word to do
}
// Send word to adaptive classifier for training.
word->BestChoiceToCorrectText(unicharset);
set_word_fonts(word, blob_choices);
LearnWord(NULL, rejmap, word);
}
......@@ -1164,14 +1180,14 @@ BOOL8 Tesseract::check_debug_pt(WERD_RES *word, int location) {
*/
static void find_modal_font( //good chars in word
STATS *fonts, //font stats
inT8 *font_out, //output font
inT16 *font_out, //output font
inT8 *font_count //output count
) {
inT8 font; //font index
inT16 font; //font index
inT32 count; //pile couat
if (fonts->get_total () > 0) {
font = (inT8) fonts->mode ();
font = (inT16) fonts->mode ();
*font_out = font;
count = fonts->pile_count (font);
*font_count = count < MAX_INT8 ? count : MAX_INT8;
......@@ -1216,68 +1232,57 @@ void Tesseract::set_word_fonts(WERD_RES *word,
if (word_ch_id >= PreTrainedTemplates->NumClasses)
return; // This must be a cube word.
choice_it.set_to_list(char_it.data());
if (tessedit_debug_fonts) {
tprintf("Examining fonts in %s\n", word->best_choice->debug_string(
getDict().getUnicharset()).string());
}
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
choice_it.forward()) {
UNICHAR_ID blob_ch_id = choice_it.data()->unichar_id();
if (blob_ch_id == word_ch_id) {
int config = choice_it.data()->config();
int config2 = choice_it.data()->config2();
int font_set_id = PreTrainedTemplates->Class[blob_ch_id]->font_set_id;
if (font_set_id >= 0 && config >= 0 && font_set_id < fontset_size) {
FontSet font_set = get_fontset_table().get(font_set_id);
if (tessedit_debug_fonts) {
tprintf("%s(%d/%d)", unicharset.id_to_unichar(blob_ch_id),
config, config2);
const char* fontname;
if (config >= font_set.size) {
fontname = "Unknown";
} else {
fontname = get_fontinfo_table().get(
font_set.configs[config]).name;
}
tprintf("%s(%d,%d=%s)\n",
unicharset.id_to_unichar(choice_it.data()->unichar_id()),
font_set_id, config, fontname);
}
// 1st choice config gets 2 pts, 2nd choice 1 pt.
if (config < font_set.size) {
int fontinfo_id = font_set.configs[config];
if (fontinfo_id < fontinfo_size) {
fonts.add(fontinfo_id, 2);
}
}
if (config2 >= 0 && config2 < font_set.size) {
int fontinfo_id = font_set.configs[config2];
if (fontinfo_id < fontinfo_size) {
fonts.add(fontinfo_id, 1);
}
}
if (tessedit_debug_fonts) {
tprintf("%s font %s (%d) font2 %s (%d)\n",
getDict().getUnicharset().id_to_unichar(blob_ch_id),
choice_it.data()->fontinfo_id() < 0 ? "unknown" :
fontinfo_table_.get(choice_it.data()->fontinfo_id()).name,
choice_it.data()->fontinfo_id(),
choice_it.data()->fontinfo_id2() < 0 ? "unknown" :
fontinfo_table_.get(choice_it.data()->fontinfo_id2()).name,
choice_it.data()->fontinfo_id2());
}
// 1st choice font gets 2 pts, 2nd choice 1 pt.
if (choice_it.data()->fontinfo_id() >= 0) {
fonts.add(choice_it.data()->fontinfo_id(), 2);
}
if (choice_it.data()->fontinfo_id2() >= 0) {
fonts.add(choice_it.data()->fontinfo_id2(), 1);
}
break;
}
}
}
find_modal_font(&fonts, &word->font1, &word->font1_count);
find_modal_font(&fonts, &word->font2, &word->font2_count);
find_modal_font(&fonts, &word->fontinfo_id, &word->fontinfo_id_count);
find_modal_font(&fonts, &word->fontinfo_id2, &word->fontinfo_id2_count);
// All the blobs get the word's best choice font.
for (int i = 0; i < word->best_choice->length(); ++i) {
word->best_choice_fontinfo_ids.push_back(word->font1);
word->best_choice_fontinfo_ids.push_back(word->fontinfo_id);
}
if (word->font1_count > 0) {
FontInfo fi = fontinfo_table_.get(word->font1);
if (word->fontinfo_id_count > 0) {
FontInfo fi = fontinfo_table_.get(word->fontinfo_id);
if (tessedit_debug_fonts) {
if (word->font2_count > 0) {
if (word->fontinfo_id2_count > 0) {
tprintf("Word modal font=%s, score=%d, 2nd choice %s/%d\n",
fi.name, word->font1_count,
fontinfo_table_.get(word->font2).name, word->font2_count);
fi.name, word->fontinfo_id_count,
fontinfo_table_.get(word->fontinfo_id2).name,
word->fontinfo_id2_count);
} else {
tprintf("Word modal font=%s, score=%d. No 2nd choice\n",
fi.name, word->font1_count);
fi.name, word->fontinfo_id_count);
}
}
// 1st choices got 2 pts, so we need to halve the score for the mode.
word->italic = (fi.is_italic() ? 1 : -1) * (word->font1_count + 1) / 2;
word->bold = (fi.is_bold() ? 1 : -1) * (word->font1_count + 1) / 2;
word->italic = (fi.is_italic() ? 1 : -1) * (word->fontinfo_id_count + 1) / 2;
word->bold = (fi.is_bold() ? 1 : -1) * (word->fontinfo_id_count + 1) / 2;
}
}
......@@ -1292,7 +1297,7 @@ void Tesseract::font_recognition_pass( //good chars in word
PAGE_RES_IT &page_res_it) {
inT32 length; //of word
inT32 count; //of a feature
inT8 doc_font; //modal font
inT16 doc_font; //modal font
inT8 doc_font_count; //modal font
WERD_RES *word; //current word
STATS doc_fonts (0, get_fontinfo_table().size() ?
......@@ -1305,8 +1310,8 @@ void Tesseract::font_recognition_pass( //good chars in word
if (!save_best_choices) { // set_blob_choices() does a deep clear
word->best_choice->set_blob_choices(NULL);
}
doc_fonts.add(word->font1, word->font1_count);
doc_fonts.add(word->font2, word->font2_count);
doc_fonts.add(word->fontinfo_id, word->fontinfo_id_count);
doc_fonts.add(word->fontinfo_id2, word->fontinfo_id2_count);
page_res_it.forward();
}
find_modal_font(&doc_fonts, &doc_font, &doc_font_count);
......@@ -1320,11 +1325,11 @@ void Tesseract::font_recognition_pass( //good chars in word
length = word->best_choice->length();
// 1st choices got 2 pts, so we need to halve the score for the mode.
count = (word->font1_count + 1) / 2;
count = (word->fontinfo_id_count + 1) / 2;
if (!(count == length || (length > 3 && count >= length * 3 / 4))) {
word->font1 = doc_font;
word->fontinfo_id = doc_font;
// Counts only get 1 as it came from the doc.
word->font1_count = 1;
word->fontinfo_id_count = 1;
word->italic = fi.is_italic() ? 1 : -1;
word->bold = fi.is_bold() ? 1 : -1;
}
......
......@@ -196,7 +196,7 @@ static WERD_CHOICE *create_werd_choice(
/**********************************************************************
* init_cube_objects
*
* Instantitates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
* Instantiates Tesseract object's CubeRecoContext and TesseractCubeCombiner.
* Returns false if cube context could not be created or if load_combiner is
* true, but the combiner could not be loaded.
**********************************************************************/
......@@ -260,7 +260,10 @@ void Tesseract::run_cube(
page_res_it.forward()) {
WERD_RES* word = page_res_it.word();
TBOX word_box = word->word->bounding_box();
const BLOCK* block = word->denorm.block();
// TODO(rays): Instead of page_res_it.block()->block maybe use
// word->denorm.block() once TODO in
// Tesseract::recog_all_words() is addressed.
const BLOCK* block = page_res_it.block()->block;
if (block != NULL && (block->re_rotation().x() != 1.0f ||
block->re_rotation().y() != 0.0f)) {
// TODO(rays) We have to rotate the bounding box to get the true coords.
......
......@@ -737,12 +737,15 @@ void Tesseract::merge_tess_fails(WERD_RES *word_res) {
unicharset,
NewPermanentTessCallback(this, &Tesseract::BothSpaces), NULL,
word_res->best_choice->blob_choices())) {
tprintf("Post:bc len=%d, rejmap=%d, boxword=%d, chopword=%d, rebuild=%d\n",
word_res->best_choice->length(),
word_res->reject_map.length(),
word_res->box_word->length(),
word_res->chopped_word->NumBlobs(),
word_res->rebuild_word->NumBlobs());
if (crunch_debug) {
tprintf("Post:bc len=%d, rejmap=%d, boxword=%d, chopword=%d,"
" rebuild=%d\n",
word_res->best_choice->length(),
word_res->reject_map.length(),
word_res->box_word->length(),
word_res->chopped_word->NumBlobs(),
word_res->rebuild_word->NumBlobs());
}
int len = word_res->best_choice->length();
ASSERT_HOST(word_res->reject_map.length() == len);
ASSERT_HOST(word_res->box_word->length() == len);
......
......@@ -668,7 +668,6 @@ void Tesseract::fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row,
old_word_res->combination = TRUE; // Kludge to force deep copy
*new_word_res = *old_word_res; // deep copy
old_word_res->combination = FALSE; // Undo kludge
new_word_res->combination = FALSE; // Undo kludge
current_perm_it.add_to_end(new_word_res);
break_noisiest_blob_word(current_perm);
......@@ -754,7 +753,9 @@ void Tesseract::break_noisiest_blob_word(WERD_RES_LIST &words) {
new_rej_cblob_it.add_after_then_move(rej_cblob_it.extract());
}
worst_word_it.add_before_then_move(new WERD_RES(new_word));
WERD_RES* new_word_res = new WERD_RES(new_word);
new_word_res->combination = TRUE;
worst_word_it.add_before_then_move(new_word_res);
word_res->ClearResults();
}
......
......@@ -403,7 +403,7 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
int prev_id = -1;
int prev_script;
int prev_class_id = -1;
int prev_config = -1;
int prev_fontinfo_id = -1;
const char* prev_unichar = "";
const char* unichar = "";
float next_best_score = -1.0;
......@@ -427,7 +427,7 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
prev_script = choice->script_id();
prev_unichar = unichar;
prev_class_id = choice->unichar_id();
prev_config = choice->config();
prev_fontinfo_id = choice->fontinfo_id();
} else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) {
++script_count;
next_best_score = -choice->certainty();
......@@ -451,11 +451,9 @@ void ScriptDetector::detect_blob(BLOB_CHOICE_LIST* scores) {
// Workaround for Fraktur
if (prev_id == latin_id_) {
int font_set_id = tess_->PreTrainedTemplates->
Class[prev_class_id]->font_set_id;
if (font_set_id >= 0 && prev_config >= 0) {
FontInfo fi = tess_->get_fontinfo_table().get(
tess_->get_fontset_table().get(font_set_id).configs[prev_config]);
if (prev_fontinfo_id >= 0) {
const FontInfo &fi =
tess_->get_fontinfo_table().get(prev_fontinfo_id);
//printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name,
// fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(),
// fi.is_serif(), fi.is_fraktur(),
......
......@@ -102,8 +102,7 @@ static Pix* RemoveEnclosingCircle(Pix* pixs) {
/**
* Segment the page according to the current value of tessedit_pageseg_mode.
* If the pix_binary_ member is not NULL, it is used as the source image,
* and copied to image, otherwise it just uses image as the input.
* pix_binary_ is used as the source image and should not be NULL.
* On return the blocks list owns all the constructed page layout.
*/
int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
......@@ -169,7 +168,8 @@ int Tesseract::SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
}
if (blocks->empty()) {
tprintf("Empty page\n");
if (textord_debug_tabfind)
tprintf("Empty page\n");
return 0; // AutoPageSeg found an empty page.
}
......
......@@ -721,7 +721,7 @@ BOOL8 Tesseract::word_display(BLOCK* block, ROW* row, WERD_RES* word_res) {
if (color_mode != CM_RAINBOW && word_res->box_word != NULL) {
BoxWord* box_word = word_res->box_word;
int length = box_word->length();
int font_id = word_res->font1;
int font_id = word_res->fontinfo_id;
if (font_id < 0) font_id = 0;
const UnicityTable<FontInfo> &font_table = get_fontinfo_table();
FontInfo font_info = font_table.get(font_id);
......
......@@ -97,7 +97,9 @@ void Tesseract::read_config_file(const char *filename, bool init_only) {
bool Tesseract::init_tesseract_lang_data(
const char *arg0, const char *textbase, const char *language,
OcrEngineMode oem, char **configs, int configs_size,
bool configs_init_only) {
const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values,
bool set_only_init_params) {
// Set the basename, compute the data directory.
main_setup(arg0, textbase);
......@@ -129,7 +131,20 @@ bool Tesseract::init_tesseract_lang_data(
// language-specific variables from [lang].traineddata file, so that custom
// config files can override values in [lang].traineddata file.
for (int i = 0; i < configs_size; ++i) {
read_config_file(configs[i], configs_init_only);
read_config_file(configs[i], set_only_init_params);
}
// Set params specified in vars_vec (done after setting params from config
// files, so that params in vars_vec can override those from files).
if (vars_vec != NULL && vars_values != NULL) {
for (int i = 0; i < vars_vec->size(); ++i) {
if (!ParamUtils::SetParam((*vars_vec)[i].string(),
(*vars_values)[i].string(),
set_only_init_params, this->params())) {
tprintf("Error setting param %s\n", (*vars_vec)[i].string());
exit(1);
}
}
}
if (((STRING &)tessedit_write_params_to_file).length() > 0) {
......@@ -192,9 +207,12 @@ bool Tesseract::init_tesseract_lang_data(
int Tesseract::init_tesseract(
const char *arg0, const char *textbase, const char *language,
OcrEngineMode oem, char **configs, int configs_size,
bool configs_init_only) {
const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values,
bool set_only_init_params) {
if (!init_tesseract_lang_data(arg0, textbase, language, oem, configs,
configs_size, configs_init_only)) {
configs_size, vars_vec, vars_values,
set_only_init_params)) {
return -1;
}
// If only Cube will be used, skip loading Tesseract classifier's
......@@ -216,8 +234,8 @@ int Tesseract::init_tesseract(
int Tesseract::init_tesseract_lm(const char *arg0,
const char *textbase,
const char *language) {
if (!init_tesseract_lang_data(arg0, textbase, language,
OEM_TESSERACT_ONLY, NULL, 0, false))
if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY,
NULL, 0, NULL, NULL, false))
return -1;
getDict().Load();
tessdata_manager.End();
......
......@@ -68,6 +68,14 @@ Tesseract::Tesseract()
"Whitelist of chars to recognize", this->params()),
BOOL_INIT_MEMBER(tessedit_ambigs_training, false,
"Perform training for ambiguities", this->params()),
INT_MEMBER(pageseg_devanagari_split_strategy,
tesseract::ShiroRekhaSplitter::NO_SPLIT,
"Whether to use the top-line splitting process for Devanagari "
"documents while performing page-segmentation.", this->params()),
INT_MEMBER(ocr_devanagari_split_strategy,
tesseract::ShiroRekhaSplitter::NO_SPLIT,
"Whether to use the top-line splitting process for Devanagari "
"documents while performing ocr.", this->params()),
STRING_MEMBER(tessedit_write_params_to_file, "",
"Write all parameters to the given file.", this->params()),
BOOL_MEMBER(tessedit_adapt_to_char_fragments, true,
......@@ -383,6 +391,7 @@ void Tesseract::Clear() {
deskew_ = FCOORD(1.0f, 0.0f);
reskew_ = FCOORD(1.0f, 0.0f);
orig_image_changed_ = false;
splitter_.Clear();
}
void Tesseract::SetBlackAndWhitelist() {
......@@ -391,4 +400,61 @@ void Tesseract::SetBlackAndWhitelist() {
tessedit_char_whitelist.string());
}
// Perform steps to prepare underlying binary image/other data structures for
// page segmentation.
void Tesseract::PrepareForPageseg() {
// Perform shiro-rekha (top-line) splitting and replace the current image by
// the newly splitted image.
splitter_.set_orig_pix(pix_binary());
splitter_.set_pageseg_split_strategy(
(ShiroRekhaSplitter::SplitStrategy)
((inT32)pageseg_devanagari_split_strategy));
if (splitter_.Split(true)) {
ASSERT_HOST(splitter_.splitted_image());
splitter_.CopySplittedImageTo(NULL, &pix_binary_);
orig_image_changed_ = true;
}
}
// Perform steps to prepare underlying binary image/other data structures for
// OCR. The current segmentation is required by this method.
void Tesseract::PrepareForTessOCR(BLOCK_LIST* block_list,
Tesseract* osd_tess, OSResults* osr) {
// Creating blobs to OCR.
// Utilize the segmentation information available.
splitter_.set_segmentation_block_list(block_list);
splitter_.set_ocr_split_strategy(
(ShiroRekhaSplitter::SplitStrategy)
((inT32)ocr_devanagari_split_strategy));
if (splitter_.Split(false)) {
ASSERT_HOST(splitter_.splitted_image());
splitter_.CopySplittedImageTo(NULL, &pix_binary_);
orig_image_changed_ = true;
// If the split strategies used before pageseg and ocr are the same, the
// segmentation obtained from the second round can be used going forward.
// Otherwise, the page-segmentation (& importantly, the word segmentation)
// of first round is used.
if (splitter_.HasDifferentSplitStrategies()) {
// Refresh the segmentation with new blobs.
BLOCK_LIST new_segmentation;
SegmentPage(NULL, &new_segmentation, osd_tess, osr);
C_BLOB_LIST new_blobs;
ExtractBlobsFromSegmentation(&new_segmentation, &new_blobs);
splitter_.RefreshSegmentationWithNewBlobs(&new_blobs);
} else {
block_list->clear();
SegmentPage(NULL, block_list, osd_tess, osr);
}
}
}
// Perform steps to prepare underlying binary image/other data structures for
// Cube OCR.
void Tesseract::PrepareForCubeOCR() {
if (orig_image_changed_) {
// Revert to the original image as Cube likes them more.
splitter_.CopyOriginalImageTo(NULL, &pix_binary_);
orig_image_changed_ = false;
}
}
} // namespace tesseract
......@@ -22,11 +22,13 @@
#define TESSERACT_CCMAIN_TESSERACTCLASS_H__
#include "allheaders.h"
#include "genericvector.h"
#include "params.h"
#include "wordrec.h"
#include "ocrclass.h"
#include "control.h"
#include "docqual.h"
#include "devanagari_processing.h"
#include "textord.h"
class PAGE_RES;
......@@ -159,6 +161,12 @@ class Tesseract : public Wordrec {
return pixGetHeight(pix_binary_);
}
const ShiroRekhaSplitter& splitter() const {
return splitter_;
}
ShiroRekhaSplitter* mutable_splitter() {
return &splitter_;
}
const Textord& textord() const {
return textord_;
}
......@@ -172,6 +180,24 @@ class Tesseract : public Wordrec {
void SetBlackAndWhitelist();
// Perform steps to prepare underlying binary image/other data structures for
// page segmentation. Uses the strategy specified in the global variable
// pageseg_devanagari_split_strategy for perform splitting while preparing for
// page segmentation.
void PrepareForPageseg();
// Perform steps to prepare underlying binary image/other data structures for
// Tesseract OCR. The current segmentation is required by this method.
// Uses the strategy specified in the global variable
// ocr_devanagari_split_strategy for performing splitting while preparing for
// Tesseract ocr.
void PrepareForTessOCR(BLOCK_LIST* block_list,
Tesseract* osd_tess, OSResults* osr);
// Perform steps to prepare underlying binary image/other data structures for
// Cube OCR.
void PrepareForCubeOCR();
int SegmentPage(const STRING* input_file, BLOCK_LIST* blocks,
Tesseract* osd_tess, OSResults* osr);
void SetupWordScripts(BLOCK_LIST* blocks);
......@@ -228,11 +254,12 @@ class Tesseract : public Wordrec {
void fix_hyphens(WERD_RES* word_res,
BLOB_CHOICE_LIST_CLIST *blob_choices);
void set_word_fonts(
WERD_RES *word, // word to adapt to
WERD_RES *word, // set fonts of this word
BLOB_CHOICE_LIST_CLIST *blob_choices); // detailed results
void font_recognition_pass( //good chars in word
PAGE_RES_IT &page_res_it);
BOOL8 check_debug_pt(WERD_RES *word, int location);
//// cube_control.cpp ///////////////////////////////////////////////////
bool init_cube_objects(bool load_combiner,
TessdataManager *tessdata_manager);
......@@ -267,11 +294,14 @@ class Tesseract : public Wordrec {
OcrEngineMode oem,
char **configs,
int configs_size,
bool configs_init_only);
const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values,
bool set_only_init_params);
int init_tesseract(const char *datapath,
const char *language,
OcrEngineMode oem) {
return init_tesseract(datapath, NULL, language, oem, NULL, 0, false);
return init_tesseract(datapath, NULL, language, oem,
NULL, 0, NULL, NULL, false);
}
int init_tesseract_lm(const char *arg0,
......@@ -287,7 +317,9 @@ class Tesseract : public Wordrec {
OcrEngineMode oem,
char **configs,
int configs_size,
bool configs_init_only);
const GenericVector<STRING> *vars_vec,
const GenericVector<STRING> *vars_values,
bool set_only_init_params);
//// pgedit.h //////////////////////////////////////////////////////////
SVMenuNode *build_menu_new();
......@@ -555,6 +587,14 @@ class Tesseract : public Wordrec {
"Whitelist of chars to recognize");
BOOL_VAR_H(tessedit_ambigs_training, false,
"Perform training for ambiguities");
INT_VAR_H(pageseg_devanagari_split_strategy,
tesseract::ShiroRekhaSplitter::NO_SPLIT,
"Whether to use the top-line splitting process for Devanagari "
"documents while performing page-segmentation.");
INT_VAR_H(ocr_devanagari_split_strategy,
tesseract::ShiroRekhaSplitter::NO_SPLIT,
"Whether to use the top-line splitting process for Devanagari "
"documents while performing ocr.");
STRING_VAR_H(tessedit_write_params_to_file, "",
"Write all parameters to the given file.");
BOOL_VAR_H(tessedit_adapt_to_char_fragments, true,
......@@ -781,6 +821,9 @@ class Tesseract : public Wordrec {
STRING word_config_;
Pix* pix_binary_;
Pix* pix_grey_;
// The shiro-rekha splitter object which is used to split top-lines in
// Devanagari words to provide a better word and grapheme segmentation.
ShiroRekhaSplitter splitter_;
// The boolean records if the currently set
// pix_binary_ member has been modified due to any processing so that this
// may hurt Cube's recognition phase.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册