Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
a7660331a
tesseract
提交
5deebe6c
T
tesseract
项目概览
a7660331a
/
tesseract
与 Fork 源项目一致
从无法访问的项目Fork
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
T
tesseract
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
5deebe6c
编写于
12月 05, 2016
作者:
R
Ray Smith
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fixed multilang for LSTM, pushed cube to one side without actually deleting it
上级
798d79aa
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
139 addition
and
124 deletion
+139
-124
api/tesseractmain.cpp
api/tesseractmain.cpp
+3
-4
ccmain/control.cpp
ccmain/control.cpp
+32
-29
ccmain/linerec.cpp
ccmain/linerec.cpp
+17
-23
ccmain/tessedit.cpp
ccmain/tessedit.cpp
+41
-34
ccmain/tesseract_cube_combiner.cpp
ccmain/tesseract_cube_combiner.cpp
+6
-6
ccmain/tesseractclass.cpp
ccmain/tesseractclass.cpp
+2
-2
ccmain/tesseractclass.h
ccmain/tesseractclass.h
+10
-10
ccstruct/pageres.cpp
ccstruct/pageres.cpp
+1
-0
ccstruct/publictypes.h
ccstruct/publictypes.h
+5
-10
ccstruct/ratngs.h
ccstruct/ratngs.h
+14
-0
dict/context.cpp
dict/context.cpp
+1
-1
dict/dict.h
dict/dict.h
+3
-3
dict/stopper.cpp
dict/stopper.cpp
+2
-2
lstm/lstmrecognizer.h
lstm/lstmrecognizer.h
+2
-0
未找到文件。
api/tesseractmain.cpp
浏览文件 @
5deebe6c
...
...
@@ -123,10 +123,9 @@ void PrintHelpForOEM() {
const
char
*
msg
=
"OCR Engine modes:
\n
"
" 0 Original Tesseract only.
\n
"
" 1 Cube only.
\n
"
" 2 Tesseract + cube.
\n
"
" 3 Default, based on what is available.
\n
"
" 4 Neural nets (LSTM) only.
\n
"
;
" 1 Neural nets LSTM only.
\n
"
" 2 Tesseract + LSTM.
\n
"
" 3 Default, based on what is available.
\n
"
;
printf
(
"%s"
,
msg
);
}
...
...
ccmain/control.cpp
浏览文件 @
5deebe6c
...
...
@@ -31,21 +31,22 @@
#include <errno.h>
#endif
#include <ctype.h>
#include "ocrclass.h"
#include "werdit.h"
#include "callcpp.h"
#include "control.h"
#include "docqual.h"
#include "drawfx.h"
#include "tessbox.h"
#include "tessvars.h"
#include "pgedit.h"
#include "reject.h"
#include "fixspace.h"
#include "docqual.h"
#include "control.h"
#include "output.h"
#include "callcpp.h"
#include "globals.h"
#include "lstmrecognizer.h"
#include "ocrclass.h"
#include "output.h"
#include "pgedit.h"
#include "reject.h"
#include "sorthelper.h"
#include "tessbox.h"
#include "tesseractclass.h"
#include "tessvars.h"
#include "werdit.h"
#define MIN_FONT_ROW_COUNT 8
#define MAX_XHEIGHT_DIFF 3
...
...
@@ -192,8 +193,8 @@ void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
WERD_RES
*
word_res
=
new
WERD_RES
;
word_res
->
InitForRetryRecognition
(
*
word
->
word
);
word
->
lang_words
.
push_back
(
word_res
);
//
Cube
doesn't get setup for pass2.
if
(
pass_n
==
1
||
lang_t
->
tessedit_ocr_engine_mode
!=
OEM_
CUBE
_ONLY
)
{
//
LSTM
doesn't get setup for pass2.
if
(
pass_n
==
1
||
lang_t
->
tessedit_ocr_engine_mode
!=
OEM_
LSTM
_ONLY
)
{
word_res
->
SetupForRecognition
(
lang_t
->
unicharset
,
lang_t
,
BestPix
(),
lang_t
->
tessedit_ocr_engine_mode
,
NULL
,
...
...
@@ -301,16 +302,6 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
const
TBOX
*
target_word_box
,
const
char
*
word_config
,
int
dopasses
)
{
// PSM_RAW_LINE is a special-case mode in which the layout analysis is
// completely ignored and LSTM is run on the raw image. There is no hope
// of running normal tesseract in this situation or of integrating output.
#ifndef ANDROID_BUILD
if
(
tessedit_ocr_engine_mode
==
OEM_LSTM_ONLY
&&
tessedit_pageseg_mode
==
PSM_RAW_LINE
)
{
RecogRawLine
(
page_res
);
return
true
;
}
#endif
PAGE_RES_IT
page_res_it
(
page_res
);
if
(
tessedit_minimal_rej_pass1
)
{
...
...
@@ -397,8 +388,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
if
(
!
RecogAllWordsPassN
(
2
,
monitor
,
&
page_res_it
,
&
words
))
return
false
;
}
// The next passes can only be run if tesseract has been used, as cube
// doesn't set all the necessary outputs in WERD_RES.
// The next passes are only required for Tess-only.
if
(
AnyTessLang
()
&&
!
AnyLSTMLang
())
{
// ****************** Pass 3 *******************
// Fix fuzzy spaces.
...
...
@@ -451,8 +441,13 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
for
(
page_res_it
.
restart_page
();
page_res_it
.
word
()
!=
NULL
;
page_res_it
.
forward
())
{
WERD_RES
*
word
=
page_res_it
.
word
();
if
(
word
->
best_choice
==
NULL
||
word
->
best_choice
->
length
()
==
0
)
POLY_BLOCK
*
pb
=
page_res_it
.
block
()
->
block
!=
NULL
?
page_res_it
.
block
()
->
block
->
poly_block
()
:
NULL
;
if
(
word
->
best_choice
==
NULL
||
word
->
best_choice
->
length
()
==
0
||
(
word
->
best_choice
->
IsAllSpaces
()
&&
(
pb
==
NULL
||
pb
->
IsText
())))
{
page_res_it
.
DeleteCurrentWord
();
}
}
if
(
monitor
!=
NULL
)
{
...
...
@@ -1376,12 +1371,20 @@ void Tesseract::classify_word_pass1(const WordData& word_data,
cube_word_pass1
(
block
,
row
,
*
in_word
);
return
;
}
if
(
tessedit_ocr_engine_mode
==
OEM_LSTM_ONLY
)
{
if
(
!
(
*
in_word
)
->
odd_size
)
{
#endif
#ifndef ANDROID_BUILD
if
(
tessedit_ocr_engine_mode
==
OEM_LSTM_ONLY
||
tessedit_ocr_engine_mode
==
OEM_TESSERACT_LSTM_COMBINED
)
{
if
(
!
(
*
in_word
)
->
odd_size
||
tessedit_ocr_engine_mode
==
OEM_LSTM_ONLY
)
{
LSTMRecognizeWord
(
*
block
,
row
,
*
in_word
,
out_words
);
if
(
!
out_words
->
empty
())
return
;
// Successful lstm recognition.
}
if
(
tessedit_ocr_engine_mode
==
OEM_LSTM_ONLY
)
{
// No fallback allowed, so use a fake.
(
*
in_word
)
->
SetupFake
(
lstm_recognizer_
->
GetUnicharset
());
return
;
}
// Fall back to tesseract for failed words or odd words.
(
*
in_word
)
->
SetupForRecognition
(
unicharset
,
this
,
BestPix
(),
OEM_TESSERACT_ONLY
,
NULL
,
...
...
@@ -1523,7 +1526,7 @@ void Tesseract::classify_word_pass2(const WordData& word_data,
WERD_RES
**
in_word
,
PointerVector
<
WERD_RES
>*
out_words
)
{
// Return if we do not want to run Tesseract.
if
(
tessedit_ocr_engine_mode
==
OEM_
CUBE
_ONLY
)
{
if
(
tessedit_ocr_engine_mode
==
OEM_
LSTM
_ONLY
)
{
return
;
}
ROW
*
row
=
word_data
.
row
;
...
...
@@ -1908,7 +1911,7 @@ static void find_modal_font( //good chars in word
* Get the fonts for the word.
*/
void
Tesseract
::
set_word_fonts
(
WERD_RES
*
word
)
{
// Don't try to set the word fonts for a
cube
word, as the configs
// Don't try to set the word fonts for a
n lstm
word, as the configs
// will be meaningless.
if
(
word
->
chopped_word
==
NULL
)
return
;
ASSERT_HOST
(
word
->
best_choice
!=
NULL
);
...
...
ccmain/linerec.cpp
浏览文件 @
5deebe6c
...
...
@@ -219,19 +219,6 @@ ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block,
}
#ifndef ANDROID_BUILD
// Top-level function recognizes a single raw line.
void
Tesseract
::
RecogRawLine
(
PAGE_RES
*
page_res
)
{
PAGE_RES_IT
it
(
page_res
);
PointerVector
<
WERD_RES
>
words
;
LSTMRecognizeWord
(
*
it
.
block
()
->
block
,
it
.
row
()
->
row
,
it
.
word
(),
&
words
);
if
(
getDict
().
stopper_debug_level
>=
1
)
{
for
(
int
w
=
0
;
w
<
words
.
size
();
++
w
)
{
words
[
w
]
->
DebugWordChoices
(
true
,
NULL
);
}
}
it
.
ReplaceCurrentWord
(
&
words
);
}
// Recognizes a word or group of words, converting to WERD_RES in *words.
// Analogous to classify_word_pass1, but can handle a group of words as well.
void
Tesseract
::
LSTMRecognizeWord
(
const
BLOCK
&
block
,
ROW
*
row
,
WERD_RES
*
word
,
...
...
@@ -268,7 +255,17 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
// for each of the output words.
// If we drop a word as junk, then there is always a space in front of the
// next.
bool
deleted_prev
=
false
;
const
Dict
*
stopper_dict
=
lstm_recognizer_
->
GetDict
();
if
(
stopper_dict
==
nullptr
)
stopper_dict
=
&
getDict
();
bool
any_nonspace_delimited
=
false
;
for
(
int
w
=
0
;
w
<
words
->
size
();
++
w
)
{
WERD_RES
*
word
=
(
*
words
)[
w
];
if
(
word
->
best_choice
!=
nullptr
&&
word
->
best_choice
->
ContainsAnyNonSpaceDelimited
())
{
any_nonspace_delimited
=
true
;
break
;
}
}
for
(
int
w
=
0
;
w
<
words
->
size
();
++
w
)
{
WERD_RES
*
word
=
(
*
words
)[
w
];
if
(
word
->
best_choice
==
NULL
)
{
...
...
@@ -284,9 +281,7 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
}
if
(
word
->
best_choice
==
NULL
)
{
// It is a dud.
words
->
remove
(
w
);
--
w
;
deleted_prev
=
true
;
word
->
SetupFake
(
lstm_recognizer_
->
GetUnicharset
());
}
else
{
// Set the best state.
for
(
int
i
=
0
;
i
<
word
->
best_choice
->
length
();
++
i
)
{
...
...
@@ -314,22 +309,21 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
word
->
best_choice
->
print
();
}
// Discard words that are impossibly bad, but allow a bit more for
// dictionary words.
// dictionary words
, and keep bad words in non-space-delimited langs
.
if
(
word_certainty
>=
RecodeBeamSearch
::
kMinCertainty
||
any_nonspace_delimited
||
(
word_certainty
>=
kWorstDictCertainty
&&
Dict
::
valid_word_permuter
(
word
->
best_choice
->
permuter
(),
true
)))
{
word
->
best_choice
->
set_certainty
(
word_certainty
);
if
(
deleted_prev
)
word
->
word
->
set_blanks
(
1
);
word
->
tess_accepted
=
stopper_dict
->
AcceptableResult
(
word
);
}
else
{
if
(
getDict
().
stopper_debug_level
>=
1
)
{
tprintf
(
"Deleting word with certainty %g
\n
"
,
word_certainty
);
word
->
best_choice
->
print
();
}
// It is a dud.
words
->
remove
(
w
);
--
w
;
deleted_prev
=
true
;
word
->
SetupFake
(
lstm_recognizer_
->
GetUnicharset
());
}
word
->
best_choice
->
set_certainty
(
word_certainty
);
}
}
}
...
...
ccmain/tessedit.cpp
浏览文件 @
5deebe6c
...
...
@@ -161,7 +161,7 @@ bool Tesseract::init_tesseract_lang_data(
// Determine which ocr engine(s) should be loaded and used for recognition.
if
(
oem
!=
OEM_DEFAULT
)
tessedit_ocr_engine_mode
.
set_value
(
oem
);
if
(
tessdata_manager_debug_level
)
{
tprintf
(
"Loading Tesseract/
Cube
with tessedit_ocr_engine_mode %d
\n
"
,
tprintf
(
"Loading Tesseract/
LSTM
with tessedit_ocr_engine_mode %d
\n
"
,
static_cast
<
int
>
(
tessedit_ocr_engine_mode
));
}
...
...
@@ -174,9 +174,37 @@ bool Tesseract::init_tesseract_lang_data(
return
true
;
}
// The various OcrEngineMode settings (see publictypes.h) determine which
// engine-specific data files need to be loaded. Currently everything needs
// the base tesseract data, which supplies other useful information, but
// alternative engines, such as LSTM are optional.
#ifndef ANDROID_BUILD
if
(
tessedit_ocr_engine_mode
==
OEM_LSTM_ONLY
||
tessedit_ocr_engine_mode
==
OEM_TESSERACT_LSTM_COMBINED
)
{
if
(
tessdata_manager
.
swap
())
{
tprintf
(
"Error: LSTM requested on big-endian hardware!!
\n
"
);
tprintf
(
"Big-endian not yet supported! Loading tesseract.
\n
"
);
tessedit_ocr_engine_mode
.
set_value
(
OEM_TESSERACT_ONLY
);
}
else
if
(
tessdata_manager
.
SeekToStart
(
TESSDATA_LSTM
))
{
lstm_recognizer_
=
new
LSTMRecognizer
;
TFile
fp
;
fp
.
Open
(
tessdata_manager
.
GetDataFilePtr
(),
-
1
);
ASSERT_HOST
(
lstm_recognizer_
->
DeSerialize
(
tessdata_manager
.
swap
(),
&
fp
));
if
(
lstm_use_matrix
)
lstm_recognizer_
->
LoadDictionary
(
tessdata_path
.
string
(),
language
);
}
else
{
tprintf
(
"Error: LSTM requested, but not present!! Loading tesseract.
\n
"
);
tessedit_ocr_engine_mode
.
set_value
(
OEM_TESSERACT_ONLY
);
}
}
#endif
// Load the unicharset
if
(
!
tessdata_manager
.
SeekToStart
(
TESSDATA_UNICHARSET
)
||
!
unicharset
.
load_from_file
(
tessdata_manager
.
GetDataFilePtr
()))
{
if
(
tessedit_ocr_engine_mode
==
OEM_LSTM_ONLY
)
{
// Avoid requiring a unicharset when we aren't running base tesseract.
unicharset
.
CopyFrom
(
lstm_recognizer_
->
GetUnicharset
());
}
else
if
(
!
tessdata_manager
.
SeekToStart
(
TESSDATA_UNICHARSET
)
||
!
unicharset
.
load_from_file
(
tessdata_manager
.
GetDataFilePtr
()))
{
return
false
;
}
if
(
unicharset
.
size
()
>
MAX_NUM_CLASSES
)
{
...
...
@@ -203,11 +231,6 @@ bool Tesseract::init_tesseract_lang_data(
ambigs_debug_level
,
use_ambigs_for_adaption
,
&
unicharset
);
if
(
tessdata_manager_debug_level
)
tprintf
(
"Loaded ambigs
\n
"
);
}
// The various OcrEngineMode settings (see publictypes.h) determine which
// engine-specific data files need to be loaded. Currently everything needs
// the base tesseract data, which supplies other useful information, but
// alternative engines, such as cube and LSTM are optional.
#ifndef NO_CUBE_BUILD
if
(
tessedit_ocr_engine_mode
==
OEM_CUBE_ONLY
)
{
ASSERT_HOST
(
init_cube_objects
(
false
,
&
tessdata_manager
));
...
...
@@ -217,22 +240,6 @@ bool Tesseract::init_tesseract_lang_data(
ASSERT_HOST
(
init_cube_objects
(
true
,
&
tessdata_manager
));
if
(
tessdata_manager_debug_level
)
tprintf
(
"Loaded Cube with combiner
\n
"
);
}
else
if
(
tessedit_ocr_engine_mode
==
OEM_LSTM_ONLY
)
{
if
(
tessdata_manager
.
swap
())
{
tprintf
(
"Error: LSTM requested on big-endian hardware!!
\n
"
);
tprintf
(
"Big-endian not yet supported! Loading tesseract.
\n
"
);
tessedit_ocr_engine_mode
.
set_value
(
OEM_TESSERACT_ONLY
);
}
else
if
(
tessdata_manager
.
SeekToStart
(
TESSDATA_LSTM
))
{
lstm_recognizer_
=
new
LSTMRecognizer
;
TFile
fp
;
fp
.
Open
(
tessdata_manager
.
GetDataFilePtr
(),
-
1
);
ASSERT_HOST
(
lstm_recognizer_
->
DeSerialize
(
tessdata_manager
.
swap
(),
&
fp
));
if
(
lstm_use_matrix
)
lstm_recognizer_
->
LoadDictionary
(
tessdata_path
.
string
(),
language
);
}
else
{
tprintf
(
"Error: LSTM requested, but not present!! Loading tesseract.
\n
"
);
tessedit_ocr_engine_mode
.
set_value
(
OEM_TESSERACT_ONLY
);
}
}
#endif
// Init ParamsModel.
...
...
@@ -425,16 +432,16 @@ int Tesseract::init_tesseract_internal(
tessdata_manager
.
End
();
return
0
;
}
// If only
Cube
will be used, skip loading Tesseract classifier's
// pre-trained templates.
bool
init_tesseract
_classifier
=
tessedit_ocr_engine_mode
!=
OEM_CUBE_ONLY
;
// If only Cube will be used and if it has its own Unicharset,
// skip initializing permuter and loading Tesseract Dawgs.
bool
init_dict
=
!
(
tessedit_ocr_engine_mode
==
OEM_CUBE_ONLY
&&
tessdata_manager
.
SeekToStart
(
TESSDATA_CUBE_UNICHARSET
));
program_editup
(
textbase
,
init_tesseract
_classifier
,
init_dict
);
// If only
LSTM
will be used, skip loading Tesseract classifier's
// pre-trained templates
and dictionary
.
bool
init_tesseract
=
tessedit_ocr_engine_mode
!=
OEM_LSTM_ONLY
&&
tessedit_ocr_engine_mode
!=
OEM_CUBE_ONLY
;
bool
init_dict
=
init_tesseract
;
if
(
tessedit_ocr_engine_mode
==
OEM_CUBE_ONLY
&&
!
tessdata_manager
.
SeekToStart
(
TESSDATA_CUBE_UNICHARSET
))
{
init_dict
=
true
;
}
program_editup
(
textbase
,
init_tesseract
,
init_dict
);
tessdata_manager
.
End
();
return
0
;
//Normal exit
}
...
...
ccmain/tesseract_cube_combiner.cpp
浏览文件 @
5deebe6c
...
...
@@ -21,6 +21,8 @@
// the recognition results of Tesseract and Cube at the word level
#include <algorithm>
#include <string>
#include <vector>
#include <wctype.h>
#include "tesseract_cube_combiner.h"
...
...
@@ -125,12 +127,10 @@ bool TesseractCubeCombiner::ValidWord(const string &str) {
// Public method for computing the combiner features. The agreement
// output parameter will be true if both answers are identical,
// and false otherwise.
bool
TesseractCubeCombiner
::
ComputeCombinerFeatures
(
const
string
&
tess_str
,
int
tess_confidence
,
CubeObject
*
cube_obj
,
WordAltList
*
cube_alt_list
,
vector
<
double
>
*
features
,
bool
*
agreement
)
{
bool
TesseractCubeCombiner
::
ComputeCombinerFeatures
(
const
string
&
tess_str
,
int
tess_confidence
,
CubeObject
*
cube_obj
,
WordAltList
*
cube_alt_list
,
std
::
vector
<
double
>
*
features
,
bool
*
agreement
)
{
features
->
clear
();
*
agreement
=
false
;
if
(
cube_alt_list
==
NULL
||
cube_alt_list
->
AltCount
()
<=
0
)
...
...
ccmain/tesseractclass.cpp
浏览文件 @
5deebe6c
...
...
@@ -81,9 +81,9 @@ Tesseract::Tesseract()
" (Values from PageSegMode enum in publictypes.h)"
,
this
->
params
()),
INT_INIT_MEMBER
(
tessedit_ocr_engine_mode
,
tesseract
::
OEM_TESSERACT_ONLY
,
"Which OCR engine(s) to run (Tesseract,
Cube
, both)."
"Which OCR engine(s) to run (Tesseract,
LSTM
, both)."
" Defaults to loading and running only Tesseract"
" (no
Cube
,no combiner)."
" (no
LSTM
,no combiner)."
" Values from OcrEngineMode enum in tesseractclass.h)"
,
this
->
params
()),
STRING_MEMBER
(
tessedit_char_blacklist
,
""
,
...
...
ccmain/tesseractclass.h
浏览文件 @
5deebe6c
...
...
@@ -210,6 +210,9 @@ class Tesseract : public Wordrec {
void
set_pix_original
(
Pix
*
original_pix
)
{
pixDestroy
(
&
pix_original_
);
pix_original_
=
original_pix
;
// Clone to sublangs as well.
for
(
int
i
=
0
;
i
<
sub_langs_
.
size
();
++
i
)
sub_langs_
[
i
]
->
set_pix_original
(
pixClone
(
original_pix
));
}
// Returns a pointer to a Pix representing the best available (original) image
// of the page. Can be of any bit depth, but never color-mapped, as that has
...
...
@@ -261,20 +264,19 @@ class Tesseract : public Wordrec {
Tesseract
*
get_sub_lang
(
int
index
)
const
{
return
sub_langs_
[
index
];
}
// Returns true if any language uses Tesseract (as opposed to
cube
).
// Returns true if any language uses Tesseract (as opposed to
LSTM
).
bool
AnyTessLang
()
const
{
if
(
tessedit_ocr_engine_mode
!=
OEM_
CUBE
_ONLY
)
return
true
;
if
(
tessedit_ocr_engine_mode
!=
OEM_
LSTM
_ONLY
)
return
true
;
for
(
int
i
=
0
;
i
<
sub_langs_
.
size
();
++
i
)
{
if
(
sub_langs_
[
i
]
->
tessedit_ocr_engine_mode
!=
OEM_CUBE_ONLY
)
return
true
;
if
(
sub_langs_
[
i
]
->
tessedit_ocr_engine_mode
!=
OEM_LSTM_ONLY
)
return
true
;
}
return
false
;
}
// Returns true if any language uses the LSTM.
bool
AnyLSTMLang
()
const
{
if
(
tessedit_ocr_engine_mode
==
OEM_LSTM
_ONLY
)
return
true
;
if
(
tessedit_ocr_engine_mode
!=
OEM_TESSERACT
_ONLY
)
return
true
;
for
(
int
i
=
0
;
i
<
sub_langs_
.
size
();
++
i
)
{
if
(
sub_langs_
[
i
]
->
tessedit_ocr_engine_mode
==
OEM_LSTM
_ONLY
)
if
(
sub_langs_
[
i
]
->
tessedit_ocr_engine_mode
!=
OEM_TESSERACT
_ONLY
)
return
true
;
}
return
false
;
...
...
@@ -340,8 +342,6 @@ class Tesseract : public Wordrec {
// is also returned to enable calculation of output bounding boxes.
ImageData
*
GetRectImage
(
const
TBOX
&
box
,
const
BLOCK
&
block
,
int
padding
,
TBOX
*
revised_box
)
const
;
// Top-level function recognizes a single raw line.
void
RecogRawLine
(
PAGE_RES
*
page_res
);
// Recognizes a word or group of words, converting to WERD_RES in *words.
// Analogous to classify_word_pass1, but can handle a group of words as well.
void
LSTMRecognizeWord
(
const
BLOCK
&
block
,
ROW
*
row
,
WERD_RES
*
word
,
...
...
@@ -850,8 +850,8 @@ class Tesseract : public Wordrec {
" 5=line, 6=word, 7=char"
" (Values from PageSegMode enum in publictypes.h)"
);
INT_VAR_H
(
tessedit_ocr_engine_mode
,
tesseract
::
OEM_TESSERACT_ONLY
,
"Which OCR engine(s) to run (Tesseract,
Cube
, both). Defaults"
" to loading and running only Tesseract (no
Cube
, no combiner)."
"Which OCR engine(s) to run (Tesseract,
LSTM
, both). Defaults"
" to loading and running only Tesseract (no
LSTM
, no combiner)."
" (Values from OcrEngineMode enum in tesseractclass.h)"
);
STRING_VAR_H
(
tessedit_char_blacklist
,
""
,
"Blacklist of chars not to recognize"
);
...
...
ccstruct/pageres.cpp
浏览文件 @
5deebe6c
...
...
@@ -884,6 +884,7 @@ void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE** choices) {
}
FakeWordFromRatings
(
TOP_CHOICE_PERM
);
reject_map
.
initialise
(
blob_count
);
best_state
.
init_to_size
(
blob_count
,
1
);
done
=
true
;
}
...
...
ccstruct/publictypes.h
浏览文件 @
5deebe6c
...
...
@@ -255,8 +255,9 @@ enum ParagraphJustification {
*/
enum
OcrEngineMode
{
OEM_TESSERACT_ONLY
,
// Run Tesseract only - fastest
OEM_CUBE_ONLY
,
// Run Cube only - better accuracy, but slower
OEM_TESSERACT_CUBE_COMBINED
,
// Run both and combine results - best accuracy
OEM_LSTM_ONLY
,
// Run just the LSTM line recognizer.
OEM_TESSERACT_LSTM_COMBINED
,
// Run the LSTM recognizer, but allow fallback
// to Tesseract when things get difficult.
OEM_DEFAULT
,
// Specify this mode when calling init_*(),
// to indicate that any of the above modes
// should be automatically inferred from the
...
...
@@ -264,14 +265,8 @@ enum OcrEngineMode {
// command-line configs, or if not specified
// in any of the above should be set to the
// default OEM_TESSERACT_ONLY.
// OEM_LSTM_ONLY will fall back (with a warning) to OEM_TESSERACT_ONLY where
// there is no network model available. This allows use of a mix of languages,
// some of which contain a network model, and some of which do not. Since the
// tesseract model is required for the LSTM to fall back to for "difficult"
// words anyway, this seems like a reasonable approach, but leaves the danger
// of not noticing that it is using the wrong engine if the warning is
// ignored.
OEM_LSTM_ONLY
,
// Run just the LSTM line recognizer.
OEM_CUBE_ONLY
,
// Run Cube only - better accuracy, but slower
OEM_TESSERACT_CUBE_COMBINED
,
// Run both and combine results - best accuracy
};
}
// namespace tesseract.
...
...
ccstruct/ratngs.h
浏览文件 @
5deebe6c
...
...
@@ -508,6 +508,20 @@ class WERD_CHOICE : public ELIST_LINK {
}
return
word_str
;
}
// Returns true if any unichar_id in the word is a non-space-delimited char.
bool
ContainsAnyNonSpaceDelimited
()
const
{
for
(
int
i
=
0
;
i
<
length_
;
++
i
)
{
if
(
!
unicharset_
->
IsSpaceDelimited
(
unichar_ids_
[
i
]))
return
true
;
}
return
false
;
}
// Returns true if the word is all spaces.
bool
IsAllSpaces
()
const
{
for
(
int
i
=
0
;
i
<
length_
;
++
i
)
{
if
(
unichar_ids_
[
i
]
!=
UNICHAR_SPACE
)
return
false
;
}
return
true
;
}
// Call this to override the default (strict left to right graphemes)
// with the fact that some engine produces a "reading order" set of
...
...
dict/context.cpp
浏览文件 @
5deebe6c
...
...
@@ -49,7 +49,7 @@ const int case_state_table[6][4] = {
5
,
-
1
,
2
,
-
1
},
};
int
Dict
::
case_ok
(
const
WERD_CHOICE
&
word
,
const
UNICHARSET
&
unicharset
)
{
int
Dict
::
case_ok
(
const
WERD_CHOICE
&
word
,
const
UNICHARSET
&
unicharset
)
const
{
int
state
=
0
;
int
x
;
for
(
x
=
0
;
x
<
word
.
length
();
++
x
)
{
...
...
dict/dict.h
浏览文件 @
5deebe6c
...
...
@@ -260,7 +260,7 @@ class Dict {
MATRIX
*
ratings
);
/// Returns the length of the shortest alpha run in WordChoice.
int
LengthOfShortestAlphaRun
(
const
WERD_CHOICE
&
WordChoice
);
int
LengthOfShortestAlphaRun
(
const
WERD_CHOICE
&
WordChoice
)
const
;
/// Returns true if the certainty of the BestChoice word is within a
/// reasonable range of the average certainties for the best choices for
/// each character in the segmentation. This test is used to catch words
...
...
@@ -275,7 +275,7 @@ class Dict {
/// Returns false if the best choice for the current word is questionable
/// and should be tried again on the second pass or should be flagged to
/// the user.
bool
AcceptableResult
(
WERD_RES
*
word
)
;
bool
AcceptableResult
(
WERD_RES
*
word
)
const
;
void
EndDangerousAmbigs
();
/// Prints the current choices for this word to stdout.
void
DebugWordChoices
();
...
...
@@ -285,7 +285,7 @@ class Dict {
void
SettupStopperPass2
();
/* context.cpp *************************************************************/
/// Check a string to see if it matches a set of lexical rules.
int
case_ok
(
const
WERD_CHOICE
&
word
,
const
UNICHARSET
&
unicharset
);
int
case_ok
(
const
WERD_CHOICE
&
word
,
const
UNICHARSET
&
unicharset
)
const
;
/// Returns true if the word looks like an absolute garbage
/// (e.g. image mistakenly recognized as text).
bool
absolute_garbage
(
const
WERD_CHOICE
&
word
,
const
UNICHARSET
&
unicharset
);
...
...
dict/stopper.cpp
浏览文件 @
5deebe6c
...
...
@@ -107,7 +107,7 @@ bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice,
}
}
bool
Dict
::
AcceptableResult
(
WERD_RES
*
word
)
{
bool
Dict
::
AcceptableResult
(
WERD_RES
*
word
)
const
{
if
(
word
->
best_choice
==
NULL
)
return
false
;
float
CertaintyThreshold
=
stopper_nondict_certainty_base
-
reject_offset_
;
int
WordSize
;
...
...
@@ -448,7 +448,7 @@ void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
}
}
int
Dict
::
LengthOfShortestAlphaRun
(
const
WERD_CHOICE
&
WordChoice
)
{
int
Dict
::
LengthOfShortestAlphaRun
(
const
WERD_CHOICE
&
WordChoice
)
const
{
int
shortest
=
MAX_INT32
;
int
curr_len
=
0
;
for
(
int
w
=
0
;
w
<
WordChoice
.
length
();
++
w
)
{
...
...
lstm/lstmrecognizer.h
浏览文件 @
5deebe6c
...
...
@@ -141,6 +141,8 @@ class LSTMRecognizer {
bool
IsUsingAdaGrad
()
const
{
return
network_
->
TestFlag
(
NF_ADA_GRAD
);
}
// Provides access to the UNICHARSET that this classifier works with.
const
UNICHARSET
&
GetUnicharset
()
const
{
return
ccutil_
.
unicharset
;
}
// Provides access to the Dict that this classifier works with.
const
Dict
*
GetDict
()
const
{
return
dict_
;
}
// Sets the sample iteration to the given value. The sample_iteration_
// determines the seed for the random number generator. The training
// iteration is incremented only by a successful training iteration.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录