提交 0d9fa6a0 编写于 作者: T theraysmith

Fixed portability problems with VC++ 6 and VC++ express.

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@83 d0cd1f9f-072b-0410-8dd7-cf729c803f20
上级 1943de9a
......@@ -139,6 +139,7 @@ void re_estimate_x_ht( //improve for 1 word
const char *word_str;
INT16 i;
INT16 offset;
STATS all_blobs_ht (0, 300); //every blob in word
STATS x_ht (0, 300); //confirmed pts in wd
......@@ -174,8 +175,9 @@ void re_estimate_x_ht( //improve for 1 word
Cycle blobs, allocating to one of the stats sets when possible.
*/
blob_it.set_to_list (word_res->outword->blob_list ());
for (blob_it.mark_cycle_pt (), i = 0;
!blob_it.cycled_list (); blob_it.forward (), i++) {
for (blob_it.mark_cycle_pt (), i = 0, offset = 0;
!blob_it.cycled_list (); blob_it.forward (),
offset += word_res->best_choice->lengths()[i++]) {
if (!dodgy_blob (blob_it.data ())) {
blob_box = blob_it.data ()->bounding_box ();
blob_ht_above_baseline = blob_box.top () - bln_baseline_offset;
......@@ -189,22 +191,22 @@ void re_estimate_x_ht( //improve for 1 word
rej_blobs_max_area = blob_box.area ();
}
else {
if (STRING (chs_non_ambig_x_ht).contains (word_str[i]))
if (STRING (chs_non_ambig_x_ht).contains (word_str[offset]))
x_ht.add (blob_ht_above_baseline, 1);
if (STRING (chs_non_ambig_caps_ht).contains (word_str[i]))
if (STRING (chs_non_ambig_caps_ht).contains (word_str[offset]))
caps_ht.add (blob_ht_above_baseline, 1);
if (STRING (chs_ambig_caps_x).contains (word_str[i])) {
if (STRING (chs_ambig_caps_x).contains (word_str[offset])) {
case_ambig.add (blob_ht_above_baseline, 1);
if (STRING (chs_x_ht).contains (word_str[i]))
if (STRING (chs_x_ht).contains (word_str[offset]))
x_ht_ambigs++;
else
caps_ht_ambigs++;
}
if (STRING (chs_bl_ambig_caps_x).contains (word_str[i])) {
if (STRING (chs_x_ht).contains (word_str[i])) {
if (STRING (chs_bl_ambig_caps_x).contains (word_str[offset])) {
if (STRING (chs_x_ht).contains (word_str[offset])) {
/* confirm x_height provided > 15% total height below baseline */
if ((bln_baseline_offset - blob_box.bottom ()) /
(float) blob_box.height () > 0.15)
......@@ -498,16 +500,22 @@ void re_estimate_x_ht( //improve for 1 word
void check_block_occ(WERD_RES *word_res) {
PBLOB_IT blob_it;
STRING new_string;
STRING new_string_lengths(word_res->best_choice->lengths());
// char new_string_lengths[word_res->best_choice->lengths().length() + 1];
REJMAP new_map = word_res->reject_map;
WERD_CHOICE *new_choice;
const char *word_str = word_res->best_choice->string ().string ();
INT16 i;
INT16 offset;
INT16 reject_count = 0;
char confirmed_char;
char confirmed_char[UNICHAR_LEN + 1];
char temp_char[UNICHAR_LEN + 1];
float x_ht;
float caps_ht;
new_string_lengths[0] = 0;
if (word_res->x_height > 0)
x_ht = word_res->x_height * word_res->denorm.scale ();
else
......@@ -520,24 +528,31 @@ void check_block_occ(WERD_RES *word_res) {
blob_it.set_to_list (word_res->outword->blob_list ());
for (blob_it.mark_cycle_pt (), i = 0;
!blob_it.cycled_list (); blob_it.forward (), i++) {
new_string += word_str[i]; //default copy
for (blob_it.mark_cycle_pt (), i = 0, offset = 0;
!blob_it.cycled_list (); blob_it.forward (),
offset += word_res->best_choice->lengths()[i++]) {
strncpy(temp_char, word_str + offset,
word_res->best_choice->lengths()[i]); //default copy
temp_char[word_res->best_choice->lengths()[i]] = '\0';
if (word_res->reject_map[i].accepted ()) {
confirmed_char = check_blob_occ (word_str[i],
check_blob_occ (temp_char,
blob_it.data ()->bounding_box ().
top () - bln_baseline_offset, x_ht,
caps_ht);
caps_ht, confirmed_char);
if (confirmed_char == '\0') {
if (strcmp(confirmed_char, "") == 0) {
if (rej_use_check_block_occ) {
new_map[i].setrej_xht_fixup ();
reject_count++;
}
}
else
new_string[i] = confirmed_char;
strcpy(temp_char, confirmed_char);
}
new_string += temp_char;
new_string_lengths[i] = strlen(temp_char);
new_string_lengths[i + 1] = 0;
}
if ((reject_count > 0) || (new_string != word_str)) {
if (debug_x_ht_level >= 2) {
......@@ -548,6 +563,7 @@ void check_block_occ(WERD_RES *word_res) {
tprintf ("\n");
}
new_choice = new WERD_CHOICE (new_string.string (),
new_string_lengths.string(),
word_res->best_choice->rating (),
word_res->best_choice->certainty (),
word_res->best_choice->permuter ());
......@@ -562,13 +578,14 @@ void check_block_occ(WERD_RES *word_res) {
* check_blob_occ()
*
* Checks blob for position relative to position above baseline
* Returns 0 for reject, or (possibly case shifted) confirmed char
* Return 0 for reject, or (possibly case shifted) confirmed char
*************************************************************************/
char check_blob_occ(char proposed_char,
void check_blob_occ(char* proposed_char,
INT16 blob_ht_above_baseline,
float x_ht,
float caps_ht) {
float caps_ht,
char* confirmed_char) {
BOOL8 blob_definite_x_ht;
BOOL8 blob_definite_caps_ht;
float acceptable_variation;
......@@ -593,37 +610,47 @@ char check_blob_occ(char proposed_char,
blob_definite_caps_ht = blob_ht_above_baseline >=
caps_ht - acceptable_variation;
if (STRING (chs_ambig_caps_x).contains (proposed_char)) {
if (STRING (chs_ambig_caps_x).contains (*proposed_char)) {
if ((!blob_definite_x_ht && !blob_definite_caps_ht) ||
(proposed_char == '0' && !blob_definite_caps_ht) ||
(proposed_char == 'o' && !blob_definite_x_ht))
return '\0';
((strcmp(proposed_char, "0") == 0) && !blob_definite_caps_ht) ||
((strcmp(proposed_char, "o") == 0) && !blob_definite_x_ht)) {
strcpy(confirmed_char, "");
return;
}
else if (blob_definite_caps_ht &&
STRING (chs_x_ht).contains (proposed_char)) {
if (x_ht_case_flip)
STRING (chs_x_ht).contains (*proposed_char)) {
if (x_ht_case_flip) {
//flip to upper case
return (char) toupper (proposed_char);
else
return '\0';
proposed_char[0] = (char) toupper (*proposed_char);
return;
} else {
strcpy(confirmed_char, "");
return;
}
}
else if (blob_definite_x_ht &&
!STRING (chs_x_ht).contains (proposed_char)) {
if (x_ht_case_flip)
!STRING (chs_x_ht).contains (*proposed_char)) {
if (x_ht_case_flip) {
//flip to lower case
return (char) tolower (proposed_char);
else
return '\0';
proposed_char[0] = (char) tolower (*proposed_char);
} else {
strcpy(confirmed_char, "");
return;
}
}
}
else
if ((STRING (chs_non_ambig_x_ht).contains (proposed_char)
if ((STRING (chs_non_ambig_x_ht).contains (*proposed_char)
&& !blob_definite_x_ht)
|| (STRING (chs_non_ambig_caps_ht).contains (proposed_char)
&& !blob_definite_caps_ht))
return '\0';
return proposed_char;
|| (STRING (chs_non_ambig_caps_ht).contains (*proposed_char)
&& !blob_definite_caps_ht)) {
strcpy(confirmed_char, "");
return;
}
strcpy(confirmed_char, proposed_char);
return;
}
......@@ -647,8 +674,10 @@ void improve_estimate(WERD_RES *word_res,
const char *word_str;
INT16 i;
INT16 offset;
BOX blob_box; //blob bounding box
char confirmed_char;
char confirmed_char[UNICHAR_LEN + 1];
char temp_char[UNICHAR_LEN + 1];
float new_val;
/* IMPROVE estimates here - if good estimates, and case ambig chars,
......@@ -658,17 +687,21 @@ void improve_estimate(WERD_RES *word_res,
blob_it.set_to_list (word_res->outword->blob_list ());
word_str = word_res->best_choice->string ().string ();
for (blob_it.mark_cycle_pt (), i = 0;
!blob_it.cycled_list (); blob_it.forward (), i++) {
if ((STRING (chs_ambig_caps_x).contains (word_str[i])) &&
for (blob_it.mark_cycle_pt (), i = 0, offset = 0;
!blob_it.cycled_list (); blob_it.forward (),
offset += word_res->best_choice->lengths()[i++]) {
if ((STRING (chs_ambig_caps_x).contains (word_str[offset])) &&
(!dodgy_blob (blob_it.data ()))) {
blob_box = blob_it.data ()->bounding_box ();
blob_ht_above_baseline = blob_box.top () - bln_baseline_offset;
confirmed_char = check_blob_occ (word_str[i],
strncpy(temp_char, word_str + offset,
word_res->best_choice->lengths()[i]);
temp_char[word_res->best_choice->lengths()[i]] = '\0';
check_blob_occ (temp_char,
blob_ht_above_baseline,
est_x_ht, est_caps_ht);
if (confirmed_char != '\0')
if (STRING (chs_x_ht).contains (confirmed_char))
est_x_ht, est_caps_ht, confirmed_char);
if (strcmp(confirmed_char, "") != 0)
if (STRING (chs_x_ht).contains (*confirmed_char))
x_ht.add (blob_ht_above_baseline, 1);
else
caps_ht.add (blob_ht_above_baseline, 1);
......@@ -692,8 +725,7 @@ void reject_ambigs( //rej any accepted xht ambig chars
while (*word_str != '\0') {
if (STRING (chs_ambig_caps_x).contains (*word_str))
word->reject_map[i].setrej_xht_fixup ();
word_str++;
i++;
word_str += word->best_choice->lengths()[i++];
}
}
......@@ -713,6 +745,7 @@ void est_ambigs( //xht ambig ht stats
const char *word_str;
INT16 i;
INT16 offset;
float min; //min ambig ch ht
float max; //max ambig ch ht
float short_limit; // for lower case
......@@ -738,10 +771,11 @@ void est_ambigs( //xht ambig ht stats
tall_limit = max - (max - min) * x_ht_variation;
word_str = word_res->best_choice->string ().string ();
blob_it.set_to_list (word_res->outword->blob_list ());
for (blob_it.mark_cycle_pt (), i = 0;
!blob_it.cycled_list (); blob_it.forward (), i++) {
for (blob_it.mark_cycle_pt (), i = 0, offset = 0;
!blob_it.cycled_list (); blob_it.forward (),
offset += word_res->best_choice->lengths()[i++]) {
if (word_res->reject_map[i].accepted () &&
STRING (chs_ambig_caps_x).contains (word_str[i]) &&
STRING (chs_ambig_caps_x).contains (word_str[offset]) &&
(!dodgy_blob (blob_it.data ()))) {
blob_box = blob_it.data ()->bounding_box ();
blob_ht_above_baseline =
......
......@@ -5,10 +5,10 @@
// For Unix and mac the file does nothing. It needs to be included in all cpp
// files for compatibility with the PC pre-compiled header mechanism.
#ifdef __MSW32__
#ifdef __IPEREGDLL
#ifndef _AFXDLL
#define WIN32_LEAN_AND_MEAN
#define STRICT 1
#include <windows.h>
//#include <windows.h>
#include <stdlib.h>
#else
#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers
......
......@@ -107,7 +107,6 @@ void DoError(int Error, const char *Message) {
(*ProcTrapStack[CurrentTrapDepth - 1]) ();
longjmp (ErrorTrapStack[CurrentTrapDepth - 1], 1);
assert(FALSE);
} /* DoError */
......
......@@ -35,21 +35,59 @@
/*----------------------------------------------------------------------
T y p e s
----------------------------------------------------------------------*/
#define MAX_WERD_LENGTH (INT32) 40
#define MAX_NODE_EDGES (INT32) 100
#define LAST_FLAG (INT32) 1
#define DIRECTION_FLAG (INT32) 2
#define WERD_END_FLAG (INT32) 4
/* #define MAX_WERD_LENGTH (INT32) 40 */
/* #define MAX_NODE_EDGES_DISPLAY (INT32) 100 */
/* #define LAST_FLAG (INT32) 1 */
/* #define DIRECTION_FLAG (INT32) 2 */
/* #define WERD_END_FLAG (INT32) 4 */
/* #define LETTER_START_BIT 0 */
/* #define FLAG_START_BIT 8 */
/* #define NEXT_EDGE_START_BIT 11 */
/* #define NO_EDGE (INT32) 0x001fffff */
/* #define NEXT_EDGE_MASK (INT32) 0xfffff800 */
/* #define FLAGS_MASK (INT32) 0x00000700 */
/* #define LETTER_MASK (INT32) 0x000000ff */
/* #define REFFORMAT "%d" */
/* typedef UINT32 EDGE_RECORD; */
/* typedef EDGE_RECORD *EDGE_ARRAY; */
/* typedef INT32 EDGE_REF; */
/* typedef INT32 NODE_REF; */
#define MAX_WERD_LENGTH (INT64) 40
#define MAX_NODE_EDGES_DISPLAY (INT64) 100
#define LAST_FLAG (INT64) 1
#define DIRECTION_FLAG (INT64) 2
#define WERD_END_FLAG (INT64) 4
#define LETTER_START_BIT 0
#define FLAG_START_BIT 8
#define NEXT_EDGE_START_BIT 11
#ifdef __MSW32__
#define NO_EDGE (INT64) 0x001fffffffffffffi64
#define NEXT_EDGE_MASK (INT64) 0xfffffffffffff800i64
#define FLAGS_MASK (INT64) 0x0000000000000700i64
#define LETTER_MASK (INT64) 0x00000000000000ffi64
#else
#define NO_EDGE (INT64) 0x001fffffffffffffll
#define NEXT_EDGE_MASK (INT64) 0xfffffffffffff800ll
#define FLAGS_MASK (INT64) 0x0000000000000700ll
#define LETTER_MASK (INT64) 0x00000000000000ffll
#endif
#define FLAG_START_BIT 21
#define LETTER_START_BIT 24
#define MAX_NUM_EDGES_IN_SQUISHED_DAWG_FILE 2000000
#define NO_EDGE (INT32) 0x1fffff
#define REFFORMAT "%lld"
typedef UINT32 EDGE_RECORD;
typedef UINT64 EDGE_RECORD;
typedef EDGE_RECORD *EDGE_ARRAY;
typedef INT32 EDGE_REF;
typedef INT32 NODE_REF;
typedef INT64 EDGE_REF;
typedef INT64 NODE_REF;
/*---------------------------------------------------------------------
V a r i a b l e s
......@@ -60,6 +98,28 @@ extern INT32 debug;
/*----------------------------------------------------------------------
M a c r o s
----------------------------------------------------------------------*/
/**********************************************************************
* edge_of
*
* Access the edge that is indexed by the requested edge number.
**********************************************************************/
#define edge_of(edges,e) \
((edges)[e])
/**********************************************************************
* print_edge
*
* Print the contents of a single edge entry in the DAWG.
**********************************************************************/
#define print_edge(dawg,edge) \
printf ("%7d : next = %7d, char = '%c', %s %s %s\n", \
edge, next_node (dawg, edge), edge_letter (dawg, edge), \
(forward_edge (dawg, edge) ? "FORWARD" : " "), \
(last_edge (dawg, edge) ? "LAST" : " "), \
(end_of_word (dawg, edge) ? "EOW" : "")) \
/**********************************************************************
* next_node
*
......@@ -67,7 +127,7 @@ extern INT32 debug;
**********************************************************************/
#define next_node(edges,e) \
((edges)[e] & NO_EDGE)
(((edges)[e] & NEXT_EDGE_MASK) >> NEXT_EDGE_START_BIT)
/**********************************************************************
* set_next_edge
......@@ -76,8 +136,17 @@ extern INT32 debug;
**********************************************************************/
#define set_next_edge(edges,e,value) \
((edges)[e] = ((edges)[e] & (INT32) 0xffe00000) |\
(value & NO_EDGE))
((edges)[e] = ((edges)[e] & (~NEXT_EDGE_MASK)) |\
((value << NEXT_EDGE_START_BIT) & NEXT_EDGE_MASK))
/**********************************************************************
* empty_edge_spot
*
* Return TRUE if this edge spot in this location is unoccupied.
**********************************************************************/
#define empty_edge_spot(edges,e) \
((edges)[e] == NEXT_EDGE_MASK)
/**********************************************************************
* set_empty_edge
......@@ -86,7 +155,7 @@ extern INT32 debug;
**********************************************************************/
#define set_empty_edge(edges,e) \
((edges)[e] = NO_EDGE)
((edges)[e] = NEXT_EDGE_MASK)
/**********************************************************************
* clear_all_edges
......@@ -105,7 +174,16 @@ for (edge=0; edge<max_num_edges; edge++) \
**********************************************************************/
#define edge_occupied(edges,e) \
((edges)[e] != NO_EDGE)
((edges)[e] != NEXT_EDGE_MASK)
/**********************************************************************
* edge_flags
*
* The letter choice that corresponds to this edge in the DAWG.
**********************************************************************/
#define edge_flags(edges,e) \
(((edges)[e] & FLAGS_MASK) >> FLAG_START_BIT)
/**********************************************************************
* edge_letter
......@@ -114,7 +192,16 @@ for (edge=0; edge<max_num_edges; edge++) \
**********************************************************************/
#define edge_letter(edges,e) \
((edges)[e] >> LETTER_START_BIT)
((char)(((edges)[e] & LETTER_MASK) >> LETTER_START_BIT))
/**********************************************************************
* letter_of_edge
*
* The letter choice that corresponds to this edge in the DAWG.
**********************************************************************/
#define letter_of_edge(edge) \
((char)((edge & LETTER_MASK) >> LETTER_START_BIT))
/**********************************************************************
* last_edge
......@@ -171,7 +258,9 @@ while (! last_edge (edges,e++))
* Check the case of this character in the character string to make
* sure that there is not a problem with the case.
**********************************************************************/
// TODO(tkielbus) Replace islalpha, islower & isupper by unicode versions.
// However the lengths information is not available at this point in the
// code. We will probably get rid of the dictionaries at some point anyway.
#define case_is_okay(word,i) \
(i ? \
((isupper(word[i]) && islower(word[i-1])) ? \
......
......@@ -23,7 +23,7 @@ make_int_var (LearningDebugLevel, 0, MakeLearningDebugLevel,
make_int_var (NormMethod, character, MakeNormMethod,
15, 10, SetNormMethod, "Normalization Method ...")
char *demodir; /*demo home directory */
//char *demodir; /*demo home directory */
void cprintf( //Trace printf
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册