提交 2f4a43b4 编写于 作者: T theraysmith

Improved consistency of results from floating point calculations

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@79 d0cd1f9f-072b-0410-8dd7-cf729c803f20
上级 d33938c0
此差异已折叠。
......@@ -22,6 +22,7 @@
#include "intmatcher.h"
#include "tordvars.h"
#include "callcpp.h"
#include "globals.h"
#include <math.h>
#define CLASS_MASK_SIZE ((MAX_NUM_CLASSES*NUM_BITS_PER_CLASS \
......@@ -335,7 +336,7 @@ make_int_var (IntThetaFudge, 128, MakeIntThetaFudge,
16, 23, SetIntThetaFudge,
"Integer Matcher Theta Fudge 0-255: ");
make_float_var (CPCutoffStrength, 0.15, MakeCPCutoffStrength,
make_int_var (CPCutoffStrength, 7, MakeCPCutoffStrength,
16, 24, SetCPCutoffStrength,
"Class Pruner CutoffStrength: ");
......@@ -422,10 +423,10 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
int NumPruners;
INT32 feature_index; //current feature
static INT32 ClassCount[MAX_NUM_CLASSES - 1];
static INT16 NormCount[MAX_NUM_CLASSES - 1];
static INT16 SortKey[MAX_NUM_CLASSES];
static UINT8 SortIndex[MAX_NUM_CLASSES];
static INT32 ClassCount[MAX_NUM_CLASSES];
static INT16 NormCount[MAX_NUM_CLASSES];
static INT16 SortKey[MAX_NUM_CLASSES + 1];
static UINT8 SortIndex[MAX_NUM_CLASSES + 1];
CLASS_INDEX Class;
int out_class;
int MaxNumClasses;
......@@ -433,7 +434,7 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
int NumClasses;
FLOAT32 max_rating; //max allowed rating
INT32 *ClassCountPtr;
INT8 classch;
CLASS_ID classch;
MaxNumClasses = NumClassesIn (IntTemplates);
......@@ -497,12 +498,11 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
/* Adjust Class Counts for Number of Expected Features */
for (Class = 0; Class < MaxNumClasses; Class++)
if (NumFeatures < ExpectedNumFeatures[Class])
ClassCount[Class] =
(int) (((FLOAT32) (ClassCount[Class] * NumFeatures)) /
(NumFeatures +
CPCutoffStrength * (ExpectedNumFeatures[Class] -
NumFeatures)));
if (NumFeatures < ExpectedNumFeatures[Class]) {
int deficit = ExpectedNumFeatures[Class] - NumFeatures;
ClassCount[Class] -= ClassCount[Class] * deficit /
(NumFeatures*CPCutoffStrength + deficit);
}
/* Adjust Class Counts for Normalization Factors */
MaxCount = 0;
......@@ -535,17 +535,14 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
if (display_ratings > 1) {
cprintf ("CP:%d classes, %d features:\n", NumClasses, NumFeatures);
for (Class = 0; Class < NumClasses; Class++) {
classch =
ClassIdForIndex (IntTemplates, SortIndex[NumClasses - Class]);
cprintf ("%c:C=%d, E=%d, N=%d, Rat=%d\n", classch,
ClassCount[SortIndex[NumClasses - Class]],
ExpectedNumFeatures[SortIndex[NumClasses - Class]],
SortKey[NumClasses - Class],
(int) (10 +
1000 * (1.0f -
SortKey[NumClasses -
Class] / ((float) cp_maps[3] *
NumFeatures))));
classch = ClassIdForIndex (IntTemplates, SortIndex[NumClasses - Class]);
cprintf ("%s:C=%d, E=%d, N=%d, Rat=%d\n",
unicharset.id_to_unichar(classch),
ClassCount[SortIndex[NumClasses - Class]],
ExpectedNumFeatures[SortIndex[NumClasses - Class]],
SortKey[NumClasses - Class],
1010 - 1000 * SortKey[NumClasses - Class] /
(cp_maps[3] * NumFeatures));
}
if (display_ratings > 2) {
NumPruners = NumClassPrunersIn (IntTemplates);
......@@ -569,9 +566,9 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
PrunerWord = *BasePrunerAddress++;
for (Class = 0; Class < 16; Class++, class_index++) {
if (NormCount[class_index] >= MaxCount)
cprintf (" %c=%d,",
ClassIdForIndex (IntTemplates,
class_index),
cprintf (" %s=%d,",
unicharset.id_to_unichar(ClassIdForIndex (IntTemplates,
class_index)),
PrunerWord & 3);
PrunerWord >>= 2;
}
......@@ -582,8 +579,8 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
cprintf ("Adjustments:");
for (Class = 0; Class < MaxNumClasses; Class++) {
if (NormCount[Class] > MaxCount)
cprintf (" %c=%d,",
ClassIdForIndex (IntTemplates, Class),
cprintf (" %s=%d,",
unicharset.id_to_unichar(ClassIdForIndex (IntTemplates, Class)),
-((ClassPrunerMultiplier *
NormalizationFactors[Class]) >> 8) * cp_maps[3] /
3);
......@@ -640,7 +637,7 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
cp_bestconf = -1;
for (Class = 0; Class < NumClasses; Class++) {
classch = Results[Class].Class;
if (classch == blob_answer) {
if (strcmp(unicharset.id_to_unichar(classch), blob_answer) == 0) {
cp_bestindex = Class;
cp_bestrating = (int) (1000 * Results[Class].Rating + 10);
cp_bestconf = (int) (1000 * Results[Class].Rating2 + 10);
......@@ -1191,7 +1188,6 @@ int FindGoodProtos(INT_CLASS ClassTemplate,
if (MatchDebuggingOn (Debug))
cprintf ("Match Complete --------------------------------------------\n");
return NumGoodProtos;
}
......
......@@ -134,7 +134,7 @@ void add_point_to_list(POINT_GROUP point_list, EDGEPT *point) {
}
#ifndef GRAPHICS_DISABLED
if (chop_debug)
if (chop_debug > 2)
mark_outline(point);
#endif
}
......@@ -162,7 +162,8 @@ int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) {
length = length_product (vector1, vector2);
if ((int) length == 0)
return (0);
angle = (int) (asin (CROSS (vector1, vector2) / length) / PI * 180.0);
angle = static_cast<int>(floor(asin(CROSS (vector1, vector2) /
length) / PI * 180.0 + 0.5));
/* Use dot product */
if (SCALAR (vector1, vector2) < 0)
......
......@@ -73,7 +73,8 @@ double_VAR (tessedit_certainty_threshold, -2.25, "Good blob limit");
* Set the fields in this choice to be defaulted bad initial values.
**********************************************************************/
#define set_null_choice(choice) \
(class_string (choice) = NULL, \
(class_string (choice) = NULL, \
class_lengths (choice) = NULL, \
class_probability (choice) = MAX_FLOAT32, \
class_certainty (choice) = -MAX_FLOAT32) \
......@@ -225,7 +226,8 @@ SEAM *attempt_blob_chop(TWERD *word, INT32 blob_number, SEAMS seam_list) {
delete_seam(seam);
#ifndef GRAPHICS_DISABLED
if (chop_debug) {
display_blob(blob, Red);
if (chop_debug >2)
display_blob(blob, Red);
cprintf ("\n** seam being removed ** \n");
}
#endif
......@@ -437,7 +439,6 @@ CHOICES_LIST chop_word_main(register TWERD *word,
}
bit_count = index - 1;
permute_characters(char_choices, rating_limit, best_choice, raw_choice);
set_n_ones (&state, array_count (char_choices) - 1);
if (matcher_fp != NULL) {
if (matcher_pass == 0) {
......@@ -474,7 +475,6 @@ CHOICES_LIST chop_word_main(register TWERD *word,
if (chop_debug)
print_seams ("Final seam list:", seam_list);
if (enable_assoc &&
!AcceptableChoice (char_choices, best_choice, raw_choice, NULL)
|| (tester || trainer)
......
......@@ -370,7 +370,7 @@ SEAM *pick_good_seam(TBLOB *blob) {
INT16 num_points = 0;
#ifndef GRAPHICS_DISABLED
if (chop_debug)
if (chop_debug > 2)
display_splits = TRUE;
draw_blob_edges(blob);
......@@ -417,7 +417,7 @@ SEAM *pick_good_seam(TBLOB *blob) {
mark_split (seam->split2);
if (seam->split3)
mark_split (seam->split3);
if (chop_debug > 1) {
if (chop_debug > 2) {
update_edge_window();
edge_window_wait();
}
......
......@@ -42,22 +42,22 @@
* Split this blob into two blobs by applying the splits included in
* the seam description.
**********************************************************************/
void apply_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
check_outline_mem();
void apply_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
check_outline_mem();
if (seam->split1 == NULL) {
divide_blobs (blob, other_blob, seam->location);
}
else if (seam->split2 == NULL) {
make_split_blobs(blob, other_blob, seam);
make_split_blobs(blob, other_blob, seam);
}
else if (seam->split3 == NULL) {
make_double_split(blob, other_blob, seam);
make_double_split(blob, other_blob, seam);
}
else {
make_triple_split(blob, other_blob, seam);
make_triple_split(blob, other_blob, seam);
}
check_outline_mem();
check_outline_mem();
}
......@@ -69,7 +69,7 @@ void apply_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* other blob. The ones whose x location is less than that point are
* retained in the original blob.
**********************************************************************/
void divide_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
void divide_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
TESSLINE *outline;
TESSLINE *outline1 = NULL;
TESSLINE *outline2 = NULL;
......@@ -115,23 +115,23 @@ void divide_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
* Group the outlines from the first blob into both of them. Do so
* according to the information about the split.
**********************************************************************/
void form_two_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
setup_blob_outlines(blob);
void form_two_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
setup_blob_outlines(blob);
divide_blobs(blob, other_blob, location);
divide_blobs(blob, other_blob, location);
eliminate_duplicate_outlines(blob);
eliminate_duplicate_outlines(other_blob);
eliminate_duplicate_outlines(blob);
eliminate_duplicate_outlines(other_blob);
correct_blob_order(blob, other_blob);
correct_blob_order(blob, other_blob);
#ifndef GRAPHICS_DISABLED
if (chop_debug) {
display_blob(blob, Red);
if (chop_debug > 2) {
display_blob(blob, Red);
#ifdef __UNIX__
sleep (1);
#endif
display_blob(other_blob, Cyan);
display_blob(other_blob, Cyan);
}
#endif
}
......@@ -143,7 +143,7 @@ void form_two_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
* Create two blobs out of one by splitting the original one in half.
* Return the resultant blobs for classification.
**********************************************************************/
void make_double_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
void make_double_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
make_single_split (blob->outlines, seam->split1);
make_single_split (blob->outlines, seam->split2);
form_two_blobs (blob, other_blob, seam->location);
......@@ -156,7 +156,7 @@ void make_double_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* Create two outlines out of one by splitting the original one in half.
* Return the resultant outlines.
**********************************************************************/
void make_single_split(TESSLINE *outlines, SPLIT *split) {
void make_single_split(TESSLINE *outlines, SPLIT *split) {
assert (outlines != NULL);
split_outline (split->point1, split->point2);
......@@ -186,7 +186,7 @@ void make_single_split(TESSLINE *outlines, SPLIT *split) {
* Create two blobs out of one by splitting the original one in half.
* Return the resultant blobs for classification.
**********************************************************************/
void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
make_single_split (blob->outlines, seam->split1);
form_two_blobs (blob, other_blob, seam->location);
......@@ -201,7 +201,7 @@ void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* the outlines. Three of the starting outlines will produce two ending
* outlines. Return the resultant blobs for classification.
**********************************************************************/
void make_triple_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
void make_triple_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
make_single_split (blob->outlines, seam->split1);
make_single_split (blob->outlines, seam->split2);
make_single_split (blob->outlines, seam->split3);
......@@ -217,7 +217,7 @@ void make_triple_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* result. The seam may consist of one, two, or three splits. Each
* of these split must be removed from the outlines.
**********************************************************************/
void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
TESSLINE *outline;
if (!seam)
......@@ -231,7 +231,7 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
while (outline->next)
outline = outline->next;
outline->next = other_blob->outlines;
oldblob(other_blob);
oldblob(other_blob);
if (seam->split1 == NULL) {
}
......@@ -248,10 +248,10 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
undo_single_split (blob, seam->split1);
}
setup_blob_outlines(blob);
eliminate_duplicate_outlines(blob);
setup_blob_outlines(blob);
eliminate_duplicate_outlines(blob);
check_outline_mem();
check_outline_mem();
}
......@@ -261,7 +261,7 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* Undo a seam that is made by a single split. Perform the correct
* magic to reconstruct the appropriate set of outline data structures.
**********************************************************************/
void undo_single_split(TBLOB *blob, SPLIT *split) {
void undo_single_split(TBLOB *blob, SPLIT *split) {
TESSLINE *outline1;
TESSLINE *outline2;
/* Modify edge points */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册