提交 2f4a43b4 编写于 作者: T theraysmith

Improved consistency of results from floating point calculations

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@79 d0cd1f9f-072b-0410-8dd7-cf729c803f20
上级 d33938c0
此差异已折叠。
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "intmatcher.h" #include "intmatcher.h"
#include "tordvars.h" #include "tordvars.h"
#include "callcpp.h" #include "callcpp.h"
#include "globals.h"
#include <math.h> #include <math.h>
#define CLASS_MASK_SIZE ((MAX_NUM_CLASSES*NUM_BITS_PER_CLASS \ #define CLASS_MASK_SIZE ((MAX_NUM_CLASSES*NUM_BITS_PER_CLASS \
...@@ -335,7 +336,7 @@ make_int_var (IntThetaFudge, 128, MakeIntThetaFudge, ...@@ -335,7 +336,7 @@ make_int_var (IntThetaFudge, 128, MakeIntThetaFudge,
16, 23, SetIntThetaFudge, 16, 23, SetIntThetaFudge,
"Integer Matcher Theta Fudge 0-255: "); "Integer Matcher Theta Fudge 0-255: ");
make_float_var (CPCutoffStrength, 0.15, MakeCPCutoffStrength, make_int_var (CPCutoffStrength, 7, MakeCPCutoffStrength,
16, 24, SetCPCutoffStrength, 16, 24, SetCPCutoffStrength,
"Class Pruner CutoffStrength: "); "Class Pruner CutoffStrength: ");
...@@ -422,10 +423,10 @@ int ClassPruner(INT_TEMPLATES IntTemplates, ...@@ -422,10 +423,10 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
int NumPruners; int NumPruners;
INT32 feature_index; //current feature INT32 feature_index; //current feature
static INT32 ClassCount[MAX_NUM_CLASSES - 1]; static INT32 ClassCount[MAX_NUM_CLASSES];
static INT16 NormCount[MAX_NUM_CLASSES - 1]; static INT16 NormCount[MAX_NUM_CLASSES];
static INT16 SortKey[MAX_NUM_CLASSES]; static INT16 SortKey[MAX_NUM_CLASSES + 1];
static UINT8 SortIndex[MAX_NUM_CLASSES]; static UINT8 SortIndex[MAX_NUM_CLASSES + 1];
CLASS_INDEX Class; CLASS_INDEX Class;
int out_class; int out_class;
int MaxNumClasses; int MaxNumClasses;
...@@ -433,7 +434,7 @@ int ClassPruner(INT_TEMPLATES IntTemplates, ...@@ -433,7 +434,7 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
int NumClasses; int NumClasses;
FLOAT32 max_rating; //max allowed rating FLOAT32 max_rating; //max allowed rating
INT32 *ClassCountPtr; INT32 *ClassCountPtr;
INT8 classch; CLASS_ID classch;
MaxNumClasses = NumClassesIn (IntTemplates); MaxNumClasses = NumClassesIn (IntTemplates);
...@@ -497,12 +498,11 @@ int ClassPruner(INT_TEMPLATES IntTemplates, ...@@ -497,12 +498,11 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
/* Adjust Class Counts for Number of Expected Features */ /* Adjust Class Counts for Number of Expected Features */
for (Class = 0; Class < MaxNumClasses; Class++) for (Class = 0; Class < MaxNumClasses; Class++)
if (NumFeatures < ExpectedNumFeatures[Class]) if (NumFeatures < ExpectedNumFeatures[Class]) {
ClassCount[Class] = int deficit = ExpectedNumFeatures[Class] - NumFeatures;
(int) (((FLOAT32) (ClassCount[Class] * NumFeatures)) / ClassCount[Class] -= ClassCount[Class] * deficit /
(NumFeatures + (NumFeatures*CPCutoffStrength + deficit);
CPCutoffStrength * (ExpectedNumFeatures[Class] - }
NumFeatures)));
/* Adjust Class Counts for Normalization Factors */ /* Adjust Class Counts for Normalization Factors */
MaxCount = 0; MaxCount = 0;
...@@ -535,17 +535,14 @@ int ClassPruner(INT_TEMPLATES IntTemplates, ...@@ -535,17 +535,14 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
if (display_ratings > 1) { if (display_ratings > 1) {
cprintf ("CP:%d classes, %d features:\n", NumClasses, NumFeatures); cprintf ("CP:%d classes, %d features:\n", NumClasses, NumFeatures);
for (Class = 0; Class < NumClasses; Class++) { for (Class = 0; Class < NumClasses; Class++) {
classch = classch = ClassIdForIndex (IntTemplates, SortIndex[NumClasses - Class]);
ClassIdForIndex (IntTemplates, SortIndex[NumClasses - Class]); cprintf ("%s:C=%d, E=%d, N=%d, Rat=%d\n",
cprintf ("%c:C=%d, E=%d, N=%d, Rat=%d\n", classch, unicharset.id_to_unichar(classch),
ClassCount[SortIndex[NumClasses - Class]], ClassCount[SortIndex[NumClasses - Class]],
ExpectedNumFeatures[SortIndex[NumClasses - Class]], ExpectedNumFeatures[SortIndex[NumClasses - Class]],
SortKey[NumClasses - Class], SortKey[NumClasses - Class],
(int) (10 + 1010 - 1000 * SortKey[NumClasses - Class] /
1000 * (1.0f - (cp_maps[3] * NumFeatures));
SortKey[NumClasses -
Class] / ((float) cp_maps[3] *
NumFeatures))));
} }
if (display_ratings > 2) { if (display_ratings > 2) {
NumPruners = NumClassPrunersIn (IntTemplates); NumPruners = NumClassPrunersIn (IntTemplates);
...@@ -569,9 +566,9 @@ int ClassPruner(INT_TEMPLATES IntTemplates, ...@@ -569,9 +566,9 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
PrunerWord = *BasePrunerAddress++; PrunerWord = *BasePrunerAddress++;
for (Class = 0; Class < 16; Class++, class_index++) { for (Class = 0; Class < 16; Class++, class_index++) {
if (NormCount[class_index] >= MaxCount) if (NormCount[class_index] >= MaxCount)
cprintf (" %c=%d,", cprintf (" %s=%d,",
ClassIdForIndex (IntTemplates, unicharset.id_to_unichar(ClassIdForIndex (IntTemplates,
class_index), class_index)),
PrunerWord & 3); PrunerWord & 3);
PrunerWord >>= 2; PrunerWord >>= 2;
} }
...@@ -582,8 +579,8 @@ int ClassPruner(INT_TEMPLATES IntTemplates, ...@@ -582,8 +579,8 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
cprintf ("Adjustments:"); cprintf ("Adjustments:");
for (Class = 0; Class < MaxNumClasses; Class++) { for (Class = 0; Class < MaxNumClasses; Class++) {
if (NormCount[Class] > MaxCount) if (NormCount[Class] > MaxCount)
cprintf (" %c=%d,", cprintf (" %s=%d,",
ClassIdForIndex (IntTemplates, Class), unicharset.id_to_unichar(ClassIdForIndex (IntTemplates, Class)),
-((ClassPrunerMultiplier * -((ClassPrunerMultiplier *
NormalizationFactors[Class]) >> 8) * cp_maps[3] / NormalizationFactors[Class]) >> 8) * cp_maps[3] /
3); 3);
...@@ -640,7 +637,7 @@ int ClassPruner(INT_TEMPLATES IntTemplates, ...@@ -640,7 +637,7 @@ int ClassPruner(INT_TEMPLATES IntTemplates,
cp_bestconf = -1; cp_bestconf = -1;
for (Class = 0; Class < NumClasses; Class++) { for (Class = 0; Class < NumClasses; Class++) {
classch = Results[Class].Class; classch = Results[Class].Class;
if (classch == blob_answer) { if (strcmp(unicharset.id_to_unichar(classch), blob_answer) == 0) {
cp_bestindex = Class; cp_bestindex = Class;
cp_bestrating = (int) (1000 * Results[Class].Rating + 10); cp_bestrating = (int) (1000 * Results[Class].Rating + 10);
cp_bestconf = (int) (1000 * Results[Class].Rating2 + 10); cp_bestconf = (int) (1000 * Results[Class].Rating2 + 10);
...@@ -1191,7 +1188,6 @@ int FindGoodProtos(INT_CLASS ClassTemplate, ...@@ -1191,7 +1188,6 @@ int FindGoodProtos(INT_CLASS ClassTemplate,
if (MatchDebuggingOn (Debug)) if (MatchDebuggingOn (Debug))
cprintf ("Match Complete --------------------------------------------\n"); cprintf ("Match Complete --------------------------------------------\n");
return NumGoodProtos; return NumGoodProtos;
} }
......
...@@ -134,7 +134,7 @@ void add_point_to_list(POINT_GROUP point_list, EDGEPT *point) { ...@@ -134,7 +134,7 @@ void add_point_to_list(POINT_GROUP point_list, EDGEPT *point) {
} }
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
if (chop_debug) if (chop_debug > 2)
mark_outline(point); mark_outline(point);
#endif #endif
} }
...@@ -162,7 +162,8 @@ int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) { ...@@ -162,7 +162,8 @@ int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) {
length = length_product (vector1, vector2); length = length_product (vector1, vector2);
if ((int) length == 0) if ((int) length == 0)
return (0); return (0);
angle = (int) (asin (CROSS (vector1, vector2) / length) / PI * 180.0); angle = static_cast<int>(floor(asin(CROSS (vector1, vector2) /
length) / PI * 180.0 + 0.5));
/* Use dot product */ /* Use dot product */
if (SCALAR (vector1, vector2) < 0) if (SCALAR (vector1, vector2) < 0)
......
...@@ -73,7 +73,8 @@ double_VAR (tessedit_certainty_threshold, -2.25, "Good blob limit"); ...@@ -73,7 +73,8 @@ double_VAR (tessedit_certainty_threshold, -2.25, "Good blob limit");
* Set the fields in this choice to be defaulted bad initial values. * Set the fields in this choice to be defaulted bad initial values.
**********************************************************************/ **********************************************************************/
#define set_null_choice(choice) \ #define set_null_choice(choice) \
(class_string (choice) = NULL, \ (class_string (choice) = NULL, \
class_lengths (choice) = NULL, \
class_probability (choice) = MAX_FLOAT32, \ class_probability (choice) = MAX_FLOAT32, \
class_certainty (choice) = -MAX_FLOAT32) \ class_certainty (choice) = -MAX_FLOAT32) \
...@@ -225,7 +226,8 @@ SEAM *attempt_blob_chop(TWERD *word, INT32 blob_number, SEAMS seam_list) { ...@@ -225,7 +226,8 @@ SEAM *attempt_blob_chop(TWERD *word, INT32 blob_number, SEAMS seam_list) {
delete_seam(seam); delete_seam(seam);
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
if (chop_debug) { if (chop_debug) {
display_blob(blob, Red); if (chop_debug >2)
display_blob(blob, Red);
cprintf ("\n** seam being removed ** \n"); cprintf ("\n** seam being removed ** \n");
} }
#endif #endif
...@@ -437,7 +439,6 @@ CHOICES_LIST chop_word_main(register TWERD *word, ...@@ -437,7 +439,6 @@ CHOICES_LIST chop_word_main(register TWERD *word,
} }
bit_count = index - 1; bit_count = index - 1;
permute_characters(char_choices, rating_limit, best_choice, raw_choice); permute_characters(char_choices, rating_limit, best_choice, raw_choice);
set_n_ones (&state, array_count (char_choices) - 1); set_n_ones (&state, array_count (char_choices) - 1);
if (matcher_fp != NULL) { if (matcher_fp != NULL) {
if (matcher_pass == 0) { if (matcher_pass == 0) {
...@@ -474,7 +475,6 @@ CHOICES_LIST chop_word_main(register TWERD *word, ...@@ -474,7 +475,6 @@ CHOICES_LIST chop_word_main(register TWERD *word,
if (chop_debug) if (chop_debug)
print_seams ("Final seam list:", seam_list); print_seams ("Final seam list:", seam_list);
if (enable_assoc && if (enable_assoc &&
!AcceptableChoice (char_choices, best_choice, raw_choice, NULL) !AcceptableChoice (char_choices, best_choice, raw_choice, NULL)
|| (tester || trainer) || (tester || trainer)
......
...@@ -370,7 +370,7 @@ SEAM *pick_good_seam(TBLOB *blob) { ...@@ -370,7 +370,7 @@ SEAM *pick_good_seam(TBLOB *blob) {
INT16 num_points = 0; INT16 num_points = 0;
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
if (chop_debug) if (chop_debug > 2)
display_splits = TRUE; display_splits = TRUE;
draw_blob_edges(blob); draw_blob_edges(blob);
...@@ -417,7 +417,7 @@ SEAM *pick_good_seam(TBLOB *blob) { ...@@ -417,7 +417,7 @@ SEAM *pick_good_seam(TBLOB *blob) {
mark_split (seam->split2); mark_split (seam->split2);
if (seam->split3) if (seam->split3)
mark_split (seam->split3); mark_split (seam->split3);
if (chop_debug > 1) { if (chop_debug > 2) {
update_edge_window(); update_edge_window();
edge_window_wait(); edge_window_wait();
} }
......
...@@ -42,22 +42,22 @@ ...@@ -42,22 +42,22 @@
* Split this blob into two blobs by applying the splits included in * Split this blob into two blobs by applying the splits included in
* the seam description. * the seam description.
**********************************************************************/ **********************************************************************/
void apply_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { void apply_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
check_outline_mem(); check_outline_mem();
if (seam->split1 == NULL) { if (seam->split1 == NULL) {
divide_blobs (blob, other_blob, seam->location); divide_blobs (blob, other_blob, seam->location);
} }
else if (seam->split2 == NULL) { else if (seam->split2 == NULL) {
make_split_blobs(blob, other_blob, seam); make_split_blobs(blob, other_blob, seam);
} }
else if (seam->split3 == NULL) { else if (seam->split3 == NULL) {
make_double_split(blob, other_blob, seam); make_double_split(blob, other_blob, seam);
} }
else { else {
make_triple_split(blob, other_blob, seam); make_triple_split(blob, other_blob, seam);
} }
check_outline_mem(); check_outline_mem();
} }
...@@ -69,7 +69,7 @@ void apply_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { ...@@ -69,7 +69,7 @@ void apply_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* other blob. The ones whose x location is less than that point are * other blob. The ones whose x location is less than that point are
* retained in the original blob. * retained in the original blob.
**********************************************************************/ **********************************************************************/
void divide_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) { void divide_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
TESSLINE *outline; TESSLINE *outline;
TESSLINE *outline1 = NULL; TESSLINE *outline1 = NULL;
TESSLINE *outline2 = NULL; TESSLINE *outline2 = NULL;
...@@ -115,23 +115,23 @@ void divide_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) { ...@@ -115,23 +115,23 @@ void divide_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
* Group the outlines from the first blob into both of them. Do so * Group the outlines from the first blob into both of them. Do so
* according to the information about the split. * according to the information about the split.
**********************************************************************/ **********************************************************************/
void form_two_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) { void form_two_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
setup_blob_outlines(blob); setup_blob_outlines(blob);
divide_blobs(blob, other_blob, location); divide_blobs(blob, other_blob, location);
eliminate_duplicate_outlines(blob); eliminate_duplicate_outlines(blob);
eliminate_duplicate_outlines(other_blob); eliminate_duplicate_outlines(other_blob);
correct_blob_order(blob, other_blob); correct_blob_order(blob, other_blob);
#ifndef GRAPHICS_DISABLED #ifndef GRAPHICS_DISABLED
if (chop_debug) { if (chop_debug > 2) {
display_blob(blob, Red); display_blob(blob, Red);
#ifdef __UNIX__ #ifdef __UNIX__
sleep (1); sleep (1);
#endif #endif
display_blob(other_blob, Cyan); display_blob(other_blob, Cyan);
} }
#endif #endif
} }
...@@ -143,7 +143,7 @@ void form_two_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) { ...@@ -143,7 +143,7 @@ void form_two_blobs(TBLOB *blob, TBLOB *other_blob, INT32 location) {
* Create two blobs out of one by splitting the original one in half. * Create two blobs out of one by splitting the original one in half.
* Return the resultant blobs for classification. * Return the resultant blobs for classification.
**********************************************************************/ **********************************************************************/
void make_double_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { void make_double_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
make_single_split (blob->outlines, seam->split1); make_single_split (blob->outlines, seam->split1);
make_single_split (blob->outlines, seam->split2); make_single_split (blob->outlines, seam->split2);
form_two_blobs (blob, other_blob, seam->location); form_two_blobs (blob, other_blob, seam->location);
...@@ -156,7 +156,7 @@ void make_double_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { ...@@ -156,7 +156,7 @@ void make_double_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* Create two outlines out of one by splitting the original one in half. * Create two outlines out of one by splitting the original one in half.
* Return the resultant outlines. * Return the resultant outlines.
**********************************************************************/ **********************************************************************/
void make_single_split(TESSLINE *outlines, SPLIT *split) { void make_single_split(TESSLINE *outlines, SPLIT *split) {
assert (outlines != NULL); assert (outlines != NULL);
split_outline (split->point1, split->point2); split_outline (split->point1, split->point2);
...@@ -186,7 +186,7 @@ void make_single_split(TESSLINE *outlines, SPLIT *split) { ...@@ -186,7 +186,7 @@ void make_single_split(TESSLINE *outlines, SPLIT *split) {
* Create two blobs out of one by splitting the original one in half. * Create two blobs out of one by splitting the original one in half.
* Return the resultant blobs for classification. * Return the resultant blobs for classification.
**********************************************************************/ **********************************************************************/
void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
make_single_split (blob->outlines, seam->split1); make_single_split (blob->outlines, seam->split1);
form_two_blobs (blob, other_blob, seam->location); form_two_blobs (blob, other_blob, seam->location);
...@@ -201,7 +201,7 @@ void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { ...@@ -201,7 +201,7 @@ void make_split_blobs(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* the outlines. Three of the starting outlines will produce two ending * the outlines. Three of the starting outlines will produce two ending
* outlines. Return the resultant blobs for classification. * outlines. Return the resultant blobs for classification.
**********************************************************************/ **********************************************************************/
void make_triple_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { void make_triple_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
make_single_split (blob->outlines, seam->split1); make_single_split (blob->outlines, seam->split1);
make_single_split (blob->outlines, seam->split2); make_single_split (blob->outlines, seam->split2);
make_single_split (blob->outlines, seam->split3); make_single_split (blob->outlines, seam->split3);
...@@ -217,7 +217,7 @@ void make_triple_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { ...@@ -217,7 +217,7 @@ void make_triple_split(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* result. The seam may consist of one, two, or three splits. Each * result. The seam may consist of one, two, or three splits. Each
* of these split must be removed from the outlines. * of these split must be removed from the outlines.
**********************************************************************/ **********************************************************************/
void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
TESSLINE *outline; TESSLINE *outline;
if (!seam) if (!seam)
...@@ -231,7 +231,7 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { ...@@ -231,7 +231,7 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
while (outline->next) while (outline->next)
outline = outline->next; outline = outline->next;
outline->next = other_blob->outlines; outline->next = other_blob->outlines;
oldblob(other_blob); oldblob(other_blob);
if (seam->split1 == NULL) { if (seam->split1 == NULL) {
} }
...@@ -248,10 +248,10 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { ...@@ -248,10 +248,10 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
undo_single_split (blob, seam->split1); undo_single_split (blob, seam->split1);
} }
setup_blob_outlines(blob); setup_blob_outlines(blob);
eliminate_duplicate_outlines(blob); eliminate_duplicate_outlines(blob);
check_outline_mem(); check_outline_mem();
} }
...@@ -261,7 +261,7 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) { ...@@ -261,7 +261,7 @@ void undo_seam(TBLOB *blob, TBLOB *other_blob, SEAM *seam) {
* Undo a seam that is made by a single split. Perform the correct * Undo a seam that is made by a single split. Perform the correct
* magic to reconstruct the appropriate set of outline data structures. * magic to reconstruct the appropriate set of outline data structures.
**********************************************************************/ **********************************************************************/
void undo_single_split(TBLOB *blob, SPLIT *split) { void undo_single_split(TBLOB *blob, SPLIT *split) {
TESSLINE *outline1; TESSLINE *outline1;
TESSLINE *outline2; TESSLINE *outline2;
/* Modify edge points */ /* Modify edge points */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册