Fixed various internationalization issues, mostly for training

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@106 d0cd1f9f-072b-0410-8dd7-cf729c803f20

Fixed various internationalization issues, mostly for training
git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@106 d0cd1f9f-072b-0410-8dd7-cf729c803f20
f382fb56 · theraysmith · 100942d7 · f382fb56 · f382fb56 · f382fb56
11 changed file
--- a/ccmain/applybox.cpp
+++ b/ccmain/applybox.cpp
@@ -31,6 +31,7 @@ what measures we are interested in.
 #include <assert.h>
 #include <errno.h>
 #endif
+#include "boxread.h"
 #include "mainblk.h"
 #include "genblob.h"
 #include "fixxht.h"
@@ -207,51 +208,26 @@ void clear_any_old_text(                        //remove correct text
 BOOL8 read_next_box(FILE* box_file,  //
                    BOX *box,
                    UNICHAR_ID *uch_id) {
-  char buff[256];                //boxfile read buffer
-  char *buffptr = buff;
-  STRING box_filename;
-  static INT16 line = 0;
-  INT32 x_min;
-  INT32 y_min;
-  INT32 x_max;
-  INT32 y_max;
-  INT32 count = 0;
-  char uch[256];
-
-  while (!feof (box_file)) {
-    fgets (buff, sizeof (buff) - 1, box_file);
-    line++;
-
-    buffptr = buff;
-    const unsigned char *ubuf = reinterpret_cast<const unsigned char*>(buffptr);
-    if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf)
-      buffptr += 3;  // Skip unicode file designation.
-    /* Check for blank lines in box file */
-    while (isspace (*buffptr))
-      buffptr++;
-    if (*buffptr != '\0') {
-      count =
-        sscanf (buffptr,
-        "%s " INT32FORMAT " " INT32FORMAT " " INT32FORMAT " "
-        INT32FORMAT, uch, &x_min, &y_min, &x_max, &y_max);
-      if (count != 5) {
-        tprintf ("Box file format error on line %i ignored\n", line);
-      }
-      else {
-        if (!unicharset_boxes.contains_unichar(uch))
-        {
-          unicharset_boxes.unichar_insert(uch);
-          if (unicharset_boxes.size() > MAX_NUM_CLASSES) {
-            tprintf("Error: Size of unicharset of boxes is \
-greater than MAX_NUM_CLASSES\n");
-            exit(1);
-          }
-        }
-        *uch_id = unicharset_boxes.unichar_to_id(uch);
-        *box = BOX (ICOORD (x_min, y_min), ICOORD (x_max, y_max));
-        return TRUE;             //read a box ok
+  int x_min;
+  int y_min;
+  int x_max;
+  int y_max;
+  char uch[kBufSize];
+
+  while (read_next_box(box_file, uch, &x_min, &y_min, &x_max, &y_max)) {
+    if (!unicharset_boxes.contains_unichar(uch))
+    {
+      unicharset_boxes.unichar_insert(uch);
+      if (unicharset_boxes.size() > MAX_NUM_CLASSES) {
+        tprintf("Error: Size of unicharset of boxes is "
+                "greater than MAX_NUM_CLASSES (%d)\n",
+                MAX_NUM_CLASSES);
+        exit(1);
      }
    }
+    *uch_id = unicharset_boxes.unichar_to_id(uch);
+    *box = BOX (ICOORD (x_min, y_min), ICOORD (x_max, y_max));
+    return TRUE;             //read a box ok
  }
  return FALSE;                  //EOF
 }

--- a/ccmain/output.cpp
+++ b/ccmain/output.cpp
@@ -857,7 +857,7 @@ void write_shm_text(                    //write output
                         lineend ? OCR_NL_NEWLINE : OCR_NL_NONE);
        } else {
          for (int suboffset = 0; suboffset < text_lengths[index]; ++suboffset)
-            ocr_append_char (text[offset + suboffset],
+            ocr_append_char (static_cast<unsigned char>(text[offset+suboffset]),
                             blob_box.left (), blob_box.right (),
                             page_image.get_ysize () - 1 - blob_box.top (),
                             page_image.get_ysize () - 1 - blob_box.bottom (),

--- a/ccutil/boxread.cpp
+++ b/ccutil/boxread.cpp
+/**********************************************************************
+ * File:        boxread.cpp
+ * Description: Read data from a box file.
+ * Author:		Ray Smith
+ * Created:		Fri Aug 24 17:47:23 PDT 2007
+ *
+ * (C) Copyright 2007, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "mfcpch.h"
+#include <string.h>
+#include "boxread.h"
+#include "unichar.h"
+#include "tprintf.h"
+
+bool read_next_box(FILE* box_file, char* utf8_str,
+                   int* x_min, int* y_min, int* x_max, int* y_max) {
+  static int line = 0;
+  int count = 0;
+  char buff[kBufSize];                //boxfile read buffer
+  char uch[kBufSize];
+  char *buffptr = buff;
+
+  while (!feof(box_file)) {
+    fgets(buff, sizeof(buff) - 1, box_file);
+    line++;
+
+    buffptr = buff;
+    const unsigned char *ubuf = reinterpret_cast<const unsigned char*>(buffptr);
+    if (ubuf[0] == 0xef && ubuf[1] == 0xbb && ubuf[2] == 0xbf)
+      buffptr += 3;  // Skip unicode file designation.
+    /* Check for blank lines in box file */
+    while (*buffptr == ' ' || *buffptr == '\t')
+      buffptr++;
+    if (*buffptr != '\0') {
+      count = sscanf(buffptr, "%s " INT32FORMAT " " INT32FORMAT " "
+                     INT32FORMAT " " INT32FORMAT,
+                     uch, x_min, y_min, x_max, y_max);
+      if (count == 5) {
+        // Validate UTF8 by making unichars with it.
+        int used = 0;
+        int uch_len = strlen(uch);
+        while (used < uch_len) {
+          UNICHAR ch(uch + used, uch_len - used);
+          int new_used = ch.utf8_len();
+          if (new_used == 0) {
+            tprintf("Bad utf-8 char starting with 0x%x at line %d, col %d, \n",
+                    uch[used], used + 1, line);
+            count = 0;
+            break;
+          }
+          used += new_used;
+        }
+        if (uch_len > UNICHAR_LEN) {
+          tprintf("utf-8 string too long at line %d\n", line);
+          count = 0;
+        }
+      }
+      if (count != 5) {
+        tprintf("Box file format error on line %i ignored\n", line);
+      } else {
+        strcpy(utf8_str, uch);
+        return true;             //read a box ok
+      }
+    }
+  }
+  fclose(box_file);
+  line = 0;
+  return false;                  //EOF
+}
--- a/ccutil/boxread.h
+++ b/ccutil/boxread.h
+/**********************************************************************
+ * File:        boxread.cpp
+ * Description: Read data from a box file.
+ * Author:		Ray Smith
+ * Created:		Fri Aug 24 17:47:23 PDT 2007
+ *
+ * (C) Copyright 2007, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef THIRD_PARTY_TESSERACT_CCUTIL_BOXREAD_H__
+#define THIRD_PARTY_TESSERACT_CCUTIL_BOXREAD_H__
+
+#include <stdio.h>
+
+const int kBufSize = 256;
+// read_next_box factors out the code to interpret a line of a box
+// file so that applybox and unicharset_extractor interpert the same way.
+// This function returns the next valid box file utf8 string and coords
+// and returns true, or false on eof (and closes the file).
+// If ignores the uft8 file signature, checks for valid utf-8 and allows
+// space or tab between fields.
+// utf8_str must be at least kBufSize in length.
+bool read_next_box(FILE* box_file, char* utf8_str,
+                   int* x_min, int* y_min, int* x_max, int* y_max);
+
+#endif  // THIRD_PARTY_TESSERACT_CCUTIL_BOXREAD_H__
+
--- a/ccutil/unichar.cpp
+++ b/ccutil/unichar.cpp
@@ -134,8 +134,8 @@ int UNICHAR::utf8_step(const char* utf8_str) {
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0
  };

--- a/ccutil/unichar.h
+++ b/ccutil/unichar.h
@@ -24,7 +24,7 @@

 // Maximum number of characters that can be stored in a UNICHAR. Must be
 // at least 4. Must not exceed 31 without changing the coding of length.
-#define UNICHAR_LEN 4
+#define UNICHAR_LEN 8

 // A UNICHAR_ID is the unique id of a unichar.
 typedef int UNICHAR_ID;

--- a/dict/dawg.cpp
+++ b/dict/dawg.cpp
@@ -123,8 +123,11 @@ INT32 def_letter_is_okay(EDGE_ARRAY dawg,
  const char *ptr;

  for (ptr = word; *ptr != '\0';) {
-    word_single_lengths += UNICHAR::utf8_step(ptr);
-    ptr += UNICHAR::utf8_step(ptr);
+    int step = UNICHAR::utf8_step(ptr);
+    if (step == 0)
+      return FALSE;
+    word_single_lengths += step;
+    ptr += step;
  }

  if (*node == NO_EDGE) {        /* Trailing punctuation */
@@ -174,10 +177,11 @@ INT32 def_letter_is_okay(EDGE_ARRAY dawg,
  if (edge != NO_EDGE) {         /* Normal edge in DAWG */
    if (case_sensative || case_is_okay (dummy_word, char_index)) {
                                 //next_node (dawg, edge);
-    *node = next_node(dawg, edge);
+      *node = next_node(dawg, edge);
+      if (*node == 0)
+        *node = NO_EDGE;
      return (TRUE);
-    }
-    else {
+    } else {
      return (FALSE);
    }
  }

--- a/dict/permdawg.cpp
+++ b/dict/permdawg.cpp
@@ -43,7 +43,7 @@
 ----------------------------------------------------------------------*/
 #define FREQ_WERD     1.0
 #define GOOD_WERD     1.1
-#define OK_WERD       1.25
+#define OK_WERD       1.3125
 #define MAX_FREQ_EDGES    1500
 #define NO_RATING              -1


--- a/training/unicharset_extractor.cpp
+++ b/training/unicharset_extractor.cpp
@@ -24,14 +24,63 @@
 // unichar per line.

 #include <stdio.h>
+/*
+** Include automatically generated configuration file if running autoconf
+*/
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+#if defined(HAVE_WCHAR_T) || defined(__MSW32__) || defined(GOOGLE3)
+#include <wchar.h>
+#include <wctype.h>
+#define USING_WCTYPE
+#endif

 #include "unichar.h"
 #include "unicharset.h"
 #include "strngs.h"
+#include "boxread.h"
 #include "tessopt.h"

 static const char* const kUnicharsetFileName = "unicharset";

+// Set character properties using wctype if we have it.
+// Contributed by piggy@gmail.com.
+// Modified by Ray to use UNICHAR for unicode conversion
+// and to check for wctype using autoconf/presence of windows.
+void set_properties(UNICHARSET *unicharset, const char* const c_string) {
+#ifdef USING_WCTYPE
+  UNICHAR_ID id;
+  int wc;
+
+  // Convert the string to a unichar id.
+  id = unicharset->unichar_to_id(c_string);
+
+  int step = 0;
+  int len = strlen(c_string);
+  for (int offset = 0; offset < len; offset += step) {
+    step = UNICHAR::utf8_step(c_string + offset);
+    if (step == 0)
+      break; // Invalid utf-8.
+
+    // Get the next Unicode cond point in the string.
+    UNICHAR ch(c_string + offset, step);
+    wc = ch.first_uni();
+
+    /* Copy the properties. */
+    if (iswalpha(wc)) {
+      unicharset->set_isalpha(id, 1);
+      if (iswlower(wc))
+        unicharset->set_islower(id, 1);
+      if (iswlower(wc))
+        unicharset->set_isupper(id, 1);
+    }
+    if (iswdigit(wc))
+      unicharset->set_isdigit(id, 1);
+  }
+#endif
+}
+
 int main(int argc, char** argv) {
  int option;
  const char* output_directory = ".";
@@ -73,18 +122,12 @@ int main(int argc, char** argv) {
      return -1;
    }

-    while (!feof(box_file)) {
-      int x_min, y_min, x_max, y_max;
-      char buffer[256];
-      char c_string[256];
-
-      fgets(buffer, sizeof (buffer), box_file);
-      sscanf(buffer, "%s %d %d %d %d",
-             c_string, &x_min, &y_min, &x_max, &y_max);
-
+    int x_min, y_min, x_max, y_max;
+    char c_string[kBufSize];
+    while (read_next_box(box_file, c_string, &x_min, &y_min, &x_max, &y_max)) {
      unicharset.unichar_insert(c_string);
+      set_properties(&unicharset, c_string);
    }
-    fclose(box_file);
  }

  // Write unicharset file

--- a/wordrec/bestfirst.cpp
+++ b/wordrec/bestfirst.cpp
@@ -41,7 +41,7 @@
 #include "structures.h"
 #include "wordclass.h"

-void call_caller(); 
+void call_caller();

 /*----------------------------------------------------------------------
          V a r i a b l e s
@@ -65,9 +65,9 @@ make_float_var (worst_state, 1, make_worst_state,
 * Create and initialize references to debug variables that control
 * operations in this file.
 **********************************************************************/
-void init_bestfirst_vars() { 
-  make_seg_states(); 
-  make_worst_state(); 
+void init_bestfirst_vars() {
+  make_seg_states();
+  make_worst_state();
 }


@@ -93,9 +93,14 @@ void best_first_search(CHUNKS_RECORD *chunks_record,
    best_choice, raw_choice, state);

 #ifndef GRAPHICS_DISABLED
-  save_best_state(chunks_record); 
+  save_best_state(chunks_record);
 #endif
-  start_recording(); 
+  start_recording();
+  FLOAT32 worst_priority = 2.0f * prioritize_state(chunks_record,
+                                                   the_search,
+                                                   best_state);
+  if (worst_priority < worst_state)
+    worst_priority = worst_state;

  guided_state = *state;
  do {
@@ -109,7 +114,7 @@ void best_first_search(CHUNKS_RECORD *chunks_record,

      guided_state = *(the_search->this_state);
      keep_going =
-        evaluate_state(chunks_record, the_search, fixpt, best_state, pass); 
+        evaluate_state(chunks_record, the_search, fixpt, best_state, pass);

      hash_add (the_search->closed_states, the_search->this_state);

@@ -119,7 +124,7 @@ void best_first_search(CHUNKS_RECORD *chunks_record,
        break;
      }

-      expand_node(chunks_record, the_search); 
+      expand_node(worst_priority, chunks_record, the_search);
    }

    free_state (the_search->this_state);
@@ -130,8 +135,8 @@ void best_first_search(CHUNKS_RECORD *chunks_record,

  state->part1 = the_search->best_state->part1;
  state->part2 = the_search->best_state->part2;
-  stop_recording(); 
-  delete_search(the_search); 
+  stop_recording();
+  delete_search(the_search);
 }


@@ -141,7 +146,7 @@ void best_first_search(CHUNKS_RECORD *chunks_record,
 * Return the width of several of the chunks (if they were joined to-
 * gether.
 **********************************************************************/
-int chunks_width(WIDTH_RECORD *width_record, int start_chunk, int last_chunk) { 
+int chunks_width(WIDTH_RECORD *width_record, int start_chunk, int last_chunk) {
  int result = 0;
  int x;

@@ -157,7 +162,7 @@ int chunks_width(WIDTH_RECORD *width_record, int start_chunk, int last_chunk) {
 *
 * Terminate the current search and free all the memory involved.
 **********************************************************************/
-void delete_search(SEARCH_RECORD *the_search) { 
+void delete_search(SEARCH_RECORD *the_search) {
  float closeness;

  closeness = (the_search->num_joints ?
@@ -174,7 +179,7 @@ void delete_search(SEARCH_RECORD *the_search) {
  free_hash_table (the_search->closed_states);
  FreeHeapData (the_search->open_states, (void_dest) free_state);

-  memfree(the_search); 
+  memfree(the_search);
 }


@@ -204,7 +209,7 @@ CHOICES_LIST evaluate_chunks(CHUNKS_RECORD *chunks_record,
      y = x + search_state[i];

    if (blob_skip) {
-      array_free(char_choices); 
+      array_free(char_choices);
      return (NULL);
    }                            /* Process one square */
    /* Classify if needed */
@@ -216,7 +221,7 @@ CHOICES_LIST evaluate_chunks(CHUNKS_RECORD *chunks_record,
      this_state, best_state, pass, i - 1);

    if (this_choice == NIL) {
-      array_free(char_choices); 
+      array_free(char_choices);
      return (NULL);
    }
    /* Add permuted ratings */
@@ -256,7 +261,7 @@ INT16 evaluate_state(CHUNKS_RECORD *chunks_record,
  chunk_groups = bin_to_chunks (the_search->this_state,
    the_search->num_joints);
  bin_to_pieces (the_search->this_state, the_search->num_joints, widths);
-  LogNewSegmentation(widths); 
+  LogNewSegmentation(widths);

  rating_limit = class_probability (the_search->best_choice);

@@ -270,14 +275,14 @@ INT16 evaluate_state(CHUNKS_RECORD *chunks_record,
    if (AcceptableChoice (char_choices, the_search->best_choice,
      the_search->raw_choice, fixpt))
      keep_going = FALSE;
-    array_free(char_choices); 
+    array_free(char_choices);
  }

 #ifndef GRAPHICS_DISABLED
  if (display_segmentations) {
    display_segmentation (chunks_record->chunks, chunk_groups);
    if (display_segmentations > 1)
-      window_wait(segm_window); 
+      window_wait(segm_window);
  }
 #endif

@@ -285,12 +290,12 @@ INT16 evaluate_state(CHUNKS_RECORD *chunks_record,
    the_search->before_best = the_search->num_states;
    the_search->best_state->part1 = the_search->this_state->part1;
    the_search->best_state->part2 = the_search->this_state->part2;
-    replace_char_widths(chunks_record, chunk_groups); 
+    replace_char_widths(chunks_record, chunk_groups);
  }
  else if (char_choices != NULL)
    fixpt->index = -1;

-  memfree(chunk_groups); 
+  memfree(chunk_groups);

  return (keep_going);
 }
@@ -337,7 +342,7 @@ CHOICES_LIST rebuild_current_state(TBLOB *blobs,
      array_value (old_choices, x) = NULL;
    }
    else {
-      join_pieces(blobs, seam_list, x, y); 
+      join_pieces(blobs, seam_list, x, y);
      for (blob = blobs, blobindex = 0, p_blob = NULL; blobindex < x;
      blobindex++) {
        p_blob = blob;
@@ -358,8 +363,8 @@ CHOICES_LIST rebuild_current_state(TBLOB *blobs,
    x = y - search_state[i];
  }

-  memfree(search_state); 
-  free_all_choices(old_choices, x); 
+  memfree(search_state);
+  free_all_choices(old_choices, x);
  return (char_choices);

 }
@@ -372,7 +377,8 @@ CHOICES_LIST rebuild_current_state(TBLOB *blobs,
 * each one has not already been visited.  If not add it to the priority
 * queue.
 **********************************************************************/
-void expand_node(CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search) { 
+void expand_node(FLOAT32 worst_priority,
+                 CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search) {
  STATE old_state;
  int x;
  int mask = 1 << (the_search->num_joints - 1 - 32);
@@ -383,9 +389,9 @@ void expand_node(CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search) {
  for (x = the_search->num_joints; x > 32; x--) {
    the_search->this_state->part1 = mask ^ old_state.part1;
    if (!hash_lookup (the_search->closed_states, the_search->this_state))
-      push_queue (the_search->open_states,
-        the_search->this_state,
-        prioritize_state (chunks_record, the_search, &old_state));
+      push_queue (the_search->open_states, the_search->this_state,
+                  worst_priority,
+                  prioritize_state (chunks_record, the_search, &old_state));
    mask >>= 1;
  }

@@ -399,9 +405,9 @@ void expand_node(CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search) {
  while (x--) {
    the_search->this_state->part2 = mask ^ old_state.part2;
    if (!hash_lookup (the_search->closed_states, the_search->this_state))
-      push_queue (the_search->open_states,
-        the_search->this_state,
-        prioritize_state (chunks_record, the_search, &old_state));
+      push_queue (the_search->open_states, the_search->this_state,
+                  worst_priority,
+                  prioritize_state (chunks_record, the_search, &old_state));
    mask >>= 1;
  }
 }
@@ -449,7 +455,7 @@ SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record,
 * Get this state from the priority queue.  It should be the state that
 * has the greatest urgency to be evaluated.
 **********************************************************************/
-STATE *pop_queue(HEAP *queue) { 
+STATE *pop_queue(HEAP *queue) {
  HEAPENTRY entry;

  if (GetTopOfHeap (queue, &entry) == OK) {
@@ -472,14 +478,15 @@ STATE *pop_queue(HEAP *queue) {
 *
 * Add this state into the priority queue.
 **********************************************************************/
-void push_queue(HEAP *queue, STATE *state, FLOAT32 priority) { 
+void push_queue(HEAP *queue, STATE *state, FLOAT32 worst_priority,
+                FLOAT32 priority) {
  HEAPENTRY entry;

-  if (SizeOfHeap (queue) < MaxSizeOfHeap (queue) && priority < worst_state) {
+  if (SizeOfHeap (queue) < MaxSizeOfHeap (queue) && priority < worst_priority) {
    entry.Data = (char *) new_state (state);
    num_pushed++;
    entry.Key = priority;
-    HeapStore(queue, &entry); 
+    HeapStore(queue, &entry);
  }
 }

@@ -490,7 +497,7 @@ void push_queue(HEAP *queue, STATE *state, FLOAT32 priority) {
 * Replace the value of the char_width field in the chunks_record with
 * the updated width measurements from the last_segmentation.
 **********************************************************************/
-void replace_char_widths(CHUNKS_RECORD *chunks_record, SEARCH_STATE state) { 
+void replace_char_widths(CHUNKS_RECORD *chunks_record, SEARCH_STATE state) {
  WIDTH_RECORD *width_record;
  int num_blobs;
  int i;

--- a/wordrec/bestfirst.h
+++ b/wordrec/bestfirst.h
@@ -80,7 +80,7 @@ extern int num_popped;
 /*----------------------------------------------------------------------
              F u n c t i o n s
 ----------------------------------------------------------------------*/
-void init_bestfirst_vars(); 
+void init_bestfirst_vars();

 void best_first_search(CHUNKS_RECORD *chunks_record,
                       A_CHOICE *best_choice,
@@ -90,9 +90,9 @@ void best_first_search(CHUNKS_RECORD *chunks_record,
                       STATE *best_state,
                       INT32 pass);

-int chunks_width(WIDTH_RECORD *width_record, int start_chunk, int last_chunk); 
+int chunks_width(WIDTH_RECORD *width_record, int start_chunk, int last_chunk);

-void delete_search(SEARCH_RECORD *the_search); 
+void delete_search(SEARCH_RECORD *the_search);

 CHOICES_LIST evaluate_chunks(CHUNKS_RECORD *chunks_record,
                             SEARCH_STATE search_state,
@@ -112,7 +112,9 @@ CHOICES_LIST rebuild_current_state(TBLOB *blobs,
                                   CHOICES_LIST old_choices,
                                   int fx);

-void expand_node(CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search); 
+void expand_node(FLOAT32 worst_priority,
+                 CHUNKS_RECORD *chunks_record,
+                 SEARCH_RECORD *the_search);

 SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record,
                          int num_joints,
@@ -120,11 +122,12 @@ SEARCH_RECORD *new_search(CHUNKS_RECORD *chunks_record,
                          A_CHOICE *raw_choice,
                          STATE *state);

-STATE *pop_queue(HEAP *queue); 
+STATE *pop_queue(HEAP *queue);

-void push_queue(HEAP *queue, STATE *state, FLOAT32 priority); 
+void push_queue(HEAP *queue, STATE *state,
+                FLOAT32 worst_priority, FLOAT32 priority);

-void replace_char_widths(CHUNKS_RECORD *chunks_record, SEARCH_STATE state); 
+void replace_char_widths(CHUNKS_RECORD *chunks_record, SEARCH_STATE state);

 /*
 #if defined(__STDC__) || defined(__cplusplus)