Misc fixes, mostly clang formatting, but some bug fixes in matrix, werd, and...

Misc fixes, mostly clang formatting, but some bug fixes in matrix, werd, and tesstrain_utils. Also updates unicharset to match traineddata files.

Misc fixes, mostly clang formatting, but some bug fixes in matrix, werd, and...
Misc fixes, mostly clang formatting, but some bug fixes in matrix, werd, and tesstrain_utils. Also updates unicharset to match traineddata files.
a303ab9d · Ray Smith · d00d833b · a303ab9d · a303ab9d · a303ab9d
16 changed file
--- a/api/pdfrenderer.cpp
+++ b/api/pdfrenderer.cpp
@@ -636,9 +636,9 @@ bool TessPDFRenderer::BeginDocumentHandler() {
               "  /Length1 %ld\n"
               ">>\n"
               "stream\n", size, size);
-  if (n >= sizeof(buf)) { 
-  	delete[] buffer;
-	return false;
+  if (n >= sizeof(buf)) {
+    delete[] buffer;
+    return false;
  }
  AppendString(buf);
  objsize  = strlen(buf);

--- a/ccmain/pgedit.cpp
+++ b/ccmain/pgedit.cpp
@@ -314,6 +314,7 @@ void Tesseract::do_re_display(
    image_win->Image(pix_binary_, 0, 0);
  }

+  image_win->Brush(ScrollView::NONE);
  PAGE_RES_IT pr_it(current_page_res);
  for (WERD_RES* word = pr_it.word(); word != NULL; word = pr_it.forward()) {
    (this->*word_painter)(&pr_it);

--- a/ccmain/tessedit.cpp
+++ b/ccmain/tessedit.cpp
 /**********************************************************************
 * File:        tessedit.cpp  (Formerly tessedit.c)
- * Description: Main program for merge of tess and editor.
- * Author:					Ray Smith
- * Created:					Tue Jan 07 15:21:46 GMT 1992
+ * Description: (Previously) Main program for merge of tess and editor.
+ *              Now just code to load the language model and various
+ *              engine-specific data files.
+ * Author:      Ray Smith
+ * Created:     Tue Jan 07 15:21:46 GMT 1992
 *
 * (C) Copyright 1992, Hewlett-Packard Ltd.
 ** Licensed under the Apache License, Version 2.0 (the "License");

--- a/ccstruct/matrix.cpp
+++ b/ccstruct/matrix.cpp
@@ -96,11 +96,11 @@ MATRIX* MATRIX::DeepCopy() const {
  int band_width = bandwidth();
  MATRIX* result = new MATRIX(dim, band_width);
  for (int col = 0; col < dim; ++col) {
-    for (int row = col; row < col + band_width; ++row) {
+    for (int row = col; row < dim && row < col + band_width; ++row) {
      BLOB_CHOICE_LIST* choices = get(col, row);
      if (choices != NULL) {
        BLOB_CHOICE_LIST* copy_choices = new BLOB_CHOICE_LIST;
-        choices->deep_copy(copy_choices, &BLOB_CHOICE::deep_copy);
+        copy_choices->deep_copy(choices, &BLOB_CHOICE::deep_copy);
        result->put(col, row, copy_choices);
      }
    }

--- a/ccstruct/werd.cpp
+++ b/ccstruct/werd.cpp
@@ -50,17 +50,14 @@ WERD::WERD(C_BLOB_LIST *blob_list, uinT8 blank_count, const char *text)
    flags(0),
    script_id_(0),
    correct(text) {
-  C_BLOB_IT start_it = blob_list;
-  C_BLOB_IT end_it = blob_list;
+  C_BLOB_IT start_it = &cblobs;
  C_BLOB_IT rej_cblob_it = &rej_cblobs;
  C_OUTLINE_IT c_outline_it;
  inT16 inverted_vote = 0;
  inT16 non_inverted_vote = 0;

  // Move blob_list's elements into cblobs.
-  while (!end_it.at_last())
-    end_it.forward();
-  cblobs.assign_to_sublist(&start_it, &end_it);
+  start_it.add_list_after(blob_list);

  /*
    Set white on black flag for the WERD, moving any duff blobs onto the

--- a/ccutil/unicharset.cpp
+++ b/ccutil/unicharset.cpp
@@ -99,12 +99,12 @@ void UNICHARSET::UNICHAR_PROPERTIES::SetRangesOpen() {
  max_bottom = MAX_UINT8;
  min_top = 0;
  max_top = MAX_UINT8;
-  min_width = 0;
-  max_width = MAX_INT16;
-  min_bearing = 0;
-  max_bearing = MAX_INT16;
-  min_advance = 0;
-  max_advance = MAX_INT16;
+  width = 0.0f;
+  width_sd = 0.0f;
+  bearing = 0.0f;
+  bearing_sd = 0.0f;
+  advance = 0.0f;
+  advance_sd = 0.0f;
 }

 // Sets all ranges to empty. Used before expanding with font-based data.
@@ -113,20 +113,18 @@ void UNICHARSET::UNICHAR_PROPERTIES::SetRangesEmpty() {
  max_bottom = 0;
  min_top = MAX_UINT8;
  max_top = 0;
-  min_width = MAX_INT16;
-  max_width = 0;
-  min_bearing = MAX_INT16;
-  max_bearing = 0;
-  min_advance = MAX_INT16;
-  max_advance = 0;
+  width = 0.0f;
+  width_sd = 0.0f;
+  bearing = 0.0f;
+  bearing_sd = 0.0f;
+  advance = 0.0f;
+  advance_sd = 0.0f;
 }

-// Returns true if any of the top/bottom/width/bearing/advance ranges is
-// emtpy.
+// Returns true if any of the top/bottom/width/bearing/advance ranges/stats
+// is emtpy.
 bool UNICHARSET::UNICHAR_PROPERTIES::AnyRangeEmpty() const {
-  return min_bottom > max_bottom || min_top > max_top ||
-      min_width > max_width || min_bearing > max_bearing ||
-      min_advance > max_advance;
+  return width == 0.0f || advance == 0.0f;
 }

 // Expands the ranges with the ranges from the src properties.
@@ -136,12 +134,18 @@ void UNICHARSET::UNICHAR_PROPERTIES::ExpandRangesFrom(
  UpdateRange(src.max_bottom, &min_bottom, &max_bottom);
  UpdateRange(src.min_top, &min_top, &max_top);
  UpdateRange(src.max_top, &min_top, &max_top);
-  UpdateRange(src.min_width, &min_width, &max_width);
-  UpdateRange(src.max_width, &min_width, &max_width);
-  UpdateRange(src.min_bearing, &min_bearing, &max_bearing);
-  UpdateRange(src.max_bearing, &min_bearing, &max_bearing);
-  UpdateRange(src.min_advance, &min_advance, &max_advance);
-  UpdateRange(src.max_advance, &min_advance, &max_advance);
+  if (src.width_sd > width_sd) {
+    width = src.width;
+    width_sd = src.width_sd;
+  }
+  if (src.bearing_sd > bearing_sd) {
+    bearing = src.bearing;
+    bearing_sd = src.bearing_sd;
+  }
+  if (src.advance_sd > advance_sd) {
+    advance = src.advance;
+    advance_sd = src.advance_sd;
+  }
 }

 // Copies the properties from src into this.
@@ -430,8 +434,6 @@ void UNICHARSET::PartialSetPropertiesFromOther(int start_index,
      }
      unichars[ch].properties.CopyFrom(properties);
      set_normed_ids(ch);
-    } else {
-      tprintf("Failed to get properties for index %d = %s\n", ch, utf8);
    }
  }
 }
@@ -473,15 +475,15 @@ void UNICHARSET::AppendOtherUnicharset(const UNICHARSET& src) {
  for (int ch = 0; ch < src.size_used; ++ch) {
    const UNICHAR_PROPERTIES& src_props = src.unichars[ch].properties;
    const char* utf8 = src.id_to_unichar(ch);
-    if (strcmp(utf8, " ") != 0 && src_props.AnyRangeEmpty()) {
+    if (ch >= SPECIAL_UNICHAR_CODES_COUNT && src_props.AnyRangeEmpty()) {
      // Only use fully valid entries.
      tprintf("Bad properties for index %d, char %s: "
-              "%d,%d %d,%d %d,%d %d,%d %d,%d\n",
+              "%d,%d %d,%d %g,%g %g,%g %g,%g\n",
              ch, utf8, src_props.min_bottom, src_props.max_bottom,
              src_props.min_top, src_props.max_top,
-              src_props.min_width, src_props.max_width,
-              src_props.min_bearing, src_props.max_bearing,
-              src_props.min_advance, src_props.max_advance);
+              src_props.width, src_props.width_sd,
+              src_props.bearing, src_props.bearing_sd,
+              src_props.advance, src_props.advance_sd);
      continue;
    }
    int id = size_used;
@@ -564,8 +566,6 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
                                  UNICHAR_PROPERTIES* props) const {
  props->Init();
  props->SetRangesEmpty();
-  props->min_advance = 0;
-  props->max_advance = 0;
  int total_unicodes = 0;
  GenericVector<UNICHAR_ID> encoding;
  if (!encode_string(utf8_str, true, &encoding, NULL, NULL))
@@ -586,21 +586,16 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
    UpdateRange(src_props.max_bottom, &props->min_bottom, &props->max_bottom);
    UpdateRange(src_props.min_top, &props->min_top, &props->max_top);
    UpdateRange(src_props.max_top, &props->min_top, &props->max_top);
-    int bearing = ClipToRange(props->min_advance + src_props.min_bearing,
-                              -MAX_INT16, MAX_INT16);
-    if (total_unicodes == 0 || bearing < props->min_bearing)
-      props->min_bearing = bearing;
-    bearing = ClipToRange(props->max_advance + src_props.max_bearing,
-                          -MAX_INT16, MAX_INT16);
-    if (total_unicodes == 0 || bearing < props->max_bearing)
-      props->max_bearing = bearing;
-    props->min_advance = ClipToRange(props->min_advance + src_props.min_advance,
-                                     -MAX_INT16, MAX_INT16);
-    props->max_advance = ClipToRange(props->max_advance + src_props.max_advance,
-                                     -MAX_INT16, MAX_INT16);
+    float bearing = props->advance + src_props.bearing;
+    if (total_unicodes == 0 || bearing < props->bearing) {
+      props->bearing = bearing;
+      props->bearing_sd = props->advance_sd + src_props.bearing_sd;
+    }
+    props->advance += src_props.advance;
+    props->advance_sd += src_props.advance_sd;
    // With a single width, just use the widths stored in the unicharset.
-    props->min_width = src_props.min_width;
-    props->max_width = src_props.max_width;
+    props->width = src_props.width;
+    props->width_sd = src_props.width_sd;
    // Use the first script id, other_case, mirror, direction.
    // Note that these will need translation, except direction.
    if (total_unicodes == 0) {
@@ -616,10 +611,8 @@ bool UNICHARSET::GetStrProperties(const char* utf8_str,
  }
  if (total_unicodes > 1) {
    // Estimate the total widths from the advance - bearing.
-    props->min_width = ClipToRange(props->min_advance - props->max_bearing,
-                                   -MAX_INT16, MAX_INT16);
-    props->max_width = ClipToRange(props->max_advance - props->min_bearing,
-                                   -MAX_INT16, MAX_INT16);
+    props->width = props->advance - props->bearing;
+    props->width_sd = props->advance_sd + props->bearing_sd;
  }
  return total_unicodes > 0;
 }
@@ -707,12 +700,12 @@ bool UNICHARSET::save_to_string(STRING *str) const {
  for (UNICHAR_ID id = 0; id < this->size(); ++id) {
    int min_bottom, max_bottom, min_top, max_top;
    get_top_bottom(id, &min_bottom, &max_bottom, &min_top, &max_top);
-    int min_width, max_width;
-    get_width_range(id, &min_width, &max_width);
-    int min_bearing, max_bearing;
-    get_bearing_range(id, &min_bearing, &max_bearing);
-    int min_advance, max_advance;
-    get_advance_range(id, &min_advance, &max_advance);
+    float width, width_sd;
+    get_width_stats(id, &width, &width_sd);
+    float bearing, bearing_sd;
+    get_bearing_stats(id, &bearing, &bearing_sd);
+    float advance, advance_sd;
+    get_advance_stats(id, &advance, &advance_sd);
    unsigned int properties = this->get_properties(id);
    if (strcmp(this->id_to_unichar(id), " ") == 0) {
      snprintf(buffer, kFileBufSize, "%s %x %s %d\n", "NULL", properties,
@@ -720,10 +713,10 @@ bool UNICHARSET::save_to_string(STRING *str) const {
              this->get_other_case(id));
    } else {
      snprintf(buffer, kFileBufSize,
-              "%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %s %d %d %d %s\t# %s\n",
+              "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %s %d %d %d %s\t# %s\n",
              this->id_to_unichar(id), properties,
-              min_bottom, max_bottom, min_top, max_top, min_width, max_width,
-              min_bearing, max_bearing, min_advance, max_advance,
+              min_bottom, max_bottom, min_top, max_top, width, width_sd,
+              bearing, bearing_sd, advance, advance_sd,
              this->get_script_from_script_id(this->get_script(id)),
              this->get_other_case(id), this->get_direction(id),
              this->get_mirror(id), this->get_normed_unichar(id),
@@ -821,12 +814,12 @@ bool UNICHARSET::load_via_fgets(
    int max_bottom = MAX_UINT8;
    int min_top = 0;
    int max_top = MAX_UINT8;
-    int min_width = 0;
-    int max_width = MAX_INT16;
-    int min_bearing = 0;
-    int max_bearing = MAX_INT16;
-    int min_advance = 0;
-    int max_advance = MAX_INT16;
+    float width = 0.0f;
+    float width_sd = 0.0f;
+    float bearing = 0.0f;
+    float bearing_sd = 0.0f;
+    float advance = 0.0f;
+    float advance_sd = 0.0f;
    // TODO(eger): check that this default it ok
    // after enabling BiDi iterator for Arabic+Cube.
    int direction = UNICHARSET::U_LEFT_TO_RIGHT;
@@ -836,19 +829,19 @@ bool UNICHARSET::load_via_fgets(
    int v = -1;
    if (fgets_cb->Run(buffer, sizeof (buffer)) == NULL ||
        ((v = sscanf(buffer,
-                     "%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d %63s",
+                     "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d %63s",
                     unichar, &properties,
                     &min_bottom, &max_bottom, &min_top, &max_top,
-                     &min_width, &max_width, &min_bearing, &max_bearing,
-                     &min_advance, &max_advance, script, &other_case,
+                     &width, &width_sd, &bearing, &bearing_sd,
+                     &advance, &advance_sd, script, &other_case,
                     &direction, &mirror, normed)) != 17 &&
         (v = sscanf(buffer,
-                     "%s %x %d,%d,%d,%d,%d,%d,%d,%d,%d,%d %63s %d %d %d",
+                     "%s %x %d,%d,%d,%d,%g,%g,%g,%g,%g,%g %63s %d %d %d",
                     unichar, &properties,
                     &min_bottom, &max_bottom, &min_top, &max_top,
-                     &min_width, &max_width, &min_bearing, &max_bearing,
-                     &min_advance, &max_advance,
-                     script, &other_case, &direction, &mirror)) != 16 &&
+                     &width, &width_sd, &bearing, &bearing_sd,
+                     &advance, &advance_sd, script, &other_case,
+                     &direction, &mirror)) != 16 &&
          (v = sscanf(buffer, "%s %x %d,%d,%d,%d %63s %d %d %d",
                      unichar, &properties,
                      &min_bottom, &max_bottom, &min_top, &max_top,
@@ -888,9 +881,9 @@ bool UNICHARSET::load_via_fgets(
    this->set_script(id, script);
    this->unichars[id].properties.enabled = true;
    this->set_top_bottom(id, min_bottom, max_bottom, min_top, max_top);
-    this->set_width_range(id, min_width, max_width);
-    this->set_bearing_range(id, min_bearing, max_bearing);
-    this->set_advance_range(id, min_advance, max_advance);
+    this->set_width_stats(id, width, width_sd);
+    this->set_bearing_stats(id, bearing, bearing_sd);
+    this->set_advance_stats(id, advance, advance_sd);
    this->set_direction(id, static_cast<UNICHARSET::Direction>(direction));
    ASSERT_HOST(other_case < unicharset_size);
    this->set_other_case(id, (v>3) ? other_case : id);

--- a/ccutil/unicharset.h
+++ b/ccutil/unicharset.h
@@ -554,68 +554,56 @@ class UNICHARSET {
    unichars[unichar_id].properties.max_top =
        static_cast<uinT8>(ClipToRange(max_top, 0, MAX_UINT8));
  }
-  // Returns the width range of the given unichar in baseline-normalized
-  // coordinates, ie, where the baseline is kBlnBaselineOffset and the
-  // meanline is kBlnBaselineOffset + kBlnXHeight.
-  // (See normalis.h for the definitions).
-  void get_width_range(UNICHAR_ID unichar_id,
-                       int* min_width, int* max_width) const {
+  // Returns the width stats (as mean, sd) of the given unichar relative to the
+  // median advance of all characters in the character set.
+  void get_width_stats(UNICHAR_ID unichar_id,
+                       float* width, float* width_sd) const {
    if (INVALID_UNICHAR_ID == unichar_id) {
-      *min_width = 0;
-      *max_width = 256;  // kBlnCellHeight;
+      *width = 0.0f;
+      *width_sd = 0.0f;;
      return;
    }
    ASSERT_HOST(contains_unichar_id(unichar_id));
-    *min_width = unichars[unichar_id].properties.min_width;
-    *max_width = unichars[unichar_id].properties.max_width;
+    *width = unichars[unichar_id].properties.width;
+    *width_sd = unichars[unichar_id].properties.width_sd;
  }
-  void set_width_range(UNICHAR_ID unichar_id, int min_width, int max_width) {
-    unichars[unichar_id].properties.min_width =
-        static_cast<inT16>(ClipToRange(min_width, 0, MAX_INT16));
-    unichars[unichar_id].properties.max_width =
-        static_cast<inT16>(ClipToRange(max_width, 0, MAX_INT16));
+  void set_width_stats(UNICHAR_ID unichar_id, float width, float width_sd) {
+    unichars[unichar_id].properties.width = width;
+    unichars[unichar_id].properties.width_sd = width_sd;
  }
-  // Returns the range of the x-bearing of the given unichar in
-  // baseline-normalized coordinates, ie, where the baseline is
-  // kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight.
-  // (See normalis.h for the definitions).
-  void get_bearing_range(UNICHAR_ID unichar_id,
-                         int* min_bearing, int* max_bearing) const {
+  // Returns the stats of the x-bearing (as mean, sd) of the given unichar
+  // relative to the median advance of all characters in the character set.
+  void get_bearing_stats(UNICHAR_ID unichar_id,
+                         float* bearing, float* bearing_sd) const {
    if (INVALID_UNICHAR_ID == unichar_id) {
-      *min_bearing = *max_bearing = 0;
+      *bearing = *bearing_sd = 0.0f;
      return;
    }
    ASSERT_HOST(contains_unichar_id(unichar_id));
-    *min_bearing = unichars[unichar_id].properties.min_bearing;
-    *max_bearing = unichars[unichar_id].properties.max_bearing;
-  }
-  void set_bearing_range(UNICHAR_ID unichar_id,
-                         int min_bearing, int max_bearing) {
-    unichars[unichar_id].properties.min_bearing =
-        static_cast<inT16>(ClipToRange(min_bearing, 0, MAX_INT16));
-    unichars[unichar_id].properties.max_bearing =
-        static_cast<inT16>(ClipToRange(max_bearing, 0, MAX_INT16));
-  }
-  // Returns the range of the x-advance of the given unichar in
-  // baseline-normalized coordinates, ie, where the baseline is
-  // kBlnBaselineOffset and the meanline is kBlnBaselineOffset + kBlnXHeight.
-  // (See normalis.h for the definitions).
-  void get_advance_range(UNICHAR_ID unichar_id,
-                         int* min_advance, int* max_advance) const {
+    *bearing = unichars[unichar_id].properties.bearing;
+    *bearing_sd = unichars[unichar_id].properties.bearing_sd;
+  }
+  void set_bearing_stats(UNICHAR_ID unichar_id,
+                         float bearing, float bearing_sd) {
+    unichars[unichar_id].properties.bearing = bearing;
+    unichars[unichar_id].properties.bearing_sd = bearing_sd;
+  }
+  // Returns the stats of the x-advance of the given unichar (as mean, sd)
+  // relative to the median advance of all characters in the character set.
+  void get_advance_stats(UNICHAR_ID unichar_id,
+                         float* advance, float* advance_sd) const {
    if (INVALID_UNICHAR_ID == unichar_id) {
-      *min_advance = *max_advance = 0;
+      *advance = *advance_sd = 0;
      return;
    }
    ASSERT_HOST(contains_unichar_id(unichar_id));
-    *min_advance = unichars[unichar_id].properties.min_advance;
-    *max_advance = unichars[unichar_id].properties.max_advance;
+    *advance = unichars[unichar_id].properties.advance;
+    *advance_sd = unichars[unichar_id].properties.advance_sd;
  }
-  void set_advance_range(UNICHAR_ID unichar_id,
-                         int min_advance, int max_advance) {
-    unichars[unichar_id].properties.min_advance =
-        static_cast<inT16>(ClipToRange(min_advance, 0, MAX_INT16));
-    unichars[unichar_id].properties.max_advance =
-        static_cast<inT16>(ClipToRange(max_advance, 0, MAX_INT16));
+  void set_advance_stats(UNICHAR_ID unichar_id,
+                         float advance, float advance_sd) {
+    unichars[unichar_id].properties.advance = advance;
+    unichars[unichar_id].properties.advance_sd = advance_sd;
  }
  // Returns true if the font metrics properties are empty.
  bool PropertiesIncomplete(UNICHAR_ID unichar_id) const {
@@ -873,8 +861,8 @@ class UNICHARSET {
    void SetRangesOpen();
    // Sets all ranges to empty. Used before expanding with font-based data.
    void SetRangesEmpty();
-    // Returns true if any of the top/bottom/width/bearing/advance ranges is
-    // emtpy.
+    // Returns true if any of the top/bottom/width/bearing/advance ranges/stats
+    // is emtpy.
    bool AnyRangeEmpty() const;
    // Expands the ranges with the ranges from the src properties.
    void ExpandRangesFrom(const UNICHAR_PROPERTIES& src);
@@ -896,14 +884,14 @@ class UNICHARSET {
    uinT8 max_bottom;
    uinT8 min_top;
    uinT8 max_top;
-    // Limits on the widths of bounding box, also in baseline-normalized coords.
-    inT16 min_width;
-    inT16 max_width;
-    // Limits on the x-bearing and advance, also in baseline-normalized coords.
-    inT16 min_bearing;
-    inT16 max_bearing;
-    inT16 min_advance;
-    inT16 max_advance;
+    // Statstics of the widths of bounding box, relative to the median advance.
+    float width;
+    float width_sd;
+    // Stats of the x-bearing and advance, also relative to the median advance.
+    float bearing;
+    float bearing_sd;
+    float advance;
+    float advance_sd;
    int   script_id;
    UNICHAR_ID other_case;  // id of the corresponding upper/lower case unichar
    Direction direction;  // direction of this unichar

--- a/ccutil/universalambigs.cpp
+++ b/ccutil/universalambigs.cpp
--- a/classify/mfoutline.cpp
+++ b/classify/mfoutline.cpp
@@ -30,8 +30,6 @@
 #include <math.h>
 #include <stdio.h>

-#define MIN_INERTIA (0.00001)
-
 /*----------------------------------------------------------------------------
              Public Code
 ----------------------------------------------------------------------------*/
@@ -475,71 +473,6 @@ void ComputeDirection(MFEDGEPT *Start,
  Finish->PreviousDirection = Start->Direction;
 }                                /* ComputeDirection */

-
-/*---------------------------------------------------------------------------*/
-void FinishOutlineStats(register OUTLINE_STATS *OutlineStats) {
-/*
- ** Parameters:
- **   OutlineStats  statistics about a set of outlines
- ** Globals: none
- ** Operation: Use the preliminary statistics accumulated in OutlineStats
- **   to compute the final statistics.
- **   (see Dan Johnson's Tesseract lab
- **   notebook #2, pgs. 74-78).
- ** Return: none
- ** Exceptions: none
- ** History: Fri Dec 14 10:13:36 1990, DSJ, Created.
- */
-  OutlineStats->x = 0.5 * OutlineStats->My / OutlineStats->L;
-  OutlineStats->y = 0.5 * OutlineStats->Mx / OutlineStats->L;
-
-  OutlineStats->Ix = (OutlineStats->Ix / 3.0 -
-    OutlineStats->y * OutlineStats->Mx +
-    OutlineStats->y * OutlineStats->y * OutlineStats->L);
-
-  OutlineStats->Iy = (OutlineStats->Iy / 3.0 -
-    OutlineStats->x * OutlineStats->My +
-    OutlineStats->x * OutlineStats->x * OutlineStats->L);
-
-  /* Ix and/or Iy could possibly be negative due to roundoff error */
-  if (OutlineStats->Ix < 0.0)
-    OutlineStats->Ix = MIN_INERTIA;
-  if (OutlineStats->Iy < 0.0)
-    OutlineStats->Iy = MIN_INERTIA;
-
-  OutlineStats->Rx = sqrt (OutlineStats->Ix / OutlineStats->L);
-  OutlineStats->Ry = sqrt (OutlineStats->Iy / OutlineStats->L);
-
-  OutlineStats->Mx *= 0.5;
-  OutlineStats->My *= 0.5;
-
-}                                /* FinishOutlineStats */
-
-
-/*---------------------------------------------------------------------------*/
-void InitOutlineStats(OUTLINE_STATS *OutlineStats) {
-/*
- ** Parameters:
- **   OutlineStats  stats data structure to be initialized
- ** Globals: none
- ** Operation: Initialize the outline statistics data structure so
- **   that it is ready to start accumulating statistics.
- ** Return: none
- ** Exceptions: none
- ** History: Fri Dec 14 08:55:22 1990, DSJ, Created.
- */
-  OutlineStats->Mx = 0.0;
-  OutlineStats->My = 0.0;
-  OutlineStats->L = 0.0;
-  OutlineStats->x = 0.0;
-  OutlineStats->y = 0.0;
-  OutlineStats->Ix = 0.0;
-  OutlineStats->Iy = 0.0;
-  OutlineStats->Rx = 0.0;
-  OutlineStats->Ry = 0.0;
-}                                /* InitOutlineStats */
-
-
 /*---------------------------------------------------------------------------*/
 MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {
 /*
@@ -569,51 +502,3 @@ MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) {

  return (EdgePoint);
 }                                /* NextDirectionChange */
-
-
-/*---------------------------------------------------------------------------*/
-void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats,
-                        register FLOAT32 x1,
-                        register FLOAT32 x2,
-                        register FLOAT32 y1,
-                        register FLOAT32 y2) {
-/*
- ** Parameters:
- **   OutlineStats  statistics to add this segment to
- **   x1, y1, x2, y2  segment to be added to statistics
- ** Globals: none
- ** Operation: This routine adds the statistics for the specified
- **   line segment to OutlineStats.  The statistics that are
- **   kept are:
- **     sum of length of all segments
- **     sum of 2*Mx for all segments
- **     sum of 2*My for all segments
- **     sum of 2*Mx*(y1+y2) - L*y1*y2 for all segments
- **     sum of 2*My*(x1+x2) - L*x1*x2 for all segments
- **   These numbers, once collected can later be used to easily
- **   compute the center of mass, first and second moments,
- **   and radii of gyration.  (see Dan Johnson's Tesseract lab
- **   notebook #2, pgs. 74-78).
- ** Return: none
- ** Exceptions: none
- ** History: Fri Dec 14 08:59:17 1990, DSJ, Created.
- */
-  register FLOAT64 L;
-  register FLOAT64 Mx2;
-  register FLOAT64 My2;
-
-  /* compute length of segment */
-  L = sqrt ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1));
-  OutlineStats->L += L;
-
-  /* compute 2Mx and 2My components */
-  Mx2 = L * (y1 + y2);
-  My2 = L * (x1 + x2);
-  OutlineStats->Mx += Mx2;
-  OutlineStats->My += My2;
-
-  /* compute second moment component */
-  OutlineStats->Ix += Mx2 * (y1 + y2) - L * y1 * y2;
-  OutlineStats->Iy += My2 * (x1 + x2) - L * x1 * x2;
-
-}                                /* UpdateOutlineStats */
--- a/classify/mfoutline.h
+++ b/classify/mfoutline.h
@@ -50,14 +50,6 @@ typedef enum {
  outer, hole
 } OUTLINETYPE;

-typedef struct {
-  FLOAT64 Mx, My;                /* first moment of all outlines */
-  FLOAT64 L;                     /* total length of all outlines */
-  FLOAT64 x, y;                  /* center of mass of all outlines */
-  FLOAT64 Ix, Iy;                /* second moments about center of mass axes */
-  FLOAT64 Rx, Ry;                /* radius of gyration about center of mass axes */
-} OUTLINE_STATS;
-
 typedef enum {
  baseline, character
 } NORM_METHOD;
@@ -127,16 +119,6 @@ void ComputeDirection(MFEDGEPT *Start,
                      FLOAT32 MinSlope,
                      FLOAT32 MaxSlope);

-void FinishOutlineStats(register OUTLINE_STATS *OutlineStats);
-
-void InitOutlineStats(OUTLINE_STATS *OutlineStats);
-
 MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint);

-void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats,
-                        register FLOAT32 x1,
-                        register FLOAT32 x2,
-                        register FLOAT32 y1,
-                        register FLOAT32 y2);
-
 #endif
--- a/cube/tess_lang_mod_edge.h
+++ b/cube/tess_lang_mod_edge.h
@@ -176,10 +176,9 @@ class TessLangModEdge : public LangModEdge {
  // returns the Hash value of the edge. Used by the SearchNode hash table
  // to quickly lookup exisiting edges to converge during search
  inline unsigned int Hash() const {
-    return static_cast<unsigned int>(((start_edge_ | end_edge_) ^
-      ((reinterpret_cast<uintptr_t>(dawg_)))) ^
-      ((unsigned int)edge_mask_) ^
-      class_id_);
+    return static_cast<unsigned int>(
+        ((start_edge_ | end_edge_) ^ ((reinterpret_cast<uintptr_t>(dawg_)))) ^
+        ((unsigned int)edge_mask_) ^ class_id_);
  }

  // A verbal description of the edge: Used by visualizers

--- a/opencl/openclwrapper.cpp
+++ b/opencl/openclwrapper.cpp
@@ -2669,7 +2669,8 @@ PERF_COUNT_START("HistogramRectOCL")
    int numThreads = block_size*numWorkGroups;
    size_t local_work_size[] = {static_cast<size_t>(block_size)};
    size_t global_work_size[] = {static_cast<size_t>(numThreads)};
-    size_t red_global_work_size[] = {static_cast<size_t>(block_size*kHistogramSize*bytes_per_pixel)};
+    size_t red_global_work_size[] = {
+        static_cast<size_t>(block_size * kHistogramSize * bytes_per_pixel)};

    /* map histogramAllChannels as write only */
    int numBins = kHistogramSize*bytes_per_pixel*numWorkGroups;

--- a/training/fileio.cpp
+++ b/training/fileio.cpp
@@ -152,11 +152,11 @@ InputBuffer::~InputBuffer() {
  }
 }

-bool InputBuffer::Read(string *out) {
-  char buf[BUFSIZ+1];
+bool InputBuffer::Read(string* out) {
+  char buf[BUFSIZ + 1];
  int l;
-  while((l = fread(buf, 1, BUFSIZ, stream_)) > 0) {
-    if(ferror(stream_)) {
+  while ((l = fread(buf, 1, BUFSIZ, stream_)) > 0) {
+    if (ferror(stream_)) {
      clearerr(stream_);
      return false;
    }

--- a/training/stringrenderer.cpp
+++ b/training/stringrenderer.cpp
@@ -140,6 +140,14 @@ void StringRenderer::set_resolution(const int resolution) {
  font_.set_resolution(resolution);
 }

+void StringRenderer::set_underline_start_prob(const double frac) {
+  underline_start_prob_ = min(max(frac, 0.0), 1.0);
+}
+
+void StringRenderer::set_underline_continuation_prob(const double frac) {
+  underline_continuation_prob_ = min(max(frac, 0.0), 1.0);
+}
+
 StringRenderer::~StringRenderer() {
  ClearBoxes();
  FreePangoCairo();

--- a/training/stringrenderer.h
+++ b/training/stringrenderer.h
@@ -83,14 +83,10 @@ class StringRenderer {
  // Sets the probability (value in [0, 1]) of starting to render a word with an
  // underline. This implementation consider words to be space-delimited
  // sequences of characters.
-  void set_underline_start_prob(const double frac) {
-    underline_start_prob_ = std::min(std::max(frac, 0.0), 1.0);
-  }
+  void set_underline_start_prob(const double frac);
  // Set the probability (value in [0, 1]) of continuing a started underline to
  // the next word.
-  void set_underline_continuation_prob(const double frac) {
-    underline_continuation_prob_ = std::min(std::max(frac, 0.0), 1.0);
-  }
+  void set_underline_continuation_prob(const double frac);
  void set_underline_style(const PangoUnderline style) {
    underline_style_ = style;
  }

--- a/training/tesstrain_utils.sh
+++ b/training/tesstrain_utils.sh
@@ -184,9 +184,9 @@ parse_flags() {
        TRAINING_TEXT=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.training_text
    fi
    if [[ -z ${WORDLIST_FILE} ]]; then
-        WORDLIST_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.wordlist.clean
+        WORDLIST_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.wordlist
    fi
-    WORD_BIGRAMS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.word.bigrams.clean
+    WORD_BIGRAMS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.word.bigrams
    NUMBERS_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.numbers
    PUNC_FILE=${LANGDATA_ROOT}/${LANG_CODE}/${LANG_CODE}.punc
    BIGRAM_FREQS_FILE=${TRAINING_TEXT}.bigram_freqs