diff --git a/ccutil/Makefile.am b/ccutil/Makefile.am index 53980e93130e940ede5d6884a57a17cd2f29b7ea..9d3d83b22b2a09e2edaa5b0e174d01bd99038d7b 100644 --- a/ccutil/Makefile.am +++ b/ccutil/Makefile.am @@ -18,7 +18,7 @@ include_HEADERS = \ noinst_HEADERS = \ ambigs.h bits16.h bitvector.h ccutil.h clst.h doubleptr.h elst2.h \ - elst.h genericheap.h globaloc.h hashfn.h indexmapbidi.h kdpair.h lsterr.h \ + elst.h genericheap.h globaloc.h indexmapbidi.h kdpair.h lsterr.h \ nwmain.h object_cache.h qrsequence.h sorthelper.h stderr.h \ scanutils.h tessdatamanager.h tprintf.h unicity_table.h unicodes.h \ universalambigs.h diff --git a/ccutil/hashfn.h b/ccutil/hashfn.h deleted file mode 100644 index 73e15be9a8406cd1b8e4d6ed887bc25e0f224644..0000000000000000000000000000000000000000 --- a/ccutil/hashfn.h +++ /dev/null @@ -1,80 +0,0 @@ -/********************************************************************** - * File: hashfn.h (Formerly hash.h) - * Description: Portability hacks for hash_map, hash_set and unique_ptr. - * Author: Ray Smith - * Created: Wed Jan 08 14:08:25 PST 2014 - * - * (C) Copyright 2014, Google Inc. - ** Licensed under the Apache License, Version 2.0 (the "License"); - ** you may not use this file except in compliance with the License. - ** You may obtain a copy of the License at - ** http://www.apache.org/licenses/LICENSE-2.0 - ** Unless required by applicable law or agreed to in writing, software - ** distributed under the License is distributed on an "AS IS" BASIS, - ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - ** See the License for the specific language governing permissions and - ** limitations under the License. - * - **********************************************************************/ - -#ifndef HASHFN_H -#define HASHFN_H - -#if (__cplusplus >= 201103L) || defined(_MSC_VER) // Visual Studio -#include -#include -#if defined(_MSC_VER) && (_MSC_VER >= 1500 && _MSC_VER < 1600) // VS 2008 -#define TessHashMap std::tr1::unordered_map -#define TessHashSet std::tr1::unordered_set -#else // _MSC_VER -#define TessHashMap std::unordered_map -#define TessHashSet std::unordered_set -#include -#define SmartPtr std::unique_ptr -#define HAVE_UNIQUE_PTR -#endif // _MSC_VER -#elif (defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ > 0)) || \ - __GNUC__ >= 4)) // gcc -// hash_set is deprecated in gcc -#include -#include -using __gnu_cxx::hash_map; -using __gnu_cxx::hash_set; -#define TessHashMap __gnu_cxx::hash_map -#define TessHashSet __gnu_cxx::hash_set -#else -#include -#include -#define TessHashMap hash_map -#define TessHashSet :hash_set -#endif // gcc - -#ifndef HAVE_UNIQUE_PTR -// Trivial smart ptr. Expand to add features of std::unique_ptr as required. -template class SmartPtr { - public: - SmartPtr() : ptr_(NULL) {} - explicit SmartPtr(T* ptr) : ptr_(ptr) {} - ~SmartPtr() { - delete ptr_; - } - - T* get() const { - return ptr_; - } - void reset(T* ptr) { - delete ptr_; - ptr_ = ptr; - } - bool operator==(const T* ptr) const { - return ptr_ == ptr; - } - T* operator->() const { - return ptr_; - } - private: - T* ptr_; -}; -#endif // HAVE_UNIQUE_PTR - -#endif // HASHFN_H diff --git a/ccutil/unicharcompress.cpp b/ccutil/unicharcompress.cpp index a9437ed4cf5639a7800cdac058e8e9bf191e61b3..084e6c4386d09a11c13ff7320a1269879cbf95ba 100644 --- a/ccutil/unicharcompress.cpp +++ b/ccutil/unicharcompress.cpp @@ -57,9 +57,9 @@ struct RadicalStrokedHash { }; // A hash map to convert unicodes to radical,stroke pair. -typedef TessHashMap RSMap; +typedef std::unordered_map RSMap; // A hash map to count occurrences of each radical,stroke pair. -typedef TessHashMap RSCounts; +typedef std::unordered_map RSCounts; // Helper function builds the RSMap from the radical-stroke file, which has // already been read into a STRING. Returns false on error. diff --git a/ccutil/unicharcompress.h b/ccutil/unicharcompress.h index 6efc46fdc7002e51bce6f0fea7b3a3a8424567d9..680c5aeb871b820559c5d85792d24bbc62f23537 100644 --- a/ccutil/unicharcompress.h +++ b/ccutil/unicharcompress.h @@ -22,7 +22,8 @@ #ifndef TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ #define TESSERACT_CCUTIL_UNICHARCOMPRESS_H_ -#include "hashfn.h" +#include + #include "serialis.h" #include "strngs.h" #include "unicharset.h" @@ -236,17 +237,19 @@ class UnicharCompress { // encoder_ is the only part that is serialized. The rest is computed on load. GenericVector encoder_; // Decoder converts the output of encoder back to a unichar-id. - TessHashMap decoder_; + std::unordered_map + decoder_; // True if the index is a valid single or start code. GenericVector is_valid_start_; // Maps a prefix code to a list of valid next codes. // The map owns the vectors. - TessHashMap*, + std::unordered_map*, RecodedCharID::RecodedCharIDHash> next_codes_; // Maps a prefix code to a list of valid final codes. // The map owns the vectors. - TessHashMap*, + std::unordered_map*, RecodedCharID::RecodedCharIDHash> final_codes_; // Max of any value in encoder_ + 1. diff --git a/lstm/lstmtrainer.cpp b/lstm/lstmtrainer.cpp index b3958d343133b917c162e5ad374f6c53cad0dc12..0cbb026d0745903b16038ad72f9a243cb239ffef 100644 --- a/lstm/lstmtrainer.cpp +++ b/lstm/lstmtrainer.cpp @@ -1211,7 +1211,7 @@ double LSTMTrainer::ComputeCharError(const GenericVector& truth_str, // Computes word recall error rate using a very simple bag of words algorithm. // NOTE that this is destructive on both input strings. double LSTMTrainer::ComputeWordError(STRING* truth_str, STRING* ocr_str) { - typedef TessHashMap > StrMap; + typedef std::unordered_map > StrMap; GenericVector truth_words, ocr_words; truth_str->split(' ', &truth_words); if (truth_words.empty()) return 0.0; diff --git a/textord/bbgrid.h b/textord/bbgrid.h index 4d035211af0072358b9e9e05b5abc80934ac1162..fb175efb175032f135c9e8bcd2bd82fe6d6cdafb 100644 --- a/textord/bbgrid.h +++ b/textord/bbgrid.h @@ -21,9 +21,10 @@ #ifndef TESSERACT_TEXTORD_BBGRID_H_ #define TESSERACT_TEXTORD_BBGRID_H_ +#include + #include "clst.h" #include "coutln.h" -#include "hashfn.h" #include "rect.h" #include "scrollview.h" @@ -364,7 +365,7 @@ template class GridSearch { // An iterator over the list at (x_, y_) in the grid_. BBC_C_IT it_; // Set of unique returned elements used when unique_mode_ is true. - TessHashSet > returns_; + std::unordered_set > returns_; }; // Sort function to sort a BBC by bounding_box().left(). diff --git a/training/ligature_table.cpp b/training/ligature_table.cpp index f072b46950725728e5801659594ce63a12fc7a7b..54c51748d548f053f9ea07974a8091f739b61f60 100644 --- a/training/ligature_table.cpp +++ b/training/ligature_table.cpp @@ -46,7 +46,7 @@ const int kMinLigature = 0xfb00; const int kMaxLigature = 0xfb17; // Don't put the wide Hebrew letters in. /* static */ -SmartPtr LigatureTable::instance_; +std::unique_ptr LigatureTable::instance_; /* static */ LigatureTable* LigatureTable::Get() { diff --git a/training/ligature_table.h b/training/ligature_table.h index 83e7dc3c4d165983975893bb750073474d782ac3..62b1f86a8cb0719fdd398ed4f970f33f288e3c57 100644 --- a/training/ligature_table.h +++ b/training/ligature_table.h @@ -23,8 +23,9 @@ #define TRAININGDATA_LIGATURE_TABLE_H_ #include +#include +#include -#include "hashfn.h" #include "util.h" namespace tesseract { @@ -32,7 +33,7 @@ namespace tesseract { class PangoFontInfo; // defined in pango_font_info.h // Map to substitute strings for ligatures. -typedef TessHashMap LigHash; +typedef std::unordered_map LigHash; class LigatureTable { public: @@ -61,7 +62,7 @@ class LigatureTable { // corresponding ligature characters. void Init(); - static SmartPtr instance_; + static std::unique_ptr instance_; LigHash norm_to_lig_table_; LigHash lig_to_norm_table_; int min_lig_length_; diff --git a/training/pango_font_info.cpp b/training/pango_font_info.cpp index 5e2848b29a09df1325b30908bbb2a6c2575f1b7d..1e1daed2cf6c5cea4f4521d6265d4c7a1e4c6b6f 100644 --- a/training/pango_font_info.cpp +++ b/training/pango_font_info.cpp @@ -688,7 +688,7 @@ void FontUtils::GetAllRenderableCharacters(const vector& fonts, // Utilities written to be backward compatible with StringRender /* static */ -int FontUtils::FontScore(const TessHashMap& ch_map, +int FontUtils::FontScore(const std::unordered_map& ch_map, const string& fontname, int* raw_score, vector* ch_flags) { PangoFontInfo font_info; @@ -704,7 +704,7 @@ int FontUtils::FontScore(const TessHashMap& ch_map, } *raw_score = 0; int ok_chars = 0; - for (TessHashMap::const_iterator it = ch_map.begin(); + for (std::unordered_map::const_iterator it = ch_map.begin(); it != ch_map.end(); ++it) { bool covered = (IsWhitespace(it->first) || (pango_coverage_get(coverage, it->first) @@ -722,7 +722,7 @@ int FontUtils::FontScore(const TessHashMap& ch_map, /* static */ -string FontUtils::BestFonts(const TessHashMap& ch_map, +string FontUtils::BestFonts(const std::unordered_map& ch_map, vector > >* fonts) { const double kMinOKFraction = 0.99; // Weighted fraction of characters that must be renderable in a font to make diff --git a/training/pango_font_info.h b/training/pango_font_info.h index a1ce809943efdeb73d225fdbd75f55521d7604ba..74a824998724f5f7e839f12a45b17d3190185056 100644 --- a/training/pango_font_info.h +++ b/training/pango_font_info.h @@ -23,9 +23,9 @@ #include #include #include +#include #include "commandlineflags.h" -#include "hashfn.h" #include "host.h" #include "pango/pango-font.h" #include "pango/pango.h" @@ -203,7 +203,7 @@ class FontUtils { // corresponding character (in order of iterating ch_map) can be rendered. // The return string is a list of the acceptable fonts that were used. static string BestFonts( - const TessHashMap& ch_map, + const std::unordered_map& ch_map, std::vector > >* font_flag); // FontScore returns the weighted renderability score of the given @@ -211,7 +211,7 @@ class FontUtils { // is also returned in raw_score. // The values in the bool vector ch_flags correspond to whether the // corresponding character (in order of iterating ch_map) can be rendered. - static int FontScore(const TessHashMap& ch_map, + static int FontScore(const std::unordered_map& ch_map, const string& fontname, int* raw_score, std::vector* ch_flags); diff --git a/training/stringrenderer.h b/training/stringrenderer.h index b4646f71b1f99348ca2222ed884bf23b4986332d..6e2bee072d2416dca251e7429414c638663e4a9d 100644 --- a/training/stringrenderer.h +++ b/training/stringrenderer.h @@ -31,8 +31,8 @@ #include #include +#include -#include "hashfn.h" #include "host.h" #include "pango_font_info.h" #include "pango/pango-layout.h" @@ -210,7 +210,7 @@ class StringRenderer { Boxa* page_boxes_; // Objects cached for subsequent calls to RenderAllFontsToImage() - TessHashMap char_map_; // Time-saving char histogram. + std::unordered_map char_map_; // Time-saving char histogram. int total_chars_; // Number in the string to be rendered. int font_index_; // Index of next font to use in font list. int last_offset_; // Offset returned from last successful rendering