提交 a59e5dc7 编写于 作者: T theraysmith

Preparations for unicodization

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@56 d0cd1f9f-072b-0410-8dd7-cf729c803f20
上级 c7e9ec8f
......@@ -25,6 +25,11 @@
/*----------------------------------------------------------------------
I n c l u d e s
----------------------------------------------------------------------*/
#ifdef __MSW32__
#include <windows.h>
#else
#include <netinet/in.h>
#endif
#include "dawg.h"
#include "cutil.h"
#include "callcpp.h"
......@@ -91,18 +96,24 @@ INT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node) {
}
/*
* Initialize letter_is_okay to point to default implmentation (a main
* program can override this).
*/
LETTER_OK_FUNC letter_is_okay = &def_letter_is_okay;
/**********************************************************************
* letter_is_okay
* def_letter_is_okay
*
* Check this letter in light of the current state. If everything is
* still OK then return TRUE;
* Default way to check this letter in light of the current state. If
* everything is still OK then return TRUE.
**********************************************************************/
INT32 letter_is_okay(EDGE_ARRAY dawg,
NODE_REF *node,
INT32 char_index,
char prevchar,
const char *word,
INT32 word_end) {
INT32 def_letter_is_okay(EDGE_ARRAY dawg,
NODE_REF *node,
INT32 char_index,
char prevchar,
const char *word,
INT32 word_end) {
EDGE_REF edge;
STRING dummy_word(word); // Auto-deleting string fixes memory leak.
......@@ -267,7 +278,8 @@ void print_dawg_node(EDGE_ARRAY dawg, NODE_REF node) {
*
* Write the DAWG out to a file
**********************************************************************/
void read_squished_dawg(char *filename, EDGE_ARRAY dawg, INT32 max_num_edges) {
void read_squished_dawg(const char *filename, EDGE_ARRAY dawg,
INT32 max_num_edges) {
FILE *file;
EDGE_REF edge;
INT32 num_edges = 0;
......@@ -282,28 +294,12 @@ void read_squished_dawg(char *filename, EDGE_ARRAY dawg, INT32 max_num_edges) {
#else
file = open_file (filename, "rb");
#endif
fseek(file, 0, SEEK_END);
long fsize = ftell(file);
rewind(file);
fread (&num_edges, sizeof (int), 1, file);
// Auto-detect relative endianness of file and OS as future DAWG
// files may be little-endian.
long diff1 = sizeof(EDGE_RECORD)*num_edges + sizeof(int) - fsize;
reverse32(&num_edges);
long diff2 = sizeof(EDGE_RECORD)*num_edges + sizeof(int) - fsize;
reverse32(&num_edges);
// One of diff1 and diff2 should now be 0, but find the smallest
// just in case.
if (diff1 < 0) diff1 = -diff1;
if (diff2 < 0) diff2 = -diff2;
bool swap = diff2 < diff1;
if (swap)
reverse32(&num_edges);
num_edges = ntohl(num_edges);
fread (&dawg[0], sizeof (EDGE_RECORD), num_edges, file);
fclose(file);
if (swap)
for (edge=0;edge<num_edges;edge++)
reverse32(&dawg[edge]);
for (edge=0;edge<num_edges;edge++)
dawg[edge] = ntohl(dawg[edge]);
for (edge=0; edge<max_num_edges; edge++)
if (last_edge (dawg, edge)) node_count++;
......
......@@ -225,24 +225,34 @@ EDGE_REF edge_char_of(EDGE_ARRAY dawg,
int character,
int word_end);
INT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node);
INT32 edges_in_node(EDGE_ARRAY dawg, NODE_REF node);
INT32 letter_is_okay(EDGE_ARRAY dawg,
INT32 def_letter_is_okay(EDGE_ARRAY dawg,
NODE_REF *node,
INT32 char_index,
char prevchar,
const char *word,
INT32 word_end);
INT32 num_forward_edges(EDGE_ARRAY dawg, NODE_REF node);
/*
* Allow for externally provided letter_is_okay.
*/
typedef INT32 (*LETTER_OK_FUNC)(EDGE_ARRAY, NODE_REF*, INT32, char, const char*,
INT32);
extern LETTER_OK_FUNC letter_is_okay;
INT32 num_forward_edges(EDGE_ARRAY dawg, NODE_REF node);
void print_dawg_node(EDGE_ARRAY dawg, NODE_REF node);
void print_dawg_node(EDGE_ARRAY dawg, NODE_REF node);
void read_squished_dawg(char *filename, EDGE_ARRAY dawg, INT32 max_num_edges);
void read_squished_dawg(const char *filename, EDGE_ARRAY dawg,
INT32 max_num_edges);
INT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, INT32 char_index);
INT32 verify_trailing_punct(EDGE_ARRAY dawg, char *word, INT32 char_index);
INT32 word_in_dawg(EDGE_ARRAY dawg, const char *string);
INT32 word_in_dawg(EDGE_ARRAY dawg, const char *string);
/*
#if defined(__STDC__) || defined(__cplusplus) || MAC_OR_DOS
......@@ -262,7 +272,7 @@ INT32 edges_in_node
_ARGS((EDGE_ARRAY dawg,
NODE_REF node));
INT32 letter_is_okay
INT32 def_letter_is_okay
_ARGS((EDGE_ARRAY dawg,
NODE_REF *node,
INT32 char_index,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册