提交 93c8e5d2 编写于 作者: T theraysmith

Harmless improvements from 3.00 going in to 2.04

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@209 d0cd1f9f-072b-0410-8dd7-cf729c803f20
上级 de462485
/**********************************************************************
* File: boxread.cpp
* Description: Read data from a box file.
* Author: Ray Smith
* Created: Fri Aug 24 17:47:23 PDT 2007
* Author: Ray Smith
* Created: Fri Aug 24 17:47:23 PDT 2007
*
* (C) Copyright 2007, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
......@@ -23,12 +23,25 @@
#include "unichar.h"
#include "tprintf.h"
// Box files are used ONLY DURING TRAINING, but by both processes of
// creating tr files with tesseract, and unicharset_extractor.
// read_next_box factors out the code to interpret a line of a box
// file so that applybox and unicharset_extractor interpret the same way.
// This function returns the next valid box file utf8 string and coords
// and returns true, or false on eof (and closes the file).
// It ignores the uft8 file signature, checks for valid utf-8 and allows
// space or tab between fields.
// utf8_str must be at least kBoxReadBufSize in length.
// If there are page numbers in the file, it reads them all.
bool read_next_box(FILE* box_file, char* utf8_str,
int* x_min, int* y_min, int* x_max, int* y_max) {
return read_next_box(-1, box_file, utf8_str,
x_min, y_min, x_max, y_max);
}
// As read_next_box above, but get a specific page number. (0-based)
// Use -1 to read any page number. Files without page number all
// read as if they are page 0.
bool read_next_box(int target_page, FILE* box_file, char* utf8_str,
int* x_min, int* y_min, int* x_max, int* y_max) {
static int line = 0;
......
......@@ -17,8 +17,8 @@
*
**********************************************************************/
#ifndef THIRD_PARTY_TESSERACT_CCUTIL_BOXREAD_H__
#define THIRD_PARTY_TESSERACT_CCUTIL_BOXREAD_H__
#ifndef TESSERACT_CCUTIL_BOXREAD_H__
#define TESSERACT_CCUTIL_BOXREAD_H__
#include <stdio.h>
......@@ -41,5 +41,4 @@ bool read_next_box(FILE* box_file, char* utf8_str,
bool read_next_box(int page, FILE* box_file, char* utf8_str,
int* x_min, int* y_min, int* x_max, int* y_max);
#endif // THIRD_PARTY_TESSERACT_CCUTIL_BOXREAD_H__
#endif // TESSERACT_CCUTIL_BOXREAD_H__
......@@ -31,7 +31,6 @@
const ERRCODE BADERRACTION = "Illegal error action";
#define MAX_MSG 1024
extern inT16 global_abort_code;
/**********************************************************************
* error
......@@ -76,27 +75,29 @@ const char *format, ... //special message
//no specific
msgptr += sprintf (msgptr, "\n");
tprintf(msg);
if ((strstr (message, "File") != NULL) ||
fprintf(stderr, msg);
/*if ((strstr (message, "File") != NULL) ||
(strstr (message, "file") != NULL))
global_abort_code = FILE_ABORT;
else if ((strstr (message, "List") != NULL) ||
(strstr (message, "list") != NULL))
global_abort_code = LIST_ABORT;
else if ((strstr (message, "Memory") != NULL) ||
(strstr (message, "memory") != NULL))
global_abort_code = MEMORY_ABORT;
else
global_abort_code = NO_ABORT_CODE;
*/
int* p = NULL;
switch (action) {
case DBG:
case TESSLOG:
return; //report only
case EXIT:
err_exit();
//err_exit();
case ABORT:
abort();
// Create a deliberate segv as the stack trace is more useful that way.
if (!*p)
abort();
default:
BADERRACTION.error ("error", ABORT, NULL);
}
......
......@@ -22,18 +22,18 @@
#include "errcode.h"
#include "tprintf.h"
inT16 global_loc_code = LOC_INIT;//location code
/*inT16 global_loc_code = LOC_INIT;//location code
inT16 global_subloc_code = SUBLOC_NORM;
//pass2 subloc code
inT16 global_subsubloc_code = SUBSUBLOC_OTHER;
//location code
inT16 global_abort_code = NO_ABORT_CODE;
//Prog abort code
*/
void signal_exit( //
int signal_code //Signal which
) {
int exit_status;
/*int exit_status;
if ((global_loc_code == LOC_PASS2) || (global_loc_code == LOC_FUZZY_SPACE))
global_loc_code += global_subloc_code + global_subsubloc_code;
......@@ -49,7 +49,8 @@ void signal_exit( //
exit_status, global_loc_code, signal_code);
}
exit(exit_status);
exit(exit_status);*/
exit(signal_code);
}
......@@ -95,19 +96,20 @@ void signal_termination_handler( //The real signal
//}; //end extern "C"
void set_global_loc_code(int loc_code) {
global_loc_code = loc_code;
// global_loc_code = loc_code;
}
void set_global_subloc_code(int loc_code) {
global_subloc_code = loc_code;
// global_subloc_code = loc_code;
}
void set_global_subsubloc_code(int loc_code) {
global_subsubloc_code = loc_code;
// global_subsubloc_code = loc_code;
}
......@@ -171,4 +171,10 @@ typedef unsigned char BOOL8;
#define NULL 0L
#endif
// Return true if x is within tolerance of y
template<class T> bool NearlyEqual(T x, T y, T tolerance) {
T diff = x - y;
return diff <= tolerance && -diff <= tolerance;
}
#endif
......@@ -40,7 +40,10 @@ EXTERN DLLSYM STRING imagebasename;
EXTERN BOOL_VAR (m_print_variables, FALSE,
"Print initial values of all variables");
EXTERN STRING_VAR (m_data_sub_dir, "tessdata/", "Directory for data files");
EXTERN INT_VAR (memgrab_size, 0, "Preallocation size for batch use");
/*
EXTERN INT_VAR (memgrab_size, 0, "Preallocation size for batch use");*/
const ERRCODE NO_PATH =
"Warning:explicit path for executable will not be used for configs";
static const ERRCODE USAGE = "Usage";
......@@ -51,7 +54,7 @@ static const ERRCODE USAGE = "Usage";
* Main for mithras demo program. Read the arguments and set up globals.
**********************************************************************/
void main_setup( /*main demo program */
void main_setup( /*main demo program */
const char *argv0, //program name
const char *basename, //name of image
int argc, /*argument count */
......@@ -65,21 +68,29 @@ void main_setup( /*main demo program */
imagebasename = basename; /*name of image */
if(!getenv("TESSDATA_PREFIX")) {
#ifdef TESSDATA_PREFIX
#define _STR(a) #a
#define _XSTR(a) _STR(a)
datadir = _XSTR(TESSDATA_PREFIX);
#undef _XSTR
#undef _STR
#else
if (getpath (argv0, datadir) < 0)
#ifdef __UNIX__
CANTOPENFILE.error ("main", ABORT, "%s to get path", argv[0]);
#else
NO_PATH.error ("main", DBG, NULL);
#endif
#endif
// TESSDATA_PREFIX Environment variable overrules everything.
// Compiled in -DTESSDATA_PREFIX is next.
// NULL goes to current directory.
// An actual value of argv0 is used if getpath is successful.
if (!getenv("TESSDATA_PREFIX")) {
#ifdef TESSDATA_PREFIX
#define _STR(a) #a
#define _XSTR(a) _STR(a)
datadir = _XSTR(TESSDATA_PREFIX);
#undef _XSTR
#undef _STR
#else
if (argv0 != NULL) {
if (getpath(argv0, datadir) < 0)
#ifdef __UNIX__
CANTOPENFILE.error("main", ABORT, "%s to get path", argv0);
#else
NO_PATH.error("main", DBG, NULL);
#endif
} else {
datadir = "./";
}
#endif
} else {
datadir = getenv("TESSDATA_PREFIX");
}
......@@ -114,18 +125,4 @@ void main_setup( /*main demo program */
datadir += m_data_sub_dir; /*data directory */
#ifdef __UNIX__
if (memgrab_size > 0) {
void *membuf; //test virtual mem
//test memory
membuf = malloc (memgrab_size);
if (membuf == NULL) {
raise(SIGTTOU); //hangup for jobber
sleep (10);
}
else
free(membuf);
}
#endif
}
......@@ -114,7 +114,8 @@ DLLSYM char *alloc_string( //allocate string
}
return &string[1]; //string for user
#else
return static_cast<char*>(malloc(count));
// Round up the amount allocated to a multiple of 4
return static_cast<char*>(malloc((count + 3) & ~3));
#endif
}
......
......@@ -21,11 +21,11 @@
#define NDMINX_H
#ifndef MAX
#define MAX(a,b) ( (a>b) ? a : b )
#define MAX(x,y) (((x) >= (y))?(x):(y))
#endif
#ifndef MIN
#define MIN(a,b) ( (a<b) ? a : b )
#define MIN(x,y) (((x) <= (y))?(x):(y))
#endif
#endif
......@@ -17,8 +17,8 @@
//
///////////////////////////////////////////////////////////////////////
#ifndef THIRD_PARTY_TESSERACT_CCUTIL_UNICHAR_H__
#define THIRD_PARTY_TESSERACT_CCUTIL_UNICHAR_H__
#ifndef TESSERACT_CCUTIL_UNICHAR_H__
#define TESSERACT_CCUTIL_UNICHAR_H__
#include <memory.h>
......@@ -29,6 +29,11 @@
// A UNICHAR_ID is the unique id of a unichar.
typedef int UNICHAR_ID;
// A variable to indicate an invalid or uninitialized unichar id.
static const int INVALID_UNICHAR_ID = -1;
// A special unichar that corresponds to INVALID_UNICHAR_ID.
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
// The UNICHAR class holds a single classification result. This may be
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
// multple Unicode characters representing the NFKC expansion of a ligature
......@@ -76,4 +81,4 @@ class UNICHAR {
char chars[UNICHAR_LEN];
};
#endif // THIRD_PARTY_TESSERACT_CCUTIL_UNICHAR_H__
#endif // TESSERACT_CCUTIL_UNICHAR_H__
......@@ -17,8 +17,8 @@
//
///////////////////////////////////////////////////////////////////////
#ifndef THIRD_PARTY_TESSERACT_CCUTIL_UNICHARMAP_H__
#define THIRD_PARTY_TESSERACT_CCUTIL_UNICHARMAP_H__
#ifndef TESSERACT_CCUTIL_UNICHARMAP_H__
#define TESSERACT_CCUTIL_UNICHARMAP_H__
#include "unichar.h"
......@@ -79,4 +79,4 @@ class UNICHARMAP {
UNICHARMAP_NODE* nodes;
};
#endif // THIRD_PARTY_TESSERACT_CCUTIL_UNICHARMAP_H__
#endif // TESSERACT_CCUTIL_UNICHARMAP_H__
......@@ -584,7 +584,7 @@ DLLSYM BOOL8 read_variables_file(const char *file // name to read
if (!foundit) {
anyerr = TRUE; // had an error
tprintf("read_variables_file:variable not found: %s",
tprintf("read_variables_file:variable not found: %s\n",
line);
}
}
......
/**********************************************************************
* File: varable.h (Formerly variable.h)
* Description: Class definitions of the *_VAR classes for tunable constants.
* Author: Ray Smith
* Created: Fri Feb 22 11:26:25 GMT 1991
* Author: Ray Smith
* Created: Fri Feb 22 11:26:25 GMT 1991
*
* (C) Copyright 1991, Hewlett-Packard Ltd.
** Licensed under the Apache License, Version 2.0 (the "License");
......@@ -342,66 +342,78 @@ class DLLSYM double_VARIABLE
/* SECURE_NAMES is defined in senames.h when necessary */
#ifdef SECURE_NAMES
#define INT_VAR(name,val,comment) /*make INT_VARIABLE*/\
INT_VARIABLE name(val,"","")
#define INT_VAR(name,val,comment) /*make INT_VARIABLE*/\
INT_VARIABLE name(val,"","")
#define BOOL_VAR(name,val,comment) /*make BOOL_VARIABLE*/\
BOOL_VARIABLE name(val,"","")
#define BOOL_VAR(name,val,comment) /*make BOOL_VARIABLE*/\
BOOL_VARIABLE name(val,"","")
#define STRING_VAR(name,val,comment) /*make STRING_VARIABLE*/\
STRING_VARIABLE name(val,"","")
#define STRING_VAR(name,val,comment) /*make STRING_VARIABLE*/\
STRING_VARIABLE name(val,"","")
#define double_VAR(name,val,comment) /*make double_VARIABLE*/\
double_VARIABLE name(val,"","")
#define double_VAR(name,val,comment) /*make double_VARIABLE*/\
double_VARIABLE name(val,"","")
#else
#define INT_VAR(name,val,comment) /*make INT_VARIABLE*/\
INT_VARIABLE name(val,#name,comment)
#define INT_VAR(name,val,comment) /*make INT_VARIABLE*/\
INT_VARIABLE name(val,#name,comment)
#define BOOL_VAR(name,val,comment) /*make BOOL_VARIABLE*/\
BOOL_VARIABLE name(val,#name,comment)
#define BOOL_VAR(name,val,comment) /*make BOOL_VARIABLE*/\
BOOL_VARIABLE name(val,#name,comment)
#define STRING_VAR(name,val,comment) /*make STRING_VARIABLE*/\
STRING_VARIABLE name(val,#name,comment)
#define STRING_VAR(name,val,comment) /*make STRING_VARIABLE*/\
STRING_VARIABLE name(val,#name,comment)
#define double_VAR(name,val,comment) /*make double_VARIABLE*/\
double_VARIABLE name(val,#name,comment)
#define double_VAR(name,val,comment) /*make double_VARIABLE*/\
double_VARIABLE name(val,#name,comment)
#endif
#define INT_VAR_H(name,val,comment) /*declare one*/\
INT_VARIABLE name
#define INT_VAR_H(name,val,comment) /*declare one*/\
INT_VARIABLE name
#define BOOL_VAR_H(name,val,comment) /*declare one*/\
BOOL_VARIABLE name
#define BOOL_VAR_H(name,val,comment) /*declare one*/\
BOOL_VARIABLE name
#define STRING_VAR_H(name,val,comment) /*declare one*/\
STRING_VARIABLE name
#define STRING_VAR_H(name,val,comment) /*declare one*/\
STRING_VARIABLE name
#define double_VAR_H(name,val,comment) /*declare one*/\
double_VARIABLE name
#define double_VAR_H(name,val,comment) /*declare one*/\
double_VARIABLE name
#define INT_EVAR(name,val,comment) /*make INT_VARIABLE*/\
INT_VARIABLE name(val,#name,comment)
#define INT_MEMBER(name, val, comment) /*make INT_VARIABLE*/\
name(val, #name, comment)
#define INT_EVAR_H(name,val,comment) /*declare one*/\
INT_VARIABLE name
#define BOOL_MEMBER(name, val, comment) /*make BOOL_VARIABLE*/\
name(val, #name, comment)
#define BOOL_EVAR(name,val,comment) /*make BOOL_VARIABLE*/\
BOOL_VARIABLE name(val,#name,comment)
#define STRING_MEMBER(name, val, comment) /*make STRING_VARIABLE*/\
name(val, #name, comment)
#define BOOL_EVAR_H(name,val,comment) /*declare one*/\
BOOL_VARIABLE name
#define double_MEMBER(name, val, comment) /*make double_VARIABLE*/\
name(val, #name, comment)
#define STRING_EVAR(name,val,comment) /*make STRING_VARIABLE*/\
STRING_VARIABLE name(val,#name,comment)
#define INT_EVAR(name,val,comment) /*make INT_VARIABLE*/\
INT_VARIABLE name(val,#name,comment)
#define STRING_EVAR_H(name,val,comment) /*declare one*/\
STRING_VARIABLE name
#define INT_EVAR_H(name,val,comment) /*declare one*/\
INT_VARIABLE name
#define double_EVAR(name,val,comment) /*make double_VARIABLE*/\
double_VARIABLE name(val,#name,comment)
#define BOOL_EVAR(name,val,comment) /*make BOOL_VARIABLE*/\
BOOL_VARIABLE name(val,#name,comment)
#define double_EVAR_H(name,val,comment) /*declare one*/\
double_VARIABLE name
#define BOOL_EVAR_H(name,val,comment) /*declare one*/\
BOOL_VARIABLE name
#define STRING_EVAR(name,val,comment) /*make STRING_VARIABLE*/\
STRING_VARIABLE name(val,#name,comment)
#define STRING_EVAR_H(name,val,comment) /*declare one*/\
STRING_VARIABLE name
#define double_EVAR(name,val,comment) /*make double_VARIABLE*/\
double_VARIABLE name(val,#name,comment)
#define double_EVAR_H(name,val,comment) /*declare one*/\
double_VARIABLE name
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册