提交 1943de9a 编写于 作者: T theraysmith

Fixed the extern C mismatches properly.

git-svn-id: https://tesseract-ocr.googlecode.com/svn/trunk@82 d0cd1f9f-072b-0410-8dd7-cf729c803f20
上级 f4baca27
此差异已折叠。
...@@ -46,66 +46,8 @@ ...@@ -46,66 +46,8 @@
typedef void (*TESS_TESTER) (TBLOB *, BOOL8, char *, INT32, LIST); typedef void (*TESS_TESTER) (TBLOB *, BOOL8, char *, INT32, LIST);
typedef LIST (*TESS_MATCHER) (TBLOB *, TBLOB *, TBLOB *, void *, TEXTROW *); typedef LIST (*TESS_MATCHER) (TBLOB *, TBLOB *, TBLOB *, void *, TEXTROW *);
extern "C" extern TEXTROW normalized_row;
{ extern int display_ratings;
/*
int start_recog( //Real main in C
int argc,
char *argv[]);
void program_editup2( //afterforking part
int argc,
char** argv);
int end_recog( //Real main in C
int argc,
char *argv[]);
void set_interactive_pass();
void set_pass1();
void set_pass2();
//ARRAY cc_recog(TWERD*,TESS_CHOICE*,TESS_CHOICE*,TESS_TESTER,
// TESS_TESTER);*/
//void wo_learn_blob(TBLOB*,TEXTROW*,char*,INT32);
//LIST AdaptiveClassifier(TBLOB*,TBLOB*,TEXTROW*);
//void LearnBlob(TBLOB*,TEXTROW*,char*,INT32);
//TWERD *newword();
//TBLOB *newblob();
//TESSLINE *newoutline();
//EDGEPT *newedgept();
//void oldedgept(EDGEPT*);
//void destroy_nodes(void*,void (*)(void*));
//TESS_LIST *append_choice(TESS_LIST*,char*,double,double,char);
//void fix_quotes (char*);
//void record_certainty(double,int);
//int AcceptableResult(A_CHOICE*,A_CHOICE*);
//int AdaptableWord(TWERD*,const char*,const char*);
//void delete_word(TWERD*);
//void free_blob(TBLOB*);
//void add_document_word(A_CHOICE*);
//void AdaptToWord(TWERD*,TEXTROW*,const char*,const char*,const char*);
//void SaveBadWord(const char*,double);
//void free_choice(TESS_CHOICE*);
//TWERD *newword();
//TBLOB *newblob();
//void free_blob( //free a blob
// TBLOB *blob); //blob to free
//int dict_word( const char* );
//extern int tess_cn_matching;
//extern int tess_bn_matching;
//extern int last_word_on_line;
extern TEXTROW normalized_row;
//extern TESS_MATCHER blob_matchers[];
//extern FILE *rawfile;
//extern FILE *textfile;
//extern int character_count;
//extern int word_count;
//extern int enable_assoc;
//extern int chop_enable;
//extern int permute_only_top;
extern int display_ratings;
};
#if 0 #if 0
#define strsave(s) \ #define strsave(s) \
......
...@@ -23,8 +23,8 @@ ...@@ -23,8 +23,8 @@
#include "tessopt.h" #include "tessopt.h"
#include "notdll.h" //must be last include #include "notdll.h" //must be last include
int optind; int tessoptind;
char *optarg; char *tessoptarg;
/********************************************************************** /**********************************************************************
* tessopt * tessopt
...@@ -37,22 +37,22 @@ INT32 argc, //arg count ...@@ -37,22 +37,22 @@ INT32 argc, //arg count
char *argv[], //args char *argv[], //args
const char *arglist //string of arg chars const char *arglist //string of arg chars
) { ) {
char *arg; //arg char const char *arg; //arg char
if (optind == 0) if (tessoptind == 0)
optind = 1; tessoptind = 1;
if (optind < argc && argv[optind][0] == '-') { if (tessoptind < argc && argv[tessoptind][0] == '-') {
arg = strchr (arglist, argv[optind][1]); arg = strchr (arglist, argv[tessoptind][1]);
if (arg == NULL || *arg == ':') if (arg == NULL || *arg == ':')
return '?'; //dud option return '?'; //dud option
optind++; tessoptind++;
optarg = argv[optind]; tessoptarg = argv[tessoptind];
if (arg[1] == ':') { if (arg[1] == ':') {
if (argv[optind - 1][2] != '\0') if (argv[tessoptind - 1][2] != '\0')
//immediately after //immediately after
optarg = argv[optind - 1] + 2; tessoptarg = argv[tessoptind - 1] + 2;
else else
optind++; tessoptind++;
} }
return *arg; return *arg;
} }
......
...@@ -20,8 +20,8 @@ ...@@ -20,8 +20,8 @@
#include "host.h" #include "host.h"
#include "notdll.h" //must be last include #include "notdll.h" //must be last include
extern int optind; extern int tessoptind;
extern char *optarg; extern char *tessoptarg;
int tessopt ( //parse args int tessopt ( //parse args
INT32 argc, //arg count INT32 argc, //arg count
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
*/ */
#ifdef __cplusplus #ifdef __cplusplus
#define EXTERN extern "C" #define EXTERN extern
#else #else
#define EXTERN extern #define EXTERN extern
#endif #endif
......
...@@ -39,14 +39,12 @@ ...@@ -39,14 +39,12 @@
extern TBLOB *pageblobs; /*first blob on page */ extern TBLOB *pageblobs; /*first blob on page */
extern TEXTBLOCK *pageblocks; /*first block on page */ extern TEXTBLOCK *pageblocks; /*first block on page */
/*class definitions */ /*class definitions */
extern char classes[CLASSIZE][CLASSLENGTH]; /* extern char classes[CLASSIZE][CLASSLENGTH]; */
extern int resolution; /*scanner res in dpi */ extern int resolution; /*scanner res in dpi */
extern int acts[MAXPROC]; /*action flags */ extern int acts[MAXPROC]; /*action flags */
extern int debugs[MAXPROC]; /*debug flags */ extern int debugs[MAXPROC]; /*debug flags */
extern int plots[MAXPROC]; /*plot flags */ extern int plots[MAXPROC]; /*plot flags */
extern int corners[4]; /*corners of scan window */ extern int corners[4]; /*corners of scan window */
extern int optind; /*option index */
extern char *optarg; /*option argument */
/*image file name */ /*image file name */
extern char imagefile[FILENAMESIZE]; extern char imagefile[FILENAMESIZE];
/* main directory */ /* main directory */
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
#include "unichar.h"
#define MAXNAMESIZE 80 #define MAXNAMESIZE 80
#define MAX_NUM_SAMPLES 10000 #define MAX_NUM_SAMPLES 10000
...@@ -219,21 +220,34 @@ int main ( ...@@ -219,21 +220,34 @@ int main (
ParseArguments (argc, argv); ParseArguments (argc, argv);
while ((PageName = GetNextFilename()) != NULL) while ((PageName = GetNextFilename()) != NULL)
{ {
printf ("\nReading %s ...", PageName); printf ("Reading %s ...\n", PageName);
TrainingPage = Efopen (PageName, "r"); TrainingPage = Efopen (PageName, "r");
ReadTrainingSamples (TrainingPage, &CharList); ReadTrainingSamples (TrainingPage, &CharList);
fclose (TrainingPage); fclose (TrainingPage);
//WriteTrainingSamples (Directory, CharList); //WriteTrainingSamples (Directory, CharList);
} }
printf("Clustering ...\n");
pCharList = CharList; pCharList = CharList;
iterate(pCharList) iterate(pCharList)
{ {
//Cluster //Cluster
CharSample = (LABELEDLIST) first_node (pCharList); CharSample = (LABELEDLIST) first_node (pCharList);
printf ("\nClustering %s ...", CharSample->Label); //printf ("\nClustering %s ...", CharSample->Label);
Clusterer = SetUpForClustering(CharSample); Clusterer = SetUpForClustering(CharSample);
ProtoList = ClusterSamples(Clusterer, &Config); float SavedMinSamples = Config.MinSamples;
AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label); while (Config.MinSamples > 0.001) {
ProtoList = ClusterSamples(Clusterer, &Config);
if (NumberOfProtos(ProtoList, 1, 0) > 0)
break;
else {
Config.MinSamples *= 0.95;
printf("0 significant protos for %s."
" Retrying clustering with MinSamples = %f%%\n",
CharSample->Label, Config.MinSamples);
}
}
Config.MinSamples = SavedMinSamples;
AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label);
} }
FreeTrainingSamples (CharList); FreeTrainingSamples (CharList);
WriteNormProtos (Directory, NormProtoList, Clusterer); WriteNormProtos (Directory, NormProtoList, Clusterer);
...@@ -262,7 +276,7 @@ void ParseArguments( ...@@ -262,7 +276,7 @@ void ParseArguments(
** ShowSignificantProtos flag controlling proto display ** ShowSignificantProtos flag controlling proto display
** ShowInsignificantProtos flag controlling proto display ** ShowInsignificantProtos flag controlling proto display
** Config current clustering parameters ** Config current clustering parameters
** optarg, optind defined by tessopt sys call ** tessoptarg, tessoptind defined by tessopt sys call
** Argc, Argv global copies of argc and argv ** Argc, Argv global copies of argc and argv
** Operation: ** Operation:
** This routine parses the command line arguments that were ** This routine parses the command line arguments that were
...@@ -287,7 +301,6 @@ void ParseArguments( ...@@ -287,7 +301,6 @@ void ParseArguments(
int Option; int Option;
int ParametersRead; int ParametersRead;
BOOL8 Error; BOOL8 Error;
extern char *optarg;
Error = FALSE; Error = FALSE;
Argc = argc; Argc = argc;
...@@ -297,48 +310,48 @@ void ParseArguments( ...@@ -297,48 +310,48 @@ void ParseArguments(
switch ( Option ) switch ( Option )
{ {
case 'n': case 'n':
sscanf(optarg,"%d", &ParametersRead); sscanf(tessoptarg,"%d", &ParametersRead);
ShowInsignificantProtos = ParametersRead; ShowInsignificantProtos = ParametersRead;
break; break;
case 'p': case 'p':
sscanf(optarg,"%d", &ParametersRead); sscanf(tessoptarg,"%d", &ParametersRead);
ShowSignificantProtos = ParametersRead; ShowSignificantProtos = ParametersRead;
break; break;
case 'd': case 'd':
ShowAllSamples = FALSE; ShowAllSamples = FALSE;
break; break;
case 'C': case 'C':
ParametersRead = sscanf( optarg, "%lf", &(Config.Confidence) ); ParametersRead = sscanf( tessoptarg, "%lf", &(Config.Confidence) );
if ( ParametersRead != 1 ) Error = TRUE; if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.Confidence > 1 ) Config.Confidence = 1; else if ( Config.Confidence > 1 ) Config.Confidence = 1;
else if ( Config.Confidence < 0 ) Config.Confidence = 0; else if ( Config.Confidence < 0 ) Config.Confidence = 0;
break; break;
case 'I': case 'I':
ParametersRead = sscanf( optarg, "%f", &(Config.Independence) ); ParametersRead = sscanf( tessoptarg, "%f", &(Config.Independence) );
if ( ParametersRead != 1 ) Error = TRUE; if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.Independence > 1 ) Config.Independence = 1; else if ( Config.Independence > 1 ) Config.Independence = 1;
else if ( Config.Independence < 0 ) Config.Independence = 0; else if ( Config.Independence < 0 ) Config.Independence = 0;
break; break;
case 'M': case 'M':
ParametersRead = sscanf( optarg, "%f", &(Config.MinSamples) ); ParametersRead = sscanf( tessoptarg, "%f", &(Config.MinSamples) );
if ( ParametersRead != 1 ) Error = TRUE; if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.MinSamples > 1 ) Config.MinSamples = 1; else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
else if ( Config.MinSamples < 0 ) Config.MinSamples = 0; else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
break; break;
case 'B': case 'B':
ParametersRead = sscanf( optarg, "%f", &(Config.MaxIllegal) ); ParametersRead = sscanf( tessoptarg, "%f", &(Config.MaxIllegal) );
if ( ParametersRead != 1 ) Error = TRUE; if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1; else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0; else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
break; break;
case 'R': case 'R':
ParametersRead = sscanf( optarg, "%f", &RoundingAccuracy ); ParametersRead = sscanf( tessoptarg, "%f", &RoundingAccuracy );
if ( ParametersRead != 1 ) Error = TRUE; if ( ParametersRead != 1 ) Error = TRUE;
else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01; else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01;
else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0; else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0;
break; break;
case 'S': case 'S':
switch ( optarg[0] ) switch ( tessoptarg[0] )
{ {
case 's': Config.ProtoStyle = spherical; break; case 's': Config.ProtoStyle = spherical; break;
case 'e': Config.ProtoStyle = elliptical; break; case 'e': Config.ProtoStyle = elliptical; break;
...@@ -348,10 +361,10 @@ void ParseArguments( ...@@ -348,10 +361,10 @@ void ParseArguments(
} }
break; break;
case 'D': case 'D':
Directory = optarg; Directory = tessoptarg;
break; break;
case 'N': case 'N':
if (sscanf (optarg, "%d", &MaxNumSamples) != 1 || if (sscanf (tessoptarg, "%d", &MaxNumSamples) != 1 ||
MaxNumSamples <= 0) MaxNumSamples <= 0)
Error = TRUE; Error = TRUE;
break; break;
...@@ -375,7 +388,7 @@ char *GetNextFilename () ...@@ -375,7 +388,7 @@ char *GetNextFilename ()
/* /*
** Parameters: none ** Parameters: none
** Globals: ** Globals:
** optind defined by tessopt sys call ** tessoptind defined by tessopt sys call
** Argc, Argv global copies of argc and argv ** Argc, Argv global copies of argc and argv
** Operation: ** Operation:
** This routine returns the next command line argument. If ** This routine returns the next command line argument. If
...@@ -388,8 +401,8 @@ char *GetNextFilename () ...@@ -388,8 +401,8 @@ char *GetNextFilename ()
*/ */
{ {
if (optind < Argc) if (tessoptind < Argc)
return (Argv [optind++]); return (Argv [tessoptind++]);
else else
return (NULL); return (NULL);
...@@ -417,32 +430,32 @@ void ReadTrainingSamples ( ...@@ -417,32 +430,32 @@ void ReadTrainingSamples (
*/ */
{ {
char CharName[MAXNAMESIZE]; char unichar[UNICHAR_LEN + 1];
LABELEDLIST CharSample; LABELEDLIST CharSample;
FEATURE_SET FeatureSamples; FEATURE_SET FeatureSamples;
CHAR_DESC CharDesc; CHAR_DESC CharDesc;
int Type, i; int Type, i;
while (fscanf (File, "%s %s", FontName, CharName) == 2) { while (fscanf (File, "%s %s", FontName, unichar) == 2) {
CharSample = FindList (*TrainingSamples, CharName); CharSample = FindList (*TrainingSamples, unichar);
if (CharSample == NULL) { if (CharSample == NULL) {
CharSample = NewLabeledList (CharName); CharSample = NewLabeledList (unichar);
*TrainingSamples = push (*TrainingSamples, CharSample); *TrainingSamples = push (*TrainingSamples, CharSample);
} }
CharDesc = ReadCharDescription (File); CharDesc = ReadCharDescription (File);
Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE); Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
FeatureSamples = FeaturesOfType(CharDesc, Type); FeatureSamples = FeaturesOfType(CharDesc, Type);
for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) { for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
FEATURE f = FeatureSamples->Features[feature]; FEATURE f = FeatureSamples->Features[feature];
for (int dim =0; dim < f->Type->NumParams; ++dim) for (int dim =0; dim < f->Type->NumParams; ++dim)
f->Params[dim] += UniformRandomNumber(-MINSD, MINSD); f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
} }
CharSample->List = push (CharSample->List, FeatureSamples); CharSample->List = push (CharSample->List, FeatureSamples);
for (i = 0; i < NumFeatureSetsIn (CharDesc); i++) for (i = 0; i < NumFeatureSetsIn (CharDesc); i++)
if (Type != i) if (Type != i)
FreeFeatureSet (FeaturesOfType (CharDesc, i)); FreeFeatureSet (FeaturesOfType (CharDesc, i));
free (CharDesc); free (CharDesc);
} }
} // ReadTrainingSamples } // ReadTrainingSamples
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
...@@ -606,7 +619,6 @@ void WriteNormProtos ( ...@@ -606,7 +619,6 @@ void WriteNormProtos (
char Filename[MAXNAMESIZE]; char Filename[MAXNAMESIZE];
LABELEDLIST LabeledProto; LABELEDLIST LabeledProto;
int N; int N;
char Label;
strcpy (Filename, ""); strcpy (Filename, "");
if (Directory != NULL) if (Directory != NULL)
...@@ -623,9 +635,17 @@ void WriteNormProtos ( ...@@ -623,9 +635,17 @@ void WriteNormProtos (
{ {
LabeledProto = (LABELEDLIST) first_node (LabeledProtoList); LabeledProto = (LABELEDLIST) first_node (LabeledProtoList);
N = NumberOfProtos(LabeledProto->List, N = NumberOfProtos(LabeledProto->List,
ShowSignificantProtos, ShowInsignificantProtos); ShowSignificantProtos, ShowInsignificantProtos);
Label = NameToChar(LabeledProto->Label); if (N < 1) {
fprintf(File, "\n%c %d\n", Label, N); printf ("\nError! Not enough protos for %s: %d protos"
" (%d significant protos"
", %d insignificant protos)\n",
LabeledProto->Label, N,
NumberOfProtos(LabeledProto->List, 1, 0),
NumberOfProtos(LabeledProto->List, 0, 1));
exit(1);
}
fprintf(File, "\n%s %d\n", LabeledProto->Label, N);
WriteProtos(File, Clusterer->SampleSize, LabeledProto->List, WriteProtos(File, Clusterer->SampleSize, LabeledProto->List,
ShowSignificantProtos, ShowInsignificantProtos); ShowSignificantProtos, ShowInsignificantProtos);
} }
......
...@@ -44,6 +44,9 @@ ...@@ -44,6 +44,9 @@
#include "intproto.h" #include "intproto.h"
#include "variables.h" #include "variables.h"
#include "freelist.h" #include "freelist.h"
#include "efio.h"
#include "danerror.h"
#include "globals.h"
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
...@@ -73,7 +76,6 @@ typedef MERGE_CLASS_NODE* MERGE_CLASS; ...@@ -73,7 +76,6 @@ typedef MERGE_CLASS_NODE* MERGE_CLASS;
#define round(x,frag)(floor(x/frag+.5)*frag) #define round(x,frag)(floor(x/frag+.5)*frag)
/**---------------------------------------------------------------------------- /**----------------------------------------------------------------------------
Public Function Prototypes Public Function Prototypes
----------------------------------------------------------------------------**/ ----------------------------------------------------------------------------**/
...@@ -164,21 +166,7 @@ void Normalize ( ...@@ -164,21 +166,7 @@ void Normalize (
void SetUpForFloat2Int( void SetUpForFloat2Int(
LIST LabeledClassList); LIST LabeledClassList);
void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) { void WritePFFMTable(INT_TEMPLATES Templates, const char* filename);
FILE* fp = Efopen(filename, "wb");
/* then write out each class */
for (int i = 0; i < NumClassesIn (Templates); i++) {
int MaxLength = 0;
INT_CLASS Class = ClassForIndex (Templates, i);
for (int ConfigId = 0; ConfigId < NumIntConfigsIn (Class); ConfigId++) {
if (LengthForConfigId (Class, ConfigId) > MaxLength)
MaxLength = LengthForConfigId (Class, ConfigId);
}
fprintf(fp, "%c %d\n", ClassIdForIndex(Templates, i), MaxLength);
}
fclose(fp);
}
//--------------Global Data Definitions and Declarations-------------- //--------------Global Data Definitions and Declarations--------------
static char FontName[MAXNAMESIZE]; static char FontName[MAXNAMESIZE];
...@@ -200,6 +188,9 @@ static CLUSTERCONFIG Config = ...@@ -200,6 +188,9 @@ static CLUSTERCONFIG Config =
static FLOAT32 RoundingAccuracy = 0.0; static FLOAT32 RoundingAccuracy = 0.0;
// The unicharset used during mftraining
static UNICHARSET unicharset_mftraining;
/*---------------------------------------------------------------------------- /*----------------------------------------------------------------------------
Public Code Public Code
-----------------------------------------------------------------------------*/ -----------------------------------------------------------------------------*/
...@@ -260,12 +251,17 @@ int main ( ...@@ -260,12 +251,17 @@ int main (
LIST pCharList, pProtoList; LIST pCharList, pProtoList;
char Filename[MAXNAMESIZE]; char Filename[MAXNAMESIZE];
// Clean the unichar set
unicharset_mftraining.clear();
// Space character needed to represent NIL classification
unicharset_mftraining.unichar_insert(" ");
ParseArguments (argc, argv); ParseArguments (argc, argv);
InitFastTrainerVars (); InitFastTrainerVars ();
InitSubfeatureVars (); InitSubfeatureVars ();
while ((PageName = GetNextFilename()) != NULL) while ((PageName = GetNextFilename()) != NULL)
{ {
printf ("\nReading %s ...", PageName); printf ("Reading %s ...\n", PageName);
TrainingPage = Efopen (PageName, "r"); TrainingPage = Efopen (PageName, "r");
CharList = ReadTrainingSamples (TrainingPage); CharList = ReadTrainingSamples (TrainingPage);
fclose (TrainingPage); fclose (TrainingPage);
...@@ -275,7 +271,7 @@ int main ( ...@@ -275,7 +271,7 @@ int main (
{ {
//Cluster //Cluster
CharSample = (LABELEDLIST) first_node (pCharList); CharSample = (LABELEDLIST) first_node (pCharList);
printf ("\nClustering %s ...", CharSample->Label); // printf ("\nClustering %s ...", CharSample->Label);
Clusterer = SetUpForClustering(CharSample); Clusterer = SetUpForClustering(CharSample);
ProtoList = ClusterSamples(Clusterer, &Config); ProtoList = ClusterSamples(Clusterer, &Config);
//WriteClusteredTrainingSamples (Directory, ProtoList, Clusterer, CharSample); //WriteClusteredTrainingSamples (Directory, ProtoList, Clusterer, CharSample);
...@@ -320,14 +316,13 @@ int main ( ...@@ -320,14 +316,13 @@ int main (
FreeProtoList (&ProtoList); FreeProtoList (&ProtoList);
} }
FreeTrainingSamples (CharList); FreeTrainingSamples (CharList);
printf ("\n");
} }
//WriteMergedTrainingSamples(Directory,ClassList); //WriteMergedTrainingSamples(Directory,ClassList);
WriteMicrofeat(Directory, ClassList); WriteMicrofeat(Directory, ClassList);
InitIntProtoVars (); InitIntProtoVars ();
InitPrototypes (); InitPrototypes ();
SetUpForFloat2Int(ClassList); SetUpForFloat2Int(ClassList);
IntTemplates = CreateIntTemplates(TrainingData); IntTemplates = CreateIntTemplates(TrainingData, unicharset_mftraining);
strcpy (Filename, ""); strcpy (Filename, "");
if (Directory != NULL) if (Directory != NULL)
{ {
...@@ -340,11 +335,18 @@ int main ( ...@@ -340,11 +335,18 @@ int main (
#else #else
OutFile = Efopen (Filename, "wb"); OutFile = Efopen (Filename, "wb");
#endif #endif
WriteIntTemplates(OutFile, IntTemplates); WriteIntTemplates(OutFile, IntTemplates, unicharset_mftraining);
fclose (OutFile); fclose (OutFile);
// Now create pffmtable. strcpy (Filename, "");
WritePFFMTable(IntTemplates, "pffmtable"); if (Directory != NULL)
printf ("\nDone!\n"); /**/ {
strcat (Filename, Directory);
strcat (Filename, "/");
}
strcat (Filename, "pffmtable");
// Now create pffmtable.
WritePFFMTable(IntTemplates, Filename);
printf ("Done!\n"); /**/
FreeLabeledClassList (ClassList); FreeLabeledClassList (ClassList);
return 0; return 0;
} /* main */ } /* main */
...@@ -367,7 +369,7 @@ char **argv) ...@@ -367,7 +369,7 @@ char **argv)
** ShowSignificantProtos flag controlling proto display ** ShowSignificantProtos flag controlling proto display
** ShowInsignificantProtos flag controlling proto display ** ShowInsignificantProtos flag controlling proto display
** Config current clustering parameters ** Config current clustering parameters
** optarg, optind defined by tessopt sys call ** tessoptarg, tessoptind defined by tessopt sys call
** Argc, Argv global copies of argc and argv ** Argc, Argv global copies of argc and argv
** Operation: ** Operation:
** This routine parses the command line arguments that were ** This routine parses the command line arguments that were
...@@ -392,7 +394,6 @@ char **argv) ...@@ -392,7 +394,6 @@ char **argv)
int Option; int Option;
int ParametersRead; int ParametersRead;
BOOL8 Error; BOOL8 Error;
extern char *optarg;
Error = FALSE; Error = FALSE;
Argc = argc; Argc = argc;
...@@ -411,37 +412,37 @@ char **argv) ...@@ -411,37 +412,37 @@ char **argv)
ShowAllSamples = FALSE; ShowAllSamples = FALSE;
break; break;
case 'C': case 'C':
ParametersRead = sscanf( optarg, "%lf", &(Config.Confidence) ); ParametersRead = sscanf( tessoptarg, "%lf", &(Config.Confidence) );
if ( ParametersRead != 1 ) Error = TRUE; if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.Confidence > 1 ) Config.Confidence = 1; else if ( Config.Confidence > 1 ) Config.Confidence = 1;
else if ( Config.Confidence < 0 ) Config.Confidence = 0; else if ( Config.Confidence < 0 ) Config.Confidence = 0;
break; break;
case 'I': case 'I':
ParametersRead = sscanf( optarg, "%f", &(Config.Independence) ); ParametersRead = sscanf( tessoptarg, "%f", &(Config.Independence) );
if ( ParametersRead != 1 ) Error = TRUE; if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.Independence > 1 ) Config.Independence = 1; else if ( Config.Independence > 1 ) Config.Independence = 1;
else if ( Config.Independence < 0 ) Config.Independence = 0; else if ( Config.Independence < 0 ) Config.Independence = 0;
break; break;
case 'M': case 'M':
ParametersRead = sscanf( optarg, "%f", &(Config.MinSamples) ); ParametersRead = sscanf( tessoptarg, "%f", &(Config.MinSamples) );
if ( ParametersRead != 1 ) Error = TRUE; if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.MinSamples > 1 ) Config.MinSamples = 1; else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;
else if ( Config.MinSamples < 0 ) Config.MinSamples = 0; else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;
break; break;
case 'B': case 'B':
ParametersRead = sscanf( optarg, "%f", &(Config.MaxIllegal) ); ParametersRead = sscanf( tessoptarg, "%f", &(Config.MaxIllegal) );
if ( ParametersRead != 1 ) Error = TRUE; if ( ParametersRead != 1 ) Error = TRUE;
else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1; else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;
else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0; else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;
break; break;
case 'R': case 'R':
ParametersRead = sscanf( optarg, "%f", &RoundingAccuracy ); ParametersRead = sscanf( tessoptarg, "%f", &RoundingAccuracy );
if ( ParametersRead != 1 ) Error = TRUE; if ( ParametersRead != 1 ) Error = TRUE;
else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01; else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01;
else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0; else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0;
break; break;
case 'S': case 'S':
switch ( optarg[0] ) switch ( tessoptarg[0] )
{ {
case 's': Config.ProtoStyle = spherical; break; case 's': Config.ProtoStyle = spherical; break;
case 'e': Config.ProtoStyle = elliptical; break; case 'e': Config.ProtoStyle = elliptical; break;
...@@ -451,10 +452,10 @@ char **argv) ...@@ -451,10 +452,10 @@ char **argv)
} }
break; break;
case 'D': case 'D':
Directory = optarg; Directory = tessoptarg;
break; break;
case 'N': case 'N':
if (sscanf (optarg, "%d", &MaxNumSamples) != 1 || if (sscanf (tessoptarg, "%d", &MaxNumSamples) != 1 ||
MaxNumSamples <= 0) MaxNumSamples <= 0)
Error = TRUE; Error = TRUE;
break; break;
...@@ -478,7 +479,7 @@ char *GetNextFilename () ...@@ -478,7 +479,7 @@ char *GetNextFilename ()
/* /*
** Parameters: none ** Parameters: none
** Globals: ** Globals:
** optind defined by tessopt sys call ** tessoptind defined by tessopt sys call
** Argc, Argv global copies of argc and argv ** Argc, Argv global copies of argc and argv
** Operation: ** Operation:
** This routine returns the next command line argument. If ** This routine returns the next command line argument. If
...@@ -491,8 +492,8 @@ char *GetNextFilename () ...@@ -491,8 +492,8 @@ char *GetNextFilename ()
*/ */
{ {
if (optind < Argc) if (tessoptind < Argc)
return (Argv [optind++]); return (Argv [tessoptind++]);
else else
return (NULL); return (NULL);
...@@ -519,33 +520,41 @@ LIST ReadTrainingSamples ( ...@@ -519,33 +520,41 @@ LIST ReadTrainingSamples (
*/ */
{ {
char CharName[MAXNAMESIZE]; char unichar[UNICHAR_LEN + 1];
LABELEDLIST CharSample; LABELEDLIST CharSample;
FEATURE_SET FeatureSamples; FEATURE_SET FeatureSamples;
LIST TrainingSamples = NIL; LIST TrainingSamples = NIL;
CHAR_DESC CharDesc; CHAR_DESC CharDesc;
int Type, i; int Type, i;
while (fscanf (File, "%s %s", FontName, CharName) == 2) { while (fscanf (File, "%s %s", FontName, unichar) == 2) {
CharSample = FindList (TrainingSamples, CharName); if (!unicharset_mftraining.contains_unichar(unichar)) {
unicharset_mftraining.unichar_insert(unichar);
if (unicharset_mftraining.size() > MAX_NUM_CLASSES) {
cprintf("Error: Size of unicharset of mftraining is "
"greater than MAX_NUM_CLASSES\n");
exit(1);
}
}
CharSample = FindList (TrainingSamples, unichar);
if (CharSample == NULL) { if (CharSample == NULL) {
CharSample = NewLabeledList (CharName); CharSample = NewLabeledList (unichar);
TrainingSamples = push (TrainingSamples, CharSample); TrainingSamples = push (TrainingSamples, CharSample);
} }
CharDesc = ReadCharDescription (File); CharDesc = ReadCharDescription (File);
Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE); Type = ShortNameToFeatureType(PROGRAM_FEATURE_TYPE);
FeatureSamples = FeaturesOfType(CharDesc, Type); FeatureSamples = FeaturesOfType(CharDesc, Type);
for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) { for (int feature = 0; feature < FeatureSamples->NumFeatures; ++feature) {
FEATURE f = FeatureSamples->Features[feature]; FEATURE f = FeatureSamples->Features[feature];
for (int dim =0; dim < f->Type->NumParams; ++dim) for (int dim =0; dim < f->Type->NumParams; ++dim)
f->Params[dim] += UniformRandomNumber(-MINSD, MINSD); f->Params[dim] += UniformRandomNumber(-MINSD, MINSD);
} }
CharSample->List = push (CharSample->List, FeatureSamples); CharSample->List = push (CharSample->List, FeatureSamples);
for (i = 0; i < NumFeatureSetsIn (CharDesc); i++) for (i = 0; i < NumFeatureSetsIn (CharDesc); i++)
if (Type != i) if (Type != i)
FreeFeatureSet (FeaturesOfType (CharDesc, i)); FreeFeatureSet (FeaturesOfType (CharDesc, i));
free (CharDesc); free (CharDesc);
} }
return (TrainingSamples); return (TrainingSamples);
} /* ReadTrainingSamples */ } /* ReadTrainingSamples */
...@@ -843,7 +852,7 @@ void WriteProtos( ...@@ -843,7 +852,7 @@ void WriteProtos(
int i; int i;
PROTO Proto; PROTO Proto;
fprintf(File, "%c\n", NameToChar(MergeClass->Label)); fprintf(File, "%s\n", MergeClass->Label);
fprintf(File, "%d\n", NumProtosIn(MergeClass->Class)); fprintf(File, "%d\n", NumProtosIn(MergeClass->Class));
for(i=0; i < NumProtosIn(MergeClass->Class); i++) for(i=0; i < NumProtosIn(MergeClass->Class); i++)
{ {
...@@ -900,7 +909,7 @@ void FreeTrainingSamples ( ...@@ -900,7 +909,7 @@ void FreeTrainingSamples (
LIST FeatureList; LIST FeatureList;
printf ("\nFreeTrainingSamples..."); // printf ("FreeTrainingSamples...\n");
iterate (CharList) /* iterate thru all of the fonts */ iterate (CharList) /* iterate thru all of the fonts */
{ {
CharSample = (LABELEDLIST) first_node (CharList); CharSample = (LABELEDLIST) first_node (CharList);
...@@ -1161,12 +1170,13 @@ void SetUpForFloat2Int( ...@@ -1161,12 +1170,13 @@ void SetUpForFloat2Int(
BIT_VECTOR NewConfig; BIT_VECTOR NewConfig;
BIT_VECTOR OldConfig; BIT_VECTOR OldConfig;
printf("Float2Int ..."); // printf("Float2Int ...\n");
iterate(LabeledClassList) iterate(LabeledClassList)
{ {
MergeClass = (MERGE_CLASS) first_node (LabeledClassList); MergeClass = (MERGE_CLASS) first_node (LabeledClassList);
Class = &TrainingData[NameToChar(MergeClass->Label)]; Class = &TrainingData[unicharset_mftraining.unichar_to_id(
MergeClass->Label)];
NumProtos = NumProtosIn(MergeClass->Class); NumProtos = NumProtosIn(MergeClass->Class);
NumConfigs = NumConfigsIn(MergeClass->Class); NumConfigs = NumConfigsIn(MergeClass->Class);
...@@ -1204,3 +1214,20 @@ void SetUpForFloat2Int( ...@@ -1204,3 +1214,20 @@ void SetUpForFloat2Int(
} }
} }
} // SetUpForFloat2Int } // SetUpForFloat2Int
/*--------------------------------------------------------------------------*/
void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) {
FILE* fp = Efopen(filename, "wb");
/* then write out each class */
for (int i = 0; i < NumClassesIn (Templates); i++) {
int MaxLength = 0;
INT_CLASS Class = ClassForIndex (Templates, i);
for (int ConfigId = 0; ConfigId < NumIntConfigsIn (Class); ConfigId++) {
if (LengthForConfigId (Class, ConfigId) > MaxLength)
MaxLength = LengthForConfigId (Class, ConfigId);
}
fprintf(fp, "%s %d\n", unicharset_mftraining.id_to_unichar(
ClassIdForIndex(Templates, i)), MaxLength);
}
fclose(fp);
} // WritePFFMTable
...@@ -52,8 +52,8 @@ int main(int argc, char** argv) { ...@@ -52,8 +52,8 @@ int main(int argc, char** argv) {
while ((option = tessopt(argc, argv, "D" )) != EOF) { while ((option = tessopt(argc, argv, "D" )) != EOF) {
switch (option) { switch (option) {
case 'D': case 'D':
output_directory = optarg; output_directory = tessoptarg;
++optind; ++tessoptind;
break; break;
} }
} }
...@@ -64,12 +64,12 @@ int main(int argc, char** argv) { ...@@ -64,12 +64,12 @@ int main(int argc, char** argv) {
unicharset_file_name += kUnicharsetFileName; unicharset_file_name += kUnicharsetFileName;
// Load box files // Load box files
for (; optind < argc; ++optind) { for (; tessoptind < argc; ++tessoptind) {
printf("Extracting unicharset from %s\n", argv[optind]); printf("Extracting unicharset from %s\n", argv[tessoptind]);
FILE* box_file = fopen(argv[optind], "r"); FILE* box_file = fopen(argv[tessoptind], "r");
if (box_file == NULL) { if (box_file == NULL) {
printf("Cannot open box file %s\n", argv[optind]); printf("Cannot open box file %s\n", argv[tessoptind]);
return -1; return -1;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册