From 4f94b49a318c1130e1c9fe82eca2ad4a894604f0 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Wed, 7 Dec 2005 13:12:54 +0000 Subject: [PATCH] Improve word parser. - allow ~ in filenames - -8.2.1 now is '-' and '8.2.1' instead of '-8.2' '.' '3' - '.text' now is not a file --- contrib/tsearch2/wordparser/parser.c | 48 +++++++++++++++++++++++++++- contrib/tsearch2/wordparser/parser.h | 4 +++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/contrib/tsearch2/wordparser/parser.c b/contrib/tsearch2/wordparser/parser.c index deccdb284a..37f020ef68 100644 --- a/contrib/tsearch2/wordparser/parser.c +++ b/contrib/tsearch2/wordparser/parser.c @@ -243,6 +243,15 @@ SpecialHyphen(TParser * prs) prs->state->poschar -= prs->state->lencharlexeme; } +static void +SpecialVerVersion(TParser * prs) +{ + prs->state->posbyte -= prs->state->lenbytelexeme; + prs->state->poschar -= prs->state->lencharlexeme; + prs->state->lenbytelexeme = 0; + prs->state->lencharlexeme = 0; +} + static int p_isstophost(TParser * prs) { @@ -326,8 +335,9 @@ static TParserStateActionItem actionTPS_Base[] = { {p_iseqC, '-', A_PUSH, TPS_InSignedIntFirst, 0, NULL}, {p_iseqC, '+', A_PUSH, TPS_InSignedIntFirst, 0, NULL}, {p_iseqC, '&', A_PUSH, TPS_InHTMLEntityFirst, 0, NULL}, + {p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL}, {p_iseqC, '/', A_PUSH, TPS_InFileFirst, 0, NULL}, - {p_iseqC, '.', A_PUSH, TPS_InPathFirst, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InPathFirstFirst, 0, NULL}, {NULL, 0, A_NEXT, TPS_InSpace, 0, NULL} }; @@ -429,11 +439,25 @@ static TParserStateActionItem actionTPS_InDecimalFirst[] = { static TParserStateActionItem actionTPS_InDecimal[] = { {p_isEOF, 0, A_BINGO, TPS_Base, DECIMAL, NULL}, {p_isdigit, 0, A_NEXT, TPS_InDecimal, 0, NULL}, + {p_iseqC, '.', A_PUSH, TPS_InVerVersion, 0, NULL}, {p_iseqC, 'e', A_PUSH, TPS_InMantissaFirst, 0, NULL}, {p_iseqC, 'E', A_PUSH, TPS_InMantissaFirst, 0, NULL}, {NULL, 0, A_BINGO, TPS_Base, DECIMAL, NULL} }; +static TParserStateActionItem actionTPS_InVerVersion[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_RERUN, TPS_InSVerVersion, 0, SpecialVerVersion}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InSVerVersion[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_isdigit, 0, A_BINGO, TPS_InUnsignedInt, SPACE, NULL}, + {NULL, 0, A_NEXT, TPS_Null, 0, NULL} +}; + + static TParserStateActionItem actionTPS_InVersionFirst[] = { {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, {p_isdigit, 0, A_CLEAR, TPS_InVersion, 0, NULL}, @@ -537,6 +561,7 @@ static TParserStateActionItem actionTPS_InTag[] = { {p_iseqC, '&', A_NEXT, TPS_Null, 0, NULL}, {p_iseqC, '?', A_NEXT, TPS_Null, 0, NULL}, {p_iseqC, '%', A_NEXT, TPS_Null, 0, NULL}, + {p_iseqC, '~', A_NEXT, TPS_Null, 0, NULL}, {p_isspace, 0, A_NEXT, TPS_Null, 0, SpecialTags}, {NULL, 0, A_POP, TPS_Null, 0, NULL} }; @@ -676,6 +701,16 @@ static TParserStateActionItem actionTPS_InFileFirst[] = { {p_iseqC, '.', A_NEXT, TPS_InPathFirst, 0, NULL}, {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, {p_iseqC, '?', A_PUSH, TPS_InURIFirst, 0, NULL}, + {p_iseqC, '~', A_PUSH, TPS_InFileTwiddle, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + +static TParserStateActionItem actionTPS_InFileTwiddle[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_islatin, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_isdigit, 0, A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '_', A_NEXT, TPS_InFile, 0, NULL}, + {p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL}, {NULL, 0, A_POP, TPS_Null, 0, NULL} }; @@ -689,6 +724,13 @@ static TParserStateActionItem actionTPS_InPathFirst[] = { {NULL, 0, A_POP, TPS_Null, 0, NULL} }; +static TParserStateActionItem actionTPS_InPathFirstFirst[] = { + {p_isEOF, 0, A_POP, TPS_Null, 0, NULL}, + {p_iseqC, '.', A_NEXT, TPS_InPathSecond, 0, NULL}, + {p_iseqC, '/', A_NEXT, TPS_InFileFirst, 0, NULL}, + {NULL, 0, A_POP, TPS_Null, 0, NULL} +}; + static TParserStateActionItem actionTPS_InPathSecond[] = { {p_isEOF, 0, A_BINGO|A_CLEAR, TPS_Base, FILEPATH, NULL}, {p_iseqC, '/', A_NEXT|A_PUSH, TPS_InFileFirst, 0, NULL}, @@ -920,6 +962,8 @@ static const TParserStateAction Actions[] = { {TPS_InUDecimal, actionTPS_InUDecimal}, {TPS_InDecimalFirst, actionTPS_InDecimalFirst}, {TPS_InDecimal, actionTPS_InDecimal}, + {TPS_InVerVersion, actionTPS_InVerVersion}, + {TPS_InSVerVersion, actionTPS_InSVerVersion}, {TPS_InVersionFirst, actionTPS_InVersionFirst}, {TPS_InVersion, actionTPS_InVersion}, {TPS_InMantissaFirst, actionTPS_InMantissaFirst}, @@ -953,7 +997,9 @@ static const TParserStateAction Actions[] = { {TPS_InHost, actionTPS_InHost}, {TPS_InEmail, actionTPS_InEmail}, {TPS_InFileFirst, actionTPS_InFileFirst}, + {TPS_InFileTwiddle, actionTPS_InFileTwiddle}, {TPS_InPathFirst, actionTPS_InPathFirst}, + {TPS_InPathFirstFirst, actionTPS_InPathFirstFirst}, {TPS_InPathSecond, actionTPS_InPathSecond}, {TPS_InFile, actionTPS_InFile}, {TPS_InFileNext, actionTPS_InFileNext}, diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h index 9cdd141efd..3f7962feea 100644 --- a/contrib/tsearch2/wordparser/parser.h +++ b/contrib/tsearch2/wordparser/parser.h @@ -19,6 +19,8 @@ typedef enum TPS_InUDecimal, TPS_InDecimalFirst, TPS_InDecimal, + TPS_InVerVersion, + TPS_InSVerVersion, TPS_InVersionFirst, TPS_InVersion, TPS_InMantissaFirst, @@ -52,7 +54,9 @@ typedef enum TPS_InHost, TPS_InEmail, TPS_InFileFirst, + TPS_InFileTwiddle, TPS_InPathFirst, + TPS_InPathFirstFirst, TPS_InPathSecond, TPS_InFile, TPS_InFileNext, -- GitLab