diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index 66a361ee2ffe7d61c16ac609a9d9be4d249b8497..085842c92b70d74ea969488e0cc50d1466f2e53e 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -1330,14 +1330,16 @@ compact(struct nfa * nfa, for (s = nfa->states; s != NULL; s = s->next) { nstates++; - narcs += 1 + s->nouts + 1; - /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */ + narcs += s->nouts + 1; /* need one extra for endmarker */ } + cnfa->stflags = (char *) MALLOC(nstates * sizeof(char)); cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *)); cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc)); - if (cnfa->states == NULL || cnfa->arcs == NULL) + if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL) { + if (cnfa->stflags != NULL) + FREE(cnfa->stflags); if (cnfa->states != NULL) FREE(cnfa->states); if (cnfa->arcs != NULL) @@ -1359,9 +1361,8 @@ compact(struct nfa * nfa, for (s = nfa->states; s != NULL; s = s->next) { assert((size_t) s->no < nstates); + cnfa->stflags[s->no] = 0; cnfa->states[s->no] = ca; - ca->co = 0; /* clear and skip flags "arc" */ - ca++; first = ca; for (a = s->outs; a != NULL; a = a->outchain) switch (a->type) @@ -1392,8 +1393,8 @@ compact(struct nfa * nfa, /* mark no-progress states */ for (a = nfa->pre->outs; a != NULL; a = a->outchain) - cnfa->states[a->to->no]->co = 1; - cnfa->states[nfa->pre->no]->co = 1; + cnfa->stflags[a->to->no] = CNFA_NOPROGRESS; + cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS; } /* @@ -1433,6 +1434,7 @@ freecnfa(struct cnfa * cnfa) { assert(cnfa->nstates != 0); /* not empty already */ cnfa->nstates = 0; + FREE(cnfa->stflags); FREE(cnfa->states); FREE(cnfa->arcs); } @@ -1617,7 +1619,7 @@ dumpcnfa(struct cnfa * cnfa, fprintf(f, ", haslacons"); fprintf(f, "\n"); for (st = 0; st < cnfa->nstates; st++) - dumpcstate(st, cnfa->states[st], cnfa, f); + dumpcstate(st, cnfa, f); fflush(f); } #endif @@ -1629,22 +1631,20 @@ dumpcnfa(struct cnfa * cnfa, */ static void dumpcstate(int st, - struct carc * ca, struct cnfa * cnfa, FILE *f) { - int i; + struct carc * ca; int pos; - fprintf(f, "%d%s", st, (ca[0].co) ? ":" : "."); + fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : "."); pos = 1; - for (i = 1; ca[i].co != COLORLESS; i++) + for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++) { - if (ca[i].co < cnfa->ncolors) - fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to); + if (ca->co < cnfa->ncolors) + fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to); else - fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors, - ca[i].to); + fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to); if (pos == 5) { fprintf(f, "\n"); @@ -1653,7 +1653,7 @@ dumpcstate(int st, else pos++; } - if (i == 1 || pos != 1) + if (ca == cnfa->states[st] || pos != 1) fprintf(f, "\n"); fflush(f); } diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 57055f04abb807466744daf74d8506b9f0e594b8..ceb6f0f8737e9e34fca9d118669fcc69e796c5f2 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -162,7 +162,7 @@ static void dumparcs(struct state *, FILE *); static int dumprarcs(struct arc *, struct state *, FILE *, int); static void dumparc(struct arc *, struct state *, FILE *); static void dumpcnfa(struct cnfa *, FILE *); -static void dumpcstate(int, struct carc *, struct cnfa *, FILE *); +static void dumpcstate(int, struct cnfa *, FILE *); #endif /* === regc_cvec.c === */ static struct cvec *newcvec(int, int); diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c index da7a0bf402ff41bace723924fa8a8f0635eb15ec..7a7ba5b89cf28fc5a448a2b3826dfe5b9298db63 100644 --- a/src/backend/regex/rege_dfa.c +++ b/src/backend/regex/rege_dfa.c @@ -457,14 +457,14 @@ miss(struct vars * v, /* used only for debug flags */ gotstate = 0; for (i = 0; i < d->nstates; i++) if (ISBSET(css->states, i)) - for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++) + for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++) if (ca->co == co) { BSET(d->work, ca->to); gotstate = 1; if (ca->to == cnfa->post) ispost = 1; - if (!cnfa->states[ca->to]->co) + if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS)) noprogress = 0; FDEBUG(("%d -> %d\n", i, ca->to)); } @@ -475,10 +475,9 @@ miss(struct vars * v, /* used only for debug flags */ dolacons = 0; for (i = 0; i < d->nstates; i++) if (ISBSET(d->work, i)) - for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; - ca++) + for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++) { - if (ca->co <= cnfa->ncolors) + if (ca->co < cnfa->ncolors) continue; /* NOTE CONTINUE */ sawlacons = 1; if (ISBSET(d->work, ca->to)) @@ -489,7 +488,7 @@ miss(struct vars * v, /* used only for debug flags */ dolacons = 1; if (ca->to == cnfa->post) ispost = 1; - if (!cnfa->states[ca->to]->co) + if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS)) noprogress = 0; FDEBUG(("%d :> %d\n", i, ca->to)); } diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h index e8415799ec6da88945c5b93832e39f98b176fb4f..b8788506d417e83b3a293efae391ef63f8468e5b 100644 --- a/src/include/regex/regguts.h +++ b/src/include/regex/regguts.h @@ -279,15 +279,14 @@ struct state; struct arc { - int type; -#define ARCFREE '\0' + int type; /* 0 if free, else an NFA arc type code */ color co; struct state *from; /* where it's from (and contained within) */ struct state *to; /* where it's to */ - struct arc *outchain; /* *from's outs chain or free chain */ + struct arc *outchain; /* link in *from's outs chain or free chain */ #define freechain outchain - struct arc *inchain; /* *to's ins chain */ - struct arc *colorchain; /* color's arc chain */ + struct arc *inchain; /* link in *to's ins chain */ + struct arc *colorchain; /* link in color's arc chain */ struct arc *colorchainRev; /* back-link in color's arc chain */ }; @@ -339,24 +338,38 @@ struct nfa /* * definitions for compacted NFA + * + * The main space savings in a compacted NFA is from making the arcs as small + * as possible. We store only the transition color and next-state number for + * each arc. The list of out arcs for each state is an array beginning at + * cnfa.states[statenumber], and terminated by a dummy carc struct with + * co == COLORLESS. + * + * The non-dummy carc structs are of two types: plain arcs and LACON arcs. + * Plain arcs just store the transition color number as "co". LACON arcs + * store the lookahead constraint number plus cnfa.ncolors as "co". LACON + * arcs can be distinguished from plain by testing for co >= cnfa.ncolors. */ struct carc { color co; /* COLORLESS is list terminator */ - int to; /* state number */ + int to; /* next-state number */ }; struct cnfa { int nstates; /* number of states */ - int ncolors; /* number of colors */ + int ncolors; /* number of colors (max color in use + 1) */ int flags; -#define HASLACONS 01 /* uses lookahead constraints */ +#define HASLACONS 01 /* uses lookahead constraints */ int pre; /* setup state number */ int post; /* teardown state number */ color bos[2]; /* colors, if any, assigned to BOS and BOL */ color eos[2]; /* colors, if any, assigned to EOS and EOL */ + char *stflags; /* vector of per-state flags bytes */ +#define CNFA_NOPROGRESS 01 /* flag bit for a no-progress state */ struct carc **states; /* vector of pointers to outarc lists */ + /* states[n] are pointers into a single malloc'd array of arcs */ struct carc *arcs; /* the area for the lists */ };