提交 c6aae304 编写于 作者: T Tom Lane

Simplify and document regex library's compact-NFA representation.

The previous coding abused the first element of a cNFA state's arcs list
to hold a per-state flag bit, which was confusing, undocumented, and not
even particularly efficient.  Get rid of that in favor of a separate
"stflags" vector.  Since there's only one bit in use, I chose to allocate a
char per state; we could possibly replace this with a bitmap at some point,
but that would make accesses a little slower.  It's already about 8X
smaller than before, so let's not get overly tense.

Also document the representation better than it was before, which is to say
not at all.

This patch is a byproduct of investigations towards extracting a "fixed
prefix" string from the compact-NFA representation of regex patterns.
Might need to back-patch it if we decide to back-patch that fix, but for
now it's just code cleanup so I'll just put it in HEAD.
上级 a184e4db
......@@ -1330,14 +1330,16 @@ compact(struct nfa * nfa,
for (s = nfa->states; s != NULL; s = s->next)
{
nstates++;
narcs += 1 + s->nouts + 1;
/* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
narcs += s->nouts + 1; /* need one extra for endmarker */
}
cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
if (cnfa->states == NULL || cnfa->arcs == NULL)
if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
{
if (cnfa->stflags != NULL)
FREE(cnfa->stflags);
if (cnfa->states != NULL)
FREE(cnfa->states);
if (cnfa->arcs != NULL)
......@@ -1359,9 +1361,8 @@ compact(struct nfa * nfa,
for (s = nfa->states; s != NULL; s = s->next)
{
assert((size_t) s->no < nstates);
cnfa->stflags[s->no] = 0;
cnfa->states[s->no] = ca;
ca->co = 0; /* clear and skip flags "arc" */
ca++;
first = ca;
for (a = s->outs; a != NULL; a = a->outchain)
switch (a->type)
......@@ -1392,8 +1393,8 @@ compact(struct nfa * nfa,
/* mark no-progress states */
for (a = nfa->pre->outs; a != NULL; a = a->outchain)
cnfa->states[a->to->no]->co = 1;
cnfa->states[nfa->pre->no]->co = 1;
cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
}
/*
......@@ -1433,6 +1434,7 @@ freecnfa(struct cnfa * cnfa)
{
assert(cnfa->nstates != 0); /* not empty already */
cnfa->nstates = 0;
FREE(cnfa->stflags);
FREE(cnfa->states);
FREE(cnfa->arcs);
}
......@@ -1617,7 +1619,7 @@ dumpcnfa(struct cnfa * cnfa,
fprintf(f, ", haslacons");
fprintf(f, "\n");
for (st = 0; st < cnfa->nstates; st++)
dumpcstate(st, cnfa->states[st], cnfa, f);
dumpcstate(st, cnfa, f);
fflush(f);
}
#endif
......@@ -1629,22 +1631,20 @@ dumpcnfa(struct cnfa * cnfa,
*/
static void
dumpcstate(int st,
struct carc * ca,
struct cnfa * cnfa,
FILE *f)
{
int i;
struct carc * ca;
int pos;
fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
pos = 1;
for (i = 1; ca[i].co != COLORLESS; i++)
for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
{
if (ca[i].co < cnfa->ncolors)
fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
if (ca->co < cnfa->ncolors)
fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
else
fprintf(f, "\t:%ld:->%d", (long) ca[i].co - cnfa->ncolors,
ca[i].to);
fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
if (pos == 5)
{
fprintf(f, "\n");
......@@ -1653,7 +1653,7 @@ dumpcstate(int st,
else
pos++;
}
if (i == 1 || pos != 1)
if (ca == cnfa->states[st] || pos != 1)
fprintf(f, "\n");
fflush(f);
}
......
......@@ -162,7 +162,7 @@ static void dumparcs(struct state *, FILE *);
static int dumprarcs(struct arc *, struct state *, FILE *, int);
static void dumparc(struct arc *, struct state *, FILE *);
static void dumpcnfa(struct cnfa *, FILE *);
static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
static void dumpcstate(int, struct cnfa *, FILE *);
#endif
/* === regc_cvec.c === */
static struct cvec *newcvec(int, int);
......
......@@ -457,14 +457,14 @@ miss(struct vars * v, /* used only for debug flags */
gotstate = 0;
for (i = 0; i < d->nstates; i++)
if (ISBSET(css->states, i))
for (ca = cnfa->states[i] + 1; ca->co != COLORLESS; ca++)
for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
if (ca->co == co)
{
BSET(d->work, ca->to);
gotstate = 1;
if (ca->to == cnfa->post)
ispost = 1;
if (!cnfa->states[ca->to]->co)
if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
noprogress = 0;
FDEBUG(("%d -> %d\n", i, ca->to));
}
......@@ -475,10 +475,9 @@ miss(struct vars * v, /* used only for debug flags */
dolacons = 0;
for (i = 0; i < d->nstates; i++)
if (ISBSET(d->work, i))
for (ca = cnfa->states[i] + 1; ca->co != COLORLESS;
ca++)
for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
{
if (ca->co <= cnfa->ncolors)
if (ca->co < cnfa->ncolors)
continue; /* NOTE CONTINUE */
sawlacons = 1;
if (ISBSET(d->work, ca->to))
......@@ -489,7 +488,7 @@ miss(struct vars * v, /* used only for debug flags */
dolacons = 1;
if (ca->to == cnfa->post)
ispost = 1;
if (!cnfa->states[ca->to]->co)
if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
noprogress = 0;
FDEBUG(("%d :> %d\n", i, ca->to));
}
......
......@@ -279,15 +279,14 @@ struct state;
struct arc
{
int type;
#define ARCFREE '\0'
int type; /* 0 if free, else an NFA arc type code */
color co;
struct state *from; /* where it's from (and contained within) */
struct state *to; /* where it's to */
struct arc *outchain; /* *from's outs chain or free chain */
struct arc *outchain; /* link in *from's outs chain or free chain */
#define freechain outchain
struct arc *inchain; /* *to's ins chain */
struct arc *colorchain; /* color's arc chain */
struct arc *inchain; /* link in *to's ins chain */
struct arc *colorchain; /* link in color's arc chain */
struct arc *colorchainRev; /* back-link in color's arc chain */
};
......@@ -339,24 +338,38 @@ struct nfa
/*
* definitions for compacted NFA
*
* The main space savings in a compacted NFA is from making the arcs as small
* as possible. We store only the transition color and next-state number for
* each arc. The list of out arcs for each state is an array beginning at
* cnfa.states[statenumber], and terminated by a dummy carc struct with
* co == COLORLESS.
*
* The non-dummy carc structs are of two types: plain arcs and LACON arcs.
* Plain arcs just store the transition color number as "co". LACON arcs
* store the lookahead constraint number plus cnfa.ncolors as "co". LACON
* arcs can be distinguished from plain by testing for co >= cnfa.ncolors.
*/
struct carc
{
color co; /* COLORLESS is list terminator */
int to; /* state number */
int to; /* next-state number */
};
struct cnfa
{
int nstates; /* number of states */
int ncolors; /* number of colors */
int ncolors; /* number of colors (max color in use + 1) */
int flags;
#define HASLACONS 01 /* uses lookahead constraints */
#define HASLACONS 01 /* uses lookahead constraints */
int pre; /* setup state number */
int post; /* teardown state number */
color bos[2]; /* colors, if any, assigned to BOS and BOL */
color eos[2]; /* colors, if any, assigned to EOS and EOL */
char *stflags; /* vector of per-state flags bytes */
#define CNFA_NOPROGRESS 01 /* flag bit for a no-progress state */
struct carc **states; /* vector of pointers to outarc lists */
/* states[n] are pointers into a single malloc'd array of arcs */
struct carc *arcs; /* the area for the lists */
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册