提交 3cbfe485 编写于 作者: T Tom Lane

Remove useless "retry memory" logic within regex engine.

Apparently some primordial version of Spencer's engine needed cdissect()
and child functions to be able to continue matching from a previous
position when re-called.  That is dead code, though, since trivial
inspection shows that cdissect can never be entered without having
previously done zapmem which resets the relevant retry counter.  I have
also verified experimentally that no case in the Tcl regression tests
reaches cdissect with a nonzero retry value.  Accordingly, remove that
logic.  This doesn't really save any noticeable number of cycles in itself,
but it is one step towards making dissect() and cdissect() equivalent,
which will allow removing hundreds of lines of near-duplicated code.

Since struct subre's "retry" field is no longer particularly related to
any kind of retry, rename it to "id".  As of this commit it's only used
for identifying a subre node in debug printouts, so you might think we
should get rid of the field entirely; but I have a plan for another use.
上级 1fbacbf9
......@@ -1623,7 +1623,7 @@ subre(struct vars * v,
ret->op = op;
ret->flags = flags;
ret->retry = 0;
ret->id = 0; /* will be assigned later */
ret->subno = 0;
ret->min = ret->max = 1;
ret->left = NULL;
......@@ -1693,7 +1693,7 @@ optst(struct vars * v,
}
/*
* numst - number tree nodes (assigning retry indexes)
* numst - number tree nodes (assigning "id" indexes)
*/
static int /* next number */
numst(struct subre * t,
......@@ -1704,7 +1704,7 @@ numst(struct subre * t,
assert(t != NULL);
i = start;
t->retry = (short) i++;
t->id = (short) i++;
if (t->left != NULL)
i = numst(t->left, i);
if (t->right != NULL)
......@@ -1999,11 +1999,11 @@ stid(struct subre * t,
char *buf,
size_t bufsize)
{
/* big enough for hex int or decimal t->retry? */
if (bufsize < sizeof(void *) * 2 + 3 || bufsize < sizeof(t->retry) * 3 + 1)
/* big enough for hex int or decimal t->id? */
if (bufsize < sizeof(void *) * 2 + 3 || bufsize < sizeof(t->id) * 3 + 1)
return "unable";
if (t->retry != 0)
sprintf(buf, "%d", t->retry);
if (t->id != 0)
sprintf(buf, "%d", t->id);
else
sprintf(buf, "%p", t);
return buf;
......
......@@ -112,7 +112,6 @@ struct vars
chr *search_start; /* search start of string */
chr *stop; /* just past end of string */
int err; /* error code if any (0 none) */
regoff_t *mem; /* memory vector for backtracking */
struct smalldfa dfa1;
struct smalldfa dfa2;
};
......@@ -134,8 +133,8 @@ struct vars
static int find(struct vars *, struct cnfa *, struct colormap *);
static int cfind(struct vars *, struct cnfa *, struct colormap *);
static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
static void zapsubs(regmatch_t *, size_t);
static void zapmem(struct vars *, struct subre *);
static void zapallsubs(regmatch_t *, size_t);
static void zaptreesubs(struct vars *, struct subre *);
static void subset(struct vars *, struct subre *, chr *, chr *);
static int dissect(struct vars *, struct subre *, chr *, chr *);
static int condissect(struct vars *, struct subre *, chr *, chr *);
......@@ -186,9 +185,6 @@ pg_regexec(regex_t *re,
#define LOCALMAT 20
regmatch_t mat[LOCALMAT];
#define LOCALMEM 40
regoff_t mem[LOCALMEM];
/* sanity checks */
if (re == NULL || string == NULL || re->re_magic != REMAGIC)
return REG_INVARG;
......@@ -229,24 +225,6 @@ pg_regexec(regex_t *re,
v->search_start = (chr *) string + search_start;
v->stop = (chr *) string + len;
v->err = 0;
if (backref)
{
/* need retry memory */
assert(v->g->ntree >= 0);
n = (size_t) v->g->ntree;
if (n <= LOCALMEM)
v->mem = mem;
else
v->mem = (regoff_t *) MALLOC(n * sizeof(regoff_t));
if (v->mem == NULL)
{
if (v->pmatch != pmatch && v->pmatch != mat)
FREE(v->pmatch);
return REG_ESPACE;
}
}
else
v->mem = NULL;
/* do it */
assert(v->g->tree != NULL);
......@@ -258,7 +236,7 @@ pg_regexec(regex_t *re,
/* copy (portion of) match vector over if necessary */
if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
{
zapsubs(pmatch, nmatch);
zapallsubs(pmatch, nmatch);
n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
}
......@@ -266,8 +244,6 @@ pg_regexec(regex_t *re,
/* clean up */
if (v->pmatch != pmatch && v->pmatch != mat)
FREE(v->pmatch);
if (v->mem != NULL && v->mem != mem)
FREE(v->mem);
return st;
}
......@@ -354,7 +330,7 @@ find(struct vars * v,
return REG_OKAY;
/* submatches */
zapsubs(v->pmatch, v->nmatch);
zapallsubs(v->pmatch, v->nmatch);
return dissect(v, v->g->tree, begin, end);
}
......@@ -451,8 +427,7 @@ cfindloop(struct vars * v,
if (end == NULL)
break; /* NOTE BREAK OUT */
MDEBUG(("tentative end %ld\n", LOFF(end)));
zapsubs(v->pmatch, v->nmatch);
zapmem(v, v->g->tree);
zapallsubs(v->pmatch, v->nmatch);
er = cdissect(v, v->g->tree, begin, end);
if (er == REG_OKAY)
{
......@@ -490,11 +465,11 @@ cfindloop(struct vars * v,
}
/*
* zapsubs - initialize the subexpression matches to "no match"
* zapallsubs - initialize all subexpression matches to "no match"
*/
static void
zapsubs(regmatch_t *p,
size_t n)
zapallsubs(regmatch_t *p,
size_t n)
{
size_t i;
......@@ -506,17 +481,12 @@ zapsubs(regmatch_t *p,
}
/*
* zapmem - initialize the retry memory of a subtree to zeros
* zaptreesubs - initialize subexpressions within subtree to "no match"
*/
static void
zapmem(struct vars * v,
struct subre * t)
zaptreesubs(struct vars * v,
struct subre * t)
{
if (t == NULL)
return;
assert(v->mem != NULL);
v->mem[t->retry] = 0;
if (t->op == '(')
{
assert(t->subno > 0);
......@@ -525,9 +495,9 @@ zapmem(struct vars * v,
}
if (t->left != NULL)
zapmem(v, t->left);
zaptreesubs(v, t->left);
if (t->right != NULL)
zapmem(v, t->right);
zaptreesubs(v, t->right);
}
/*
......@@ -767,7 +737,7 @@ iterdissect(struct vars * v,
FREE(endpts);
return v->err;
}
MDEBUG(("iter %d\n", t->retry));
MDEBUG(("iter %d\n", t->id));
/*
* Our strategy is to first find a set of sub-match endpoints that are
......@@ -796,7 +766,7 @@ iterdissect(struct vars * v,
goto backtrack;
}
MDEBUG(("%d: working endpoint %d: %ld\n",
t->retry, k, LOFF(endpts[k])));
t->id, k, LOFF(endpts[k])));
/* k'th sub-match can no longer be considered verified */
if (nverified >= k)
......@@ -831,7 +801,7 @@ iterdissect(struct vars * v,
if (k < min_matches)
goto backtrack;
MDEBUG(("%d: verifying %d..%d\n", t->retry, nverified + 1, k));
MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
for (i = nverified + 1; i <= k; i++)
{
......@@ -852,7 +822,7 @@ iterdissect(struct vars * v,
if (i > k)
{
/* satisfaction */
MDEBUG(("%d successful\n", t->retry));
MDEBUG(("%d successful\n", t->id));
freedfa(d);
FREE(endpts);
return REG_OKAY;
......@@ -885,7 +855,7 @@ backtrack:
}
/* all possibilities exhausted - shouldn't happen in uncomplicated mode */
MDEBUG(("%d failed\n", t->retry));
MDEBUG(("%d failed\n", t->id));
freedfa(d);
FREE(endpts);
return REG_ASSERT;
......@@ -953,7 +923,7 @@ reviterdissect(struct vars * v,
FREE(endpts);
return v->err;
}
MDEBUG(("reviter %d\n", t->retry));
MDEBUG(("reviter %d\n", t->id));
/*
* Our strategy is to first find a set of sub-match endpoints that are
......@@ -989,7 +959,7 @@ reviterdissect(struct vars * v,
goto backtrack;
}
MDEBUG(("%d: working endpoint %d: %ld\n",
t->retry, k, LOFF(endpts[k])));
t->id, k, LOFF(endpts[k])));
/* k'th sub-match can no longer be considered verified */
if (nverified >= k)
......@@ -1019,7 +989,7 @@ reviterdissect(struct vars * v,
if (k < min_matches)
goto backtrack;
MDEBUG(("%d: verifying %d..%d\n", t->retry, nverified + 1, k));
MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
for (i = nverified + 1; i <= k; i++)
{
......@@ -1040,7 +1010,7 @@ reviterdissect(struct vars * v,
if (i > k)
{
/* satisfaction */
MDEBUG(("%d successful\n", t->retry));
MDEBUG(("%d successful\n", t->id));
freedfa(d);
FREE(endpts);
return REG_OKAY;
......@@ -1066,7 +1036,7 @@ backtrack:
}
/* all possibilities exhausted - shouldn't happen in uncomplicated mode */
MDEBUG(("%d failed\n", t->retry));
MDEBUG(("%d failed\n", t->id));
freedfa(d);
FREE(endpts);
return REG_ASSERT;
......@@ -1074,8 +1044,6 @@ backtrack:
/*
* cdissect - determine subexpression matches (with complications)
* The retry memory stores the offset of the trial midpoint from begin,
* plus 1 so that 0 uniquely means "clean slate".
*/
static int /* regexec return code */
cdissect(struct vars * v,
......@@ -1119,8 +1087,6 @@ cdissect(struct vars * v,
/*
* ccondissect - concatenation subexpression matches (with complications)
* The retry memory stores the offset of the trial midpoint from begin,
* plus 1 so that 0 uniquely means "clean slate".
*/
static int /* regexec return code */
ccondissect(struct vars * v,
......@@ -1149,26 +1115,17 @@ ccondissect(struct vars * v,
freedfa(d);
return v->err;
}
MDEBUG(("cconcat %d\n", t->retry));
MDEBUG(("cconcat %d\n", t->id));
/* pick a tentative midpoint */
if (v->mem[t->retry] == 0)
{
mid = longest(v, d, begin, end, (int *) NULL);
if (mid == NULL)
{
freedfa(d);
freedfa(d2);
return REG_NOMATCH;
}
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
v->mem[t->retry] = (mid - begin) + 1;
}
else
mid = longest(v, d, begin, end, (int *) NULL);
if (mid == NULL)
{
mid = begin + (v->mem[t->retry] - 1);
MDEBUG(("working midpoint %ld\n", LOFF(mid)));
freedfa(d);
freedfa(d2);
return REG_NOMATCH;
}
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
/* iterate until satisfaction or failure */
for (;;)
......@@ -1201,7 +1158,7 @@ ccondissect(struct vars * v,
if (mid == begin)
{
/* all possibilities exhausted */
MDEBUG(("%d no midpoint\n", t->retry));
MDEBUG(("%d no midpoint\n", t->id));
freedfa(d);
freedfa(d2);
return REG_NOMATCH;
......@@ -1210,15 +1167,14 @@ ccondissect(struct vars * v,
if (mid == NULL)
{
/* failed to find a new one */
MDEBUG(("%d failed midpoint\n", t->retry));
MDEBUG(("%d failed midpoint\n", t->id));
freedfa(d);
freedfa(d2);
return REG_NOMATCH;
}
MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
v->mem[t->retry] = (mid - begin) + 1;
zapmem(v, t->left);
zapmem(v, t->right);
MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
zaptreesubs(v, t->left);
zaptreesubs(v, t->right);
}
/* can't get here */
......@@ -1227,8 +1183,6 @@ ccondissect(struct vars * v,
/*
* crevdissect - shortest-first concatenation subexpression matches
* The retry memory stores the offset of the trial midpoint from begin,
* plus 1 so that 0 uniquely means "clean slate".
*/
static int /* regexec return code */
crevdissect(struct vars * v,
......@@ -1256,26 +1210,17 @@ crevdissect(struct vars * v,
freedfa(d);
return v->err;
}
MDEBUG(("crev %d\n", t->retry));
MDEBUG(("crev %d\n", t->id));
/* pick a tentative midpoint */
if (v->mem[t->retry] == 0)
{
mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
if (mid == NULL)
{
freedfa(d);
freedfa(d2);
return REG_NOMATCH;
}
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
v->mem[t->retry] = (mid - begin) + 1;
}
else
mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
if (mid == NULL)
{
mid = begin + (v->mem[t->retry] - 1);
MDEBUG(("working midpoint %ld\n", LOFF(mid)));
freedfa(d);
freedfa(d2);
return REG_NOMATCH;
}
MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
/* iterate until satisfaction or failure */
for (;;)
......@@ -1308,7 +1253,7 @@ crevdissect(struct vars * v,
if (mid == end)
{
/* all possibilities exhausted */
MDEBUG(("%d no midpoint\n", t->retry));
MDEBUG(("%d no midpoint\n", t->id));
freedfa(d);
freedfa(d2);
return REG_NOMATCH;
......@@ -1317,15 +1262,14 @@ crevdissect(struct vars * v,
if (mid == NULL)
{
/* failed to find a new one */
MDEBUG(("%d failed midpoint\n", t->retry));
MDEBUG(("%d failed midpoint\n", t->id));
freedfa(d);
freedfa(d2);
return REG_NOMATCH;
}
MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
v->mem[t->retry] = (mid - begin) + 1;
zapmem(v, t->left);
zapmem(v, t->right);
MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
zaptreesubs(v, t->left);
zaptreesubs(v, t->right);
}
/* can't get here */
......@@ -1355,7 +1299,7 @@ cbrdissect(struct vars * v,
assert(n >= 0);
assert((size_t) n < v->nmatch);
MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max));
/* get the backreferenced string */
if (v->pmatch[n].rm_so == -1)
......@@ -1363,11 +1307,6 @@ cbrdissect(struct vars * v,
brstring = v->start + v->pmatch[n].rm_so;
brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
/* no room to maneuver -- retries are pointless */
if (v->mem[t->retry])
return REG_NOMATCH;
v->mem[t->retry] = 1;
/* special cases for zero-length strings */
if (brlen == 0)
{
......@@ -1430,40 +1369,29 @@ caltdissect(struct vars * v,
struct dfa *d;
int er;
#define UNTRIED 0 /* not yet tried at all */
#define TRYING 1 /* top matched, trying submatches */
#define TRIED 2 /* top didn't match or submatches exhausted */
if (t == NULL)
return REG_NOMATCH;
assert(t->op == '|');
if (v->mem[t->retry] == TRIED)
return caltdissect(v, t->right, begin, end);
MDEBUG(("calt n%d\n", t->retry));
assert(t->op == '|');
assert(t->left != NULL);
if (v->mem[t->retry] == UNTRIED)
MDEBUG(("calt n%d\n", t->id));
d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
if (ISERR())
return v->err;
if (longest(v, d, begin, end, (int *) NULL) != end)
{
d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
if (ISERR())
return v->err;
if (longest(v, d, begin, end, (int *) NULL) != end)
{
freedfa(d);
v->mem[t->retry] = TRIED;
return caltdissect(v, t->right, begin, end);
}
freedfa(d);
MDEBUG(("calt matched\n"));
v->mem[t->retry] = TRYING;
return caltdissect(v, t->right, begin, end);
}
freedfa(d);
MDEBUG(("calt matched\n"));
er = cdissect(v, t->left, begin, end);
if (er != REG_NOMATCH)
return er;
v->mem[t->retry] = TRIED;
return caltdissect(v, t->right, begin, end);
}
......@@ -1531,7 +1459,7 @@ citerdissect(struct vars * v,
FREE(endpts);
return v->err;
}
MDEBUG(("citer %d\n", t->retry));
MDEBUG(("citer %d\n", t->id));
/*
* Our strategy is to first find a set of sub-match endpoints that are
......@@ -1560,7 +1488,7 @@ citerdissect(struct vars * v,
goto backtrack;
}
MDEBUG(("%d: working endpoint %d: %ld\n",
t->retry, k, LOFF(endpts[k])));
t->id, k, LOFF(endpts[k])));
/* k'th sub-match can no longer be considered verified */
if (nverified >= k)
......@@ -1595,11 +1523,11 @@ citerdissect(struct vars * v,
if (k < min_matches)
goto backtrack;
MDEBUG(("%d: verifying %d..%d\n", t->retry, nverified + 1, k));
MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
for (i = nverified + 1; i <= k; i++)
{
zapmem(v, t->left);
zaptreesubs(v, t->left);
er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
if (er == REG_OKAY)
{
......@@ -1617,7 +1545,7 @@ citerdissect(struct vars * v,
if (i > k)
{
/* satisfaction */
MDEBUG(("%d successful\n", t->retry));
MDEBUG(("%d successful\n", t->id));
freedfa(d);
FREE(endpts);
return REG_OKAY;
......@@ -1650,7 +1578,7 @@ backtrack:
}
/* all possibilities exhausted */
MDEBUG(("%d failed\n", t->retry));
MDEBUG(("%d failed\n", t->id));
freedfa(d);
FREE(endpts);
return REG_NOMATCH;
......@@ -1718,7 +1646,7 @@ creviterdissect(struct vars * v,
FREE(endpts);
return v->err;
}
MDEBUG(("creviter %d\n", t->retry));
MDEBUG(("creviter %d\n", t->id));
/*
* Our strategy is to first find a set of sub-match endpoints that are
......@@ -1754,7 +1682,7 @@ creviterdissect(struct vars * v,
goto backtrack;
}
MDEBUG(("%d: working endpoint %d: %ld\n",
t->retry, k, LOFF(endpts[k])));
t->id, k, LOFF(endpts[k])));
/* k'th sub-match can no longer be considered verified */
if (nverified >= k)
......@@ -1784,11 +1712,11 @@ creviterdissect(struct vars * v,
if (k < min_matches)
goto backtrack;
MDEBUG(("%d: verifying %d..%d\n", t->retry, nverified + 1, k));
MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
for (i = nverified + 1; i <= k; i++)
{
zapmem(v, t->left);
zaptreesubs(v, t->left);
er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
if (er == REG_OKAY)
{
......@@ -1806,7 +1734,7 @@ creviterdissect(struct vars * v,
if (i > k)
{
/* satisfaction */
MDEBUG(("%d successful\n", t->retry));
MDEBUG(("%d successful\n", t->id));
freedfa(d);
FREE(endpts);
return REG_OKAY;
......@@ -1832,7 +1760,7 @@ backtrack:
}
/* all possibilities exhausted */
MDEBUG(("%d failed\n", t->retry));
MDEBUG(("%d failed\n", t->id));
freedfa(d);
FREE(endpts);
return REG_NOMATCH;
......
......@@ -409,7 +409,7 @@ struct subre
#define PREF(f) ((f)&LOCAL)
#define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
short retry; /* index into retry memory */
short id; /* ID of subre (1..ntree) */
int subno; /* subexpression number (for 'b' and '(') */
short min; /* min repetitions for iteration or backref */
short max; /* max repetitions for iteration or backref */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册