diff --git a/src/backend/regex/regc_color.c b/src/backend/regex/regc_color.c index 268e072c599b03c7baff2b8a9192e03bdda42643..87eb1e4958a28db84a0470cb98847dd9960adf04 100644 --- a/src/backend/regex/regc_color.c +++ b/src/backend/regex/regc_color.c @@ -28,7 +28,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/backend/regex/regc_color.c,v 1.7 2007/11/15 21:14:37 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/regex/regc_color.c,v 1.8 2008/01/03 20:47:55 tgl Exp $ * * * Note that there are some incestuous relationships between this code and @@ -569,12 +569,9 @@ okcolors(struct nfa * nfa, while ((a = cd->arcs) != NULL) { assert(a->co == co); - /* uncolorchain(cm, a); */ - cd->arcs = a->colorchain; + uncolorchain(cm, a); a->co = sco; - /* colorchain(cm, a); */ - a->colorchain = scd->arcs; - scd->arcs = a; + colorchain(cm, a); } freecolor(cm, co); } @@ -604,7 +601,10 @@ colorchain(struct colormap * cm, { struct colordesc *cd = &cm->cd[a->co]; + if (cd->arcs != NULL) + cd->arcs->colorchainRev = a; a->colorchain = cd->arcs; + a->colorchainRev = NULL; cd->arcs = a; } @@ -616,19 +616,22 @@ uncolorchain(struct colormap * cm, struct arc * a) { struct colordesc *cd = &cm->cd[a->co]; - struct arc *aa; + struct arc *aa = a->colorchainRev; - aa = cd->arcs; - if (aa == a) /* easy case */ + if (aa == NULL) + { + assert(cd->arcs == a); cd->arcs = a->colorchain; + } else { - for (; aa != NULL && aa->colorchain != a; aa = aa->colorchain) - continue; - assert(aa != NULL); + assert(aa->colorchain == a); aa->colorchain = a->colorchain; } + if (a->colorchain != NULL) + a->colorchain->colorchainRev = aa; a->colorchain = NULL; /* paranoia */ + a->colorchainRev = NULL; } /* diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c index e5c83d8714708a3e42d7e9dcd195aa3ef5f75620..fc86ca322a902a07ef01b3c2937f1936acfbb922 100644 --- a/src/backend/regex/regc_lex.c +++ b/src/backend/regex/regc_lex.c @@ -28,7 +28,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/backend/regex/regc_lex.c,v 1.6 2007/10/22 01:02:22 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/regex/regc_lex.c,v 1.7 2008/01/03 20:47:55 tgl Exp $ * */ @@ -846,7 +846,7 @@ lexescape(struct vars * v) if (ISERR()) FAILW(REG_EESCAPE); /* ugly heuristic (first test is "exactly 1 digit?") */ - if (v->now - save == 0 || (int) c <= v->nsubexp) + if (v->now - save == 0 || ((int) c > 0 && (int) c <= v->nsubexp)) { NOTE(REG_UBACKREF); RETV(BACKREF, (chr) c); diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index fa68d021bc2ad5077bccfc1550600415ff747899..ea382df7f91152671fb642b8e80dd8ce40f1ceb1 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -28,7 +28,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/backend/regex/regc_nfa.c,v 1.4 2005/10/15 02:49:24 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/regex/regc_nfa.c,v 1.5 2008/01/03 20:47:55 tgl Exp $ * * * One or two things that technically ought to be in here @@ -60,11 +60,12 @@ newnfa(struct vars * v, nfa->nstates = 0; nfa->cm = cm; nfa->v = v; + nfa->size = 0; nfa->bos[0] = nfa->bos[1] = COLORLESS; nfa->eos[0] = nfa->eos[1] = COLORLESS; + nfa->parent = parent; /* Precedes newfstate so parent is valid. */ nfa->post = newfstate(nfa, '@'); /* number 0 */ nfa->pre = newfstate(nfa, '>'); /* number 1 */ - nfa->parent = parent; nfa->init = newstate(nfa); /* may become invalid later */ nfa->final = newstate(nfa); @@ -88,6 +89,57 @@ newnfa(struct vars * v, return nfa; } +/* + * TooManyStates - checks if the max states exceeds the compile-time value + */ +static int +TooManyStates(struct nfa *nfa) +{ + struct nfa *parent = nfa->parent; + size_t sz = nfa->size; + + while (parent != NULL) + { + sz = parent->size; + parent = parent->parent; + } + if (sz > REG_MAX_STATES) + return 1; + return 0; +} + +/* + * IncrementSize - increases the tracked size of the NFA and its parents. + */ +static void +IncrementSize(struct nfa *nfa) +{ + struct nfa *parent = nfa->parent; + + nfa->size++; + while (parent != NULL) + { + parent->size++; + parent = parent->parent; + } +} + +/* + * DecrementSize - decreases the tracked size of the NFA and its parents. + */ +static void +DecrementSize(struct nfa *nfa) +{ + struct nfa *parent = nfa->parent; + + nfa->size--; + while (parent != NULL) + { + parent->size--; + parent = parent->parent; + } +} + /* * freenfa - free an entire NFA */ @@ -122,6 +174,11 @@ newstate(struct nfa * nfa) { struct state *s; + if (TooManyStates(nfa)) + { + NERR(REG_ETOOBIG); + return NULL; + } if (nfa->free != NULL) { s = nfa->free; @@ -158,6 +215,8 @@ newstate(struct nfa * nfa) } s->prev = nfa->slast; nfa->slast = s; + /* track the current size and the parent size */ + IncrementSize(nfa); return s; } @@ -220,6 +279,7 @@ freestate(struct nfa * nfa, s->prev = NULL; s->next = nfa->free; /* don't delete it, put it on the free list */ nfa->free = s; + DecrementSize(nfa); } /* @@ -633,6 +693,8 @@ duptraverse(struct nfa * nfa, for (a = s->outs; a != NULL && !NISERR(); a = a->outchain) { duptraverse(nfa, a->to, (struct state *) NULL); + if (NISERR()) + break; assert(a->to->tmp != NULL); cparc(nfa, a, s->tmp, a->to->tmp); } @@ -793,6 +855,25 @@ pull(struct nfa * nfa, return 1; } + /* + * DGP 2007-11-15: Cloning a state with a circular constraint on its list + * of outs can lead to trouble [Tcl Bug 1810038], so get rid of them first. + */ + for (a = from->outs; a != NULL; a = nexta) + { + nexta = a->outchain; + switch (a->type) + { + case '^': + case '$': + case BEHIND: + case AHEAD: + if (from == a->to) + freearc(nfa, a); + break; + } + } + /* first, clone from state if necessary to avoid other outarcs */ if (from->nouts > 1) { @@ -917,6 +998,29 @@ push(struct nfa * nfa, return 1; } + /* + * DGP 2007-11-15: Here we duplicate the same protections as appear + * in pull() above to avoid troubles with cloning a state with a + * circular constraint on its list of ins. It is not clear whether + * this is necessary, or is protecting against a "can't happen". + * Any test case that actually leads to a freearc() call here would + * be a welcome addition to the test suite. + */ + for (a = to->ins; a != NULL; a = nexta) + { + nexta = a->inchain; + switch (a->type) + { + case '^': + case '$': + case BEHIND: + case AHEAD: + if (a->from == to) + freearc(nfa, a); + break; + } + } + /* first, clone to state if necessary to avoid other inarcs */ if (to->nins > 1) { @@ -1039,7 +1143,8 @@ fixempties(struct nfa * nfa, do { progress = 0; - for (s = nfa->states; s != NULL && !NISERR(); s = nexts) + for (s = nfa->states; s != NULL && !NISERR() && + s->no != FREESTATE; s = nexts) { nexts = s->next; for (a = s->outs; a != NULL && !NISERR(); a = nexta) diff --git a/src/include/regex/regerrs.h b/src/include/regex/regerrs.h index 74bf86958fc300b8b2027b1023971fa86f96bfe1..b8cc1f3da73f46a2368c30fee40bd82b63d40c9c 100644 --- a/src/include/regex/regerrs.h +++ b/src/include/regex/regerrs.h @@ -1,5 +1,5 @@ /* - * $PostgreSQL: pgsql/src/include/regex/regerrs.h,v 1.4 2007/02/01 19:10:29 momjian Exp $ + * $PostgreSQL: pgsql/src/include/regex/regerrs.h,v 1.5 2008/01/03 20:47:55 tgl Exp $ */ { @@ -73,3 +73,7 @@ { REG_BADOPT, "REG_BADOPT", "invalid embedded option" }, + +{ + REG_ETOOBIG, "REG_ETOOBIG", "nfa has too many states" +}, diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h index 9cda1c23ebcf66b149378b293c27ae330bef2ab9..52259887ead461e82db8366a2e2026f42a1f042d 100644 --- a/src/include/regex/regex.h +++ b/src/include/regex/regex.h @@ -29,7 +29,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/include/regex/regex.h,v 1.28 2005/10/15 02:49:46 momjian Exp $ + * $PostgreSQL: pgsql/src/include/regex/regex.h,v 1.29 2008/01/03 20:47:55 tgl Exp $ */ /* @@ -151,6 +151,7 @@ typedef struct #define REG_INVARG 16 /* invalid argument to regex function */ #define REG_MIXED 17 /* character widths of regex and string differ */ #define REG_BADOPT 18 /* invalid embedded option */ +#define REG_ETOOBIG 19 /* nfa has too many states */ /* two specials for debugging and testing */ #define REG_ATOI 101 /* convert error-code name to number */ #define REG_ITOA 102 /* convert error-code number to name */ diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h index 18712b4090d6a55a12ece941cf24c98c42e13392..327808338a004a7d05261a2ad0a7a0b68a3b2d28 100644 --- a/src/include/regex/regguts.h +++ b/src/include/regex/regguts.h @@ -27,7 +27,7 @@ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $PostgreSQL: pgsql/src/include/regex/regguts.h,v 1.5 2005/10/15 02:49:46 momjian Exp $ + * $PostgreSQL: pgsql/src/include/regex/regguts.h,v 1.6 2008/01/03 20:47:55 tgl Exp $ */ @@ -272,6 +272,7 @@ struct arc #define freechain outchain struct arc *inchain; /* *to's ins chain */ struct arc *colorchain; /* color's arc chain */ + struct arc *colorchainRev; /* back-link in color's arc chain */ }; struct arcbatch @@ -311,6 +312,9 @@ struct nfa struct colormap *cm; /* the color map */ color bos[2]; /* colors, if any, assigned to BOS and BOL */ color eos[2]; /* colors, if any, assigned to EOS and EOL */ + size_t size; /* Current NFA size; differs from nstates as + * it also counts the number of states created + * by children of this state. */ struct vars *v; /* simplifies compile error reporting */ struct nfa *parent; /* parent NFA, if any */ }; @@ -343,7 +347,12 @@ struct cnfa #define ZAPCNFA(cnfa) ((cnfa).nstates = 0) #define NULLCNFA(cnfa) ((cnfa).nstates == 0) - +/* + * Used to limit the maximum NFA size to something sane. [Tcl Bug 1810264] + */ +#ifndef REG_MAX_STATES +#define REG_MAX_STATES 100000 +#endif /* * subexpression tree