regexec.c 27.1 KB
Newer Older
1 2
/*
 * re_*exec and friends - match REs
3
 *
B
Bruce Momjian 已提交
4 5
 * Copyright (c) 1998, 1999 Henry Spencer.	All rights reserved.
 *
6 7 8
 * Development of this software was funded, in part, by Cray Research Inc.,
 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
 * Corporation, none of whom are responsible for the results.  The author
B
Bruce Momjian 已提交
9 10
 * thanks all of them.
 *
11 12 13 14
 * Redistribution and use in source and binary forms -- with or without
 * modification -- are permitted for any purpose, provided that
 * redistributions in source form retain this entire copyright notice and
 * indicate the origin and nature of any modifications.
B
Bruce Momjian 已提交
15
 *
16 17
 * I'd appreciate being given credit for this package in the documentation
 * of software which uses it, but that is not a requirement.
B
Bruce Momjian 已提交
18
 *
19 20 21 22 23 24 25 26 27 28
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
 *
30
 * $PostgreSQL: pgsql/src/backend/regex/regexec.c,v 1.28 2010/02/01 02:45:29 tgl Exp $
31 32 33
 *
 */

34 35 36 37 38
#include "regex/regguts.h"



/* lazy-DFA representation */
B
Bruce Momjian 已提交
39 40
struct arcp
{								/* "pointer" to an outarc */
41
	struct sset *ss;
B
Bruce Momjian 已提交
42
	color		co;
43 44
};

B
Bruce Momjian 已提交
45 46 47 48 49 50
struct sset
{								/* state set */
	unsigned   *states;			/* pointer to bitvector */
	unsigned	hash;			/* hash of bitvector */
#define  HASH(bv, nw)	 (((nw) == 1) ? *(bv) : hash(bv, nw))
#define  HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
51
		memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
B
Bruce Momjian 已提交
52 53 54 55 56 57 58 59 60
	int			flags;
#define  STARTER	 01			/* the initial state set */
#define  POSTSTATE	 02			/* includes the goal state */
#define  LOCKED		 04			/* locked in cache */
#define  NOPROGRESS  010		/* zero-progress state set */
	struct arcp ins;			/* chain of inarcs pointing here */
	chr		   *lastseen;		/* last entered on arrival here */
	struct sset **outs;			/* outarc vector indexed by color */
	struct arcp *inchain;		/* chain-pointer vector for outarcs */
61 62
};

B
Bruce Momjian 已提交
63 64 65 66 67 68 69 70 71 72 73 74
struct dfa
{
	int			nssets;			/* size of cache */
	int			nssused;		/* how many entries occupied yet */
	int			nstates;		/* number of states */
	int			ncolors;		/* length of outarc and inchain vectors */
	int			wordsper;		/* length of state-set bitvectors */
	struct sset *ssets;			/* state-set cache */
	unsigned   *statesarea;		/* bitvector storage */
	unsigned   *work;			/* pointer to work area within statesarea */
	struct sset **outsarea;		/* outarc-vector storage */
	struct arcp *incarea;		/* inchain storage */
75 76
	struct cnfa *cnfa;
	struct colormap *cm;
B
Bruce Momjian 已提交
77
	chr		   *lastpost;		/* location of last cache-flushed success */
B
Bruce Momjian 已提交
78
	chr		   *lastnopr;		/* location of last cache-flushed NOPROGRESS */
B
Bruce Momjian 已提交
79 80 81
	struct sset *search;		/* replacement-search-pointer memory */
	int			cptsmalloced;	/* were the areas individually malloced? */
	char	   *mallocarea;		/* self, or master malloced area, or NULL */
82 83
};

B
Bruce Momjian 已提交
84
#define WORK	1				/* number of work bitvectors needed */
85 86

/* setup for non-malloc allocation for small cases */
B
Bruce Momjian 已提交
87 88 89 90 91 92 93 94 95
#define FEWSTATES	20			/* must be less than UBITS */
#define FEWCOLORS	15
struct smalldfa
{
	struct dfa	dfa;
	struct sset ssets[FEWSTATES * 2];
	unsigned	statesarea[FEWSTATES * 2 + WORK];
	struct sset *outsarea[FEWSTATES * 2 * FEWCOLORS];
	struct arcp incarea[FEWSTATES * 2 * FEWCOLORS];
96
};
B
Bruce Momjian 已提交
97 98

#define DOMALLOC	((struct smalldfa *)NULL)	/* force malloc */
99 100 101 102



/* internal variables, bundled for easy passing around */
B
Bruce Momjian 已提交
103 104 105
struct vars
{
	regex_t    *re;
106
	struct guts *g;
B
Bruce Momjian 已提交
107 108
	int			eflags;			/* copies of arguments */
	size_t		nmatch;
109 110
	regmatch_t *pmatch;
	rm_detail_t *details;
B
Bruce Momjian 已提交
111
	chr		   *start;			/* start of string */
112
	chr		   *search_start;	/* search start of string */
B
Bruce Momjian 已提交
113 114 115
	chr		   *stop;			/* just past end of string */
	int			err;			/* error code if any (0 none) */
	regoff_t   *mem;			/* memory vector for backtracking */
116 117 118 119
	struct smalldfa dfa1;
	struct smalldfa dfa2;
};

B
Bruce Momjian 已提交
120 121 122 123
#define VISERR(vv)	((vv)->err != 0)	/* have we seen an error yet? */
#define ISERR() VISERR(v)
#define VERR(vv,e)	(((vv)->err) ? (vv)->err : ((vv)->err = (e)))
#define ERR(e)	VERR(v, e)		/* record an error */
B
Bruce Momjian 已提交
124
#define NOERR() {if (ISERR()) return v->err;}	/* if error seen, return it */
B
Bruce Momjian 已提交
125 126
#define OFF(p)	((p) - v->start)
#define LOFF(p) ((long)OFF(p))
127 128


129

130
/*
131 132 133
 * forward declarations
 */
/* === regexec.c === */
B
Bruce Momjian 已提交
134 135 136 137 138 139 140 141 142 143
static int	find(struct vars *, struct cnfa *, struct colormap *);
static int	cfind(struct vars *, struct cnfa *, struct colormap *);
static int	cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
static void zapsubs(regmatch_t *, size_t);
static void zapmem(struct vars *, struct subre *);
static void subset(struct vars *, struct subre *, chr *, chr *);
static int	dissect(struct vars *, struct subre *, chr *, chr *);
static int	condissect(struct vars *, struct subre *, chr *, chr *);
static int	altdissect(struct vars *, struct subre *, chr *, chr *);
static int	cdissect(struct vars *, struct subre *, chr *, chr *);
144
static int	ccaptdissect(struct vars *, struct subre *, chr *, chr *);
B
Bruce Momjian 已提交
145 146 147 148 149
static int	ccondissect(struct vars *, struct subre *, chr *, chr *);
static int	crevdissect(struct vars *, struct subre *, chr *, chr *);
static int	cbrdissect(struct vars *, struct subre *, chr *, chr *);
static int	caltdissect(struct vars *, struct subre *, chr *, chr *);

150
/* === rege_dfa.c === */
B
Bruce Momjian 已提交
151 152 153 154 155 156 157 158 159 160 161
static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
static chr *lastcold(struct vars *, struct dfa *);
static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
static void freedfa(struct dfa *);
static unsigned hash(unsigned *, int);
static struct sset *initialize(struct vars *, struct dfa *, chr *);
static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *);
static int	lacon(struct vars *, struct cnfa *, chr *, pcolor);
static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
162 163 164 165 166 167 168


/*
 * pg_regexec - match regular expression
 */
int
pg_regexec(regex_t *re,
169
		   const chr *string,
170
		   size_t len,
171
		   size_t search_start,
172
		   rm_detail_t *details,
173 174 175 176 177 178
		   size_t nmatch,
		   regmatch_t pmatch[],
		   int flags)
{
	struct vars var;
	register struct vars *v = &var;
B
Bruce Momjian 已提交
179 180 181 182 183 184 185 186 187
	int			st;
	size_t		n;
	int			backref;

#define  LOCALMAT	 20
	regmatch_t	mat[LOCALMAT];

#define  LOCALMEM	 40
	regoff_t	mem[LOCALMEM];
188 189 190 191 192 193 194 195 196

	/* sanity checks */
	if (re == NULL || string == NULL || re->re_magic != REMAGIC)
		return REG_INVARG;
	if (re->re_csize != sizeof(chr))
		return REG_MIXED;

	/* setup */
	v->re = re;
B
Bruce Momjian 已提交
197 198
	v->g = (struct guts *) re->re_guts;
	if ((v->g->cflags & REG_EXPECT) && details == NULL)
199
		return REG_INVARG;
B
Bruce Momjian 已提交
200
	if (v->g->info & REG_UIMPOSSIBLE)
201
		return REG_NOMATCH;
B
Bruce Momjian 已提交
202
	backref = (v->g->info & REG_UBACKREF) ? 1 : 0;
203
	v->eflags = flags;
B
Bruce Momjian 已提交
204 205
	if (v->g->cflags & REG_NOSUB)
		nmatch = 0;				/* override client */
206
	v->nmatch = nmatch;
B
Bruce Momjian 已提交
207 208
	if (backref)
	{
209 210 211 212
		/* need work area */
		if (v->g->nsub + 1 <= LOCALMAT)
			v->pmatch = mat;
		else
B
Bruce Momjian 已提交
213 214
			v->pmatch = (regmatch_t *) MALLOC((v->g->nsub + 1) *
											  sizeof(regmatch_t));
215 216 217
		if (v->pmatch == NULL)
			return REG_ESPACE;
		v->nmatch = v->g->nsub + 1;
B
Bruce Momjian 已提交
218 219
	}
	else
220 221
		v->pmatch = pmatch;
	v->details = details;
B
Bruce Momjian 已提交
222
	v->start = (chr *) string;
223
	v->search_start = (chr *) string + search_start;
B
Bruce Momjian 已提交
224
	v->stop = (chr *) string + len;
225
	v->err = 0;
B
Bruce Momjian 已提交
226 227
	if (backref)
	{
228 229
		/* need retry memory */
		assert(v->g->ntree >= 0);
B
Bruce Momjian 已提交
230
		n = (size_t) v->g->ntree;
231 232 233
		if (n <= LOCALMEM)
			v->mem = mem;
		else
B
Bruce Momjian 已提交
234 235 236
			v->mem = (regoff_t *) MALLOC(n * sizeof(regoff_t));
		if (v->mem == NULL)
		{
237 238 239 240
			if (v->pmatch != pmatch && v->pmatch != mat)
				FREE(v->pmatch);
			return REG_ESPACE;
		}
B
Bruce Momjian 已提交
241 242
	}
	else
243 244 245 246 247 248 249 250 251 252
		v->mem = NULL;

	/* do it */
	assert(v->g->tree != NULL);
	if (backref)
		st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
	else
		st = find(v, &v->g->tree->cnfa, &v->g->cmap);

	/* copy (portion of) match vector over if necessary */
B
Bruce Momjian 已提交
253 254
	if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0)
	{
255 256
		zapsubs(pmatch, nmatch);
		n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
B
Bruce Momjian 已提交
257
		memcpy(VS(pmatch), VS(v->pmatch), n * sizeof(regmatch_t));
258 259 260 261 262 263 264 265 266 267 268 269 270 271
	}

	/* clean up */
	if (v->pmatch != pmatch && v->pmatch != mat)
		FREE(v->pmatch);
	if (v->mem != NULL && v->mem != mem)
		FREE(v->mem);
	return st;
}

/*
 * find - find a match for the main NFA (no-complications case)
 */
static int
B
Bruce Momjian 已提交
272 273 274
find(struct vars * v,
	 struct cnfa * cnfa,
	 struct colormap * cm)
275 276 277
{
	struct dfa *s;
	struct dfa *d;
B
Bruce Momjian 已提交
278 279 280
	chr		   *begin;
	chr		   *end = NULL;
	chr		   *cold;
B
Bruce Momjian 已提交
281
	chr		   *open;			/* open and close of range of possible starts */
B
Bruce Momjian 已提交
282 283 284
	chr		   *close;
	int			hitend;
	int			shorter = (v->g->tree->flags & SHORTER) ? 1 : 0;
285 286 287 288 289 290 291

	/* first, a shot with the search RE */
	s = newdfa(v, &v->g->search, cm, &v->dfa1);
	assert(!(ISERR() && s != NULL));
	NOERR();
	MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
	cold = NULL;
292 293
	close = shortest(v, s, v->search_start, v->search_start, v->stop,
					 &cold, (int *) NULL);
294 295
	freedfa(s);
	NOERR();
B
Bruce Momjian 已提交
296 297
	if (v->g->cflags & REG_EXPECT)
	{
298 299 300 301 302
		assert(v->details != NULL);
		if (cold != NULL)
			v->details->rm_extend.rm_so = OFF(cold);
		else
			v->details->rm_extend.rm_so = OFF(v->stop);
B
Bruce Momjian 已提交
303
		v->details->rm_extend.rm_eo = OFF(v->stop);		/* unknown */
304
	}
B
Bruce Momjian 已提交
305
	if (close == NULL)			/* not found */
306
		return REG_NOMATCH;
B
Bruce Momjian 已提交
307
	if (v->nmatch == 0)			/* found, don't need exact location */
308 309 310 311 312 313 314 315 316 317
		return REG_OKAY;

	/* find starting point and match */
	assert(cold != NULL);
	open = cold;
	cold = NULL;
	MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
	d = newdfa(v, cnfa, cm, &v->dfa1);
	assert(!(ISERR() && d != NULL));
	NOERR();
B
Bruce Momjian 已提交
318 319
	for (begin = open; begin <= close; begin++)
	{
320 321 322
		MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
		if (shorter)
			end = shortest(v, d, begin, begin, v->stop,
B
Bruce Momjian 已提交
323
						   (chr **) NULL, &hitend);
324 325 326 327 328 329
		else
			end = longest(v, d, begin, v->stop, &hitend);
		NOERR();
		if (hitend && cold == NULL)
			cold = begin;
		if (end != NULL)
B
Bruce Momjian 已提交
330
			break;				/* NOTE BREAK OUT */
331 332 333 334 335 336 337 338
	}
	assert(end != NULL);		/* search RE succeeded so loop should */
	freedfa(d);

	/* and pin down details */
	assert(v->nmatch > 0);
	v->pmatch[0].rm_so = OFF(begin);
	v->pmatch[0].rm_eo = OFF(end);
B
Bruce Momjian 已提交
339 340
	if (v->g->cflags & REG_EXPECT)
	{
341 342 343 344
		if (cold != NULL)
			v->details->rm_extend.rm_so = OFF(cold);
		else
			v->details->rm_extend.rm_so = OFF(v->stop);
B
Bruce Momjian 已提交
345
		v->details->rm_extend.rm_eo = OFF(v->stop);		/* unknown */
346
	}
B
Bruce Momjian 已提交
347
	if (v->nmatch == 1)			/* no need for submatches */
348 349 350 351 352 353 354 355 356 357 358
		return REG_OKAY;

	/* submatches */
	zapsubs(v->pmatch, v->nmatch);
	return dissect(v, v->g->tree, begin, end);
}

/*
 * cfind - find a match for the main NFA (with complications)
 */
static int
B
Bruce Momjian 已提交
359 360 361
cfind(struct vars * v,
	  struct cnfa * cnfa,
	  struct colormap * cm)
362 363 364
{
	struct dfa *s;
	struct dfa *d;
B
Bruce Momjian 已提交
365 366
	chr		   *cold;
	int			ret;
367 368 369 370

	s = newdfa(v, &v->g->search, cm, &v->dfa1);
	NOERR();
	d = newdfa(v, cnfa, cm, &v->dfa2);
B
Bruce Momjian 已提交
371 372
	if (ISERR())
	{
373 374 375 376 377 378 379 380 381 382
		assert(d == NULL);
		freedfa(s);
		return v->err;
	}

	ret = cfindloop(v, cnfa, cm, d, s, &cold);

	freedfa(d);
	freedfa(s);
	NOERR();
B
Bruce Momjian 已提交
383 384
	if (v->g->cflags & REG_EXPECT)
	{
385 386 387 388 389
		assert(v->details != NULL);
		if (cold != NULL)
			v->details->rm_extend.rm_so = OFF(cold);
		else
			v->details->rm_extend.rm_so = OFF(v->stop);
B
Bruce Momjian 已提交
390
		v->details->rm_extend.rm_eo = OFF(v->stop);		/* unknown */
391 392 393 394 395 396 397 398
	}
	return ret;
}

/*
 * cfindloop - the heart of cfind
 */
static int
B
Bruce Momjian 已提交
399 400 401 402 403
cfindloop(struct vars * v,
		  struct cnfa * cnfa,
		  struct colormap * cm,
		  struct dfa * d,
		  struct dfa * s,
404
		  chr **coldp)			/* where to put coldstart pointer */
405
{
B
Bruce Momjian 已提交
406 407 408
	chr		   *begin;
	chr		   *end;
	chr		   *cold;
B
Bruce Momjian 已提交
409
	chr		   *open;			/* open and close of range of possible starts */
B
Bruce Momjian 已提交
410 411 412 413 414 415
	chr		   *close;
	chr		   *estart;
	chr		   *estop;
	int			er;
	int			shorter = v->g->tree->flags & SHORTER;
	int			hitend;
416 417 418

	assert(d != NULL && s != NULL);
	cold = NULL;
419
	close = v->search_start;
B
Bruce Momjian 已提交
420 421
	do
	{
422
		MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
B
Bruce Momjian 已提交
423
		close = shortest(v, s, close, close, v->stop, &cold, (int *) NULL);
424 425 426 427 428 429
		if (close == NULL)
			break;				/* NOTE BREAK */
		assert(cold != NULL);
		open = cold;
		cold = NULL;
		MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
B
Bruce Momjian 已提交
430 431
		for (begin = open; begin <= close; begin++)
		{
432 433 434
			MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
			estart = begin;
			estop = v->stop;
B
Bruce Momjian 已提交
435 436
			for (;;)
			{
437 438
				if (shorter)
					end = shortest(v, d, begin, estart,
B
Bruce Momjian 已提交
439
								   estop, (chr **) NULL, &hitend);
440 441
				else
					end = longest(v, d, begin, estop,
B
Bruce Momjian 已提交
442
								  &hitend);
443 444 445 446 447 448 449 450
				if (hitend && cold == NULL)
					cold = begin;
				if (end == NULL)
					break;		/* NOTE BREAK OUT */
				MDEBUG(("tentative end %ld\n", LOFF(end)));
				zapsubs(v->pmatch, v->nmatch);
				zapmem(v, v->g->tree);
				er = cdissect(v, v->g->tree, begin, end);
B
Bruce Momjian 已提交
451 452 453 454
				if (er == REG_OKAY)
				{
					if (v->nmatch > 0)
					{
455 456 457 458 459 460
						v->pmatch[0].rm_so = OFF(begin);
						v->pmatch[0].rm_eo = OFF(end);
					}
					*coldp = cold;
					return REG_OKAY;
				}
B
Bruce Momjian 已提交
461 462
				if (er != REG_NOMATCH)
				{
463
					ERR(er);
464
					*coldp = cold;
465 466
					return er;
				}
B
Bruce Momjian 已提交
467 468
				if ((shorter) ? end == estop : end == begin)
				{
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
					/* no point in trying again */
					*coldp = cold;
					return REG_NOMATCH;
				}
				/* go around and try again */
				if (shorter)
					estart = end + 1;
				else
					estop = end - 1;
			}
		}
	} while (close < v->stop);

	*coldp = cold;
	return REG_NOMATCH;
}

/*
 * zapsubs - initialize the subexpression matches to "no match"
 */
static void
zapsubs(regmatch_t *p,
		size_t n)
{
B
Bruce Momjian 已提交
493
	size_t		i;
494

B
Bruce Momjian 已提交
495 496
	for (i = n - 1; i > 0; i--)
	{
497 498 499 500 501 502 503 504 505
		p[i].rm_so = -1;
		p[i].rm_eo = -1;
	}
}

/*
 * zapmem - initialize the retry memory of a subtree to zeros
 */
static void
B
Bruce Momjian 已提交
506 507
zapmem(struct vars * v,
	   struct subre * t)
508 509 510 511 512 513
{
	if (t == NULL)
		return;

	assert(v->mem != NULL);
	v->mem[t->retry] = 0;
B
Bruce Momjian 已提交
514 515
	if (t->op == '(')
	{
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
		assert(t->subno > 0);
		v->pmatch[t->subno].rm_so = -1;
		v->pmatch[t->subno].rm_eo = -1;
	}

	if (t->left != NULL)
		zapmem(v, t->left);
	if (t->right != NULL)
		zapmem(v, t->right);
}

/*
 * subset - set any subexpression relevant to a successful subre
 */
static void
B
Bruce Momjian 已提交
531 532
subset(struct vars * v,
	   struct subre * sub,
533 534
	   chr *begin,
	   chr *end)
535
{
B
Bruce Momjian 已提交
536
	int			n = sub->subno;
537 538

	assert(n > 0);
B
Bruce Momjian 已提交
539
	if ((size_t) n >= v->nmatch)
540 541 542 543 544 545 546 547 548 549
		return;

	MDEBUG(("setting %d\n", n));
	v->pmatch[n].rm_so = OFF(begin);
	v->pmatch[n].rm_eo = OFF(end);
}

/*
 * dissect - determine subexpression matches (uncomplicated case)
 */
B
Bruce Momjian 已提交
550 551 552
static int						/* regexec return code */
dissect(struct vars * v,
		struct subre * t,
553 554
		chr *begin,				/* beginning of relevant substring */
		chr *end)				/* end of same */
555 556 557 558
{
	assert(t != NULL);
	MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));

B
Bruce Momjian 已提交
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
	switch (t->op)
	{
		case '=':				/* terminal node */
			assert(t->left == NULL && t->right == NULL);
			return REG_OKAY;	/* no action, parent did the work */
		case '|':				/* alternation */
			assert(t->left != NULL);
			return altdissect(v, t, begin, end);
		case 'b':				/* back ref -- shouldn't be calling us! */
			return REG_ASSERT;
		case '.':				/* concatenation */
			assert(t->left != NULL && t->right != NULL);
			return condissect(v, t, begin, end);
		case '(':				/* capturing */
			assert(t->left != NULL && t->right == NULL);
			assert(t->subno > 0);
			subset(v, t, begin, end);
			return dissect(v, t->left, begin, end);
		default:
			return REG_ASSERT;
579 580 581 582 583
	}
}

/*
 * condissect - determine concatenation subexpression matches (uncomplicated)
584
 */
B
Bruce Momjian 已提交
585 586 587
static int						/* regexec return code */
condissect(struct vars * v,
		   struct subre * t,
588 589
		   chr *begin,			/* beginning of relevant substring */
		   chr *end)			/* end of same */
590
{
591 592
	struct dfa *d;
	struct dfa *d2;
B
Bruce Momjian 已提交
593 594 595 596
	chr		   *mid;
	int			i;
	int			shorter = (t->left->flags & SHORTER) ? 1 : 0;
	chr		   *stop = (shorter) ? end : begin;
597 598 599 600 601 602 603 604

	assert(t->op == '.');
	assert(t->left != NULL && t->left->cnfa.nstates > 0);
	assert(t->right != NULL && t->right->cnfa.nstates > 0);

	d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
	NOERR();
	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
B
Bruce Momjian 已提交
605 606
	if (ISERR())
	{
607 608 609 610 611 612 613
		assert(d2 == NULL);
		freedfa(d);
		return v->err;
	}

	/* pick a tentative midpoint */
	if (shorter)
B
Bruce Momjian 已提交
614 615
		mid = shortest(v, d, begin, begin, end, (chr **) NULL,
					   (int *) NULL);
616
	else
B
Bruce Momjian 已提交
617 618 619
		mid = longest(v, d, begin, end, (int *) NULL);
	if (mid == NULL)
	{
620 621 622 623 624 625 626
		freedfa(d);
		freedfa(d2);
		return REG_ASSERT;
	}
	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));

	/* iterate until satisfaction or failure */
B
Bruce Momjian 已提交
627 628
	while (longest(v, d2, mid, end, (int *) NULL) != end)
	{
629
		/* that midpoint didn't work, find a new one */
B
Bruce Momjian 已提交
630 631
		if (mid == stop)
		{
632 633 634 635 636 637 638
			/* all possibilities exhausted! */
			MDEBUG(("no midpoint!\n"));
			freedfa(d);
			freedfa(d2);
			return REG_ASSERT;
		}
		if (shorter)
B
Bruce Momjian 已提交
639 640
			mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL,
						   (int *) NULL);
641
		else
B
Bruce Momjian 已提交
642 643 644
			mid = longest(v, d, begin, mid - 1, (int *) NULL);
		if (mid == NULL)
		{
645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
			/* failed to find a new one! */
			MDEBUG(("failed midpoint!\n"));
			freedfa(d);
			freedfa(d2);
			return REG_ASSERT;
		}
		MDEBUG(("new midpoint %ld\n", LOFF(mid)));
	}

	/* satisfaction */
	MDEBUG(("successful\n"));
	freedfa(d);
	freedfa(d2);
	i = dissect(v, t->left, begin, mid);
	if (i != REG_OKAY)
		return i;
	return dissect(v, t->right, mid, end);
}

/*
 * altdissect - determine alternative subexpression matches (uncomplicated)
 */
B
Bruce Momjian 已提交
667 668 669
static int						/* regexec return code */
altdissect(struct vars * v,
		   struct subre * t,
670 671
		   chr *begin,			/* beginning of relevant substring */
		   chr *end)			/* end of same */
672 673
{
	struct dfa *d;
B
Bruce Momjian 已提交
674
	int			i;
675 676 677 678

	assert(t != NULL);
	assert(t->op == '|');

B
Bruce Momjian 已提交
679 680
	for (i = 0; t != NULL; t = t->right, i++)
	{
681 682 683 684 685
		MDEBUG(("trying %dth\n", i));
		assert(t->left != NULL && t->left->cnfa.nstates > 0);
		d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
		if (ISERR())
			return v->err;
B
Bruce Momjian 已提交
686 687
		if (longest(v, d, begin, end, (int *) NULL) == end)
		{
688 689 690 691 692 693
			MDEBUG(("success\n"));
			freedfa(d);
			return dissect(v, t->left, begin, end);
		}
		freedfa(d);
	}
B
Bruce Momjian 已提交
694
	return REG_ASSERT;			/* none of them matched?!? */
695 696 697 698
}

/*
 * cdissect - determine subexpression matches (with complications)
B
Bruce Momjian 已提交
699
 * The retry memory stores the offset of the trial midpoint from begin,
700 701
 * plus 1 so that 0 uniquely means "clean slate".
 */
B
Bruce Momjian 已提交
702 703 704
static int						/* regexec return code */
cdissect(struct vars * v,
		 struct subre * t,
705 706
		 chr *begin,			/* beginning of relevant substring */
		 chr *end)				/* end of same */
707 708 709 710
{
	assert(t != NULL);
	MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));

B
Bruce Momjian 已提交
711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726
	switch (t->op)
	{
		case '=':				/* terminal node */
			assert(t->left == NULL && t->right == NULL);
			return REG_OKAY;	/* no action, parent did the work */
		case '|':				/* alternation */
			assert(t->left != NULL);
			return caltdissect(v, t, begin, end);
		case 'b':				/* back ref -- shouldn't be calling us! */
			assert(t->left == NULL && t->right == NULL);
			return cbrdissect(v, t, begin, end);
		case '.':				/* concatenation */
			assert(t->left != NULL && t->right != NULL);
			return ccondissect(v, t, begin, end);
		case '(':				/* capturing */
			assert(t->left != NULL && t->right == NULL);
727
			return ccaptdissect(v, t, begin, end);
B
Bruce Momjian 已提交
728 729
		default:
			return REG_ASSERT;
730 731 732
	}
}

733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751
/*
 * ccaptdissect - capture subexpression matches (with complications)
 */
static int						/* regexec return code */
ccaptdissect(struct vars * v,
			 struct subre * t,
			 chr *begin,		/* beginning of relevant substring */
			 chr *end)			/* end of same */
{
	int			er;

	assert(t->subno > 0);

	er = cdissect(v, t->left, begin, end);
	if (er == REG_OKAY)
		subset(v, t, begin, end);
	return er;
}

752 753
/*
 * ccondissect - concatenation subexpression matches (with complications)
B
Bruce Momjian 已提交
754
 * The retry memory stores the offset of the trial midpoint from begin,
755 756
 * plus 1 so that 0 uniquely means "clean slate".
 */
B
Bruce Momjian 已提交
757 758 759
static int						/* regexec return code */
ccondissect(struct vars * v,
			struct subre * t,
760 761
			chr *begin,			/* beginning of relevant substring */
			chr *end)			/* end of same */
762 763 764
{
	struct dfa *d;
	struct dfa *d2;
B
Bruce Momjian 已提交
765 766
	chr		   *mid;
	int			er;
767 768 769 770 771

	assert(t->op == '.');
	assert(t->left != NULL && t->left->cnfa.nstates > 0);
	assert(t->right != NULL && t->right->cnfa.nstates > 0);

B
Bruce Momjian 已提交
772
	if (t->left->flags & SHORTER)		/* reverse scan */
773 774 775 776 777 778
		return crevdissect(v, t, begin, end);

	d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
	if (ISERR())
		return v->err;
	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
B
Bruce Momjian 已提交
779 780
	if (ISERR())
	{
781 782 783 784 785 786
		freedfa(d);
		return v->err;
	}
	MDEBUG(("cconcat %d\n", t->retry));

	/* pick a tentative midpoint */
B
Bruce Momjian 已提交
787 788 789 790 791
	if (v->mem[t->retry] == 0)
	{
		mid = longest(v, d, begin, end, (int *) NULL);
		if (mid == NULL)
		{
792 793 794 795 796 797
			freedfa(d);
			freedfa(d2);
			return REG_NOMATCH;
		}
		MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
		v->mem[t->retry] = (mid - begin) + 1;
B
Bruce Momjian 已提交
798 799 800
	}
	else
	{
801 802 803 804 805
		mid = begin + (v->mem[t->retry] - 1);
		MDEBUG(("working midpoint %ld\n", LOFF(mid)));
	}

	/* iterate until satisfaction or failure */
B
Bruce Momjian 已提交
806 807
	for (;;)
	{
808
		/* try this midpoint on for size */
809
		if (longest(v, d2, mid, end, (int *) NULL) == end)
B
Bruce Momjian 已提交
810
		{
811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829
			er = cdissect(v, t->left, begin, mid);
			if (er == REG_OKAY)
			{
				er = cdissect(v, t->right, mid, end);
				if (er == REG_OKAY)
				{
					/* satisfaction */
					MDEBUG(("successful\n"));
					freedfa(d);
					freedfa(d2);
					return REG_OKAY;
				}
			}
			if (er != REG_OKAY && er != REG_NOMATCH)
			{
				freedfa(d);
				freedfa(d2);
				return er;
			}
830 831 832
		}

		/* that midpoint didn't work, find a new one */
B
Bruce Momjian 已提交
833 834
		if (mid == begin)
		{
835 836 837 838 839 840
			/* all possibilities exhausted */
			MDEBUG(("%d no midpoint\n", t->retry));
			freedfa(d);
			freedfa(d2);
			return REG_NOMATCH;
		}
B
Bruce Momjian 已提交
841 842 843
		mid = longest(v, d, begin, mid - 1, (int *) NULL);
		if (mid == NULL)
		{
844 845 846 847 848 849 850 851 852 853 854 855
			/* failed to find a new one */
			MDEBUG(("%d failed midpoint\n", t->retry));
			freedfa(d);
			freedfa(d2);
			return REG_NOMATCH;
		}
		MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
		v->mem[t->retry] = (mid - begin) + 1;
		zapmem(v, t->left);
		zapmem(v, t->right);
	}

856 857
	/* can't get here */
	return REG_ASSERT;
858
}
859 860 861

/*
 * crevdissect - determine backref shortest-first subexpression matches
B
Bruce Momjian 已提交
862
 * The retry memory stores the offset of the trial midpoint from begin,
863 864
 * plus 1 so that 0 uniquely means "clean slate".
 */
B
Bruce Momjian 已提交
865 866 867
static int						/* regexec return code */
crevdissect(struct vars * v,
			struct subre * t,
868 869
			chr *begin,			/* beginning of relevant substring */
			chr *end)			/* end of same */
870 871 872
{
	struct dfa *d;
	struct dfa *d2;
B
Bruce Momjian 已提交
873 874
	chr		   *mid;
	int			er;
875 876 877 878

	assert(t->op == '.');
	assert(t->left != NULL && t->left->cnfa.nstates > 0);
	assert(t->right != NULL && t->right->cnfa.nstates > 0);
B
Bruce Momjian 已提交
879
	assert(t->left->flags & SHORTER);
880 881 882 883 884 885

	/* concatenation -- need to split the substring between parts */
	d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
	if (ISERR())
		return v->err;
	d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
B
Bruce Momjian 已提交
886 887
	if (ISERR())
	{
888 889 890 891 892 893
		freedfa(d);
		return v->err;
	}
	MDEBUG(("crev %d\n", t->retry));

	/* pick a tentative midpoint */
B
Bruce Momjian 已提交
894 895 896 897 898
	if (v->mem[t->retry] == 0)
	{
		mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
		if (mid == NULL)
		{
899 900 901 902 903 904
			freedfa(d);
			freedfa(d2);
			return REG_NOMATCH;
		}
		MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
		v->mem[t->retry] = (mid - begin) + 1;
B
Bruce Momjian 已提交
905 906 907
	}
	else
	{
908 909 910 911 912
		mid = begin + (v->mem[t->retry] - 1);
		MDEBUG(("working midpoint %ld\n", LOFF(mid)));
	}

	/* iterate until satisfaction or failure */
B
Bruce Momjian 已提交
913 914
	for (;;)
	{
915
		/* try this midpoint on for size */
916
		if (longest(v, d2, mid, end, (int *) NULL) == end)
B
Bruce Momjian 已提交
917
		{
918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
			er = cdissect(v, t->left, begin, mid);
			if (er == REG_OKAY)
			{
				er = cdissect(v, t->right, mid, end);
				if (er == REG_OKAY)
				{
					/* satisfaction */
					MDEBUG(("successful\n"));
					freedfa(d);
					freedfa(d2);
					return REG_OKAY;
				}
			}
			if (er != REG_OKAY && er != REG_NOMATCH)
			{
				freedfa(d);
				freedfa(d2);
				return er;
			}
937 938 939
		}

		/* that midpoint didn't work, find a new one */
B
Bruce Momjian 已提交
940 941
		if (mid == end)
		{
942 943 944 945 946 947
			/* all possibilities exhausted */
			MDEBUG(("%d no midpoint\n", t->retry));
			freedfa(d);
			freedfa(d2);
			return REG_NOMATCH;
		}
B
Bruce Momjian 已提交
948 949 950
		mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL);
		if (mid == NULL)
		{
951 952 953 954 955 956 957 958 959 960 961 962
			/* failed to find a new one */
			MDEBUG(("%d failed midpoint\n", t->retry));
			freedfa(d);
			freedfa(d2);
			return REG_NOMATCH;
		}
		MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
		v->mem[t->retry] = (mid - begin) + 1;
		zapmem(v, t->left);
		zapmem(v, t->right);
	}

963 964
	/* can't get here */
	return REG_ASSERT;
965 966 967 968 969
}

/*
 * cbrdissect - determine backref subexpression matches
 */
B
Bruce Momjian 已提交
970 971 972
static int						/* regexec return code */
cbrdissect(struct vars * v,
		   struct subre * t,
973 974
		   chr *begin,			/* beginning of relevant substring */
		   chr *end)			/* end of same */
975
{
B
Bruce Momjian 已提交
976 977 978 979 980 981 982 983
	int			i;
	int			n = t->subno;
	size_t		len;
	chr		   *paren;
	chr		   *p;
	chr		   *stop;
	int			min = t->min;
	int			max = t->max;
984 985 986 987

	assert(t != NULL);
	assert(t->op == 'b');
	assert(n >= 0);
B
Bruce Momjian 已提交
988
	assert((size_t) n < v->nmatch);
989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002

	MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));

	if (v->pmatch[n].rm_so == -1)
		return REG_NOMATCH;
	paren = v->start + v->pmatch[n].rm_so;
	len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;

	/* no room to maneuver -- retries are pointless */
	if (v->mem[t->retry])
		return REG_NOMATCH;
	v->mem[t->retry] = 1;

	/* special-case zero-length string */
B
Bruce Momjian 已提交
1003 1004
	if (len == 0)
	{
1005 1006 1007 1008 1009 1010 1011
		if (begin == end)
			return REG_OKAY;
		return REG_NOMATCH;
	}

	/* and too-short string */
	assert(end >= begin);
B
Bruce Momjian 已提交
1012
	if ((size_t) (end - begin) < len)
1013 1014 1015 1016 1017
		return REG_NOMATCH;
	stop = end - len;

	/* count occurrences */
	i = 0;
B
Bruce Momjian 已提交
1018 1019 1020 1021
	for (p = begin; p <= stop && (i < max || max == INFINITY); p += len)
	{
		if ((*v->g->compare) (paren, p, len) != 0)
			break;
1022 1023 1024 1025 1026
		i++;
	}
	MDEBUG(("cbackref found %d\n", i));

	/* and sort it out */
B
Bruce Momjian 已提交
1027
	if (p != end)				/* didn't consume all of it */
1028 1029 1030
		return REG_NOMATCH;
	if (min <= i && (i <= max || max == INFINITY))
		return REG_OKAY;
B
Bruce Momjian 已提交
1031
	return REG_NOMATCH;			/* out of range */
1032 1033 1034 1035 1036
}

/*
 * caltdissect - determine alternative subexpression matches (w. complications)
 */
B
Bruce Momjian 已提交
1037 1038 1039
static int						/* regexec return code */
caltdissect(struct vars * v,
			struct subre * t,
1040 1041
			chr *begin,			/* beginning of relevant substring */
			chr *end)			/* end of same */
1042 1043
{
	struct dfa *d;
B
Bruce Momjian 已提交
1044 1045 1046 1047
	int			er;

#define  UNTRIED 0				/* not yet tried at all */
#define  TRYING  1				/* top matched, trying submatches */
B
Bruce Momjian 已提交
1048
#define  TRIED	 2				/* top didn't match or submatches exhausted */
1049 1050 1051 1052 1053 1054 1055 1056 1057 1058

	if (t == NULL)
		return REG_NOMATCH;
	assert(t->op == '|');
	if (v->mem[t->retry] == TRIED)
		return caltdissect(v, t->right, begin, end);

	MDEBUG(("calt n%d\n", t->retry));
	assert(t->left != NULL);

B
Bruce Momjian 已提交
1059 1060
	if (v->mem[t->retry] == UNTRIED)
	{
1061 1062 1063
		d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
		if (ISERR())
			return v->err;
B
Bruce Momjian 已提交
1064 1065
		if (longest(v, d, begin, end, (int *) NULL) != end)
		{
1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
			freedfa(d);
			v->mem[t->retry] = TRIED;
			return caltdissect(v, t->right, begin, end);
		}
		freedfa(d);
		MDEBUG(("calt matched\n"));
		v->mem[t->retry] = TRYING;
	}

	er = cdissect(v, t->left, begin, end);
	if (er != REG_NOMATCH)
		return er;

	v->mem[t->retry] = TRIED;
	return caltdissect(v, t->right, begin, end);
}



#include "rege_dfa.c"