grep.c 24.0 KB
Newer Older
1 2
#include "cache.h"
#include "grep.h"
3
#include "userdiff.h"
4
#include "xdiff-interface.h"
5

6 7 8 9
void append_header_grep_pattern(struct grep_opt *opt, enum grep_header_field field, const char *pat)
{
	struct grep_pat *p = xcalloc(1, sizeof(*p));
	p->pattern = pat;
10
	p->patternlen = strlen(pat);
11 12 13 14
	p->origin = "header";
	p->no = 0;
	p->token = GREP_PATTERN_HEAD;
	p->field = field;
15 16
	*opt->header_tail = p;
	opt->header_tail = &p->next;
17 18 19
	p->next = NULL;
}

20 21
void append_grep_pattern(struct grep_opt *opt, const char *pat,
			 const char *origin, int no, enum grep_pat_token t)
22 23 24 25 26 27
{
	append_grep_pat(opt, pat, strlen(pat), origin, no, t);
}

void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen,
		     const char *origin, int no, enum grep_pat_token t)
28 29 30
{
	struct grep_pat *p = xcalloc(1, sizeof(*p));
	p->pattern = pat;
31
	p->patternlen = patlen;
32 33 34 35 36 37 38 39
	p->origin = origin;
	p->no = no;
	p->token = t;
	*opt->pattern_tail = p;
	opt->pattern_tail = &p->next;
	p->next = NULL;
}

F
Fredrik Kuivinen 已提交
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
{
	struct grep_pat *pat;
	struct grep_opt *ret = xmalloc(sizeof(struct grep_opt));
	*ret = *opt;

	ret->pattern_list = NULL;
	ret->pattern_tail = &ret->pattern_list;

	for(pat = opt->pattern_list; pat != NULL; pat = pat->next)
	{
		if(pat->token == GREP_PATTERN_HEAD)
			append_header_grep_pattern(ret, pat->field,
						   pat->pattern);
		else
55 56
			append_grep_pat(ret, pat->pattern, pat->patternlen,
					pat->origin, pat->no, pat->token);
F
Fredrik Kuivinen 已提交
57 58 59 60 61
	}

	return ret;
}

62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
static NORETURN void compile_regexp_failed(const struct grep_pat *p,
		const char *error)
{
	char where[1024];

	if (p->no)
		sprintf(where, "In '%s' at %d, ", p->origin, p->no);
	else if (p->origin)
		sprintf(where, "%s, ", p->origin);
	else
		where[0] = 0;

	die("%s'%s': %s", where, p->pattern, error);
}

77 78
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
79 80
	int err;

81
	p->word_regexp = opt->word_regexp;
82
	p->ignore_case = opt->ignore_case;
83
	p->fixed = opt->fixed;
84

85 86 87 88
	if (p->fixed)
		return;

	err = regcomp(&p->regexp, p->pattern, opt->regflags);
89 90 91 92
	if (err) {
		char errbuf[1024];
		regerror(err, &p->regexp, errbuf, 1024);
		regfree(&p->regexp);
93
		compile_regexp_failed(p, errbuf);
94 95 96
	}
}

J
Junio C Hamano 已提交
97
static struct grep_expr *compile_pattern_or(struct grep_pat **);
98 99 100 101 102 103
static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
{
	struct grep_pat *p;
	struct grep_expr *x;

	p = *list;
104 105
	if (!p)
		return NULL;
106 107
	switch (p->token) {
	case GREP_PATTERN: /* atom */
108 109
	case GREP_PATTERN_HEAD:
	case GREP_PATTERN_BODY:
110 111 112 113 114 115 116
		x = xcalloc(1, sizeof (struct grep_expr));
		x->node = GREP_NODE_ATOM;
		x->u.atom = p;
		*list = p->next;
		return x;
	case GREP_OPEN_PAREN:
		*list = p->next;
J
Junio C Hamano 已提交
117
		x = compile_pattern_or(list);
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
		if (!*list || (*list)->token != GREP_CLOSE_PAREN)
			die("unmatched parenthesis");
		*list = (*list)->next;
		return x;
	default:
		return NULL;
	}
}

static struct grep_expr *compile_pattern_not(struct grep_pat **list)
{
	struct grep_pat *p;
	struct grep_expr *x;

	p = *list;
133 134
	if (!p)
		return NULL;
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
	switch (p->token) {
	case GREP_NOT:
		if (!p->next)
			die("--not not followed by pattern expression");
		*list = p->next;
		x = xcalloc(1, sizeof (struct grep_expr));
		x->node = GREP_NODE_NOT;
		x->u.unary = compile_pattern_not(list);
		if (!x->u.unary)
			die("--not followed by non pattern expression");
		return x;
	default:
		return compile_pattern_atom(list);
	}
}

static struct grep_expr *compile_pattern_and(struct grep_pat **list)
{
	struct grep_pat *p;
	struct grep_expr *x, *y, *z;

	x = compile_pattern_not(list);
	p = *list;
	if (p && p->token == GREP_AND) {
		if (!p->next)
			die("--and not followed by pattern expression");
		*list = p->next;
		y = compile_pattern_and(list);
		if (!y)
			die("--and not followed by pattern expression");
		z = xcalloc(1, sizeof (struct grep_expr));
		z->node = GREP_NODE_AND;
		z->u.binary.left = x;
		z->u.binary.right = y;
		return z;
	}
	return x;
}

static struct grep_expr *compile_pattern_or(struct grep_pat **list)
{
	struct grep_pat *p;
	struct grep_expr *x, *y, *z;

	x = compile_pattern_and(list);
	p = *list;
	if (x && p && p->token != GREP_CLOSE_PAREN) {
		y = compile_pattern_or(list);
		if (!y)
			die("not a pattern expression %s", p->pattern);
		z = xcalloc(1, sizeof (struct grep_expr));
		z->node = GREP_NODE_OR;
		z->u.binary.left = x;
		z->u.binary.right = y;
		return z;
	}
	return x;
}

static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
{
	return compile_pattern_or(list);
}

199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
static struct grep_expr *grep_true_expr(void)
{
	struct grep_expr *z = xcalloc(1, sizeof(*z));
	z->node = GREP_NODE_TRUE;
	return z;
}

static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
{
	struct grep_expr *z = xcalloc(1, sizeof(*z));
	z->node = GREP_NODE_OR;
	z->u.binary.left = left;
	z->u.binary.right = right;
	return z;
}

215
static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
216 217
{
	struct grep_pat *p;
218
	struct grep_expr *header_expr;
219 220
	struct grep_expr *(header_group[GREP_HEADER_FIELD_MAX]);
	enum grep_header_field fld;
221

222 223 224 225 226 227 228 229 230
	if (!opt->header_list)
		return NULL;
	p = opt->header_list;
	for (p = opt->header_list; p; p = p->next) {
		if (p->token != GREP_PATTERN_HEAD)
			die("bug: a non-header pattern in grep header list.");
		if (p->field < 0 || GREP_HEADER_FIELD_MAX <= p->field)
			die("bug: unknown header field %d", p->field);
		compile_regexp(p, opt);
231
	}
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258

	for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++)
		header_group[fld] = NULL;

	for (p = opt->header_list; p; p = p->next) {
		struct grep_expr *h;
		struct grep_pat *pp = p;

		h = compile_pattern_atom(&pp);
		if (!h || pp != p->next)
			die("bug: malformed header expr");
		if (!header_group[p->field]) {
			header_group[p->field] = h;
			continue;
		}
		header_group[p->field] = grep_or_expr(h, header_group[p->field]);
	}

	header_expr = NULL;

	for (fld = 0; fld < GREP_HEADER_FIELD_MAX; fld++) {
		if (!header_group[fld])
			continue;
		if (!header_expr)
			header_expr = grep_true_expr();
		header_expr = grep_or_expr(header_group[fld], header_expr);
	}
259 260 261 262 263 264 265
	return header_expr;
}

void compile_grep_patterns(struct grep_opt *opt)
{
	struct grep_pat *p;
	struct grep_expr *header_expr = prep_header_patterns(opt);
J
Junio C Hamano 已提交
266

267
	for (p = opt->pattern_list; p; p = p->next) {
268 269 270 271
		switch (p->token) {
		case GREP_PATTERN: /* atom */
		case GREP_PATTERN_HEAD:
		case GREP_PATTERN_BODY:
272
			compile_regexp(p, opt);
273 274
			break;
		default:
275
			opt->extended = 1;
276 277
			break;
		}
278 279
	}

280 281 282
	if (opt->all_match || header_expr)
		opt->extended = 1;
	else if (!opt->extended)
283 284 285
		return;

	p = opt->pattern_list;
286 287
	if (p)
		opt->pattern_expression = compile_pattern_expr(&p);
288 289
	if (p)
		die("incomplete pattern expression: %s", p->pattern);
290 291 292 293

	if (!header_expr)
		return;

294
	if (!opt->pattern_expression)
295
		opt->pattern_expression = header_expr;
296 297 298
	else
		opt->pattern_expression = grep_or_expr(opt->pattern_expression,
						       header_expr);
299
	opt->all_match = 1;
300 301
}

302 303 304
static void free_pattern_expr(struct grep_expr *x)
{
	switch (x->node) {
305
	case GREP_NODE_TRUE:
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
	case GREP_NODE_ATOM:
		break;
	case GREP_NODE_NOT:
		free_pattern_expr(x->u.unary);
		break;
	case GREP_NODE_AND:
	case GREP_NODE_OR:
		free_pattern_expr(x->u.binary.left);
		free_pattern_expr(x->u.binary.right);
		break;
	}
	free(x);
}

void free_grep_patterns(struct grep_opt *opt)
{
	struct grep_pat *p, *n;

	for (p = opt->pattern_list; p; p = n) {
		n = p->next;
		switch (p->token) {
		case GREP_PATTERN: /* atom */
		case GREP_PATTERN_HEAD:
		case GREP_PATTERN_BODY:
			regfree(&p->regexp);
			break;
		default:
			break;
		}
		free(p);
	}

	if (!opt->extended)
		return;
	free_pattern_expr(opt->pattern_expression);
}

343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
static char *end_of_line(char *cp, unsigned long *left)
{
	unsigned long l = *left;
	while (l && *cp != '\n') {
		l--;
		cp++;
	}
	*left = l;
	return cp;
}

static int word_char(char ch)
{
	return isalnum(ch) || ch == '_';
}

359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
static void output_color(struct grep_opt *opt, const void *data, size_t size,
			 const char *color)
{
	if (opt->color && color && color[0]) {
		opt->output(opt, color, strlen(color));
		opt->output(opt, data, size);
		opt->output(opt, GIT_COLOR_RESET, strlen(GIT_COLOR_RESET));
	} else
		opt->output(opt, data, size);
}

static void output_sep(struct grep_opt *opt, char sign)
{
	if (opt->null_following_name)
		opt->output(opt, "\0", 1);
	else
		output_color(opt, &sign, 1, opt->color_sep);
}

378 379
static void show_name(struct grep_opt *opt, const char *name)
{
380
	output_color(opt, name, strlen(name), opt->color_filename);
F
Fredrik Kuivinen 已提交
381
	opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
382 383
}

384 385
static int fixmatch(struct grep_pat *p, char *line, char *eol,
		    regmatch_t *match)
386
{
387
	char *hit;
388

389
	if (p->ignore_case) {
390 391
		char *s = line;
		do {
392
			hit = strcasestr(s, p->pattern);
393 394 395 396 397
			if (hit)
				break;
			s += strlen(s) + 1;
		} while (s < eol);
	} else
398
		hit = memmem(line, eol - line, p->pattern, p->patternlen);
399

400 401 402 403 404 405
	if (!hit) {
		match->rm_so = match->rm_eo = -1;
		return REG_NOMATCH;
	}
	else {
		match->rm_so = hit - line;
406
		match->rm_eo = match->rm_so + p->patternlen;
407 408 409 410
		return 0;
	}
}

411 412 413 414 415 416 417 418 419 420 421
static int regmatch(const regex_t *preg, char *line, char *eol,
		    regmatch_t *match, int eflags)
{
#ifdef REG_STARTEND
	match->rm_so = 0;
	match->rm_eo = eol - line;
	eflags |= REG_STARTEND;
#endif
	return regexec(preg, line, 1, match, eflags);
}

422 423 424 425 426 427 428 429 430 431 432 433 434
static int patmatch(struct grep_pat *p, char *line, char *eol,
		    regmatch_t *match, int eflags)
{
	int hit;

	if (p->fixed)
		hit = !fixmatch(p, line, eol, match);
	else
		hit = !regmatch(&p->regexp, line, eol, match, eflags);

	return hit;
}

435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
static int strip_timestamp(char *bol, char **eol_p)
{
	char *eol = *eol_p;
	int ch;

	while (bol < --eol) {
		if (*eol != '>')
			continue;
		*eol_p = ++eol;
		ch = *eol;
		*eol = '\0';
		return ch;
	}
	return 0;
}

static struct {
	const char *field;
	size_t len;
} header_field[] = {
	{ "author ", 7 },
	{ "committer ", 10 },
};

459
static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
460 461
			     enum grep_context ctx,
			     regmatch_t *pmatch, int eflags)
462 463
{
	int hit = 0;
464
	int saved_ch = 0;
R
René Scharfe 已提交
465
	const char *start = bol;
466

467 468 469 470
	if ((p->token != GREP_PATTERN) &&
	    ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
		return 0;

471 472 473 474 475 476 477 478 479 480 481 482
	if (p->token == GREP_PATTERN_HEAD) {
		const char *field;
		size_t len;
		assert(p->field < ARRAY_SIZE(header_field));
		field = header_field[p->field].field;
		len = header_field[p->field].len;
		if (strncmp(bol, field, len))
			return 0;
		bol += len;
		saved_ch = strip_timestamp(bol, &eol);
	}

483
 again:
484
	hit = patmatch(p, bol, eol, pmatch, eflags);
485

486
	if (hit && p->word_regexp) {
487
		if ((pmatch[0].rm_so < 0) ||
488
		    (eol - bol) < pmatch[0].rm_so ||
489 490 491 492 493 494 495 496 497 498
		    (pmatch[0].rm_eo < 0) ||
		    (eol - bol) < pmatch[0].rm_eo)
			die("regexp returned nonsense");

		/* Match beginning must be either beginning of the
		 * line, or at word boundary (i.e. the last char must
		 * not be a word char).  Similarly, match end must be
		 * either end of the line, or at word boundary
		 * (i.e. the next char must not be a word char).
		 */
499
		if ( ((pmatch[0].rm_so == 0) ||
500 501 502 503 504 505 506
		      !word_char(bol[pmatch[0].rm_so-1])) &&
		     ((pmatch[0].rm_eo == (eol-bol)) ||
		      !word_char(bol[pmatch[0].rm_eo])) )
			;
		else
			hit = 0;

507 508 509 510
		/* Words consist of at least one character. */
		if (pmatch->rm_so == pmatch->rm_eo)
			hit = 0;

511 512 513 514
		if (!hit && pmatch[0].rm_so + bol + 1 < eol) {
			/* There could be more than one match on the
			 * line, and the first match might not be
			 * strict word match.  But later ones could be!
515 516
			 * Forward to the next possible start, i.e. the
			 * next position following a non-word char.
517 518
			 */
			bol = pmatch[0].rm_so + bol + 1;
519 520
			while (word_char(bol[-1]) && bol < eol)
				bol++;
521
			eflags |= REG_NOTBOL;
522 523
			if (bol < eol)
				goto again;
524 525
		}
	}
526 527
	if (p->token == GREP_PATTERN_HEAD && saved_ch)
		*eol = saved_ch;
R
René Scharfe 已提交
528 529 530 531
	if (hit) {
		pmatch[0].rm_so += bol - start;
		pmatch[0].rm_eo += bol - start;
	}
532 533 534
	return hit;
}

535 536
static int match_expr_eval(struct grep_expr *x, char *bol, char *eol,
			   enum grep_context ctx, int collect_hits)
537
{
J
Junio C Hamano 已提交
538
	int h = 0;
539
	regmatch_t match;
J
Junio C Hamano 已提交
540

541 542
	if (!x)
		die("Not a valid grep expression");
543
	switch (x->node) {
544 545 546
	case GREP_NODE_TRUE:
		h = 1;
		break;
547
	case GREP_NODE_ATOM:
548
		h = match_one_pattern(x->u.atom, bol, eol, ctx, &match, 0);
549 550
		break;
	case GREP_NODE_NOT:
551
		h = !match_expr_eval(x->u.unary, bol, eol, ctx, 0);
J
Junio C Hamano 已提交
552
		break;
553
	case GREP_NODE_AND:
554
		if (!match_expr_eval(x->u.binary.left, bol, eol, ctx, 0))
555
			return 0;
556
		h = match_expr_eval(x->u.binary.right, bol, eol, ctx, 0);
J
Junio C Hamano 已提交
557
		break;
558
	case GREP_NODE_OR:
J
Junio C Hamano 已提交
559
		if (!collect_hits)
560
			return (match_expr_eval(x->u.binary.left,
J
Junio C Hamano 已提交
561
						bol, eol, ctx, 0) ||
562
				match_expr_eval(x->u.binary.right,
J
Junio C Hamano 已提交
563
						bol, eol, ctx, 0));
564
		h = match_expr_eval(x->u.binary.left, bol, eol, ctx, 0);
J
Junio C Hamano 已提交
565
		x->u.binary.left->hit |= h;
566
		h |= match_expr_eval(x->u.binary.right, bol, eol, ctx, 1);
J
Junio C Hamano 已提交
567 568
		break;
	default:
569
		die("Unexpected node type (internal error) %d", x->node);
570
	}
J
Junio C Hamano 已提交
571 572 573
	if (collect_hits)
		x->hit |= h;
	return h;
574 575
}

576
static int match_expr(struct grep_opt *opt, char *bol, char *eol,
J
Junio C Hamano 已提交
577
		      enum grep_context ctx, int collect_hits)
578 579
{
	struct grep_expr *x = opt->pattern_expression;
580
	return match_expr_eval(x, bol, eol, ctx, collect_hits);
581 582
}

583
static int match_line(struct grep_opt *opt, char *bol, char *eol,
J
Junio C Hamano 已提交
584
		      enum grep_context ctx, int collect_hits)
585 586
{
	struct grep_pat *p;
587 588
	regmatch_t match;

589
	if (opt->extended)
J
Junio C Hamano 已提交
590 591 592
		return match_expr(opt, bol, eol, ctx, collect_hits);

	/* we do not call with collect_hits without being extended */
593
	for (p = opt->pattern_list; p; p = p->next) {
594
		if (match_one_pattern(p, bol, eol, ctx, &match, 0))
595 596 597 598 599
			return 1;
	}
	return 0;
}

R
René Scharfe 已提交
600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
			      enum grep_context ctx,
			      regmatch_t *pmatch, int eflags)
{
	regmatch_t match;

	if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
		return 0;
	if (match.rm_so < 0 || match.rm_eo < 0)
		return 0;
	if (pmatch->rm_so >= 0 && pmatch->rm_eo >= 0) {
		if (match.rm_so > pmatch->rm_so)
			return 1;
		if (match.rm_so == pmatch->rm_so && match.rm_eo < pmatch->rm_eo)
			return 1;
	}
	pmatch->rm_so = match.rm_so;
	pmatch->rm_eo = match.rm_eo;
	return 1;
}

static int next_match(struct grep_opt *opt, char *bol, char *eol,
		      enum grep_context ctx, regmatch_t *pmatch, int eflags)
{
	struct grep_pat *p;
	int hit = 0;

	pmatch->rm_so = pmatch->rm_eo = -1;
	if (bol < eol) {
		for (p = opt->pattern_list; p; p = p->next) {
			switch (p->token) {
			case GREP_PATTERN: /* atom */
			case GREP_PATTERN_HEAD:
			case GREP_PATTERN_BODY:
				hit |= match_next_pattern(p, bol, eol, ctx,
							  pmatch, eflags);
				break;
			default:
				break;
			}
		}
	}
	return hit;
}

static void show_line(struct grep_opt *opt, char *bol, char *eol,
		      const char *name, unsigned lno, char sign)
{
	int rest = eol - bol;
649
	char *line_color = NULL;
R
René Scharfe 已提交
650

R
René Scharfe 已提交
651
	if (opt->pre_context || opt->post_context) {
652
		if (opt->last_shown == 0) {
653 654 655
			if (opt->show_hunk_mark) {
				output_color(opt, "--", 2, opt->color_sep);
				opt->output(opt, "\n", 1);
656
			}
657 658 659 660
		} else if (lno > opt->last_shown + 1) {
			output_color(opt, "--", 2, opt->color_sep);
			opt->output(opt, "\n", 1);
		}
661 662 663
	}
	opt->last_shown = lno;

F
Fredrik Kuivinen 已提交
664
	if (opt->pathname) {
665 666
		output_color(opt, name, strlen(name), opt->color_filename);
		output_sep(opt, sign);
F
Fredrik Kuivinen 已提交
667 668 669 670
	}
	if (opt->linenum) {
		char buf[32];
		snprintf(buf, sizeof(buf), "%d", lno);
671 672
		output_color(opt, buf, strlen(buf), opt->color_lineno);
		output_sep(opt, sign);
F
Fredrik Kuivinen 已提交
673
	}
R
René Scharfe 已提交
674 675 676 677 678 679
	if (opt->color) {
		regmatch_t match;
		enum grep_context ctx = GREP_CONTEXT_BODY;
		int ch = *eol;
		int eflags = 0;

680 681 682 683 684 685
		if (sign == ':')
			line_color = opt->color_selected;
		else if (sign == '-')
			line_color = opt->color_context;
		else if (sign == '=')
			line_color = opt->color_function;
R
René Scharfe 已提交
686 687
		*eol = '\0';
		while (next_match(opt, bol, eol, ctx, &match, eflags)) {
688 689
			if (match.rm_so == match.rm_eo)
				break;
F
Fredrik Kuivinen 已提交
690

691
			output_color(opt, bol, match.rm_so, line_color);
692 693 694
			output_color(opt, bol + match.rm_so,
				     match.rm_eo - match.rm_so,
				     opt->color_match);
R
René Scharfe 已提交
695 696 697 698 699 700
			bol += match.rm_eo;
			rest -= match.rm_eo;
			eflags = REG_NOTBOL;
		}
		*eol = ch;
	}
701
	output_color(opt, bol, rest, line_color);
F
Fredrik Kuivinen 已提交
702
	opt->output(opt, "\n", 1);
R
René Scharfe 已提交
703 704
}

705
static int match_funcname(struct grep_opt *opt, char *bol, char *eol)
706
{
707 708 709 710 711 712 713
	xdemitconf_t *xecfg = opt->priv;
	if (xecfg && xecfg->find_func) {
		char buf[1];
		return xecfg->find_func(bol, eol - bol, buf, 1,
					xecfg->find_func_priv) >= 0;
	}

714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733
	if (bol == eol)
		return 0;
	if (isalpha(*bol) || *bol == '_' || *bol == '$')
		return 1;
	return 0;
}

static void show_funcname_line(struct grep_opt *opt, const char *name,
			       char *buf, char *bol, unsigned lno)
{
	while (bol > buf) {
		char *eol = --bol;

		while (bol > buf && bol[-1] != '\n')
			bol--;
		lno--;

		if (lno <= opt->last_shown)
			break;

734
		if (match_funcname(opt, bol, eol)) {
735 736 737 738 739 740
			show_line(opt, bol, eol, name, lno, '=');
			break;
		}
	}
}

741 742 743
static void show_pre_context(struct grep_opt *opt, const char *name, char *buf,
			     char *bol, unsigned lno)
{
744 745
	unsigned cur = lno, from = 1, funcname_lno = 0;
	int funcname_needed = opt->funcname;
746 747 748 749 750 751 752 753

	if (opt->pre_context < lno)
		from = lno - opt->pre_context;
	if (from <= opt->last_shown)
		from = opt->last_shown + 1;

	/* Rewind. */
	while (bol > buf && cur > from) {
754 755
		char *eol = --bol;

756 757 758
		while (bol > buf && bol[-1] != '\n')
			bol--;
		cur--;
759
		if (funcname_needed && match_funcname(opt, bol, eol)) {
760 761 762
			funcname_lno = cur;
			funcname_needed = 0;
		}
763 764
	}

765 766 767 768
	/* We need to look even further back to find a function signature. */
	if (opt->funcname && funcname_needed)
		show_funcname_line(opt, name, buf, bol, cur);

769 770
	/* Back forward. */
	while (cur < lno) {
771
		char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
772 773 774

		while (*eol != '\n')
			eol++;
775
		show_line(opt, bol, eol, name, cur, sign);
776 777 778 779 780
		bol = eol + 1;
		cur++;
	}
}

781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
static int should_lookahead(struct grep_opt *opt)
{
	struct grep_pat *p;

	if (opt->extended)
		return 0; /* punt for too complex stuff */
	if (opt->invert)
		return 0;
	for (p = opt->pattern_list; p; p = p->next) {
		if (p->token != GREP_PATTERN)
			return 0; /* punt for "header only" and stuff */
	}
	return 1;
}

static int look_ahead(struct grep_opt *opt,
		      unsigned long *left_p,
		      unsigned *lno_p,
		      char **bol_p)
{
	unsigned lno = *lno_p;
	char *bol = *bol_p;
	struct grep_pat *p;
	char *sp, *last_bol;
	regoff_t earliest = -1;

	for (p = opt->pattern_list; p; p = p->next) {
		int hit;
		regmatch_t m;

811
		hit = patmatch(p, bol, bol + *left_p, &m, 0);
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836
		if (!hit || m.rm_so < 0 || m.rm_eo < 0)
			continue;
		if (earliest < 0 || m.rm_so < earliest)
			earliest = m.rm_so;
	}

	if (earliest < 0) {
		*bol_p = bol + *left_p;
		*left_p = 0;
		return 1;
	}
	for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--)
		; /* find the beginning of the line */
	last_bol = sp;

	for (sp = bol; sp < last_bol; sp++) {
		if (*sp == '\n')
			lno++;
	}
	*left_p -= last_bol - bol;
	*bol_p = last_bol;
	*lno_p = lno;
	return 0;
}

F
Fredrik Kuivinen 已提交
837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854
int grep_threads_ok(const struct grep_opt *opt)
{
	/* If this condition is true, then we may use the attribute
	 * machinery in grep_buffer_1. The attribute code is not
	 * thread safe, so we disable the use of threads.
	 */
	if (opt->funcname && !opt->unmatch_name_only && !opt->status_only &&
	    !opt->name_only)
		return 0;

	return 1;
}

static void std_output(struct grep_opt *opt, const void *buf, size_t size)
{
	fwrite(buf, size, 1, stdout);
}

J
Junio C Hamano 已提交
855 856
static int grep_buffer_1(struct grep_opt *opt, const char *name,
			 char *buf, unsigned long size, int collect_hits)
857 858 859 860 861 862 863
{
	char *bol = buf;
	unsigned long left = size;
	unsigned lno = 1;
	unsigned last_hit = 0;
	int binary_match_only = 0;
	unsigned count = 0;
864
	int try_lookahead = 0;
865
	enum grep_context ctx = GREP_CONTEXT_HEAD;
866
	xdemitconf_t xecfg;
867

F
Fredrik Kuivinen 已提交
868 869 870
	if (!opt->output)
		opt->output = std_output;

871 872 873 874 875
	if (opt->last_shown && (opt->pre_context || opt->post_context) &&
	    opt->output == std_output)
		opt->show_hunk_mark = 1;
	opt->last_shown = 0;

876 877 878
	switch (opt->binary) {
	case GREP_BINARY_DEFAULT:
		if (buffer_is_binary(buf, size))
879
			binary_match_only = 1;
880 881 882
		break;
	case GREP_BINARY_NOMATCH:
		if (buffer_is_binary(buf, size))
883
			return 0; /* Assume unmatch */
884 885 886 887 888
		break;
	case GREP_BINARY_TEXT:
		break;
	default:
		die("bug: unknown binary handling mode");
889 890
	}

891 892 893 894 895 896 897 898 899 900
	memset(&xecfg, 0, sizeof(xecfg));
	if (opt->funcname && !opt->unmatch_name_only && !opt->status_only &&
	    !opt->name_only && !binary_match_only && !collect_hits) {
		struct userdiff_driver *drv = userdiff_find_by_path(name);
		if (drv && drv->funcname.pattern) {
			const struct userdiff_funcname *pe = &drv->funcname;
			xdiff_set_find_func(&xecfg, pe->pattern, pe->cflags);
			opt->priv = &xecfg;
		}
	}
901
	try_lookahead = should_lookahead(opt);
902

903 904
	while (left) {
		char *eol, ch;
J
Junio C Hamano 已提交
905
		int hit;
906

907
		/*
908
		 * look_ahead() skips quickly to the line that possibly
909 910 911 912 913 914 915 916 917 918 919 920
		 * has the next hit; don't call it if we need to do
		 * something more than just skipping the current line
		 * in response to an unmatch for the current line.  E.g.
		 * inside a post-context window, we will show the current
		 * line as a context around the previous hit when it
		 * doesn't hit.
		 */
		if (try_lookahead
		    && !(last_hit
			 && lno <= last_hit + opt->post_context)
		    && look_ahead(opt, &left, &lno, &bol))
			break;
921 922 923 924
		eol = end_of_line(bol, &left);
		ch = *eol;
		*eol = 0;

925 926 927
		if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
			ctx = GREP_CONTEXT_BODY;

J
Junio C Hamano 已提交
928
		hit = match_line(opt, bol, eol, ctx, collect_hits);
929 930
		*eol = ch;

J
Junio C Hamano 已提交
931 932 933
		if (collect_hits)
			goto next_line;

934 935 936 937 938 939 940 941 942 943 944 945 946 947 948
		/* "grep -v -e foo -e bla" should list lines
		 * that do not have either, so inversion should
		 * be done outside.
		 */
		if (opt->invert)
			hit = !hit;
		if (opt->unmatch_name_only) {
			if (hit)
				return 0;
			goto next_line;
		}
		if (hit) {
			count++;
			if (opt->status_only)
				return 1;
R
René Scharfe 已提交
949 950 951 952
			if (opt->name_only) {
				show_name(opt, name);
				return 1;
			}
R
René Scharfe 已提交
953 954
			if (opt->count)
				goto next_line;
955
			if (binary_match_only) {
F
Fredrik Kuivinen 已提交
956
				opt->output(opt, "Binary file ", 12);
957 958
				output_color(opt, name, strlen(name),
					     opt->color_filename);
F
Fredrik Kuivinen 已提交
959
				opt->output(opt, " matches\n", 9);
960 961 962 963 964
				return 1;
			}
			/* Hit at this line.  If we haven't shown the
			 * pre-context lines, we would need to show them.
			 */
965 966
			if (opt->pre_context)
				show_pre_context(opt, name, buf, bol, lno);
967 968
			else if (opt->funcname)
				show_funcname_line(opt, name, buf, bol, lno);
R
René Scharfe 已提交
969
			show_line(opt, bol, eol, name, lno, ':');
970
			last_hit = lno;
971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
		}
		else if (last_hit &&
			 lno <= last_hit + opt->post_context) {
			/* If the last hit is within the post context,
			 * we need to show this line.
			 */
			show_line(opt, bol, eol, name, lno, '-');
		}

	next_line:
		bol = eol + 1;
		if (!left)
			break;
		left--;
		lno++;
	}

J
Junio C Hamano 已提交
988 989
	if (collect_hits)
		return 0;
990

991 992 993 994
	if (opt->status_only)
		return 0;
	if (opt->unmatch_name_only) {
		/* We did not see any hit, so we want to show this */
995
		show_name(opt, name);
996 997 998
		return 1;
	}

999 1000 1001
	xdiff_clear_find_func(&xecfg);
	opt->priv = NULL;

1002 1003 1004 1005 1006
	/* NEEDSWORK:
	 * The real "grep -c foo *.c" gives many "bar.c:0" lines,
	 * which feels mostly useless but sometimes useful.  Maybe
	 * make it another option?  For now suppress them.
	 */
F
Fredrik Kuivinen 已提交
1007 1008
	if (opt->count && count) {
		char buf[32];
1009 1010 1011
		output_color(opt, name, strlen(name), opt->color_filename);
		output_sep(opt, ':');
		snprintf(buf, sizeof(buf), "%u\n", count);
F
Fredrik Kuivinen 已提交
1012
		opt->output(opt, buf, strlen(buf));
R
René Scharfe 已提交
1013
		return 1;
F
Fredrik Kuivinen 已提交
1014
	}
1015 1016 1017
	return !!last_hit;
}

J
Junio C Hamano 已提交
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
static void clr_hit_marker(struct grep_expr *x)
{
	/* All-hit markers are meaningful only at the very top level
	 * OR node.
	 */
	while (1) {
		x->hit = 0;
		if (x->node != GREP_NODE_OR)
			return;
		x->u.binary.left->hit = 0;
		x = x->u.binary.right;
	}
}

static int chk_hit_marker(struct grep_expr *x)
{
	/* Top level nodes have hit markers.  See if they all are hits */
	while (1) {
		if (x->node != GREP_NODE_OR)
			return x->hit;
		if (!x->u.binary.left->hit)
			return 0;
		x = x->u.binary.right;
	}
}

int grep_buffer(struct grep_opt *opt, const char *name, char *buf, unsigned long size)
{
	/*
	 * we do not have to do the two-pass grep when we do not check
	 * buffer-wide "all-match".
	 */
	if (!opt->all_match)
		return grep_buffer_1(opt, name, buf, size, 0);

	/* Otherwise the toplevel "or" terms hit a bit differently.
	 * We first clear hit markers from them.
	 */
	clr_hit_marker(opt->pattern_expression);
	grep_buffer_1(opt, name, buf, size, 1);

	if (!chk_hit_marker(opt->pattern_expression))
		return 0;

	return grep_buffer_1(opt, name, buf, size, 0);
}