diff.c 29.3 KB
Newer Older
1 2 3 4 5
/*
 * Copyright (C) 2005 Junio C Hamano
 */
#include <sys/types.h>
#include <sys/wait.h>
6
#include <signal.h>
7
#include "cache.h"
8
#include "quote.h"
9
#include "diff.h"
10
#include "diffcore.h"
11

J
Junio C Hamano 已提交
12
static const char *diff_opts = "-pu";
13
static unsigned char null_sha1[20] = { 0, };
14 15

static int reverse_diff;
16
static int use_size_cache;
17

18
static const char *external_diff(void)
19
{
J
Junio C Hamano 已提交
20
	static const char *external_diff_cmd = NULL;
21
	static int done_preparing = 0;
J
Jason Riedy 已提交
22
	const char *env_diff_opts;
23 24 25 26

	if (done_preparing)
		return external_diff_cmd;

27 28 29 30 31 32 33 34
	/*
	 * Default values above are meant to match the
	 * Linux kernel development style.  Examples of
	 * alternative styles you can specify via environment
	 * variables are:
	 *
	 * GIT_DIFF_OPTS="-c";
	 */
35
	external_diff_cmd = getenv("GIT_EXTERNAL_DIFF");
36 37

	/* In case external diff fails... */
38
	env_diff_opts = getenv("GIT_DIFF_OPTS");
J
Jason Riedy 已提交
39
	if (env_diff_opts) diff_opts = env_diff_opts;
40 41 42

	done_preparing = 1;
	return external_diff_cmd;
43 44
}

45 46
#define TEMPFILE_PATH_LEN		50

47
static struct diff_tempfile {
48
	const char *name; /* filename external diff should read from */
49 50
	char hex[41];
	char mode[10];
51
	char tmp_path[TEMPFILE_PATH_LEN];
52 53
} diff_temp[2];

J
Junio C Hamano 已提交
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
static int count_lines(const char *filename)
{
	FILE *in;
	int count, ch, completely_empty = 1, nl_just_seen = 0;
	in = fopen(filename, "r");
	count = 0;
	while ((ch = fgetc(in)) != EOF)
		if (ch == '\n') {
			count++;
			nl_just_seen = 1;
			completely_empty = 0;
		}
		else {
			nl_just_seen = 0;
			completely_empty = 0;
		}
	fclose(in);
	if (completely_empty)
		return 0;
	if (!nl_just_seen)
		count++; /* no trailing newline */
	return count;
}

static void print_line_count(int count)
{
	switch (count) {
	case 0:
		printf("0,0");
		break;
	case 1:
		printf("1");
		break;
	default:
		printf("1,%d", count);
		break;
	}
}

static void copy_file(int prefix, const char *filename)
{
	FILE *in;
	int ch, nl_just_seen = 1;
	in = fopen(filename, "r");
	while ((ch = fgetc(in)) != EOF) {
		if (nl_just_seen)
			putchar(prefix);
		putchar(ch);
		if (ch == '\n')
			nl_just_seen = 1;
		else
			nl_just_seen = 0;
	}
	fclose(in);
	if (!nl_just_seen)
		printf("\n\\ No newline at end of file\n");
}

static void emit_rewrite_diff(const char *name_a,
			      const char *name_b,
			      struct diff_tempfile *temp)
{
	/* Use temp[i].name as input, name_a and name_b as labels */
	int lc_a, lc_b;
	lc_a = count_lines(temp[0].name);
	lc_b = count_lines(temp[1].name);
	printf("--- %s\n+++ %s\n@@ -", name_a, name_b);
	print_line_count(lc_a);
	printf(" +");
	print_line_count(lc_b);
	printf(" @@\n");
	if (lc_a)
		copy_file('-', temp[0].name);
	if (lc_b)
		copy_file('+', temp[1].name);
}

J
Junio C Hamano 已提交
131 132
static void builtin_diff(const char *name_a,
			 const char *name_b,
J
Junio C Hamano 已提交
133
			 struct diff_tempfile *temp,
J
Junio C Hamano 已提交
134 135
			 const char *xfrm_msg,
			 int complete_rewrite)
136
{
137
	int i, next_at, cmd_size;
138 139
	const char *const diff_cmd = "diff -L%s%s -L%s%s";
	const char *const diff_arg  = "%s %s||:"; /* "||:" is to return 0 */
140 141 142
	const char *input_name_sq[2];
	const char *path0[2];
	const char *path1[2];
J
Junio C Hamano 已提交
143
	const char *name_sq[2];
144
	char *cmd;
J
Junio C Hamano 已提交
145

146 147
	name_sq[0] = sq_quote(name_a);
	name_sq[1] = sq_quote(name_b);
J
Junio C Hamano 已提交
148

149 150
	/* diff_cmd and diff_arg have 6 %s in total which makes
	 * the sum of these strings 12 bytes larger than required.
151
	 * we use 2 spaces around diff-opts, and we need to count
152
	 * terminating NUL, so we subtract 9 here.
153
	 */
154
	cmd_size = (strlen(diff_cmd) + strlen(diff_opts) +
155
			strlen(diff_arg) - 9);
156
	for (i = 0; i < 2; i++) {
157
		input_name_sq[i] = sq_quote(temp[i].name);
158 159 160 161
		if (!strcmp(temp[i].name, "/dev/null")) {
			path0[i] = "/dev/null";
			path1[i] = "";
		} else {
162
			path0[i] = i ? "b/" : "a/";
J
Junio C Hamano 已提交
163
			path1[i] = name_sq[i];
164 165
		}
		cmd_size += (strlen(path0[i]) + strlen(path1[i]) +
166
			     strlen(input_name_sq[i]));
167
	}
168

169 170 171
	cmd = xmalloc(cmd_size);

	next_at = 0;
172
	next_at += snprintf(cmd+next_at, cmd_size-next_at,
173
			    diff_cmd,
174
			    path0[0], path1[0], path0[1], path1[1]);
175 176 177
	next_at += snprintf(cmd+next_at, cmd_size-next_at,
			    " %s ", diff_opts);
	next_at += snprintf(cmd+next_at, cmd_size-next_at,
178 179
			    diff_arg, input_name_sq[0], input_name_sq[1]);

J
Junio C Hamano 已提交
180
	printf("diff --git a/%s b/%s\n", name_a, name_b);
181
	if (!path1[0][0]) {
182
		printf("new file mode %s\n", temp[1].mode);
183 184 185 186
		if (xfrm_msg && xfrm_msg[0])
			puts(xfrm_msg);
	}
	else if (!path1[1][0]) {
187
		printf("deleted file mode %s\n", temp[0].mode);
188 189 190
		if (xfrm_msg && xfrm_msg[0])
			puts(xfrm_msg);
	}
191
	else {
192 193 194 195
		if (strcmp(temp[0].mode, temp[1].mode)) {
			printf("old mode %s\n", temp[0].mode);
			printf("new mode %s\n", temp[1].mode);
		}
196
		if (xfrm_msg && xfrm_msg[0])
197
			puts(xfrm_msg);
198 199 200 201
		if (strncmp(temp[0].mode, temp[1].mode, 3))
			/* we do not run diff between different kind
			 * of objects.
			 */
202
			exit(0);
J
Junio C Hamano 已提交
203 204 205 206 207
		if (complete_rewrite) {
			fflush(NULL);
			emit_rewrite_diff(name_a, name_b, temp);
			exit(0);
		}
208
	}
209
	fflush(NULL);
210
	execlp("/bin/sh","sh", "-c", cmd, NULL);
211 212
}

213 214 215 216
struct diff_filespec *alloc_filespec(const char *path)
{
	int namelen = strlen(path);
	struct diff_filespec *spec = xmalloc(sizeof(*spec) + namelen + 1);
217 218

	memset(spec, 0, sizeof(*spec));
219
	spec->path = (char *)(spec + 1);
220
	memcpy(spec->path, path, namelen+1);
221 222 223 224 225 226
	return spec;
}

void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1,
		   unsigned short mode)
{
227 228
	if (mode) {
		spec->mode = DIFF_FILE_CANON_MODE(mode);
229 230 231
		memcpy(spec->sha1, sha1, 20);
		spec->sha1_valid = !!memcmp(sha1, null_sha1, 20);
	}
232 233
}

J
Junio C Hamano 已提交
234 235 236 237 238 239 240 241 242 243
/*
 * Given a name and sha1 pair, if the dircache tells us the file in
 * the work tree has that object contents, return true, so that
 * prepare_temp_file() does not have to inflate and extract.
 */
static int work_tree_matches(const char *name, const unsigned char *sha1)
{
	struct cache_entry *ce;
	struct stat st;
	int pos, len;
244

J
Junio C Hamano 已提交
245 246 247 248
	/* We do not read the cache ourselves here, because the
	 * benchmark with my previous version that always reads cache
	 * shows that it makes things worse for diff-tree comparing
	 * two linux-2.6 kernel trees in an already checked out work
J
Junio C Hamano 已提交
249
	 * tree.  This is because most diff-tree comparisons deal with
J
Junio C Hamano 已提交
250 251 252 253 254 255
	 * only a small number of files, while reading the cache is
	 * expensive for a large project, and its cost outweighs the
	 * savings we get by not inflating the object to a temporary
	 * file.  Practically, this code only helps when we are used
	 * by diff-cache --cached, which does read the cache before
	 * calling us.
J
Junio C Hamano 已提交
256
	 */
J
Junio C Hamano 已提交
257 258 259 260 261 262 263 264
	if (!active_cache)
		return 0;

	len = strlen(name);
	pos = cache_name_pos(name, len);
	if (pos < 0)
		return 0;
	ce = active_cache[pos];
265
	if ((lstat(name, &st) < 0) ||
266
	    !S_ISREG(st.st_mode) || /* careful! */
267
	    ce_match_stat(ce, &st) ||
J
Junio C Hamano 已提交
268 269
	    memcmp(sha1, ce->sha1, 20))
		return 0;
270 271 272 273 274
	/* we return 1 only when we can stat, it is a regular file,
	 * stat information matches, and sha1 recorded in the cache
	 * matches.  I.e. we know the file in the work tree really is
	 * the same as the <name, sha1> pair.
	 */
J
Junio C Hamano 已提交
275 276 277
	return 1;
}

278 279 280 281 282 283 284
static struct sha1_size_cache {
	unsigned char sha1[20];
	unsigned long size;
} **sha1_size_cache;
static int sha1_size_cache_nr, sha1_size_cache_alloc;

static struct sha1_size_cache *locate_size_cache(unsigned char *sha1,
285
						 int find_only,
286 287 288 289 290 291 292 293
						 unsigned long size)
{
	int first, last;
	struct sha1_size_cache *e;

	first = 0;
	last = sha1_size_cache_nr;
	while (last > first) {
294
		int cmp, next = (last + first) >> 1;
295
		e = sha1_size_cache[next];
296
		cmp = memcmp(e->sha1, sha1, 20);
297 298 299 300 301 302 303 304 305
		if (!cmp)
			return e;
		if (cmp < 0) {
			last = next;
			continue;
		}
		first = next+1;
	}
	/* not found */
306
	if (find_only)
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
		return NULL;
	/* insert to make it at "first" */
	if (sha1_size_cache_alloc <= sha1_size_cache_nr) {
		sha1_size_cache_alloc = alloc_nr(sha1_size_cache_alloc);
		sha1_size_cache = xrealloc(sha1_size_cache,
					   sha1_size_cache_alloc *
					   sizeof(*sha1_size_cache));
	}
	sha1_size_cache_nr++;
	if (first < sha1_size_cache_nr)
		memmove(sha1_size_cache + first + 1, sha1_size_cache + first,
			(sha1_size_cache_nr - first - 1) *
			sizeof(*sha1_size_cache));
	e = xmalloc(sizeof(struct sha1_size_cache));
	sha1_size_cache[first] = e;
	memcpy(e->sha1, sha1, 20);
	e->size = size;
	return e;
}

327 328 329 330 331
/*
 * While doing rename detection and pickaxe operation, we may need to
 * grab the data for the blob (or file) for our own in-core comparison.
 * diff_filespec has data and size fields for this purpose.
 */
332
int diff_populate_filespec(struct diff_filespec *s, int size_only)
333 334
{
	int err = 0;
335
	if (!DIFF_FILE_VALID(s))
336 337 338 339
		die("internal error: asking to populate invalid file.");
	if (S_ISDIR(s->mode))
		return -1;

340 341 342
	if (!use_size_cache)
		size_only = 0;

343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361
	if (s->data)
		return err;
	if (!s->sha1_valid ||
	    work_tree_matches(s->path, s->sha1)) {
		struct stat st;
		int fd;
		if (lstat(s->path, &st) < 0) {
			if (errno == ENOENT) {
			err_empty:
				err = -1;
			empty:
				s->data = "";
				s->size = 0;
				return err;
			}
		}
		s->size = st.st_size;
		if (!s->size)
			goto empty;
362 363
		if (size_only)
			return 0;
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
		if (S_ISLNK(st.st_mode)) {
			int ret;
			s->data = xmalloc(s->size);
			s->should_free = 1;
			ret = readlink(s->path, s->data, s->size);
			if (ret < 0) {
				free(s->data);
				goto err_empty;
			}
			return 0;
		}
		fd = open(s->path, O_RDONLY);
		if (fd < 0)
			goto err_empty;
		s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
		close(fd);
P
Pavel Roskin 已提交
380 381 382
		if (s->data == MAP_FAILED)
			goto err_empty;
		s->should_munmap = 1;
383 384 385
	}
	else {
		char type[20];
386 387 388
		struct sha1_size_cache *e;

		if (size_only) {
389
			e = locate_size_cache(s->sha1, 1, 0);
390 391 392 393
			if (e) {
				s->size = e->size;
				return 0;
			}
394
			if (!sha1_object_info(s->sha1, type, &s->size))
395
				locate_size_cache(s->sha1, 0, s->size);
396 397 398 399
		}
		else {
			s->data = read_sha1_file(s->sha1, type, &s->size);
			s->should_free = 1;
400
		}
401 402 403 404
	}
	return 0;
}

405
void diff_free_filespec_data(struct diff_filespec *s)
406 407 408 409 410
{
	if (s->should_free)
		free(s->data);
	else if (s->should_munmap)
		munmap(s->data, s->size);
411 412
	s->should_free = s->should_munmap = 0;
	s->data = NULL;
413 414
}

415 416 417 418 419 420 421 422
static void prep_temp_blob(struct diff_tempfile *temp,
			   void *blob,
			   unsigned long size,
			   unsigned char *sha1,
			   int mode)
{
	int fd;

423
	fd = git_mkstemp(temp->tmp_path, TEMPFILE_PATH_LEN, ".diff_XXXXXX");
424 425 426 427 428 429 430 431 432 433 434
	if (fd < 0)
		die("unable to create temp-file");
	if (write(fd, blob, size) != size)
		die("unable to write temp-file");
	close(fd);
	temp->name = temp->tmp_path;
	strcpy(temp->hex, sha1_to_hex(sha1));
	temp->hex[40] = 0;
	sprintf(temp->mode, "%06o", mode);
}

435 436
static void prepare_temp_file(const char *name,
			      struct diff_tempfile *temp,
437
			      struct diff_filespec *one)
438
{
439
	if (!DIFF_FILE_VALID(one)) {
440
	not_a_valid_file:
441 442 443
		/* A '-' entry produces this for file-2, and
		 * a '+' entry produces this for file-1.
		 */
444 445 446
		temp->name = "/dev/null";
		strcpy(temp->hex, ".");
		strcpy(temp->mode, ".");
447 448
		return;
	}
449

450
	if (!one->sha1_valid ||
451
	    work_tree_matches(name, one->sha1)) {
452
		struct stat st;
453
		if (lstat(name, &st) < 0) {
454 455
			if (errno == ENOENT)
				goto not_a_valid_file;
456
			die("stat(%s): %s", name, strerror(errno));
457
		}
458 459 460 461 462 463 464 465 466 467
		if (S_ISLNK(st.st_mode)) {
			int ret;
			char *buf, buf_[1024];
			buf = ((sizeof(buf_) < st.st_size) ?
			       xmalloc(st.st_size) : buf_);
			ret = readlink(name, buf, st.st_size);
			if (ret < 0)
				die("readlink(%s)", name);
			prep_temp_blob(temp, buf, st.st_size,
				       (one->sha1_valid ?
468
					one->sha1 : null_sha1),
469 470 471 472
				       (one->sha1_valid ?
					one->mode : S_IFLNK));
		}
		else {
473 474
			/* we can borrow from the file in the work tree */
			temp->name = name;
475 476 477
			if (!one->sha1_valid)
				strcpy(temp->hex, sha1_to_hex(null_sha1));
			else
478
				strcpy(temp->hex, sha1_to_hex(one->sha1));
J
Junio C Hamano 已提交
479 480 481 482 483 484 485
			/* Even though we may sometimes borrow the
			 * contents from the work tree, we always want
			 * one->mode.  mode is trustworthy even when
			 * !(one->sha1_valid), as long as
			 * DIFF_FILE_VALID(one).
			 */
			sprintf(temp->mode, "%06o", one->mode);
486 487
		}
		return;
488 489
	}
	else {
490
		if (diff_populate_filespec(one, 0))
491 492 493
			die("cannot read data blob for %s", one->path);
		prep_temp_blob(temp, one->data, one->size,
			       one->sha1, one->mode);
494 495 496 497 498 499 500 501 502 503 504 505 506 507
	}
}

static void remove_tempfile(void)
{
	int i;

	for (i = 0; i < 2; i++)
		if (diff_temp[i].name == diff_temp[i].tmp_path) {
			unlink(diff_temp[i].name);
			diff_temp[i].name = NULL;
		}
}

508 509 510 511 512
static void remove_tempfile_on_signal(int signo)
{
	remove_tempfile();
}

513 514 515
/* An external diff command takes:
 *
 * diff-cmd name infile1 infile1-sha1 infile1-mode \
516
 *               infile2 infile2-sha1 infile2-mode [ rename-to ]
517 518
 *
 */
519 520
static void run_external_diff(const char *pgm,
			      const char *name,
521
			      const char *other,
522 523
			      struct diff_filespec *one,
			      struct diff_filespec *two,
J
Junio C Hamano 已提交
524 525
			      const char *xfrm_msg,
			      int complete_rewrite)
526 527
{
	struct diff_tempfile *temp = diff_temp;
528 529
	pid_t pid;
	int status;
530
	static int atexit_asked = 0;
J
Jason Riedy 已提交
531
	const char *othername;
532

J
Jason Riedy 已提交
533
	othername = (other? other : name);
534 535
	if (one && two) {
		prepare_temp_file(name, &temp[0], one);
J
Jason Riedy 已提交
536
		prepare_temp_file(othername, &temp[1], two);
537 538 539 540 541 542
		if (! atexit_asked &&
		    (temp[0].name == temp[0].tmp_path ||
		     temp[1].name == temp[1].tmp_path)) {
			atexit_asked = 1;
			atexit(remove_tempfile);
		}
543
		signal(SIGINT, remove_tempfile_on_signal);
544 545 546 547 548 549 550
	}

	fflush(NULL);
	pid = fork();
	if (pid < 0)
		die("unable to fork");
	if (!pid) {
551 552
		if (pgm) {
			if (one && two) {
553
				const char *exec_arg[10];
554 555 556 557 558 559 560 561 562
				const char **arg = &exec_arg[0];
				*arg++ = pgm;
				*arg++ = name;
				*arg++ = temp[0].name;
				*arg++ = temp[0].hex;
				*arg++ = temp[0].mode;
				*arg++ = temp[1].name;
				*arg++ = temp[1].hex;
				*arg++ = temp[1].mode;
563
				if (other) {
564
					*arg++ = other;
565 566
					*arg++ = xfrm_msg;
				}
L
Linus Torvalds 已提交
567
				*arg = NULL;
568 569
				execvp(pgm, (char *const*) exec_arg);
			}
570 571 572
			else
				execlp(pgm, pgm, name, NULL);
		}
573 574 575
		/*
		 * otherwise we use the built-in one.
		 */
576
		if (one && two)
J
Jason Riedy 已提交
577
			builtin_diff(name, othername, temp, xfrm_msg,
J
Junio C Hamano 已提交
578
				     complete_rewrite);
579 580
		else
			printf("* Unmerged path %s\n", name);
581 582
		exit(0);
	}
583 584 585
	if (waitpid(pid, &status, 0) < 0 ||
	    !WIFEXITED(status) || WEXITSTATUS(status)) {
		/* Earlier we did not check the exit status because
586
		 * diff exits non-zero if files are different, and
587 588 589 590 591 592
		 * we are not interested in knowing that.  It was a
		 * mistake which made it harder to quit a diff-*
		 * session that uses the git-apply-patch-script as
		 * the GIT_EXTERNAL_DIFF.  A custom GIT_EXTERNAL_DIFF
		 * should also exit non-zero only when it wants to
		 * abort the entire diff-* session.
593 594
		 */
		remove_tempfile();
595 596
		fprintf(stderr, "external diff died, stopping at %s.\n", name);
		exit(1);
597
	}
598 599 600
	remove_tempfile();
}

J
Junio C Hamano 已提交
601
static void run_diff(struct diff_filepair *p)
602 603
{
	const char *pgm = external_diff();
J
Junio C Hamano 已提交
604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
	char msg_[PATH_MAX*2+200], *xfrm_msg;
	struct diff_filespec *one;
	struct diff_filespec *two;
	const char *name;
	const char *other;
	int complete_rewrite = 0;

	if (DIFF_PAIR_UNMERGED(p)) {
		/* unmerged */
		run_external_diff(pgm, p->one->path, NULL, NULL, NULL, NULL,
				  0);
		return;
	}

	name = p->one->path;
	other = (strcmp(name, p->two->path) ? p->two->path : NULL);
	one = p->one; two = p->two;
	switch (p->status) {
622
	case DIFF_STATUS_COPIED:
J
Junio C Hamano 已提交
623 624 625 626 627 628 629 630
		sprintf(msg_,
			"similarity index %d%%\n"
			"copy from %s\n"
			"copy to %s",
			(int)(0.5 + p->score * 100.0/MAX_SCORE),
			name, other);
		xfrm_msg = msg_;
		break;
631
	case DIFF_STATUS_RENAMED:
J
Junio C Hamano 已提交
632 633 634 635 636 637 638 639
		sprintf(msg_,
			"similarity index %d%%\n"
			"rename from %s\n"
			"rename to %s",
			(int)(0.5 + p->score * 100.0/MAX_SCORE),
			name, other);
		xfrm_msg = msg_;
		break;
640
	case DIFF_STATUS_MODIFIED:
J
Junio C Hamano 已提交
641 642 643 644 645 646 647 648 649 650 651 652 653
		if (p->score) {
			sprintf(msg_,
				"dissimilarity index %d%%",
				(int)(0.5 + p->score * 100.0/MAX_SCORE));
			xfrm_msg = msg_;
			complete_rewrite = 1;
			break;
		}
		/* fallthru */
	default:
		xfrm_msg = NULL;
	}

654 655 656 657 658 659 660
	if (!pgm &&
	    DIFF_FILE_VALID(one) && DIFF_FILE_VALID(two) &&
	    (S_IFMT & one->mode) != (S_IFMT & two->mode)) {
		/* a filepair that changes between file and symlink
		 * needs to be split into deletion and creation.
		 */
		struct diff_filespec *null = alloc_filespec(two->path);
J
Junio C Hamano 已提交
661
		run_external_diff(NULL, name, other, one, null, xfrm_msg, 0);
662 663
		free(null);
		null = alloc_filespec(one->path);
J
Junio C Hamano 已提交
664
		run_external_diff(NULL, name, other, null, two, xfrm_msg, 0);
665 666 667
		free(null);
	}
	else
J
Junio C Hamano 已提交
668 669
		run_external_diff(pgm, name, other, one, two, xfrm_msg,
				  complete_rewrite);
670 671
}

672
void diff_setup(int flags)
673
{
674 675
	if (flags & DIFF_SETUP_REVERSE)
		reverse_diff = 1;
676 677 678 679 680 681 682 683 684 685 686 687 688
	if (flags & DIFF_SETUP_USE_CACHE) {
		if (!active_cache)
			/* read-cache does not die even when it fails
			 * so it is safe for us to do this here.  Also
			 * it does not smudge active_cache or active_nr
			 * when it fails, so we do not have to worry about
			 * cleaning it up oufselves either.
			 */
			read_cache();
	}
	if (flags & DIFF_SETUP_USE_SIZE_CACHE)
		use_size_cache = 1;
	
689 690
}

691 692 693 694 695 696 697 698 699 700 701 702 703
static int parse_num(const char **cp_p)
{
	int num, scale, ch, cnt;
	const char *cp = *cp_p;

	cnt = num = 0;
	scale = 1;
	while ('0' <= (ch = *cp) && ch <= '9') {
		if (cnt++ < 5) {
			/* We simply ignore more than 5 digits precision. */
			scale *= 10;
			num = num * 10 + ch - '0';
		}
J
Junio C Hamano 已提交
704
		cp++;
705 706 707 708 709 710 711 712 713 714 715
	}
	*cp_p = cp;

	/* user says num divided by scale and we say internally that
	 * is MAX_SCORE * num / scale.
	 */
	return (MAX_SCORE * num / scale);
}

int diff_scoreopt_parse(const char *opt)
{
716
	int opt1, opt2, cmd;
717 718 719 720 721 722 723 724

	if (*opt++ != '-')
		return -1;
	cmd = *opt++;
	if (cmd != 'M' && cmd != 'C' && cmd != 'B')
		return -1; /* that is not a -M, -C nor -B option */

	opt1 = parse_num(&opt);
725 726 727 728 729 730 731 732 733 734 735 736
	if (cmd != 'B')
		opt2 = 0;
	else {
		if (*opt == 0)
			opt2 = 0;
		else if (*opt != '/')
			return -1; /* we expect -B80/99 or -B80 */
		else {
			opt++;
			opt2 = parse_num(&opt);
		}
	}
737 738
	if (*opt != 0)
		return -1;
739
	return opt1 | (opt2 << 16);
740 741
}

J
Junio C Hamano 已提交
742 743 744
struct diff_queue_struct diff_queued_diff;

void diff_q(struct diff_queue_struct *queue, struct diff_filepair *dp)
745
{
J
Junio C Hamano 已提交
746 747 748 749
	if (queue->alloc <= queue->nr) {
		queue->alloc = alloc_nr(queue->alloc);
		queue->queue = xrealloc(queue->queue,
					sizeof(dp) * queue->alloc);
750
	}
J
Junio C Hamano 已提交
751
	queue->queue[queue->nr++] = dp;
752 753
}

754
struct diff_filepair *diff_queue(struct diff_queue_struct *queue,
J
Junio C Hamano 已提交
755 756
				 struct diff_filespec *one,
				 struct diff_filespec *two)
757
{
758
	struct diff_filepair *dp = xmalloc(sizeof(*dp));
759 760
	dp->one = one;
	dp->two = two;
761
	dp->score = 0;
762
	dp->status = 0;
763
	dp->source_stays = 0;
764
	dp->broken_pair = 0;
J
Junio C Hamano 已提交
765
	diff_q(queue, dp);
766
	return dp;
767 768
}

769 770
void diff_free_filepair(struct diff_filepair *p)
{
771 772
	diff_free_filespec_data(p->one);
	diff_free_filespec_data(p->two);
773 774 775
	free(p);
}

776 777 778
static void diff_flush_raw(struct diff_filepair *p,
			   int line_termination,
			   int inter_name_termination)
779
{
780 781
	int two_paths;
	char status[10];
782 783

	if (line_termination) {
784 785
		const char *const err =
			"path %s cannot be expressed without -z";
786 787 788 789 790 791 792 793
		if (strchr(p->one->path, line_termination) ||
		    strchr(p->one->path, inter_name_termination))
			die(err, p->one->path);
		if (strchr(p->two->path, line_termination) ||
		    strchr(p->two->path, inter_name_termination))
			die(err, p->two->path);
	}

J
Junio C Hamano 已提交
794 795 796 797 798 799 800
	if (p->score)
		sprintf(status, "%c%03d", p->status,
			(int)(0.5 + p->score * 100.0/MAX_SCORE));
	else {
		status[0] = p->status;
		status[1] = 0;
	}
801
	switch (p->status) {
802 803
	case DIFF_STATUS_COPIED:
	case DIFF_STATUS_RENAMED:
804 805
		two_paths = 1;
		break;
806 807
	case DIFF_STATUS_ADDED:
	case DIFF_STATUS_DELETED:
808 809
		two_paths = 0;
		break;
810 811 812
	default:
		two_paths = 0;
		break;
J
Junio C Hamano 已提交
813
	}
814 815
	printf(":%06o %06o %s ",
	       p->one->mode, p->two->mode, sha1_to_hex(p->one->sha1));
816 817 818 819 820 821 822 823
	printf("%s %s%c%s",
	       sha1_to_hex(p->two->sha1),
	       status,
	       inter_name_termination,
	       p->one->path);
	if (two_paths)
		printf("%c%s", inter_name_termination, p->two->path);
	putchar(line_termination);
824 825
}

826 827 828 829 830 831
static void diff_flush_name(struct diff_filepair *p,
			    int line_termination)
{
	printf("%s%c", p->two->path, line_termination);
}

832
int diff_unmodified_pair(struct diff_filepair *p)
833
{
834 835
	/* This function is written stricter than necessary to support
	 * the currently implemented transformers, but the idea is to
836
	 * let transformers to produce diff_filepairs any way they want,
837
	 * and filter and clean them up here before producing the output.
838
	 */
J
Junio C Hamano 已提交
839 840 841 842
	struct diff_filespec *one, *two;

	if (DIFF_PAIR_UNMERGED(p))
		return 0; /* unmerged is interesting */
843

J
Junio C Hamano 已提交
844 845
	one = p->one;
	two = p->two;
846

847 848 849
	/* deletion, addition, mode or type change
	 * and rename are all interesting.
	 */
850
	if (DIFF_FILE_VALID(one) != DIFF_FILE_VALID(two) ||
851
	    DIFF_PAIR_MODE_CHANGED(p) ||
852 853
	    strcmp(one->path, two->path))
		return 0;
J
Junio C Hamano 已提交
854

855 856
	/* both are valid and point at the same path.  that is, we are
	 * dealing with a change.
J
Junio C Hamano 已提交
857
	 */
858 859 860 861 862 863
	if (one->sha1_valid && two->sha1_valid &&
	    !memcmp(one->sha1, two->sha1, sizeof(one->sha1)))
		return 1; /* no change */
	if (!one->sha1_valid && !two->sha1_valid)
		return 1; /* both look at the same file on the filesystem. */
	return 0;
J
Junio C Hamano 已提交
864 865
}

866
static void diff_flush_patch(struct diff_filepair *p)
867 868 869 870 871 872 873 874
{
	if (diff_unmodified_pair(p))
		return;

	if ((DIFF_FILE_VALID(p->one) && S_ISDIR(p->one->mode)) ||
	    (DIFF_FILE_VALID(p->two) && S_ISDIR(p->two->mode)))
		return; /* no tree diffs in patch format */ 

J
Junio C Hamano 已提交
875
	run_diff(p);
876 877
}

878
int diff_queue_is_empty(void)
879
{
880
	struct diff_queue_struct *q = &diff_queued_diff;
881 882 883 884 885
	int i;
	for (i = 0; i < q->nr; i++)
		if (!diff_unmodified_pair(q->queue[i]))
			return 0;
	return 1;
886 887
}

888 889 890 891
#if DIFF_DEBUG
void diff_debug_filespec(struct diff_filespec *s, int x, const char *one)
{
	fprintf(stderr, "queue[%d] %s (%s) %s %06o %s\n",
J
Jason Riedy 已提交
892
		x, one ? one : "",
893 894 895 896 897
		s->path,
		DIFF_FILE_VALID(s) ? "valid" : "invalid",
		s->mode,
		s->sha1_valid ? sha1_to_hex(s->sha1) : "");
	fprintf(stderr, "queue[%d] %s size %lu flags %d\n",
J
Jason Riedy 已提交
898
		x, one ? one : "",
899 900 901 902 903 904 905
		s->size, s->xfrm_flags);
}

void diff_debug_filepair(const struct diff_filepair *p, int i)
{
	diff_debug_filespec(p->one, i, "one");
	diff_debug_filespec(p->two, i, "two");
906
	fprintf(stderr, "score %d, status %c stays %d broken %d\n",
J
Jason Riedy 已提交
907
		p->score, p->status ? p->status : '?',
908
		p->source_stays, p->broken_pair);
909 910 911
}

void diff_debug_queue(const char *msg, struct diff_queue_struct *q)
J
Junio C Hamano 已提交
912 913
{
	int i;
914 915 916
	if (msg)
		fprintf(stderr, "%s\n", msg);
	fprintf(stderr, "q->nr = %d\n", q->nr);
J
Junio C Hamano 已提交
917 918
	for (i = 0; i < q->nr; i++) {
		struct diff_filepair *p = q->queue[i];
919 920 921 922 923 924 925 926 927 928 929 930 931 932 933
		diff_debug_filepair(p, i);
	}
}
#endif

static void diff_resolve_rename_copy(void)
{
	int i, j;
	struct diff_filepair *p, *pp;
	struct diff_queue_struct *q = &diff_queued_diff;

	diff_debug_queue("resolve-rename-copy", q);

	for (i = 0; i < q->nr; i++) {
		p = q->queue[i];
934
		p->status = 0; /* undecided */
935
		if (DIFF_PAIR_UNMERGED(p))
936
			p->status = DIFF_STATUS_UNMERGED;
937
		else if (!DIFF_FILE_VALID(p->one))
938
			p->status = DIFF_STATUS_ADDED;
939
		else if (!DIFF_FILE_VALID(p->two))
940
			p->status = DIFF_STATUS_DELETED;
941
		else if (DIFF_PAIR_TYPE_CHANGED(p))
942
			p->status = DIFF_STATUS_TYPE_CHANGED;
943 944 945 946 947

		/* from this point on, we are dealing with a pair
		 * whose both sides are valid and of the same type, i.e.
		 * either in-place edit or rename/copy edit.
		 */
948
		else if (DIFF_PAIR_RENAME(p)) {
949
			if (p->source_stays) {
950
				p->status = DIFF_STATUS_COPIED;
951 952 953 954
				continue;
			}
			/* See if there is some other filepair that
			 * copies from the same source as us.  If so
955 956 957
			 * we are a copy.  Otherwise we are either a
			 * copy if the path stays, or a rename if it
			 * does not, but we already handled "stays" case.
958
			 */
959
			for (j = i + 1; j < q->nr; j++) {
960 961
				pp = q->queue[j];
				if (strcmp(pp->one->path, p->one->path))
962
					continue; /* not us */
963
				if (!DIFF_PAIR_RENAME(pp))
964 965
					continue; /* not a rename/copy */
				/* pp is a rename/copy from the same source */
966
				p->status = DIFF_STATUS_COPIED;
967
				break;
968 969
			}
			if (!p->status)
970
				p->status = DIFF_STATUS_RENAMED;
971
		}
972 973
		else if (memcmp(p->one->sha1, p->two->sha1, 20) ||
			 p->one->mode != p->two->mode)
974
			p->status = DIFF_STATUS_MODIFIED;
J
Junio C Hamano 已提交
975 976 977
		else {
			/* This is a "no-change" entry and should not
			 * happen anymore, but prepare for broken callers.
978
			 */
J
Junio C Hamano 已提交
979 980
			error("feeding unmodified %s to diffcore",
			      p->one->path);
981
			p->status = DIFF_STATUS_UNKNOWN;
J
Junio C Hamano 已提交
982
		}
J
Junio C Hamano 已提交
983
	}
984
	diff_debug_queue("resolve-rename-copy done", q);
985 986
}

987
void diff_flush(int diff_output_style, int line_termination)
988 989 990
{
	struct diff_queue_struct *q = &diff_queued_diff;
	int i;
991
	int inter_name_termination = '\t';
992

993 994
	if (!line_termination)
		inter_name_termination = 0;
995

996 997
	for (i = 0; i < q->nr; i++) {
		struct diff_filepair *p = q->queue[i];
998
		if ((diff_output_style == DIFF_FORMAT_NO_OUTPUT) ||
999
		    (p->status == DIFF_STATUS_UNKNOWN))
1000
			continue;
1001
		if (p->status == 0)
1002
			die("internal error in diff-resolve-rename-copy");
1003 1004
		switch (diff_output_style) {
		case DIFF_FORMAT_PATCH:
1005
			diff_flush_patch(p);
1006
			break;
1007
		case DIFF_FORMAT_RAW:
1008 1009 1010
			diff_flush_raw(p, line_termination,
				       inter_name_termination);
			break;
1011 1012 1013
		case DIFF_FORMAT_NAME:
			diff_flush_name(p, line_termination);
			break;
1014
		}
1015
		diff_free_filepair(q->queue[i]);
1016
	}
1017 1018 1019
	free(q->queue);
	q->queue = NULL;
	q->nr = q->alloc = 0;
1020 1021
}

1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032
static void diffcore_apply_filter(const char *filter)
{
	int i;
	struct diff_queue_struct *q = &diff_queued_diff;
	struct diff_queue_struct outq;
	outq.queue = NULL;
	outq.nr = outq.alloc = 0;

	if (!filter)
		return;

1033
	if (strchr(filter, DIFF_STATUS_FILTER_AON)) {
1034 1035 1036
		int found;
		for (i = found = 0; !found && i < q->nr; i++) {
			struct diff_filepair *p = q->queue[i];
1037 1038 1039 1040 1041 1042 1043
			if (((p->status == DIFF_STATUS_MODIFIED) &&
			     ((p->score &&
			       strchr(filter, DIFF_STATUS_FILTER_BROKEN)) ||
			      (!p->score &&
			       strchr(filter, DIFF_STATUS_MODIFIED)))) ||
			    ((p->status != DIFF_STATUS_MODIFIED) &&
			     strchr(filter, p->status)))
1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060
				found++;
		}
		if (found)
			return;

		/* otherwise we will clear the whole queue
		 * by copying the empty outq at the end of this
		 * function, but first clear the current entries
		 * in the queue.
		 */
		for (i = 0; i < q->nr; i++)
			diff_free_filepair(q->queue[i]);
	}
	else {
		/* Only the matching ones */
		for (i = 0; i < q->nr; i++) {
			struct diff_filepair *p = q->queue[i];
1061 1062 1063 1064 1065 1066 1067 1068

			if (((p->status == DIFF_STATUS_MODIFIED) &&
			     ((p->score &&
			       strchr(filter, DIFF_STATUS_FILTER_BROKEN)) ||
			      (!p->score &&
			       strchr(filter, DIFF_STATUS_MODIFIED)))) ||
			    ((p->status != DIFF_STATUS_MODIFIED) &&
			     strchr(filter, p->status)))
1069 1070 1071 1072 1073 1074 1075 1076 1077
				diff_q(&outq, p);
			else
				diff_free_filepair(p);
		}
	}
	free(q->queue);
	*q = outq;
}

1078 1079
void diffcore_std(const char **paths,
		  int detect_rename, int rename_score,
1080
		  const char *pickaxe, int pickaxe_opts,
1081
		  int break_opt,
1082 1083
		  const char *orderfile,
		  const char *filter)
1084 1085 1086
{
	if (paths && paths[0])
		diffcore_pathspec(paths);
1087
	if (break_opt != -1)
1088
		diffcore_break(break_opt);
1089 1090
	if (detect_rename)
		diffcore_rename(detect_rename, rename_score);
1091
	if (break_opt != -1)
1092
		diffcore_merge_broken();
1093 1094
	if (pickaxe)
		diffcore_pickaxe(pickaxe, pickaxe_opts);
1095 1096
	if (orderfile)
		diffcore_order(orderfile);
1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113
	diff_resolve_rename_copy();
	diffcore_apply_filter(filter);
}


void diffcore_std_no_resolve(const char **paths,
			     const char *pickaxe, int pickaxe_opts,
			     const char *orderfile,
			     const char *filter)
{
	if (paths && paths[0])
		diffcore_pathspec(paths);
	if (pickaxe)
		diffcore_pickaxe(pickaxe, pickaxe_opts);
	if (orderfile)
		diffcore_order(orderfile);
	diffcore_apply_filter(filter);
1114 1115
}

1116 1117 1118
void diff_addremove(int addremove, unsigned mode,
		    const unsigned char *sha1,
		    const char *base, const char *path)
1119
{
1120
	char concatpath[PATH_MAX];
1121 1122 1123 1124 1125 1126 1127 1128 1129
	struct diff_filespec *one, *two;

	/* This may look odd, but it is a preparation for
	 * feeding "there are unchanged files which should
	 * not produce diffs, but when you are doing copy
	 * detection you would need them, so here they are"
	 * entries to the diff-core.  They will be prefixed
	 * with something like '=' or '*' (I haven't decided
	 * which but should not make any difference).
1130
	 * Feeding the same new and old to diff_change() 
1131 1132 1133
	 * also has the same effect.
	 * Before the final output happens, they are pruned after
	 * merged into rename/copy pairs as appropriate.
1134
	 */
1135
	if (reverse_diff)
1136 1137
		addremove = (addremove == '+' ? '-' :
			     addremove == '-' ? '+' : addremove);
1138

1139 1140 1141 1142
	if (!path) path = "";
	sprintf(concatpath, "%s%s", base, path);
	one = alloc_filespec(concatpath);
	two = alloc_filespec(concatpath);
1143

1144 1145 1146 1147
	if (addremove != '+')
		fill_filespec(one, sha1, mode);
	if (addremove != '-')
		fill_filespec(two, sha1, mode);
1148

1149
	diff_queue(&diff_queued_diff, one, two);
1150 1151
}

1152 1153 1154 1155 1156 1157 1158 1159
void diff_helper_input(unsigned old_mode,
		       unsigned new_mode,
		       const unsigned char *old_sha1,
		       const unsigned char *new_sha1,
		       const char *old_path,
		       int status,
		       int score,
		       const char *new_path)
1160 1161
{
	struct diff_filespec *one, *two;
1162
	struct diff_filepair *dp;
1163 1164 1165 1166 1167 1168 1169

	one = alloc_filespec(old_path);
	two = alloc_filespec(new_path);
	if (old_mode)
		fill_filespec(one, old_sha1, old_mode);
	if (new_mode)
		fill_filespec(two, new_sha1, new_mode);
1170
	dp = diff_queue(&diff_queued_diff, one, two);
1171
	dp->score = score * MAX_SCORE / 100;
1172
	dp->status = status;
1173 1174
}

1175 1176 1177
void diff_change(unsigned old_mode, unsigned new_mode,
		 const unsigned char *old_sha1,
		 const unsigned char *new_sha1,
1178 1179
		 const char *base, const char *path) 
{
1180
	char concatpath[PATH_MAX];
1181
	struct diff_filespec *one, *two;
1182

1183 1184 1185 1186 1187 1188
	if (reverse_diff) {
		unsigned tmp;
		const unsigned char *tmp_c;
		tmp = old_mode; old_mode = new_mode; new_mode = tmp;
		tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
	}
1189 1190 1191 1192 1193 1194 1195
	if (!path) path = "";
	sprintf(concatpath, "%s%s", base, path);
	one = alloc_filespec(concatpath);
	two = alloc_filespec(concatpath);
	fill_filespec(one, old_sha1, old_mode);
	fill_filespec(two, new_sha1, new_mode);

1196
	diff_queue(&diff_queued_diff, one, two);
1197
}
1198

1199 1200
void diff_unmerge(const char *path)
{
J
Junio C Hamano 已提交
1201 1202 1203 1204
	struct diff_filespec *one, *two;
	one = alloc_filespec(path);
	two = alloc_filespec(path);
	diff_queue(&diff_queued_diff, one, two);
1205
}