builtin-blame.c 58.5 KB
Newer Older
J
Junio C Hamano 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Pickaxe
 *
 * Copyright (c) 2006, Junio C Hamano
 */

#include "cache.h"
#include "builtin.h"
#include "blob.h"
#include "commit.h"
#include "tag.h"
#include "tree-walk.h"
#include "diff.h"
#include "diffcore.h"
#include "revision.h"
L
Linus Torvalds 已提交
16
#include "quote.h"
J
Junio C Hamano 已提交
17
#include "xdiff-interface.h"
18
#include "cache-tree.h"
J
Junio C Hamano 已提交
19

J
Junio C Hamano 已提交
20
static char blame_usage[] =
21
"git-blame [-c] [-l] [-t] [-f] [-n] [-p] [-L n,m] [-S <revs-file>] [-M] [-C] [-C] [--contents <filename>] [commit] [--] file\n"
J
Junio C Hamano 已提交
22
"  -c, --compatibility Use the same output mode as git-annotate (Default: off)\n"
23
"  -b                  Show blank SHA-1 for boundary commits (Default: off)\n"
J
Junio C Hamano 已提交
24
"  -l, --long          Show long commit SHA1 (Default: off)\n"
25
"  --root              Do not treat root commits as boundaries (Default: off)\n"
J
Junio C Hamano 已提交
26 27 28 29 30
"  -t, --time          Show raw timestamp (Default: off)\n"
"  -f, --show-name     Show original filename (Default: auto)\n"
"  -n, --show-number   Show original linenumber (Default: off)\n"
"  -p, --porcelain     Show in a format designed for machine consumption\n"
"  -L n,m              Process only line range n,m, counting from 1\n"
31
"  -M, -C              Find line movements within and across files\n"
L
Linus Torvalds 已提交
32
"  --incremental       Show blame entries as we find them, incrementally\n"
33
"  --contents file     Use <file>'s contents as the final image\n"
J
Junio C Hamano 已提交
34 35 36 37 38 39
"  -S revs-file        Use revisions from revs-file instead of calling git-rev-list\n";

static int longest_file;
static int longest_author;
static int max_orig_digits;
static int max_digits;
40
static int max_score_digits;
41 42
static int show_root;
static int blank_boundary;
L
Linus Torvalds 已提交
43
static int incremental;
J
Junio C Hamano 已提交
44

45 46 47 48
#ifndef DEBUG
#define DEBUG 0
#endif

49 50 51 52 53
/* stats */
static int num_read_blob;
static int num_get_patch;
static int num_commits;

54
#define PICKAXE_BLAME_MOVE		01
55 56
#define PICKAXE_BLAME_COPY		02
#define PICKAXE_BLAME_COPY_HARDER	04
57

58 59 60 61 62 63 64 65 66
/*
 * blame for a blame_entry with score lower than these thresholds
 * is not passed to the parent using move/copy logic.
 */
static unsigned blame_move_score;
static unsigned blame_copy_score;
#define BLAME_DEFAULT_MOVE_SCORE	20
#define BLAME_DEFAULT_COPY_SCORE	40

J
Junio C Hamano 已提交
67 68 69 70 71
/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */
#define METAINFO_SHOWN		(1u<<12)
#define MORE_THAN_ONE_PATH	(1u<<13)

/*
72
 * One blob in a commit that is being suspected
J
Junio C Hamano 已提交
73 74
 */
struct origin {
75
	int refcnt;
J
Junio C Hamano 已提交
76
	struct commit *commit;
77
	mmfile_t file;
J
Junio C Hamano 已提交
78 79 80 81
	unsigned char blob_sha1[20];
	char path[FLEX_ARRAY];
};

82 83 84 85
/*
 * Given an origin, prepare mmfile_t structure to be used by the
 * diff machinery
 */
86 87 88 89 90 91 92 93 94 95 96 97 98 99
static char *fill_origin_blob(struct origin *o, mmfile_t *file)
{
	if (!o->file.ptr) {
		char type[10];
		num_read_blob++;
		file->ptr = read_sha1_file(o->blob_sha1, type,
					   (unsigned long *)(&(file->size)));
		o->file = *file;
	}
	else
		*file = o->file;
	return file->ptr;
}

100 101 102 103
/*
 * Origin is refcounted and usually we keep the blob contents to be
 * reused.
 */
104 105 106 107 108 109 110 111 112 113
static inline struct origin *origin_incref(struct origin *o)
{
	if (o)
		o->refcnt++;
	return o;
}

static void origin_decref(struct origin *o)
{
	if (o && --o->refcnt <= 0) {
114 115
		if (o->file.ptr)
			free(o->file.ptr);
116 117 118 119 120
		memset(o, 0, sizeof(*o));
		free(o);
	}
}

121 122 123 124 125
/*
 * Each group of lines is described by a blame_entry; it can be split
 * as we pass blame to the parents.  They form a linked list in the
 * scoreboard structure, sorted by the target line number.
 */
J
Junio C Hamano 已提交
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
struct blame_entry {
	struct blame_entry *prev;
	struct blame_entry *next;

	/* the first line of this group in the final image;
	 * internally all line numbers are 0 based.
	 */
	int lno;

	/* how many lines this group has */
	int num_lines;

	/* the commit that introduced this group into the final image */
	struct origin *suspect;

	/* true if the suspect is truly guilty; false while we have not
	 * checked if the group came from one of its parents.
	 */
	char guilty;

	/* the line number of the first line of this group in the
	 * suspect's file; internally all line numbers are 0 based.
	 */
	int s_lno;
150 151

	/* how significant this entry is -- cached to avoid
152
	 * scanning the lines over and over.
153 154
	 */
	unsigned score;
J
Junio C Hamano 已提交
155 156
};

157 158 159
/*
 * The current state of the blame assignment.
 */
J
Junio C Hamano 已提交
160 161 162 163 164 165
struct scoreboard {
	/* the final commit (i.e. where we started digging from) */
	struct commit *final;

	const char *path;

166 167 168 169
	/*
	 * The contents in the final image.
	 * Used by many functions to obtain contents of the nth line,
	 * indexed with scoreboard.lineno[blame_entry.lno].
J
Junio C Hamano 已提交
170 171 172 173 174 175 176
	 */
	const char *final_buf;
	unsigned long final_buf_size;

	/* linked list of blames */
	struct blame_entry *ent;

177
	/* look-up a line in the final buffer */
J
Junio C Hamano 已提交
178 179 180 181
	int num_lines;
	int *lineno;
};

182 183 184 185 186 187 188 189
static int cmp_suspect(struct origin *a, struct origin *b)
{
	int cmp = hashcmp(a->commit->object.sha1, b->commit->object.sha1);
	if (cmp)
		return cmp;
	return strcmp(a->path, b->path);
}

190 191
#define cmp_suspect(a, b) ( ((a)==(b)) ? 0 : cmp_suspect(a,b) )

192 193
static void sanity_check_refcnt(struct scoreboard *);

194 195 196 197 198
/*
 * If two blame entries that are next to each other came from
 * contiguous lines in the same origin (i.e. <commit, path> pair),
 * merge them together.
 */
J
Junio C Hamano 已提交
199 200 201 202 203
static void coalesce(struct scoreboard *sb)
{
	struct blame_entry *ent, *next;

	for (ent = sb->ent; ent && (next = ent->next); ent = next) {
204
		if (!cmp_suspect(ent->suspect, next->suspect) &&
J
Junio C Hamano 已提交
205 206 207 208 209 210
		    ent->guilty == next->guilty &&
		    ent->s_lno + ent->num_lines == next->s_lno) {
			ent->num_lines += next->num_lines;
			ent->next = next->next;
			if (ent->next)
				ent->next->prev = ent;
211
			origin_decref(next->suspect);
J
Junio C Hamano 已提交
212
			free(next);
213
			ent->score = 0;
J
Junio C Hamano 已提交
214 215 216
			next = ent; /* again */
		}
	}
217 218 219

	if (DEBUG) /* sanity */
		sanity_check_refcnt(sb);
J
Junio C Hamano 已提交
220 221
}

222 223 224 225 226 227
/*
 * Given a commit and a path in it, create a new origin structure.
 * The callers that add blame to the scoreboard should use
 * get_origin() to obtain shared, refcounted copy instead of calling
 * this function directly.
 */
228 229 230 231 232 233 234 235 236 237
static struct origin *make_origin(struct commit *commit, const char *path)
{
	struct origin *o;
	o = xcalloc(1, sizeof(*o) + strlen(path) + 1);
	o->commit = commit;
	o->refcnt = 1;
	strcpy(o->path, path);
	return o;
}

238 239 240
/*
 * Locate an existing origin or create a new one.
 */
241 242 243
static struct origin *get_origin(struct scoreboard *sb,
				 struct commit *commit,
				 const char *path)
J
Junio C Hamano 已提交
244
{
245
	struct blame_entry *e;
J
Junio C Hamano 已提交
246

247 248 249
	for (e = sb->ent; e; e = e->next) {
		if (e->suspect->commit == commit &&
		    !strcmp(e->suspect->path, path))
250
			return origin_incref(e->suspect);
J
Junio C Hamano 已提交
251
	}
252
	return make_origin(commit, path);
J
Junio C Hamano 已提交
253 254
}

255 256 257 258 259 260 261
/*
 * Fill the blob_sha1 field of an origin if it hasn't, so that later
 * call to fill_origin_blob() can use it to locate the data.  blob_sha1
 * for an origin is also used to pass the blame for the entire file to
 * the parent to detect the case where a child's blob is identical to
 * that of its parent's.
 */
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
static int fill_blob_sha1(struct origin *origin)
{
	unsigned mode;
	char type[10];

	if (!is_null_sha1(origin->blob_sha1))
		return 0;
	if (get_tree_entry(origin->commit->object.sha1,
			   origin->path,
			   origin->blob_sha1, &mode))
		goto error_out;
	if (sha1_object_info(origin->blob_sha1, type, NULL) ||
	    strcmp(type, blob_type))
		goto error_out;
	return 0;
 error_out:
	hashclr(origin->blob_sha1);
	return -1;
}

282 283 284 285
/*
 * We have an origin -- check if the same path exists in the
 * parent and return an origin structure to represent it.
 */
286
static struct origin *find_origin(struct scoreboard *sb,
J
Junio C Hamano 已提交
287 288 289 290 291
				  struct commit *parent,
				  struct origin *origin)
{
	struct origin *porigin = NULL;
	struct diff_options diff_opts;
292 293
	const char *paths[2];

294
	if (parent->util) {
295 296 297 298
		/*
		 * Each commit object can cache one origin in that
		 * commit.  This is a freestanding copy of origin and
		 * not refcounted.
299
		 */
300
		struct origin *cached = parent->util;
301
		if (!strcmp(cached->path, origin->path)) {
302 303 304 305
			/*
			 * The same path between origin and its parent
			 * without renaming -- the most common case.
			 */
306
			porigin = get_origin(sb, parent, cached->path);
307 308 309 310 311 312 313 314

			/*
			 * If the origin was newly created (i.e. get_origin
			 * would call make_origin if none is found in the
			 * scoreboard), it does not know the blob_sha1,
			 * so copy it.  Otherwise porigin was in the
			 * scoreboard and already knows blob_sha1.
			 */
315 316 317 318 319 320 321
			if (porigin->refcnt == 1)
				hashcpy(porigin->blob_sha1, cached->blob_sha1);
			return porigin;
		}
		/* otherwise it was not very useful; free it */
		free(parent->util);
		parent->util = NULL;
322 323
	}

324 325 326 327 328 329 330 331 332 333 334 335 336 337
	/* See if the origin->path is different between parent
	 * and origin first.  Most of the time they are the
	 * same and diff-tree is fairly efficient about this.
	 */
	diff_setup(&diff_opts);
	diff_opts.recursive = 1;
	diff_opts.detect_rename = 0;
	diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
	paths[0] = origin->path;
	paths[1] = NULL;

	diff_tree_setup_paths(paths, &diff_opts);
	if (diff_setup_done(&diff_opts) < 0)
		die("diff-setup");
338 339 340 341 342 343 344

	if (is_null_sha1(origin->commit->object.sha1))
		do_diff_cache(parent->tree->object.sha1, &diff_opts);
	else
		diff_tree_sha1(parent->tree->object.sha1,
			       origin->commit->tree->object.sha1,
			       "", &diff_opts);
345 346 347 348 349 350 351 352 353 354 355
	diffcore_std(&diff_opts);

	/* It is either one entry that says "modified", or "created",
	 * or nothing.
	 */
	if (!diff_queued_diff.nr) {
		/* The path is the same as parent */
		porigin = get_origin(sb, parent, origin->path);
		hashcpy(porigin->blob_sha1, origin->blob_sha1);
	}
	else if (diff_queued_diff.nr != 1)
J
Junio C Hamano 已提交
356
		die("internal error in blame::find_origin");
357 358 359 360
	else {
		struct diff_filepair *p = diff_queued_diff.queue[0];
		switch (p->status) {
		default:
J
Junio C Hamano 已提交
361
			die("internal error in blame::find_origin (%c)",
362 363 364 365 366 367 368 369 370 371 372 373
			    p->status);
		case 'M':
			porigin = get_origin(sb, parent, origin->path);
			hashcpy(porigin->blob_sha1, p->one->sha1);
			break;
		case 'A':
		case 'T':
			/* Did not exist in parent, or type changed */
			break;
		}
	}
	diff_flush(&diff_opts);
374
	if (porigin) {
375 376 377 378 379
		/*
		 * Create a freestanding copy that is not part of
		 * the refcounted origin found in the scoreboard, and
		 * cache it in the commit.
		 */
380
		struct origin *cached;
381

382 383 384
		cached = make_origin(porigin->commit, porigin->path);
		hashcpy(cached->blob_sha1, porigin->blob_sha1);
		parent->util = cached;
385
	}
386 387
	return porigin;
}
388

389 390 391 392
/*
 * We have an origin -- find the path that corresponds to it in its
 * parent and return an origin structure to represent it.
 */
393 394 395 396 397 398 399 400
static struct origin *find_rename(struct scoreboard *sb,
				  struct commit *parent,
				  struct origin *origin)
{
	struct origin *porigin = NULL;
	struct diff_options diff_opts;
	int i;
	const char *paths[2];
J
Junio C Hamano 已提交
401 402 403 404 405

	diff_setup(&diff_opts);
	diff_opts.recursive = 1;
	diff_opts.detect_rename = DIFF_DETECT_RENAME;
	diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
406
	diff_opts.single_follow = origin->path;
J
Junio C Hamano 已提交
407 408 409 410
	paths[0] = NULL;
	diff_tree_setup_paths(paths, &diff_opts);
	if (diff_setup_done(&diff_opts) < 0)
		die("diff-setup");
411 412 413 414 415 416 417

	if (is_null_sha1(origin->commit->object.sha1))
		do_diff_cache(parent->tree->object.sha1, &diff_opts);
	else
		diff_tree_sha1(parent->tree->object.sha1,
			       origin->commit->tree->object.sha1,
			       "", &diff_opts);
J
Junio C Hamano 已提交
418 419 420 421
	diffcore_std(&diff_opts);

	for (i = 0; i < diff_queued_diff.nr; i++) {
		struct diff_filepair *p = diff_queued_diff.queue[i];
422
		if ((p->status == 'R' || p->status == 'C') &&
423 424 425
		    !strcmp(p->two->path, origin->path)) {
			porigin = get_origin(sb, parent, p->one->path);
			hashcpy(porigin->blob_sha1, p->one->sha1);
J
Junio C Hamano 已提交
426 427 428 429 430 431 432
			break;
		}
	}
	diff_flush(&diff_opts);
	return porigin;
}

433 434 435
/*
 * Parsing of patch chunks...
 */
J
Junio C Hamano 已提交
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
struct chunk {
	/* line number in postimage; up to but not including this
	 * line is the same as preimage
	 */
	int same;

	/* preimage line number after this chunk */
	int p_next;

	/* postimage line number after this chunk */
	int t_next;
};

struct patch {
	struct chunk *chunks;
	int num;
};

struct blame_diff_state {
	struct xdiff_emit_state xm;
	struct patch *ret;
	unsigned hunk_post_context;
	unsigned hunk_in_pre_context : 1;
};

static void process_u_diff(void *state_, char *line, unsigned long len)
{
	struct blame_diff_state *state = state_;
	struct chunk *chunk;
	int off1, off2, len1, len2, num;

	num = state->ret->num;
	if (len < 4 || line[0] != '@' || line[1] != '@') {
		if (state->hunk_in_pre_context && line[0] == ' ')
			state->ret->chunks[num - 1].same++;
		else {
			state->hunk_in_pre_context = 0;
			if (line[0] == ' ')
				state->hunk_post_context++;
			else
				state->hunk_post_context = 0;
		}
		return;
	}

	if (num && state->hunk_post_context) {
		chunk = &state->ret->chunks[num - 1];
		chunk->p_next -= state->hunk_post_context;
		chunk->t_next -= state->hunk_post_context;
	}
	state->ret->num = ++num;
	state->ret->chunks = xrealloc(state->ret->chunks,
				      sizeof(struct chunk) * num);
	chunk = &state->ret->chunks[num - 1];
	if (parse_hunk_header(line, len, &off1, &len1, &off2, &len2)) {
		state->ret->num--;
		return;
	}

	/* Line numbers in patch output are one based. */
	off1--;
	off2--;

	chunk->same = len2 ? off2 : (off2 + 1);

	chunk->p_next = off1 + (len1 ? len1 : 1);
	chunk->t_next = chunk->same + len2;
	state->hunk_in_pre_context = 1;
	state->hunk_post_context = 0;
}

static struct patch *compare_buffer(mmfile_t *file_p, mmfile_t *file_o,
				    int context)
{
	struct blame_diff_state state;
	xpparam_t xpp;
	xdemitconf_t xecfg;
	xdemitcb_t ecb;

	xpp.flags = XDF_NEED_MINIMAL;
	xecfg.ctxlen = context;
	xecfg.flags = 0;
	ecb.outf = xdiff_outf;
	ecb.priv = &state;
	memset(&state, 0, sizeof(state));
	state.xm.consume = process_u_diff;
	state.ret = xmalloc(sizeof(struct patch));
	state.ret->chunks = NULL;
	state.ret->num = 0;

	xdl_diff(file_p, file_o, &xpp, &xecfg, &ecb);

	if (state.ret->num) {
		struct chunk *chunk;
		chunk = &state.ret->chunks[state.ret->num - 1];
		chunk->p_next -= state.hunk_post_context;
		chunk->t_next -= state.hunk_post_context;
	}
	return state.ret;
}

537 538 539 540 541
/*
 * Run diff between two origins and grab the patch output, so that
 * we can pass blame for lines origin is currently suspected for
 * to its parent.
 */
J
Junio C Hamano 已提交
542 543 544 545 546
static struct patch *get_patch(struct origin *parent, struct origin *origin)
{
	mmfile_t file_p, file_o;
	struct patch *patch;

547 548 549
	fill_origin_blob(parent, &file_p);
	fill_origin_blob(origin, &file_o);
	if (!file_p.ptr || !file_o.ptr)
J
Junio C Hamano 已提交
550 551
		return NULL;
	patch = compare_buffer(&file_p, &file_o, 0);
552
	num_get_patch++;
J
Junio C Hamano 已提交
553 554 555 556 557 558 559 560 561
	return patch;
}

static void free_patch(struct patch *p)
{
	free(p->chunks);
	free(p);
}

562
/*
P
Pavel Roskin 已提交
563
 * Link in a new blame entry to the scoreboard.  Entries that cover the
564 565
 * same line range have been removed from the scoreboard previously.
 */
J
Junio C Hamano 已提交
566 567 568 569
static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e)
{
	struct blame_entry *ent, *prev = NULL;

570 571
	origin_incref(e->suspect);

J
Junio C Hamano 已提交
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
	for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next)
		prev = ent;

	/* prev, if not NULL, is the last one that is below e */
	e->prev = prev;
	if (prev) {
		e->next = prev->next;
		prev->next = e;
	}
	else {
		e->next = sb->ent;
		sb->ent = e;
	}
	if (e->next)
		e->next->prev = e;
}

589 590 591 592 593 594
/*
 * src typically is on-stack; we want to copy the information in it to
 * an malloced blame_entry that is already on the linked list of the
 * scoreboard.  The origin of dst loses a refcnt while the origin of src
 * gains one.
 */
J
Junio C Hamano 已提交
595 596 597
static void dup_entry(struct blame_entry *dst, struct blame_entry *src)
{
	struct blame_entry *p, *n;
598

J
Junio C Hamano 已提交
599 600
	p = dst->prev;
	n = dst->next;
601 602
	origin_incref(src->suspect);
	origin_decref(dst->suspect);
J
Junio C Hamano 已提交
603 604 605
	memcpy(dst, src, sizeof(*src));
	dst->prev = p;
	dst->next = n;
606
	dst->score = 0;
J
Junio C Hamano 已提交
607 608 609 610 611 612 613
}

static const char *nth_line(struct scoreboard *sb, int lno)
{
	return sb->final_buf + sb->lineno[lno];
}

614 615 616 617 618 619 620 621 622 623 624 625 626 627
/*
 * It is known that lines between tlno to same came from parent, and e
 * has an overlap with that range.  it also is known that parent's
 * line plno corresponds to e's line tlno.
 *
 *                <---- e ----->
 *                   <------>
 *                   <------------>
 *             <------------>
 *             <------------------>
 *
 * Split e into potentially three parts; before this chunk, the chunk
 * to be blamed for the parent, and after that portion.
 */
628
static void split_overlap(struct blame_entry *split,
J
Junio C Hamano 已提交
629 630 631 632 633 634 635 636 637
			  struct blame_entry *e,
			  int tlno, int plno, int same,
			  struct origin *parent)
{
	int chunk_end_lno;
	memset(split, 0, sizeof(struct blame_entry [3]));

	if (e->s_lno < tlno) {
		/* there is a pre-chunk part not blamed on parent */
638
		split[0].suspect = origin_incref(e->suspect);
J
Junio C Hamano 已提交
639 640 641 642 643 644 645 646 647 648 649 650 651
		split[0].lno = e->lno;
		split[0].s_lno = e->s_lno;
		split[0].num_lines = tlno - e->s_lno;
		split[1].lno = e->lno + tlno - e->s_lno;
		split[1].s_lno = plno;
	}
	else {
		split[1].lno = e->lno;
		split[1].s_lno = plno + (e->s_lno - tlno);
	}

	if (same < e->s_lno + e->num_lines) {
		/* there is a post-chunk part not blamed on parent */
652
		split[2].suspect = origin_incref(e->suspect);
J
Junio C Hamano 已提交
653 654 655 656 657 658 659 660 661
		split[2].lno = e->lno + (same - e->s_lno);
		split[2].s_lno = e->s_lno + (same - e->s_lno);
		split[2].num_lines = e->s_lno + e->num_lines - same;
		chunk_end_lno = split[2].lno;
	}
	else
		chunk_end_lno = e->lno + e->num_lines;
	split[1].num_lines = chunk_end_lno - split[1].lno;

662 663 664 665
	/*
	 * if it turns out there is nothing to blame the parent for,
	 * forget about the splitting.  !split[1].suspect signals this.
	 */
J
Junio C Hamano 已提交
666 667
	if (split[1].num_lines < 1)
		return;
668
	split[1].suspect = origin_incref(parent);
J
Junio C Hamano 已提交
669 670
}

671 672 673 674 675
/*
 * split_overlap() divided an existing blame e into up to three parts
 * in split.  Adjust the linked list of blames in the scoreboard to
 * reflect the split.
 */
J
Junio C Hamano 已提交
676
static void split_blame(struct scoreboard *sb,
677
			struct blame_entry *split,
J
Junio C Hamano 已提交
678 679 680 681 682
			struct blame_entry *e)
{
	struct blame_entry *new_entry;

	if (split[0].suspect && split[2].suspect) {
683
		/* The first part (reuse storage for the existing entry e) */
J
Junio C Hamano 已提交
684 685
		dup_entry(e, &split[0]);

686
		/* The last part -- me */
J
Junio C Hamano 已提交
687 688 689 690
		new_entry = xmalloc(sizeof(*new_entry));
		memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
		add_blame_entry(sb, new_entry);

691
		/* ... and the middle part -- parent */
J
Junio C Hamano 已提交
692 693 694 695 696
		new_entry = xmalloc(sizeof(*new_entry));
		memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
		add_blame_entry(sb, new_entry);
	}
	else if (!split[0].suspect && !split[2].suspect)
697 698 699 700
		/*
		 * The parent covers the entire area; reuse storage for
		 * e and replace it with the parent.
		 */
J
Junio C Hamano 已提交
701 702
		dup_entry(e, &split[1]);
	else if (split[0].suspect) {
703
		/* me and then parent */
J
Junio C Hamano 已提交
704 705 706 707 708 709 710
		dup_entry(e, &split[0]);

		new_entry = xmalloc(sizeof(*new_entry));
		memcpy(new_entry, &(split[1]), sizeof(struct blame_entry));
		add_blame_entry(sb, new_entry);
	}
	else {
711
		/* parent and then me */
J
Junio C Hamano 已提交
712 713 714 715 716 717 718
		dup_entry(e, &split[1]);

		new_entry = xmalloc(sizeof(*new_entry));
		memcpy(new_entry, &(split[2]), sizeof(struct blame_entry));
		add_blame_entry(sb, new_entry);
	}

719
	if (DEBUG) { /* sanity */
J
Junio C Hamano 已提交
720
		struct blame_entry *ent;
721
		int lno = sb->ent->lno, corrupt = 0;
J
Junio C Hamano 已提交
722 723 724 725 726 727 728 729 730

		for (ent = sb->ent; ent; ent = ent->next) {
			if (lno != ent->lno)
				corrupt = 1;
			if (ent->s_lno < 0)
				corrupt = 1;
			lno += ent->num_lines;
		}
		if (corrupt) {
731
			lno = sb->ent->lno;
J
Junio C Hamano 已提交
732 733 734 735 736 737 738 739 740 741
			for (ent = sb->ent; ent; ent = ent->next) {
				printf("L %8d l %8d n %8d\n",
				       lno, ent->lno, ent->num_lines);
				lno = ent->lno + ent->num_lines;
			}
			die("oops");
		}
	}
}

742 743 744 745
/*
 * After splitting the blame, the origins used by the
 * on-stack blame_entry should lose one refcnt each.
 */
746 747 748 749 750 751 752 753
static void decref_split(struct blame_entry *split)
{
	int i;

	for (i = 0; i < 3; i++)
		origin_decref(split[i].suspect);
}

754 755 756 757
/*
 * Helper for blame_chunk().  blame_entry e is known to overlap with
 * the patch hunk; split it and pass blame to the parent.
 */
J
Junio C Hamano 已提交
758 759 760 761 762 763 764
static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
			  int tlno, int plno, int same,
			  struct origin *parent)
{
	struct blame_entry split[3];

	split_overlap(split, e, tlno, plno, same, parent);
765 766 767
	if (split[1].suspect)
		split_blame(sb, split, e);
	decref_split(split);
J
Junio C Hamano 已提交
768 769
}

770 771 772
/*
 * Find the line number of the last line the target is suspected for.
 */
J
Junio C Hamano 已提交
773 774 775 776 777 778
static int find_last_in_target(struct scoreboard *sb, struct origin *target)
{
	struct blame_entry *e;
	int last_in_target = -1;

	for (e = sb->ent; e; e = e->next) {
779
		if (e->guilty || cmp_suspect(e->suspect, target))
J
Junio C Hamano 已提交
780 781 782 783 784 785 786
			continue;
		if (last_in_target < e->s_lno + e->num_lines)
			last_in_target = e->s_lno + e->num_lines;
	}
	return last_in_target;
}

787 788 789 790 791
/*
 * Process one hunk from the patch between the current suspect for
 * blame_entry e and its parent.  Find and split the overlap, and
 * pass blame to the overlapping part to the parent.
 */
J
Junio C Hamano 已提交
792 793 794 795
static void blame_chunk(struct scoreboard *sb,
			int tlno, int plno, int same,
			struct origin *target, struct origin *parent)
{
796
	struct blame_entry *e;
J
Junio C Hamano 已提交
797

798
	for (e = sb->ent; e; e = e->next) {
799
		if (e->guilty || cmp_suspect(e->suspect, target))
J
Junio C Hamano 已提交
800 801 802 803 804 805 806 807
			continue;
		if (same <= e->s_lno)
			continue;
		if (tlno < e->s_lno + e->num_lines)
			blame_overlap(sb, e, tlno, plno, same, parent);
	}
}

808 809 810 811 812
/*
 * We are looking at the origin 'target' and aiming to pass blame
 * for the lines it is suspected to its parent.  Run diff to find
 * which lines came from parent and pass blame for them.
 */
J
Junio C Hamano 已提交
813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832
static int pass_blame_to_parent(struct scoreboard *sb,
				struct origin *target,
				struct origin *parent)
{
	int i, last_in_target, plno, tlno;
	struct patch *patch;

	last_in_target = find_last_in_target(sb, target);
	if (last_in_target < 0)
		return 1; /* nothing remains for this target */

	patch = get_patch(parent, target);
	plno = tlno = 0;
	for (i = 0; i < patch->num; i++) {
		struct chunk *chunk = &patch->chunks[i];

		blame_chunk(sb, tlno, plno, chunk->same, target, parent);
		plno = chunk->p_next;
		tlno = chunk->t_next;
	}
833
	/* The rest (i.e. anything after tlno) are the same as the parent */
J
Junio C Hamano 已提交
834 835 836 837 838 839
	blame_chunk(sb, tlno, plno, last_in_target, target, parent);

	free_patch(patch);
	return 0;
}

840 841 842 843 844 845 846 847 848
/*
 * The lines in blame_entry after splitting blames many times can become
 * very small and trivial, and at some point it becomes pointless to
 * blame the parents.  E.g. "\t\t}\n\t}\n\n" appears everywhere in any
 * ordinary C program, and it is not worth to say it was copied from
 * totally unrelated file in the parent.
 *
 * Compute how trivial the lines in the blame_entry are.
 */
849 850 851 852 853 854 855 856
static unsigned ent_score(struct scoreboard *sb, struct blame_entry *e)
{
	unsigned score;
	const char *cp, *ep;

	if (e->score)
		return e->score;

857
	score = 1;
858 859 860 861 862 863 864 865 866 867 868 869
	cp = nth_line(sb, e->lno);
	ep = nth_line(sb, e->lno + e->num_lines);
	while (cp < ep) {
		unsigned ch = *((unsigned char *)cp);
		if (isalnum(ch))
			score++;
		cp++;
	}
	e->score = score;
	return score;
}

870 871 872 873 874 875
/*
 * best_so_far[] and this[] are both a split of an existing blame_entry
 * that passes blame to the parent.  Maintain best_so_far the best split
 * so far, by comparing this and best_so_far and copying this into
 * bst_so_far as needed.
 */
876
static void copy_split_if_better(struct scoreboard *sb,
877 878
				 struct blame_entry *best_so_far,
				 struct blame_entry *this)
879
{
880 881
	int i;

882 883
	if (!this[1].suspect)
		return;
884 885 886 887
	if (best_so_far[1].suspect) {
		if (ent_score(sb, &this[1]) < ent_score(sb, &best_so_far[1]))
			return;
	}
888 889 890 891

	for (i = 0; i < 3; i++)
		origin_incref(this[i].suspect);
	decref_split(best_so_far);
892 893 894
	memcpy(best_so_far, this, sizeof(struct blame_entry [3]));
}

895 896 897 898 899
/*
 * Find the lines from parent that are the same as ent so that
 * we can pass blames to it.  file_p has the blob contents for
 * the parent.
 */
900 901 902
static void find_copy_in_blob(struct scoreboard *sb,
			      struct blame_entry *ent,
			      struct origin *parent,
903
			      struct blame_entry *split,
904 905 906 907 908 909 910 911
			      mmfile_t *file_p)
{
	const char *cp;
	int cnt;
	mmfile_t file_o;
	struct patch *patch;
	int i, plno, tlno;

912 913 914
	/*
	 * Prepare mmfile that contains only the lines in ent.
	 */
915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
	cp = nth_line(sb, ent->lno);
	file_o.ptr = (char*) cp;
	cnt = ent->num_lines;

	while (cnt && cp < sb->final_buf + sb->final_buf_size) {
		if (*cp++ == '\n')
			cnt--;
	}
	file_o.size = cp - file_o.ptr;

	patch = compare_buffer(file_p, &file_o, 1);

	memset(split, 0, sizeof(struct blame_entry [3]));
	plno = tlno = 0;
	for (i = 0; i < patch->num; i++) {
		struct chunk *chunk = &patch->chunks[i];

		/* tlno to chunk->same are the same as ent */
		if (ent->num_lines <= tlno)
			break;
		if (tlno < chunk->same) {
			struct blame_entry this[3];
			split_overlap(this, ent,
				      tlno + ent->s_lno, plno,
				      chunk->same + ent->s_lno,
				      parent);
941
			copy_split_if_better(sb, split, this);
942
			decref_split(this);
943 944 945 946 947 948 949
		}
		plno = chunk->p_next;
		tlno = chunk->t_next;
	}
	free_patch(patch);
}

950 951 952 953
/*
 * See if lines currently target is suspected for can be attributed to
 * parent.
 */
954 955 956 957
static int find_move_in_parent(struct scoreboard *sb,
			       struct origin *target,
			       struct origin *parent)
{
958
	int last_in_target, made_progress;
959
	struct blame_entry *e, split[3];
960 961 962 963 964 965
	mmfile_t file_p;

	last_in_target = find_last_in_target(sb, target);
	if (last_in_target < 0)
		return 1; /* nothing remains for this target */

966 967
	fill_origin_blob(parent, &file_p);
	if (!file_p.ptr)
968 969
		return 0;

970 971 972 973 974 975 976 977 978 979 980 981 982 983
	made_progress = 1;
	while (made_progress) {
		made_progress = 0;
		for (e = sb->ent; e; e = e->next) {
			if (e->guilty || cmp_suspect(e->suspect, target))
				continue;
			find_copy_in_blob(sb, e, parent, split, &file_p);
			if (split[1].suspect &&
			    blame_move_score < ent_score(sb, &split[1])) {
				split_blame(sb, split, e);
				made_progress = 1;
			}
			decref_split(split);
		}
984 985 986 987
	}
	return 0;
}

988 989 990 991 992
struct blame_list {
	struct blame_entry *ent;
	struct blame_entry split[3];
};

993 994 995 996
/*
 * Count the number of entries the target is suspected for,
 * and prepare a list of entry and the best split.
 */
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
static struct blame_list *setup_blame_list(struct scoreboard *sb,
					   struct origin *target,
					   int *num_ents_p)
{
	struct blame_entry *e;
	int num_ents, i;
	struct blame_list *blame_list = NULL;

	for (e = sb->ent, num_ents = 0; e; e = e->next)
		if (!e->guilty && !cmp_suspect(e->suspect, target))
			num_ents++;
	if (num_ents) {
		blame_list = xcalloc(num_ents, sizeof(struct blame_list));
		for (e = sb->ent, i = 0; e; e = e->next)
			if (!e->guilty && !cmp_suspect(e->suspect, target))
				blame_list[i++].ent = e;
	}
	*num_ents_p = num_ents;
	return blame_list;
}

1018 1019 1020 1021 1022
/*
 * For lines target is suspected for, see if we can find code movement
 * across file boundary from the parent commit.  porigin is the path
 * in the parent we already tried.
 */
1023 1024 1025 1026 1027 1028 1029 1030
static int find_copy_in_parent(struct scoreboard *sb,
			       struct origin *target,
			       struct commit *parent,
			       struct origin *porigin,
			       int opt)
{
	struct diff_options diff_opts;
	const char *paths[1];
1031
	int i, j;
1032 1033
	int retval;
	struct blame_list *blame_list;
1034
	int num_ents;
1035

1036 1037
	blame_list = setup_blame_list(sb, target, &num_ents);
	if (!blame_list)
1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
		return 1; /* nothing remains for this target */

	diff_setup(&diff_opts);
	diff_opts.recursive = 1;
	diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;

	paths[0] = NULL;
	diff_tree_setup_paths(paths, &diff_opts);
	if (diff_setup_done(&diff_opts) < 0)
		die("diff-setup");
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059

	/* Try "find copies harder" on new path if requested;
	 * we do not want to use diffcore_rename() actually to
	 * match things up; find_copies_harder is set only to
	 * force diff_tree_sha1() to feed all filepairs to diff_queue,
	 * and this code needs to be after diff_setup_done(), which
	 * usually makes find-copies-harder imply copy detection.
	 */
	if ((opt & PICKAXE_BLAME_COPY_HARDER) &&
	    (!porigin || strcmp(target->path, porigin->path)))
		diff_opts.find_copies_harder = 1;

1060 1061 1062 1063 1064 1065
	if (is_null_sha1(target->commit->object.sha1))
		do_diff_cache(parent->tree->object.sha1, &diff_opts);
	else
		diff_tree_sha1(parent->tree->object.sha1,
			       target->commit->tree->object.sha1,
			       "", &diff_opts);
1066

1067 1068
	if (!diff_opts.find_copies_harder)
		diffcore_std(&diff_opts);
1069

1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087
	retval = 0;
	while (1) {
		int made_progress = 0;

		for (i = 0; i < diff_queued_diff.nr; i++) {
			struct diff_filepair *p = diff_queued_diff.queue[i];
			struct origin *norigin;
			mmfile_t file_p;
			struct blame_entry this[3];

			if (!DIFF_FILE_VALID(p->one))
				continue; /* does not exist in parent */
			if (porigin && !strcmp(p->one->path, porigin->path))
				/* find_move already dealt with this path */
				continue;

			norigin = get_origin(sb, parent, p->one->path);
			hashcpy(norigin->blob_sha1, p->one->sha1);
1088
			fill_origin_blob(norigin, &file_p);
1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
			if (!file_p.ptr)
				continue;

			for (j = 0; j < num_ents; j++) {
				find_copy_in_blob(sb, blame_list[j].ent,
						  norigin, this, &file_p);
				copy_split_if_better(sb, blame_list[j].split,
						     this);
				decref_split(this);
			}
			origin_decref(norigin);
		}
1101

1102
		for (j = 0; j < num_ents; j++) {
1103 1104 1105 1106 1107 1108 1109
			struct blame_entry *split = blame_list[j].split;
			if (split[1].suspect &&
			    blame_copy_score < ent_score(sb, &split[1])) {
				split_blame(sb, split, blame_list[j].ent);
				made_progress = 1;
			}
			decref_split(split);
1110
		}
1111
		free(blame_list);
1112

1113 1114 1115 1116 1117 1118 1119
		if (!made_progress)
			break;
		blame_list = setup_blame_list(sb, target, &num_ents);
		if (!blame_list) {
			retval = 1;
			break;
		}
1120
	}
1121
	diff_flush(&diff_opts);
1122

1123
	return retval;
1124 1125
}

1126 1127
/*
 * The blobs of origin and porigin exactly match, so everything
1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
 * origin is suspected for can be blamed on the parent.
 */
static void pass_whole_blame(struct scoreboard *sb,
			     struct origin *origin, struct origin *porigin)
{
	struct blame_entry *e;

	if (!porigin->file.ptr && origin->file.ptr) {
		/* Steal its file */
		porigin->file = origin->file;
		origin->file.ptr = NULL;
	}
	for (e = sb->ent; e; e = e->next) {
		if (cmp_suspect(e->suspect, origin))
			continue;
		origin_incref(porigin);
		origin_decref(e->suspect);
		e->suspect = porigin;
	}
}

J
Junio C Hamano 已提交
1149 1150
#define MAXPARENT 16

1151
static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt)
J
Junio C Hamano 已提交
1152
{
1153
	int i, pass;
J
Junio C Hamano 已提交
1154 1155 1156 1157 1158 1159
	struct commit *commit = origin->commit;
	struct commit_list *parent;
	struct origin *parent_origin[MAXPARENT], *porigin;

	memset(parent_origin, 0, sizeof(parent_origin));

1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171
	/* The first pass looks for unrenamed path to optimize for
	 * common cases, then we look for renames in the second pass.
	 */
	for (pass = 0; pass < 2; pass++) {
		struct origin *(*find)(struct scoreboard *,
				       struct commit *, struct origin *);
		find = pass ? find_rename : find_origin;

		for (i = 0, parent = commit->parents;
		     i < MAXPARENT && parent;
		     parent = parent->next, i++) {
			struct commit *p = parent->item;
1172
			int j, same;
1173 1174 1175 1176 1177

			if (parent_origin[i])
				continue;
			if (parse_commit(p))
				continue;
1178
			porigin = find(sb, p, origin);
1179 1180 1181
			if (!porigin)
				continue;
			if (!hashcmp(porigin->blob_sha1, origin->blob_sha1)) {
1182
				pass_whole_blame(sb, origin, porigin);
1183 1184 1185
				origin_decref(porigin);
				goto finish;
			}
1186
			for (j = same = 0; j < i; j++)
1187 1188
				if (parent_origin[j] &&
				    !hashcmp(parent_origin[j]->blob_sha1,
1189 1190 1191 1192 1193 1194 1195 1196
					     porigin->blob_sha1)) {
					same = 1;
					break;
				}
			if (!same)
				parent_origin[i] = porigin;
			else
				origin_decref(porigin);
J
Junio C Hamano 已提交
1197 1198 1199
		}
	}

1200
	num_commits++;
J
Junio C Hamano 已提交
1201 1202 1203 1204 1205 1206 1207
	for (i = 0, parent = commit->parents;
	     i < MAXPARENT && parent;
	     parent = parent->next, i++) {
		struct origin *porigin = parent_origin[i];
		if (!porigin)
			continue;
		if (pass_blame_to_parent(sb, origin, porigin))
1208
			goto finish;
J
Junio C Hamano 已提交
1209
	}
1210 1211

	/*
1212
	 * Optionally find moves in parents' files.
1213 1214 1215 1216 1217 1218 1219 1220 1221
	 */
	if (opt & PICKAXE_BLAME_MOVE)
		for (i = 0, parent = commit->parents;
		     i < MAXPARENT && parent;
		     parent = parent->next, i++) {
			struct origin *porigin = parent_origin[i];
			if (!porigin)
				continue;
			if (find_move_in_parent(sb, origin, porigin))
1222
				goto finish;
1223 1224
		}

1225
	/*
1226
	 * Optionally find copies from parents' files.
1227 1228 1229 1230 1231 1232 1233 1234
	 */
	if (opt & PICKAXE_BLAME_COPY)
		for (i = 0, parent = commit->parents;
		     i < MAXPARENT && parent;
		     parent = parent->next, i++) {
			struct origin *porigin = parent_origin[i];
			if (find_copy_in_parent(sb, origin, parent->item,
						porigin, opt))
1235
				goto finish;
1236
		}
1237 1238 1239 1240

 finish:
	for (i = 0; i < MAXPARENT; i++)
		origin_decref(parent_origin[i]);
J
Junio C Hamano 已提交
1241 1242
}

1243 1244 1245
/*
 * Information on commits, used for output.
 */
J
Junio C Hamano 已提交
1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261
struct commit_info
{
	char *author;
	char *author_mail;
	unsigned long author_time;
	char *author_tz;

	/* filled only when asked for details */
	char *committer;
	char *committer_mail;
	unsigned long committer_time;
	char *committer_tz;

	char *summary;
};

1262 1263 1264
/*
 * Parse author/committer line in the commit object buffer
 */
J
Junio C Hamano 已提交
1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318
static void get_ac_line(const char *inbuf, const char *what,
			int bufsz, char *person, char **mail,
			unsigned long *time, char **tz)
{
	int len;
	char *tmp, *endp;

	tmp = strstr(inbuf, what);
	if (!tmp)
		goto error_out;
	tmp += strlen(what);
	endp = strchr(tmp, '\n');
	if (!endp)
		len = strlen(tmp);
	else
		len = endp - tmp;
	if (bufsz <= len) {
	error_out:
		/* Ugh */
		person = *mail = *tz = "(unknown)";
		*time = 0;
		return;
	}
	memcpy(person, tmp, len);

	tmp = person;
	tmp += len;
	*tmp = 0;
	while (*tmp != ' ')
		tmp--;
	*tz = tmp+1;

	*tmp = 0;
	while (*tmp != ' ')
		tmp--;
	*time = strtoul(tmp, NULL, 10);

	*tmp = 0;
	while (*tmp != ' ')
		tmp--;
	*mail = tmp + 1;
	*tmp = 0;
}

static void get_commit_info(struct commit *commit,
			    struct commit_info *ret,
			    int detailed)
{
	int len;
	char *tmp, *endp;
	static char author_buf[1024];
	static char committer_buf[1024];
	static char summary_buf[1024];

1319 1320
	/*
	 * We've operated without save_commit_buffer, so
1321 1322 1323 1324 1325 1326 1327 1328
	 * we now need to populate them for output.
	 */
	if (!commit->buffer) {
		char type[20];
		unsigned long size;
		commit->buffer =
			read_sha1_file(commit->object.sha1, type, &size);
	}
J
Junio C Hamano 已提交
1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351
	ret->author = author_buf;
	get_ac_line(commit->buffer, "\nauthor ",
		    sizeof(author_buf), author_buf, &ret->author_mail,
		    &ret->author_time, &ret->author_tz);

	if (!detailed)
		return;

	ret->committer = committer_buf;
	get_ac_line(commit->buffer, "\ncommitter ",
		    sizeof(committer_buf), committer_buf, &ret->committer_mail,
		    &ret->committer_time, &ret->committer_tz);

	ret->summary = summary_buf;
	tmp = strstr(commit->buffer, "\n\n");
	if (!tmp) {
	error_out:
		sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1));
		return;
	}
	tmp += 2;
	endp = strchr(tmp, '\n');
	if (!endp)
1352
		endp = tmp + strlen(tmp);
J
Junio C Hamano 已提交
1353
	len = endp - tmp;
1354
	if (len >= sizeof(summary_buf) || len == 0)
J
Junio C Hamano 已提交
1355 1356 1357 1358 1359
		goto error_out;
	memcpy(summary_buf, tmp, len);
	summary_buf[len] = 0;
}

1360 1361 1362 1363
/*
 * To allow LF and other nonportable characters in pathnames,
 * they are c-style quoted as needed.
 */
1364 1365 1366 1367 1368 1369 1370
static void write_filename_info(const char *path)
{
	printf("filename ");
	write_name_quoted(NULL, 0, path, 1, stdout);
	putchar('\n');
}

1371 1372 1373 1374
/*
 * The blame_entry is found to be guilty for the range.  Mark it
 * as such, and show it in incremental output.
 */
L
Linus Torvalds 已提交
1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401
static void found_guilty_entry(struct blame_entry *ent)
{
	if (ent->guilty)
		return;
	ent->guilty = 1;
	if (incremental) {
		struct origin *suspect = ent->suspect;

		printf("%s %d %d %d\n",
		       sha1_to_hex(suspect->commit->object.sha1),
		       ent->s_lno + 1, ent->lno + 1, ent->num_lines);
		if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
			struct commit_info ci;
			suspect->commit->object.flags |= METAINFO_SHOWN;
			get_commit_info(suspect->commit, &ci, 1);
			printf("author %s\n", ci.author);
			printf("author-mail %s\n", ci.author_mail);
			printf("author-time %lu\n", ci.author_time);
			printf("author-tz %s\n", ci.author_tz);
			printf("committer %s\n", ci.committer);
			printf("committer-mail %s\n", ci.committer_mail);
			printf("committer-time %lu\n", ci.committer_time);
			printf("committer-tz %s\n", ci.committer_tz);
			printf("summary %s\n", ci.summary);
			if (suspect->commit->object.flags & UNINTERESTING)
				printf("boundary\n");
		}
1402
		write_filename_info(suspect->path);
L
Linus Torvalds 已提交
1403 1404 1405
	}
}

1406 1407
/*
 * The main loop -- while the scoreboard has lines whose true origin
P
Pavel Roskin 已提交
1408
 * is still unknown, pick one blame_entry, and allow its current
1409 1410
 * suspect to pass blames to its parents.
 */
L
Linus Torvalds 已提交
1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424
static void assign_blame(struct scoreboard *sb, struct rev_info *revs, int opt)
{
	while (1) {
		struct blame_entry *ent;
		struct commit *commit;
		struct origin *suspect = NULL;

		/* find one suspect to break down */
		for (ent = sb->ent; !suspect && ent; ent = ent->next)
			if (!ent->guilty)
				suspect = ent->suspect;
		if (!suspect)
			return; /* all done */

1425 1426 1427 1428
		/*
		 * We will use this suspect later in the loop,
		 * so hold onto it in the meantime.
		 */
L
Linus Torvalds 已提交
1429 1430 1431 1432 1433
		origin_incref(suspect);
		commit = suspect->commit;
		if (!commit->object.parsed)
			parse_commit(commit);
		if (!(commit->object.flags & UNINTERESTING) &&
1434
		    !(revs->max_age != -1 && commit->date < revs->max_age))
L
Linus Torvalds 已提交
1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480
			pass_blame(sb, suspect, opt);
		else {
			commit->object.flags |= UNINTERESTING;
			if (commit->object.parsed)
				mark_parents_uninteresting(commit);
		}
		/* treat root commit as boundary */
		if (!commit->parents && !show_root)
			commit->object.flags |= UNINTERESTING;

		/* Take responsibility for the remaining entries */
		for (ent = sb->ent; ent; ent = ent->next)
			if (!cmp_suspect(ent->suspect, suspect))
				found_guilty_entry(ent);
		origin_decref(suspect);

		if (DEBUG) /* sanity */
			sanity_check_refcnt(sb);
	}
}

static const char *format_time(unsigned long time, const char *tz_str,
			       int show_raw_time)
{
	static char time_buf[128];
	time_t t = time;
	int minutes, tz;
	struct tm *tm;

	if (show_raw_time) {
		sprintf(time_buf, "%lu %s", time, tz_str);
		return time_buf;
	}

	tz = atoi(tz_str);
	minutes = tz < 0 ? -tz : tz;
	minutes = (minutes / 100)*60 + (minutes % 100);
	minutes = tz < 0 ? -minutes : minutes;
	t = time + minutes * 60;
	tm = gmtime(&t);

	strftime(time_buf, sizeof(time_buf), "%Y-%m-%d %H:%M:%S ", tm);
	strcat(time_buf, tz_str);
	return time_buf;
}

J
Junio C Hamano 已提交
1481 1482 1483 1484 1485 1486
#define OUTPUT_ANNOTATE_COMPAT	001
#define OUTPUT_LONG_OBJECT_NAME	002
#define OUTPUT_RAW_TIMESTAMP	004
#define OUTPUT_PORCELAIN	010
#define OUTPUT_SHOW_NAME	020
#define OUTPUT_SHOW_NUMBER	040
1487
#define OUTPUT_SHOW_SCORE      0100
J
Junio C Hamano 已提交
1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514

static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent)
{
	int cnt;
	const char *cp;
	struct origin *suspect = ent->suspect;
	char hex[41];

	strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));
	printf("%s%c%d %d %d\n",
	       hex,
	       ent->guilty ? ' ' : '*', // purely for debugging
	       ent->s_lno + 1,
	       ent->lno + 1,
	       ent->num_lines);
	if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
		struct commit_info ci;
		suspect->commit->object.flags |= METAINFO_SHOWN;
		get_commit_info(suspect->commit, &ci, 1);
		printf("author %s\n", ci.author);
		printf("author-mail %s\n", ci.author_mail);
		printf("author-time %lu\n", ci.author_time);
		printf("author-tz %s\n", ci.author_tz);
		printf("committer %s\n", ci.committer);
		printf("committer-mail %s\n", ci.committer_mail);
		printf("committer-time %lu\n", ci.committer_time);
		printf("committer-tz %s\n", ci.committer_tz);
1515
		write_filename_info(suspect->path);
J
Junio C Hamano 已提交
1516
		printf("summary %s\n", ci.summary);
1517 1518
		if (suspect->commit->object.flags & UNINTERESTING)
			printf("boundary\n");
J
Junio C Hamano 已提交
1519 1520
	}
	else if (suspect->commit->object.flags & MORE_THAN_ONE_PATH)
1521
		write_filename_info(suspect->path);
J
Junio C Hamano 已提交
1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553

	cp = nth_line(sb, ent->lno);
	for (cnt = 0; cnt < ent->num_lines; cnt++) {
		char ch;
		if (cnt)
			printf("%s %d %d\n", hex,
			       ent->s_lno + 1 + cnt,
			       ent->lno + 1 + cnt);
		putchar('\t');
		do {
			ch = *cp++;
			putchar(ch);
		} while (ch != '\n' &&
			 cp < sb->final_buf + sb->final_buf_size);
	}
}

static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt)
{
	int cnt;
	const char *cp;
	struct origin *suspect = ent->suspect;
	struct commit_info ci;
	char hex[41];
	int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);

	get_commit_info(suspect->commit, &ci, 1);
	strcpy(hex, sha1_to_hex(suspect->commit->object.sha1));

	cp = nth_line(sb, ent->lno);
	for (cnt = 0; cnt < ent->num_lines; cnt++) {
		char ch;
1554 1555 1556
		int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8;

		if (suspect->commit->object.flags & UNINTERESTING) {
1557 1558 1559 1560 1561 1562
			if (!blank_boundary) {
				length--;
				putchar('^');
			}
			else
				memset(hex, ' ', length);
1563
		}
J
Junio C Hamano 已提交
1564

1565
		printf("%.*s", length, hex);
J
Junio C Hamano 已提交
1566 1567 1568 1569 1570 1571
		if (opt & OUTPUT_ANNOTATE_COMPAT)
			printf("\t(%10s\t%10s\t%d)", ci.author,
			       format_time(ci.author_time, ci.author_tz,
					   show_raw_time),
			       ent->lno + 1 + cnt);
		else {
1572
			if (opt & OUTPUT_SHOW_SCORE)
1573 1574 1575
				printf(" %*d %02d",
				       max_score_digits, ent->score,
				       ent->suspect->refcnt);
J
Junio C Hamano 已提交
1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619
			if (opt & OUTPUT_SHOW_NAME)
				printf(" %-*.*s", longest_file, longest_file,
				       suspect->path);
			if (opt & OUTPUT_SHOW_NUMBER)
				printf(" %*d", max_orig_digits,
				       ent->s_lno + 1 + cnt);
			printf(" (%-*.*s %10s %*d) ",
			       longest_author, longest_author, ci.author,
			       format_time(ci.author_time, ci.author_tz,
					   show_raw_time),
			       max_digits, ent->lno + 1 + cnt);
		}
		do {
			ch = *cp++;
			putchar(ch);
		} while (ch != '\n' &&
			 cp < sb->final_buf + sb->final_buf_size);
	}
}

static void output(struct scoreboard *sb, int option)
{
	struct blame_entry *ent;

	if (option & OUTPUT_PORCELAIN) {
		for (ent = sb->ent; ent; ent = ent->next) {
			struct blame_entry *oth;
			struct origin *suspect = ent->suspect;
			struct commit *commit = suspect->commit;
			if (commit->object.flags & MORE_THAN_ONE_PATH)
				continue;
			for (oth = ent->next; oth; oth = oth->next) {
				if ((oth->suspect->commit != commit) ||
				    !strcmp(oth->suspect->path, suspect->path))
					continue;
				commit->object.flags |= MORE_THAN_ONE_PATH;
				break;
			}
		}
	}

	for (ent = sb->ent; ent; ent = ent->next) {
		if (option & OUTPUT_PORCELAIN)
			emit_porcelain(sb, ent);
1620
		else {
J
Junio C Hamano 已提交
1621
			emit_other(sb, ent, option);
1622
		}
J
Junio C Hamano 已提交
1623 1624 1625
	}
}

1626 1627 1628 1629
/*
 * To allow quick access to the contents of nth line in the
 * final image, prepare an index in the scoreboard.
 */
J
Junio C Hamano 已提交
1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649
static int prepare_lines(struct scoreboard *sb)
{
	const char *buf = sb->final_buf;
	unsigned long len = sb->final_buf_size;
	int num = 0, incomplete = 0, bol = 1;

	if (len && buf[len-1] != '\n')
		incomplete++; /* incomplete line at the end */
	while (len--) {
		if (bol) {
			sb->lineno = xrealloc(sb->lineno,
					      sizeof(int* ) * (num + 1));
			sb->lineno[num] = buf - sb->final_buf;
			bol = 0;
		}
		if (*buf++ == '\n') {
			num++;
			bol = 1;
		}
	}
J
Junio C Hamano 已提交
1650 1651 1652
	sb->lineno = xrealloc(sb->lineno,
			      sizeof(int* ) * (num + incomplete + 1));
	sb->lineno[num + incomplete] = buf - sb->final_buf;
J
Junio C Hamano 已提交
1653 1654 1655 1656
	sb->num_lines = num + incomplete;
	return sb->num_lines;
}

1657 1658 1659 1660 1661
/*
 * Add phony grafts for use with -S; this is primarily to
 * support git-cvsserver that wants to give a linear history
 * to its clients.
 */
J
Junio C Hamano 已提交
1662 1663 1664 1665 1666 1667 1668 1669 1670 1671
static int read_ancestry(const char *graft_file)
{
	FILE *fp = fopen(graft_file, "r");
	char buf[1024];
	if (!fp)
		return -1;
	while (fgets(buf, sizeof(buf), fp)) {
		/* The format is just "Commit Parent1 Parent2 ...\n" */
		int len = strlen(buf);
		struct commit_graft *graft = read_graft_line(buf, len);
1672 1673
		if (graft)
			register_commit_graft(graft, 0);
J
Junio C Hamano 已提交
1674 1675 1676 1677 1678
	}
	fclose(fp);
	return 0;
}

1679 1680 1681
/*
 * How many columns do we need to show line numbers in decimal?
 */
J
Junio C Hamano 已提交
1682 1683 1684 1685 1686 1687 1688 1689 1690
static int lineno_width(int lines)
{
        int i, width;

        for (width = 1, i = 10; i <= lines + 1; width++)
                i *= 10;
        return width;
}

1691 1692 1693 1694
/*
 * How many columns do we need to show line numbers, authors,
 * and filenames?
 */
J
Junio C Hamano 已提交
1695 1696 1697 1698
static void find_alignment(struct scoreboard *sb, int *option)
{
	int longest_src_lines = 0;
	int longest_dst_lines = 0;
1699
	unsigned largest_score = 0;
J
Junio C Hamano 已提交
1700 1701 1702 1703 1704 1705 1706
	struct blame_entry *e;

	for (e = sb->ent; e; e = e->next) {
		struct origin *suspect = e->suspect;
		struct commit_info ci;
		int num;

1707 1708 1709 1710 1711
		if (strcmp(suspect->path, sb->path))
			*option |= OUTPUT_SHOW_NAME;
		num = strlen(suspect->path);
		if (longest_file < num)
			longest_file = num;
J
Junio C Hamano 已提交
1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724
		if (!(suspect->commit->object.flags & METAINFO_SHOWN)) {
			suspect->commit->object.flags |= METAINFO_SHOWN;
			get_commit_info(suspect->commit, &ci, 1);
			num = strlen(ci.author);
			if (longest_author < num)
				longest_author = num;
		}
		num = e->s_lno + e->num_lines;
		if (longest_src_lines < num)
			longest_src_lines = num;
		num = e->lno + e->num_lines;
		if (longest_dst_lines < num)
			longest_dst_lines = num;
1725 1726
		if (largest_score < ent_score(sb, e))
			largest_score = ent_score(sb, e);
J
Junio C Hamano 已提交
1727 1728 1729
	}
	max_orig_digits = lineno_width(longest_src_lines);
	max_digits = lineno_width(longest_dst_lines);
1730
	max_score_digits = lineno_width(largest_score);
J
Junio C Hamano 已提交
1731 1732
}

1733 1734 1735 1736
/*
 * For debugging -- origin is refcounted, and this asserts that
 * we do not underflow.
 */
1737 1738 1739 1740 1741 1742
static void sanity_check_refcnt(struct scoreboard *sb)
{
	int baa = 0;
	struct blame_entry *ent;

	for (ent = sb->ent; ent; ent = ent->next) {
1743
		/* Nobody should have zero or negative refcnt */
1744 1745 1746 1747 1748
		if (ent->suspect->refcnt <= 0) {
			fprintf(stderr, "%s in %s has negative refcnt %d\n",
				ent->suspect->path,
				sha1_to_hex(ent->suspect->commit->object.sha1),
				ent->suspect->refcnt);
1749
			baa = 1;
1750
		}
1751 1752 1753
	}
	for (ent = sb->ent; ent; ent = ent->next) {
		/* Mark the ones that haven't been checked */
1754 1755 1756 1757
		if (0 < ent->suspect->refcnt)
			ent->suspect->refcnt = -ent->suspect->refcnt;
	}
	for (ent = sb->ent; ent; ent = ent->next) {
1758 1759 1760
		/*
		 * ... then pick each and see if they have the the
		 * correct refcnt.
1761 1762 1763 1764 1765 1766 1767
		 */
		int found;
		struct blame_entry *e;
		struct origin *suspect = ent->suspect;

		if (0 < suspect->refcnt)
			continue;
1768
		suspect->refcnt = -suspect->refcnt; /* Unmark */
1769 1770 1771 1772 1773
		for (found = 0, e = sb->ent; e; e = e->next) {
			if (e->suspect != suspect)
				continue;
			found++;
		}
1774 1775 1776 1777 1778 1779 1780
		if (suspect->refcnt != found) {
			fprintf(stderr, "%s in %s has refcnt %d, not %d\n",
				ent->suspect->path,
				sha1_to_hex(ent->suspect->commit->object.sha1),
				ent->suspect->refcnt, found);
			baa = 2;
		}
1781 1782 1783 1784 1785
	}
	if (baa) {
		int opt = 0160;
		find_alignment(sb, &opt);
		output(sb, opt);
1786
		die("Baa %d!", baa);
1787 1788 1789
	}
}

1790 1791 1792 1793
/*
 * Used for the command line parsing; check if the path exists
 * in the working tree.
 */
J
Junio C Hamano 已提交
1794 1795 1796 1797 1798 1799
static int has_path_in_work_tree(const char *path)
{
	struct stat st;
	return !lstat(path, &st);
}

1800 1801 1802 1803 1804 1805 1806 1807 1808
static unsigned parse_score(const char *arg)
{
	char *end;
	unsigned long score = strtoul(arg, &end, 10);
	if (*end)
		return 0;
	return score;
}

1809 1810 1811 1812 1813 1814 1815
static const char *add_prefix(const char *prefix, const char *path)
{
	if (!prefix || !prefix[0])
		return path;
	return prefix_path(prefix, strlen(prefix), path);
}

1816 1817 1818
/*
 * Parsing of (comma separated) one item in the -L option
 */
J
Junio C Hamano 已提交
1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829
static const char *parse_loc(const char *spec,
			     struct scoreboard *sb, long lno,
			     long begin, long *ret)
{
	char *term;
	const char *line;
	long num;
	int reg_error;
	regex_t regexp;
	regmatch_t match[1];

1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848
	/* Allow "-L <something>,+20" to mean starting at <something>
	 * for 20 lines, or "-L <something>,-5" for 5 lines ending at
	 * <something>.
	 */
	if (1 < begin && (spec[0] == '+' || spec[0] == '-')) {
		num = strtol(spec + 1, &term, 10);
		if (term != spec + 1) {
			if (spec[0] == '-')
				num = 0 - num;
			if (0 < num)
				*ret = begin + num - 2;
			else if (!num)
				*ret = begin;
			else
				*ret = begin + num;
			return term;
		}
		return spec;
	}
J
Junio C Hamano 已提交
1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892
	num = strtol(spec, &term, 10);
	if (term != spec) {
		*ret = num;
		return term;
	}
	if (spec[0] != '/')
		return spec;

	/* it could be a regexp of form /.../ */
	for (term = (char*) spec + 1; *term && *term != '/'; term++) {
		if (*term == '\\')
			term++;
	}
	if (*term != '/')
		return spec;

	/* try [spec+1 .. term-1] as regexp */
	*term = 0;
	begin--; /* input is in human terms */
	line = nth_line(sb, begin);

	if (!(reg_error = regcomp(&regexp, spec + 1, REG_NEWLINE)) &&
	    !(reg_error = regexec(&regexp, line, 1, match, 0))) {
		const char *cp = line + match[0].rm_so;
		const char *nline;

		while (begin++ < lno) {
			nline = nth_line(sb, begin);
			if (line <= cp && cp < nline)
				break;
			line = nline;
		}
		*ret = begin;
		regfree(&regexp);
		*term++ = '/';
		return term;
	}
	else {
		char errbuf[1024];
		regerror(reg_error, &regexp, errbuf, 1024);
		die("-L parameter '%s': %s", spec + 1, errbuf);
	}
}

1893 1894 1895
/*
 * Parsing of -L option
 */
J
Junio C Hamano 已提交
1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906
static void prepare_blame_range(struct scoreboard *sb,
				const char *bottomtop,
				long lno,
				long *bottom, long *top)
{
	const char *term;

	term = parse_loc(bottomtop, sb, lno, 1, bottom);
	if (*term == ',') {
		term = parse_loc(term + 1, sb, lno, *bottom + 1, top);
		if (*term)
J
Junio C Hamano 已提交
1907
			usage(blame_usage);
J
Junio C Hamano 已提交
1908 1909
	}
	if (*term)
J
Junio C Hamano 已提交
1910
		usage(blame_usage);
J
Junio C Hamano 已提交
1911 1912
}

1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925
static int git_blame_config(const char *var, const char *value)
{
	if (!strcmp(var, "blame.showroot")) {
		show_root = git_config_bool(var, value);
		return 0;
	}
	if (!strcmp(var, "blame.blankboundary")) {
		blank_boundary = git_config_bool(var, value);
		return 0;
	}
	return git_default_config(var, value);
}

1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056
static struct commit *fake_working_tree_commit(const char *path, const char *contents_from)
{
	struct commit *commit;
	struct origin *origin;
	unsigned char head_sha1[20];
	char *buf;
	const char *ident;
	int fd;
	time_t now;
	unsigned long fin_size;
	int size, len;
	struct cache_entry *ce;
	unsigned mode;

	if (get_sha1("HEAD", head_sha1))
		die("No such ref: HEAD");

	time(&now);
	commit = xcalloc(1, sizeof(*commit));
	commit->parents = xcalloc(1, sizeof(*commit->parents));
	commit->parents->item = lookup_commit_reference(head_sha1);
	commit->object.parsed = 1;
	commit->date = now;
	commit->object.type = OBJ_COMMIT;

	origin = make_origin(commit, path);

	if (!contents_from || strcmp("-", contents_from)) {
		struct stat st;
		const char *read_from;

		if (contents_from) {
			if (stat(contents_from, &st) < 0)
				die("Cannot stat %s", contents_from);
			read_from = contents_from;
		}
		else {
			if (lstat(path, &st) < 0)
				die("Cannot lstat %s", path);
			read_from = path;
		}
		fin_size = st.st_size;
		buf = xmalloc(fin_size+1);
		mode = canon_mode(st.st_mode);
		switch (st.st_mode & S_IFMT) {
		case S_IFREG:
			fd = open(read_from, O_RDONLY);
			if (fd < 0)
				die("cannot open %s", read_from);
			if (read_in_full(fd, buf, fin_size) != fin_size)
				die("cannot read %s", read_from);
			break;
		case S_IFLNK:
			if (readlink(read_from, buf, fin_size+1) != fin_size)
				die("cannot readlink %s", read_from);
			break;
		default:
			die("unsupported file type %s", read_from);
		}
	}
	else {
		/* Reading from stdin */
		contents_from = "standard input";
		buf = NULL;
		fin_size = 0;
		mode = 0;
		while (1) {
			ssize_t cnt = 8192;
			buf = xrealloc(buf, fin_size + cnt);
			cnt = xread(0, buf + fin_size, cnt);
			if (cnt < 0)
				die("read error %s from stdin",
				    strerror(errno));
			if (!cnt)
				break;
			fin_size += cnt;
		}
		buf = xrealloc(buf, fin_size + 1);
	}
	buf[fin_size] = 0;
	origin->file.ptr = buf;
	origin->file.size = fin_size;
	write_sha1_file(buf, fin_size, blob_type, origin->blob_sha1);
	commit->util = origin;

	/*
	 * Read the current index, replace the path entry with
	 * origin->blob_sha1 without mucking with its mode or type
	 * bits; we are not going to write this index out -- we just
	 * want to run "diff-index --cached".
	 */
	discard_cache();
	read_cache();

	len = strlen(path);
	if (!mode) {
		int pos = cache_name_pos(path, len);
		if (0 <= pos)
			mode = ntohl(active_cache[pos]->ce_mode);
		else
			/* Let's not bother reading from HEAD tree */
			mode = S_IFREG | 0644;
	}
	size = cache_entry_size(len);
	ce = xcalloc(1, size);
	hashcpy(ce->sha1, origin->blob_sha1);
	memcpy(ce->name, path, len);
	ce->ce_flags = create_ce_flags(len, 0);
	ce->ce_mode = create_ce_mode(mode);
	add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);

	/*
	 * We are not going to write this out, so this does not matter
	 * right now, but someday we might optimize diff-index --cached
	 * with cache-tree information.
	 */
	cache_tree_invalidate_path(active_cache_tree, path);

	commit->buffer = xmalloc(400);
	ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
	sprintf(commit->buffer,
		"tree 0000000000000000000000000000000000000000\n"
		"parent %s\n"
		"author %s\n"
		"committer %s\n\n"
		"Version of %s from %s\n",
		sha1_to_hex(head_sha1),
		ident, ident, path, contents_from ? contents_from : path);
	return commit;
}

J
Junio C Hamano 已提交
2057
int cmd_blame(int argc, const char **argv, const char *prefix)
J
Junio C Hamano 已提交
2058 2059 2060 2061 2062 2063
{
	struct rev_info revs;
	const char *path;
	struct scoreboard sb;
	struct origin *o;
	struct blame_entry *ent;
2064
	int i, seen_dashdash, unk, opt;
J
Junio C Hamano 已提交
2065 2066 2067 2068 2069
	long bottom, top, lno;
	int output_option = 0;
	const char *revs_file = NULL;
	const char *final_commit_name = NULL;
	char type[10];
J
Junio C Hamano 已提交
2070
	const char *bottomtop = NULL;
2071
	const char *contents_from = NULL;
J
Junio C Hamano 已提交
2072

2073
	git_config(git_blame_config);
2074 2075
	save_commit_buffer = 0;

2076
	opt = 0;
J
Junio C Hamano 已提交
2077 2078 2079 2080 2081
	seen_dashdash = 0;
	for (unk = i = 1; i < argc; i++) {
		const char *arg = argv[i];
		if (*arg != '-')
			break;
2082 2083 2084 2085
		else if (!strcmp("-b", arg))
			blank_boundary = 1;
		else if (!strcmp("--root", arg))
			show_root = 1;
J
Junio C Hamano 已提交
2086 2087 2088 2089 2090 2091 2092 2093
		else if (!strcmp("-c", arg))
			output_option |= OUTPUT_ANNOTATE_COMPAT;
		else if (!strcmp("-t", arg))
			output_option |= OUTPUT_RAW_TIMESTAMP;
		else if (!strcmp("-l", arg))
			output_option |= OUTPUT_LONG_OBJECT_NAME;
		else if (!strcmp("-S", arg) && ++i < argc)
			revs_file = argv[i];
2094
		else if (!strncmp("-M", arg, 2)) {
2095
			opt |= PICKAXE_BLAME_MOVE;
2096 2097 2098
			blame_move_score = parse_score(arg+2);
		}
		else if (!strncmp("-C", arg, 2)) {
2099 2100 2101
			if (opt & PICKAXE_BLAME_COPY)
				opt |= PICKAXE_BLAME_COPY_HARDER;
			opt |= PICKAXE_BLAME_COPY | PICKAXE_BLAME_MOVE;
2102
			blame_copy_score = parse_score(arg+2);
2103
		}
2104 2105 2106
		else if (!strncmp("-L", arg, 2)) {
			if (!arg[2]) {
				if (++i >= argc)
J
Junio C Hamano 已提交
2107
					usage(blame_usage);
2108 2109 2110 2111
				arg = argv[i];
			}
			else
				arg += 2;
J
Junio C Hamano 已提交
2112
			if (bottomtop)
J
Junio C Hamano 已提交
2113
				die("More than one '-L n,m' option given");
J
Junio C Hamano 已提交
2114
			bottomtop = arg;
J
Junio C Hamano 已提交
2115
		}
2116 2117 2118 2119 2120
		else if (!strcmp("--contents", arg)) {
			if (++i >= argc)
				usage(blame_usage);
			contents_from = argv[i];
		}
L
Linus Torvalds 已提交
2121 2122
		else if (!strcmp("--incremental", arg))
			incremental = 1;
2123 2124
		else if (!strcmp("--score-debug", arg))
			output_option |= OUTPUT_SHOW_SCORE;
J
Junio C Hamano 已提交
2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142
		else if (!strcmp("-f", arg) ||
			 !strcmp("--show-name", arg))
			output_option |= OUTPUT_SHOW_NAME;
		else if (!strcmp("-n", arg) ||
			 !strcmp("--show-number", arg))
			output_option |= OUTPUT_SHOW_NUMBER;
		else if (!strcmp("-p", arg) ||
			 !strcmp("--porcelain", arg))
			output_option |= OUTPUT_PORCELAIN;
		else if (!strcmp("--", arg)) {
			seen_dashdash = 1;
			i++;
			break;
		}
		else
			argv[unk++] = arg;
	}

2143 2144 2145
	if (!incremental)
		setup_pager();

2146 2147 2148 2149 2150
	if (!blame_move_score)
		blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
	if (!blame_copy_score)
		blame_copy_score = BLAME_DEFAULT_COPY_SCORE;

2151 2152
	/*
	 * We have collected options unknown to us in argv[1..unk]
J
Junio C Hamano 已提交
2153
	 * which are to be passed to revision machinery if we are
P
Pavel Roskin 已提交
2154
	 * going to do the "bottom" processing.
J
Junio C Hamano 已提交
2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182
	 *
	 * The remaining are:
	 *
	 * (1) if seen_dashdash, its either
	 *     "-options -- <path>" or
	 *     "-options -- <path> <rev>".
	 *     but the latter is allowed only if there is no
	 *     options that we passed to revision machinery.
	 *
	 * (2) otherwise, we may have "--" somewhere later and
	 *     might be looking at the first one of multiple 'rev'
	 *     parameters (e.g. " master ^next ^maint -- path").
	 *     See if there is a dashdash first, and give the
	 *     arguments before that to revision machinery.
	 *     After that there must be one 'path'.
	 *
	 * (3) otherwise, its one of the three:
	 *     "-options <path> <rev>"
	 *     "-options <rev> <path>"
	 *     "-options <path>"
	 *     but again the first one is allowed only if
	 *     there is no options that we passed to revision
	 *     machinery.
	 */

	if (seen_dashdash) {
		/* (1) */
		if (argc <= i)
J
Junio C Hamano 已提交
2183
			usage(blame_usage);
2184
		path = add_prefix(prefix, argv[i]);
J
Junio C Hamano 已提交
2185 2186
		if (i + 1 == argc - 1) {
			if (unk != 1)
J
Junio C Hamano 已提交
2187
				usage(blame_usage);
J
Junio C Hamano 已提交
2188 2189 2190 2191
			argv[unk++] = argv[i + 1];
		}
		else if (i + 1 != argc)
			/* garbage at end */
J
Junio C Hamano 已提交
2192
			usage(blame_usage);
J
Junio C Hamano 已提交
2193 2194 2195 2196 2197 2198 2199 2200
	}
	else {
		int j;
		for (j = i; !seen_dashdash && j < argc; j++)
			if (!strcmp(argv[j], "--"))
				seen_dashdash = j;
		if (seen_dashdash) {
			if (seen_dashdash + 1 != argc - 1)
J
Junio C Hamano 已提交
2201
				usage(blame_usage);
2202
			path = add_prefix(prefix, argv[seen_dashdash + 1]);
J
Junio C Hamano 已提交
2203 2204 2205 2206 2207
			for (j = i; j < seen_dashdash; j++)
				argv[unk++] = argv[j];
		}
		else {
			/* (3) */
2208
			path = add_prefix(prefix, argv[i]);
J
Junio C Hamano 已提交
2209 2210 2211 2212 2213 2214 2215
			if (i + 1 == argc - 1) {
				final_commit_name = argv[i + 1];

				/* if (unk == 1) we could be getting
				 * old-style
				 */
				if (unk == 1 && !has_path_in_work_tree(path)) {
2216
					path = add_prefix(prefix, argv[i + 1]);
J
Junio C Hamano 已提交
2217 2218 2219 2220
					final_commit_name = argv[i];
				}
			}
			else if (i != argc - 1)
J
Junio C Hamano 已提交
2221
				usage(blame_usage); /* garbage at end */
J
Junio C Hamano 已提交
2222 2223 2224 2225 2226 2227 2228 2229 2230 2231

			if (!has_path_in_work_tree(path))
				die("cannot stat path %s: %s",
				    path, strerror(errno));
		}
	}

	if (final_commit_name)
		argv[unk++] = final_commit_name;

2232 2233
	/*
	 * Now we got rev and path.  We do not want the path pruning
J
Junio C Hamano 已提交
2234 2235
	 * but we may want "bottom" processing.
	 */
2236
	argv[unk++] = "--"; /* terminate the rev name */
J
Junio C Hamano 已提交
2237 2238 2239
	argv[unk] = NULL;

	init_revisions(&revs, NULL);
2240
	setup_revisions(unk, argv, &revs, NULL);
J
Junio C Hamano 已提交
2241 2242
	memset(&sb, 0, sizeof(sb));

2243 2244
	/*
	 * There must be one and only one positive commit in the
J
Junio C Hamano 已提交
2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264
	 * revs->pending array.
	 */
	for (i = 0; i < revs.pending.nr; i++) {
		struct object *obj = revs.pending.objects[i].item;
		if (obj->flags & UNINTERESTING)
			continue;
		while (obj->type == OBJ_TAG)
			obj = deref_tag(obj, NULL, 0);
		if (obj->type != OBJ_COMMIT)
			die("Non commit %s?",
			    revs.pending.objects[i].name);
		if (sb.final)
			die("More than one commit to dig from %s and %s?",
			    revs.pending.objects[i].name,
			    final_commit_name);
		sb.final = (struct commit *) obj;
		final_commit_name = revs.pending.objects[i].name;
	}

	if (!sb.final) {
2265 2266
		/*
		 * "--not A B -- path" without anything positive;
2267 2268
		 * do not default to HEAD, but use the working tree
		 * or "--contents".
2269
		 */
2270 2271
		sb.final = fake_working_tree_commit(path, contents_from);
		add_pending_object(&revs, &(sb.final->object), ":");
J
Junio C Hamano 已提交
2272
	}
2273 2274
	else if (contents_from)
		die("Cannot use --contents with final commit object name");
J
Junio C Hamano 已提交
2275

2276 2277
	/*
	 * If we have bottom, this will mark the ancestors of the
J
Junio C Hamano 已提交
2278 2279 2280 2281 2282
	 * bottom commits we would reach while traversing as
	 * uninteresting.
	 */
	prepare_revision_walk(&revs);

2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294
	if (is_null_sha1(sb.final->object.sha1)) {
		char *buf;
		o = sb.final->util;
		buf = xmalloc(o->file.size + 1);
		memcpy(buf, o->file.ptr, o->file.size + 1);
		sb.final_buf = buf;
		sb.final_buf_size = o->file.size;
	}
	else {
		o = get_origin(&sb, sb.final, path);
		if (fill_blob_sha1(o))
			die("no such path %s in %s", path, final_commit_name);
J
Junio C Hamano 已提交
2295

2296 2297 2298
		sb.final_buf = read_sha1_file(o->blob_sha1, type,
					      &sb.final_buf_size);
	}
2299
	num_read_blob++;
J
Junio C Hamano 已提交
2300 2301
	lno = prepare_lines(&sb);

J
Junio C Hamano 已提交
2302 2303 2304 2305 2306 2307 2308
	bottom = top = 0;
	if (bottomtop)
		prepare_blame_range(&sb, bottomtop, lno, &bottom, &top);
	if (bottom && top && top < bottom) {
		long tmp;
		tmp = top; top = bottom; bottom = tmp;
	}
J
Junio C Hamano 已提交
2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329
	if (bottom < 1)
		bottom = 1;
	if (top < 1)
		top = lno;
	bottom--;
	if (lno < top)
		die("file %s has only %lu lines", path, lno);

	ent = xcalloc(1, sizeof(*ent));
	ent->lno = bottom;
	ent->num_lines = top - bottom;
	ent->suspect = o;
	ent->s_lno = bottom;

	sb.ent = ent;
	sb.path = path;

	if (revs_file && read_ancestry(revs_file))
		die("reading graft file %s failed: %s",
		    revs_file, strerror(errno));

2330
	assign_blame(&sb, &revs, opt);
J
Junio C Hamano 已提交
2331

L
Linus Torvalds 已提交
2332 2333 2334
	if (incremental)
		return 0;

J
Junio C Hamano 已提交
2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346
	coalesce(&sb);

	if (!(output_option & OUTPUT_PORCELAIN))
		find_alignment(&sb, &output_option);

	output(&sb, output_option);
	free((void *)sb.final_buf);
	for (ent = sb.ent; ent; ) {
		struct blame_entry *e = ent->next;
		free(ent);
		ent = e;
	}
2347 2348 2349 2350 2351 2352

	if (DEBUG) {
		printf("num read blob: %d\n", num_read_blob);
		printf("num get patch: %d\n", num_get_patch);
		printf("num commits: %d\n", num_commits);
	}
J
Junio C Hamano 已提交
2353 2354
	return 0;
}