builtin-pack-objects.c 41.0 KB
Newer Older
1
#include "builtin.h"
2 3
#include "cache.h"
#include "object.h"
4 5 6 7
#include "blob.h"
#include "commit.h"
#include "tag.h"
#include "tree.h"
8
#include "delta.h"
9
#include "pack.h"
10
#include "csum-file.h"
J
Junio C Hamano 已提交
11
#include "tree-walk.h"
12 13 14
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
15
#include <sys/time.h>
16
#include <signal.h>
17

18
static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--delta-base-offset] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] [--revs [--unpacked | --all]*] [--stdout | base-name] <ref-list | <object-list]";
19 20 21

struct object_entry {
	unsigned char sha1[20];
22
	unsigned long size;	/* uncompressed size */
23 24 25
	unsigned long offset;	/* offset into the final pack file;
				 * nonzero if already written.
				 */
26
	unsigned int depth;	/* delta depth */
27
	unsigned int delta_limit;	/* base adjustment for in-pack delta */
28
	unsigned int hash;	/* name hint hash */
29
	enum object_type type;
30
	enum object_type in_pack_type;	/* could be delta */
31
	unsigned long delta_size;	/* delta data size (uncompressed) */
32
#define in_pack_header_size delta_size	/* only when reusing pack data */
33 34 35
	struct object_entry *delta;	/* delta base object */
	struct packed_git *in_pack; 	/* already in pack */
	unsigned int in_pack_offset;
36
	struct object_entry *delta_child; /* deltified objects who bases me */
37 38 39
	struct object_entry *delta_sibling; /* other deltified objects who
					     * uses the same base as me
					     */
40 41 42 43
	int preferred_base;	/* we do not pack this, but is encouraged to
				 * be used as the base objectto delta huge
				 * objects against.
				 */
44 45
};

46
/*
47
 * Objects we are going to pack are collected in objects array (dynamically
48 49 50 51 52 53 54 55 56 57 58
 * expanded).  nr_objects & nr_alloc controls this array.  They are stored
 * in the order we see -- typically rev-list --objects order that gives us
 * nice "minimum seek" order.
 *
 * sorted-by-sha ans sorted-by-type are arrays of pointers that point at
 * elements in the objects array.  The former is used to build the pack
 * index (lists object names in the ascending order to help offset lookup),
 * and the latter is used to group similar things together by try_delta()
 * heuristics.
 */

59
static unsigned char object_list_sha1[20];
60 61 62 63
static int non_empty;
static int no_reuse_delta;
static int local;
static int incremental;
64 65
static int allow_ofs_delta;

66
static struct object_entry **sorted_by_sha, **sorted_by_type;
67 68
static struct object_entry *objects;
static int nr_objects, nr_alloc, nr_result;
69
static const char *base_name;
70
static unsigned char pack_file_sha1[20];
J
Junio C Hamano 已提交
71
static int progress = 1;
72
static volatile sig_atomic_t progress_update;
73
static int window = 10;
74
static int pack_to_stdout;
75
static int num_preferred_base;
76

77 78 79 80 81 82
/*
 * The object names in objects array are hashed with this hashtable,
 * to help looking up the entry by object name.  Binary search from
 * sorted_by_sha is also possible but this was easier to code and faster.
 * This hashtable is built after all the objects are seen.
 */
83 84
static int *object_ix;
static int object_ix_hashsz;
85 86 87 88 89

/*
 * Pack index for existing packs give us easy access to the offsets into
 * corresponding pack file where each object's data starts, but the entries
 * do not store the size of the compressed representation (uncompressed
90 91 92 93 94 95 96 97 98
 * size is easily available by examining the pack entry header).  It is
 * also rather expensive to find the sha1 for an object given its offset.
 *
 * We build a hashtable of existing packs (pack_revindex), and keep reverse
 * index here -- pack index file is sorted by object name mapping to offset;
 * this pack_revindex[].revindex array is a list of offset/index_nr pairs
 * ordered by offset, so if you know the offset of an object, next offset
 * is where its packed representation ends and the index_nr can be used to
 * get the object sha1 from the main index.
99
 */
100 101 102 103
struct revindex_entry {
	unsigned int offset;
	unsigned int nr;
};
104 105
struct pack_revindex {
	struct packed_git *p;
106 107 108
	struct revindex_entry *revindex;
};
static struct  pack_revindex *pack_revindex;
109
static int pack_revindex_hashsz;
110 111 112 113

/*
 * stats
 */
114 115 116 117
static int written;
static int written_delta;
static int reused;
static int reused_delta;
118 119 120

static int pack_revindex_ix(struct packed_git *p)
{
L
Luck, Tony 已提交
121
	unsigned long ui = (unsigned long)p;
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
	int i;

	ui = ui ^ (ui >> 16); /* defeat structure alignment */
	i = (int)(ui % pack_revindex_hashsz);
	while (pack_revindex[i].p) {
		if (pack_revindex[i].p == p)
			return i;
		if (++i == pack_revindex_hashsz)
			i = 0;
	}
	return -1 - i;
}

static void prepare_pack_ix(void)
{
	int num;
	struct packed_git *p;
	for (num = 0, p = packed_git; p; p = p->next)
		num++;
	if (!num)
		return;
	pack_revindex_hashsz = num * 11;
	pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
	for (p = packed_git; p; p = p->next) {
		num = pack_revindex_ix(p);
		num = - 1 - num;
		pack_revindex[num].p = p;
	}
	/* revindex elements are lazily initialized */
}

static int cmp_offset(const void *a_, const void *b_)
{
155 156 157
	const struct revindex_entry *a = a_;
	const struct revindex_entry *b = b_;
	return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
158 159 160 161 162 163 164 165 166 167 168 169
}

/*
 * Ordered list of offsets of objects in the pack.
 */
static void prepare_pack_revindex(struct pack_revindex *rix)
{
	struct packed_git *p = rix->p;
	int num_ent = num_packed_objects(p);
	int i;
	void *index = p->index_base + 256;

170
	rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1));
171
	for (i = 0; i < num_ent; i++) {
172
		unsigned int hl = *((unsigned int *)((char *) index + 24*i));
173 174
		rix->revindex[i].offset = ntohl(hl);
		rix->revindex[i].nr = i;
175 176 177 178
	}
	/* This knows the pack format -- the 20-byte trailer
	 * follows immediately after the last object data.
	 */
179 180 181
	rix->revindex[num_ent].offset = p->pack_size - 20;
	rix->revindex[num_ent].nr = -1;
	qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset);
182 183
}

184 185
static struct revindex_entry * find_packed_object(struct packed_git *p,
						  unsigned int ofs)
186 187 188 189
{
	int num;
	int lo, hi;
	struct pack_revindex *rix;
190
	struct revindex_entry *revindex;
191 192 193 194 195 196 197 198 199 200 201
	num = pack_revindex_ix(p);
	if (num < 0)
		die("internal error: pack revindex uninitialized");
	rix = &pack_revindex[num];
	if (!rix->revindex)
		prepare_pack_revindex(rix);
	revindex = rix->revindex;
	lo = 0;
	hi = num_packed_objects(p) + 1;
	do {
		int mi = (lo + hi) / 2;
202 203
		if (revindex[mi].offset == ofs) {
			return revindex + mi;
204
		}
205
		else if (ofs < revindex[mi].offset)
206 207 208 209 210 211 212
			hi = mi;
		else
			lo = mi + 1;
	} while (lo < hi);
	die("internal error: pack revindex corrupt");
}

213 214 215 216 217 218 219 220 221 222 223 224 225 226
static unsigned long find_packed_object_size(struct packed_git *p,
					     unsigned long ofs)
{
	struct revindex_entry *entry = find_packed_object(p, ofs);
	return entry[1].offset - ofs;
}

static unsigned char *find_packed_object_name(struct packed_git *p,
					      unsigned long ofs)
{
	struct revindex_entry *entry = find_packed_object(p, ofs);
	return (unsigned char *)(p->index_base + 256) + 24 * entry->nr + 4;
}

227 228 229 230 231 232 233 234 235
static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
{
	unsigned long othersize, delta_size;
	char type[10];
	void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize);
	void *delta_buf;

	if (!otherbuf)
		die("unable to read %s", sha1_to_hex(entry->delta->sha1));
236
        delta_buf = diff_delta(otherbuf, othersize,
237
			       buf, size, &delta_size, 0);
238
        if (!delta_buf || delta_size != entry->delta_size)
239 240 241 242 243 244
        	die("delta size changed");
        free(buf);
        free(otherbuf);
	return delta_buf;
}

245 246 247 248 249 250 251 252 253
/*
 * The per-object header is a pretty dense thing, which is
 *  - first byte: low four bits are "size", then three bits of "type",
 *    and the high bit is "size continues".
 *  - each byte afterwards: low seven bits are size continuation,
 *    with the high bit being "size continues"
 */
static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
{
254
	int n = 1;
255 256
	unsigned char c;

257
	if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
258 259
		die("bad type %d", type);

260 261 262
	c = (type << 4) | (size & 15);
	size >>= 4;
	while (size) {
263
		*hdr++ = c | 0x80;
264 265 266
		c = size & 0x7f;
		size >>= 7;
		n++;
267 268 269 270 271
	}
	*hdr = c;
	return n;
}

272 273 274 275
/*
 * we are going to reuse the existing object data as is.  make
 * sure it is not corrupt.
 */
276
static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect)
277
{
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
	z_stream stream;
	unsigned char fakebuf[4096];
	int st;

	memset(&stream, 0, sizeof(stream));
	stream.next_in = data;
	stream.avail_in = len;
	stream.next_out = fakebuf;
	stream.avail_out = sizeof(fakebuf);
	inflateInit(&stream);

	while (1) {
		st = inflate(&stream, Z_FINISH);
		if (st == Z_STREAM_END || st == Z_OK) {
			st = (stream.total_out == expect &&
			      stream.total_in == len) ? 0 : -1;
			break;
		}
		if (st != Z_BUF_ERROR) {
			st = -1;
			break;
		}
		stream.next_out = fakebuf;
		stream.avail_out = sizeof(fakebuf);
	}
	inflateEnd(&stream);
	return st;
305 306 307 308 309 310 311
}

static int revalidate_loose_object(struct object_entry *entry,
				   unsigned char *map,
				   unsigned long mapsize)
{
	/* we already know this is a loose object with new type header. */
312 313
	enum object_type type;
	unsigned long size, used;
314 315 316 317

	if (pack_to_stdout)
		return 0;

318 319 320 321 322 323
	used = unpack_object_header_gently(map, mapsize, &type, &size);
	if (!used)
		return -1;
	map += used;
	mapsize -= used;
	return check_inflate(map, mapsize, size);
324 325
}

326 327
static unsigned long write_object(struct sha1file *f,
				  struct object_entry *entry)
328 329 330
{
	unsigned long size;
	char type[10];
331
	void *buf;
332
	unsigned char header[10];
333
	unsigned hdrlen, datalen;
334
	enum object_type obj_type;
335
	int to_reuse = 0;
336

337
	obj_type = entry->type;
338 339
	if (! entry->in_pack)
		to_reuse = 0;	/* can't reuse what we don't have */
340
	else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA)
341 342 343 344 345 346 347 348 349 350
		to_reuse = 1;	/* check_object() decided it for us */
	else if (obj_type != entry->in_pack_type)
		to_reuse = 0;	/* pack has delta which is unusable */
	else if (entry->delta)
		to_reuse = 0;	/* we want to pack afresh */
	else
		to_reuse = 1;	/* we have it in-pack undeltified,
				 * and we do not need to deltify it.
				 */

351 352 353 354 355 356
	if (!entry->in_pack && !entry->delta) {
		unsigned char *map;
		unsigned long mapsize;
		map = map_sha1_file(entry->sha1, &mapsize);
		if (map && !legacy_loose_object(map)) {
			/* We can copy straight into the pack file */
357 358 359
			if (revalidate_loose_object(entry, map, mapsize))
				die("corrupt loose object %s",
				    sha1_to_hex(entry->sha1));
360 361 362 363 364 365 366 367 368 369
			sha1write(f, map, mapsize);
			munmap(map, mapsize);
			written++;
			reused++;
			return mapsize;
		}
		if (map)
			munmap(map, mapsize);
	}

370
	if (!to_reuse) {
371 372 373 374 375 376 377 378 379
		buf = read_sha1_file(entry->sha1, type, &size);
		if (!buf)
			die("unable to read %s", sha1_to_hex(entry->sha1));
		if (size != entry->size)
			die("object %s size inconsistency (%lu vs %lu)",
			    sha1_to_hex(entry->sha1), size, entry->size);
		if (entry->delta) {
			buf = delta_against(buf, size, entry);
			size = entry->delta_size;
380 381
			obj_type = (allow_ofs_delta && entry->delta->offset) ?
				OBJ_OFS_DELTA : OBJ_REF_DELTA;
382 383 384
		}
		/*
		 * The object header is a byte of 'type' followed by zero or
385
		 * more bytes of length.
386 387 388 389
		 */
		hdrlen = encode_header(obj_type, size, header);
		sha1write(f, header, hdrlen);

390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
		if (obj_type == OBJ_OFS_DELTA) {
			/*
			 * Deltas with relative base contain an additional
			 * encoding of the relative offset for the delta
			 * base from this object's position in the pack.
			 */
			unsigned long ofs = entry->offset - entry->delta->offset;
			unsigned pos = sizeof(header) - 1;
			header[pos] = ofs & 127;
			while (ofs >>= 7)
				header[--pos] = 128 | (--ofs & 127);
			sha1write(f, header + pos, sizeof(header) - pos);
			hdrlen += sizeof(header) - pos;
		} else if (obj_type == OBJ_REF_DELTA) {
			/*
			 * Deltas with a base reference contain
			 * an additional 20 bytes for the base sha1.
			 */
			sha1write(f, entry->delta->sha1, 20);
409 410 411 412
			hdrlen += 20;
		}
		datalen = sha1write_compressed(f, buf, size);
		free(buf);
413
	}
414 415 416
	else {
		struct packed_git *p = entry->in_pack;

417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435
		if (entry->delta) {
			obj_type = (allow_ofs_delta && entry->delta->offset) ?
				OBJ_OFS_DELTA : OBJ_REF_DELTA;
			reused_delta++;
		}
		hdrlen = encode_header(obj_type, entry->size, header);
		sha1write(f, header, hdrlen);
		if (obj_type == OBJ_OFS_DELTA) {
			unsigned long ofs = entry->offset - entry->delta->offset;
			unsigned pos = sizeof(header) - 1;
			header[pos] = ofs & 127;
			while (ofs >>= 7)
				header[--pos] = 128 | (--ofs & 127);
			sha1write(f, header + pos, sizeof(header) - pos);
			hdrlen += sizeof(header) - pos;
		} else if (obj_type == OBJ_REF_DELTA) {
			sha1write(f, entry->delta->sha1, 20);
			hdrlen += 20;
		}
436

437 438 439 440 441 442 443
		use_packed_git(p);
		buf = (char *) p->pack_base
			+ entry->in_pack_offset
			+ entry->in_pack_header_size;
		datalen = find_packed_object_size(p, entry->in_pack_offset)
				- entry->in_pack_header_size;
		if (!pack_to_stdout && check_inflate(buf, datalen, entry->size))
444
			die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
445 446 447
		sha1write(f, buf, datalen);
		unuse_packed_git(p);
		reused++;
448
	}
449
	if (entry->delta)
450
		written_delta++;
451
	written++;
452 453 454
	return hdrlen + datalen;
}

455 456 457 458
static unsigned long write_one(struct sha1file *f,
			       struct object_entry *e,
			       unsigned long offset)
{
459
	if (e->offset || e->preferred_base)
460 461 462 463
		/* offset starts from header size and cannot be zero
		 * if it is written already.
		 */
		return offset;
464
	/* if we are deltified, write out its base object first. */
465 466
	if (e->delta)
		offset = write_one(f, e->delta, offset);
467 468
	e->offset = offset;
	return offset + write_object(f, e);
469 470
}

471 472 473
static void write_pack_file(void)
{
	int i;
474
	struct sha1file *f;
475 476
	unsigned long offset;
	struct pack_header hdr;
477 478
	unsigned last_percent = 999;
	int do_progress = 0;
479

480 481
	if (!base_name)
		f = sha1fd(1, "<stdout>");
482 483 484 485 486 487
	else {
		f = sha1create("%s-%s.%s", base_name,
			       sha1_to_hex(object_list_sha1), "pack");
		do_progress = progress;
	}
	if (do_progress)
488
		fprintf(stderr, "Writing %d objects.\n", nr_result);
489

490
	hdr.hdr_signature = htonl(PACK_SIGNATURE);
491
	hdr.hdr_version = htonl(PACK_VERSION);
492
	hdr.hdr_entries = htonl(nr_result);
493 494
	sha1write(f, &hdr, sizeof(hdr));
	offset = sizeof(hdr);
495 496
	if (!nr_result)
		goto done;
497
	for (i = 0; i < nr_objects; i++) {
498
		offset = write_one(f, objects + i, offset);
499
		if (do_progress) {
500
			unsigned percent = written * 100 / nr_result;
501 502
			if (progress_update || percent != last_percent) {
				fprintf(stderr, "%4u%% (%u/%u) done\r",
503
					percent, written, nr_result);
504 505 506
				progress_update = 0;
				last_percent = percent;
			}
507 508
		}
	}
509 510
	if (do_progress)
		fputc('\n', stderr);
511
 done:
512
	sha1close(f, pack_file_sha1, 1);
513 514 515 516 517
}

static void write_index_file(void)
{
	int i;
518 519
	struct sha1file *f = sha1create("%s-%s.%s", base_name,
					sha1_to_hex(object_list_sha1), "idx");
520
	struct object_entry **list = sorted_by_sha;
521
	struct object_entry **last = list + nr_result;
522 523 524 525 526
	unsigned int array[256];

	/*
	 * Write the first-level table (the list is sorted,
	 * but we use a 256-entry lookup to be able to avoid
527
	 * having to do eight extra binary search iterations).
528 529 530 531 532 533 534 535 536 537 538 539
	 */
	for (i = 0; i < 256; i++) {
		struct object_entry **next = list;
		while (next < last) {
			struct object_entry *entry = *next;
			if (entry->sha1[0] != i)
				break;
			next++;
		}
		array[i] = htonl(next - sorted_by_sha);
		list = next;
	}
540
	sha1write(f, array, 256 * sizeof(int));
541 542 543 544 545

	/*
	 * Write the actual SHA1 entries..
	 */
	list = sorted_by_sha;
546
	for (i = 0; i < nr_result; i++) {
547 548
		struct object_entry *entry = *list++;
		unsigned int offset = htonl(entry->offset);
549 550
		sha1write(f, &offset, 4);
		sha1write(f, entry->sha1, 20);
551
	}
552 553
	sha1write(f, pack_file_sha1, 20);
	sha1close(f, NULL, 1);
554 555
}

556 557 558 559 560 561 562
static int locate_object_entry_hash(const unsigned char *sha1)
{
	int i;
	unsigned int ui;
	memcpy(&ui, sha1, sizeof(unsigned int));
	i = ui % object_ix_hashsz;
	while (0 < object_ix[i]) {
563
		if (!hashcmp(sha1, objects[object_ix[i] - 1].sha1))
564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
			return i;
		if (++i == object_ix_hashsz)
			i = 0;
	}
	return -1 - i;
}

static struct object_entry *locate_object_entry(const unsigned char *sha1)
{
	int i;

	if (!object_ix_hashsz)
		return NULL;

	i = locate_object_entry_hash(sha1);
	if (0 <= i)
		return &objects[object_ix[i]-1];
	return NULL;
}

static void rehash_objects(void)
585
{
586 587 588 589 590 591 592
	int i;
	struct object_entry *oe;

	object_ix_hashsz = nr_objects * 3;
	if (object_ix_hashsz < 1024)
		object_ix_hashsz = 1024;
	object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz);
593
	memset(object_ix, 0, sizeof(int) * object_ix_hashsz);
594 595 596 597 598 599 600 601 602
	for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
		int ix = locate_object_entry_hash(oe->sha1);
		if (0 <= ix)
			continue;
		ix = -1 - ix;
		object_ix[ix] = i + 1;
	}
}

603
static unsigned name_hash(const char *name)
604
{
605 606 607
	unsigned char c;
	unsigned hash = 0;

608
	/*
609 610 611
	 * This effectively just creates a sortable number from the
	 * last sixteen non-whitespace characters. Last characters
	 * count "most", so things that end in ".c" sort together.
612
	 */
613 614 615 616 617
	while ((c = *name++) != 0) {
		if (isspace(c))
			continue;
		hash = (hash >> 2) + (c << 24);
	}
618 619 620 621
	return hash;
}

static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclude)
622 623 624
{
	unsigned int idx = nr_objects;
	struct object_entry *entry;
625
	struct packed_git *p;
626 627
	unsigned int found_offset = 0;
	struct packed_git *found_pack = NULL;
628
	int ix, status = 0;
629

630
	if (!exclude) {
L
Linus Torvalds 已提交
631
		for (p = packed_git; p; p = p->next) {
N
Nicolas Pitre 已提交
632 633
			unsigned long offset = find_pack_entry_one(sha1, p);
			if (offset) {
L
Linus Torvalds 已提交
634 635 636 637
				if (incremental)
					return 0;
				if (local && !p->pack_local)
					return 0;
638
				if (!found_pack) {
N
Nicolas Pitre 已提交
639 640
					found_offset = offset;
					found_pack = p;
641
				}
L
Linus Torvalds 已提交
642 643 644
			}
		}
	}
645 646
	if ((entry = locate_object_entry(sha1)) != NULL)
		goto already_added;
647

648 649 650 651 652 653
	if (idx >= nr_alloc) {
		unsigned int needed = (idx + 1024) * 3 / 2;
		objects = xrealloc(objects, needed * sizeof(*entry));
		nr_alloc = needed;
	}
	entry = objects + idx;
654
	nr_objects = idx + 1;
655
	memset(entry, 0, sizeof(*entry));
656
	hashcpy(entry->sha1, sha1);
657
	entry->hash = hash;
658 659 660 661 662 663 664 665

	if (object_ix_hashsz * 3 <= nr_objects * 4)
		rehash_objects();
	else {
		ix = locate_object_entry_hash(entry->sha1);
		if (0 <= ix)
			die("internal error in object hashing.");
		object_ix[-1 - ix] = idx + 1;
666
	}
667
	status = 1;
668 669

 already_added:
670 671 672 673
	if (progress_update) {
		fprintf(stderr, "Counting objects...%d\r", nr_objects);
		progress_update = 0;
	}
674 675 676 677 678 679 680
	if (exclude)
		entry->preferred_base = 1;
	else {
		if (found_pack) {
			entry->in_pack = found_pack;
			entry->in_pack_offset = found_offset;
		}
681
	}
682
	return status;
683 684
}

685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728
struct pbase_tree_cache {
	unsigned char sha1[20];
	int ref;
	int temporary;
	void *tree_data;
	unsigned long tree_size;
};

static struct pbase_tree_cache *(pbase_tree_cache[256]);
static int pbase_tree_cache_ix(const unsigned char *sha1)
{
	return sha1[0] % ARRAY_SIZE(pbase_tree_cache);
}
static int pbase_tree_cache_ix_incr(int ix)
{
	return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
}

static struct pbase_tree {
	struct pbase_tree *next;
	/* This is a phony "cache" entry; we are not
	 * going to evict it nor find it through _get()
	 * mechanism -- this is for the toplevel node that
	 * would almost always change with any commit.
	 */
	struct pbase_tree_cache pcache;
} *pbase_tree;

static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1)
{
	struct pbase_tree_cache *ent, *nent;
	void *data;
	unsigned long size;
	char type[20];
	int neigh;
	int my_ix = pbase_tree_cache_ix(sha1);
	int available_ix = -1;

	/* pbase-tree-cache acts as a limited hashtable.
	 * your object will be found at your index or within a few
	 * slots after that slot if it is cached.
	 */
	for (neigh = 0; neigh < 8; neigh++) {
		ent = pbase_tree_cache[my_ix];
729
		if (ent && !hashcmp(ent->sha1, sha1)) {
730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
			ent->ref++;
			return ent;
		}
		else if (((available_ix < 0) && (!ent || !ent->ref)) ||
			 ((0 <= available_ix) &&
			  (!ent && pbase_tree_cache[available_ix])))
			available_ix = my_ix;
		if (!ent)
			break;
		my_ix = pbase_tree_cache_ix_incr(my_ix);
	}

	/* Did not find one.  Either we got a bogus request or
	 * we need to read and perhaps cache.
	 */
	data = read_sha1_file(sha1, type, &size);
	if (!data)
		return NULL;
	if (strcmp(type, tree_type)) {
		free(data);
		return NULL;
	}

	/* We need to either cache or return a throwaway copy */

	if (available_ix < 0)
		ent = NULL;
	else {
		ent = pbase_tree_cache[available_ix];
		my_ix = available_ix;
	}

	if (!ent) {
		nent = xmalloc(sizeof(*nent));
		nent->temporary = (available_ix < 0);
	}
	else {
		/* evict and reuse */
		free(ent->tree_data);
		nent = ent;
	}
771
	hashcpy(nent->sha1, sha1);
772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
	nent->tree_data = data;
	nent->tree_size = size;
	nent->ref = 1;
	if (!nent->temporary)
		pbase_tree_cache[my_ix] = nent;
	return nent;
}

static void pbase_tree_put(struct pbase_tree_cache *cache)
{
	if (!cache->temporary) {
		cache->ref--;
		return;
	}
	free(cache->tree_data);
	free(cache);
}

static int name_cmp_len(const char *name)
{
	int i;
	for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)
		;
	return i;
}

static void add_pbase_object(struct tree_desc *tree,
			     const char *name,
800 801
			     int cmplen,
			     const char *fullname)
802
{
803 804 805
	struct name_entry entry;

	while (tree_entry(tree,&entry)) {
806 807 808
		unsigned long size;
		char type[20];

809 810 811 812
		if (entry.pathlen != cmplen ||
		    memcmp(entry.path, name, cmplen) ||
		    !has_sha1_file(entry.sha1) ||
		    sha1_object_info(entry.sha1, type, &size))
813
			continue;
814
		if (name[cmplen] != '/') {
815
			unsigned hash = name_hash(fullname);
816
			add_object_entry(entry.sha1, hash, 1);
817 818
			return;
		}
819
		if (!strcmp(type, tree_type)) {
820
			struct tree_desc sub;
821 822 823 824
			struct pbase_tree_cache *tree;
			const char *down = name+cmplen+1;
			int downlen = name_cmp_len(down);

825
			tree = pbase_tree_get(entry.sha1);
826 827 828 829 830
			if (!tree)
				return;
			sub.buf = tree->tree_data;
			sub.size = tree->tree_size;

831
			add_pbase_object(&sub, down, downlen, fullname);
832 833 834 835
			pbase_tree_put(tree);
		}
	}
}
836

837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876
static unsigned *done_pbase_paths;
static int done_pbase_paths_num;
static int done_pbase_paths_alloc;
static int done_pbase_path_pos(unsigned hash)
{
	int lo = 0;
	int hi = done_pbase_paths_num;
	while (lo < hi) {
		int mi = (hi + lo) / 2;
		if (done_pbase_paths[mi] == hash)
			return mi;
		if (done_pbase_paths[mi] < hash)
			hi = mi;
		else
			lo = mi + 1;
	}
	return -lo-1;
}

static int check_pbase_path(unsigned hash)
{
	int pos = (!done_pbase_paths) ? -1 : done_pbase_path_pos(hash);
	if (0 <= pos)
		return 1;
	pos = -pos - 1;
	if (done_pbase_paths_alloc <= done_pbase_paths_num) {
		done_pbase_paths_alloc = alloc_nr(done_pbase_paths_alloc);
		done_pbase_paths = xrealloc(done_pbase_paths,
					    done_pbase_paths_alloc *
					    sizeof(unsigned));
	}
	done_pbase_paths_num++;
	if (pos < done_pbase_paths_num)
		memmove(done_pbase_paths + pos + 1,
			done_pbase_paths + pos,
			(done_pbase_paths_num - pos - 1) * sizeof(unsigned));
	done_pbase_paths[pos] = hash;
	return 0;
}

877
static void add_preferred_base_object(const char *name, unsigned hash)
878 879 880 881 882 883 884 885 886
{
	struct pbase_tree *it;
	int cmplen = name_cmp_len(name);

	if (check_pbase_path(hash))
		return;

	for (it = pbase_tree; it; it = it->next) {
		if (cmplen == 0) {
887
			hash = name_hash("");
888 889 890 891 892 893
			add_object_entry(it->pcache.sha1, hash, 1);
		}
		else {
			struct tree_desc tree;
			tree.buf = it->pcache.tree_data;
			tree.size = it->pcache.tree_size;
894
			add_pbase_object(&tree, name, cmplen, name);
895
		}
896 897 898
	}
}

899
static void add_preferred_base(unsigned char *sha1)
900
{
901 902 903 904
	struct pbase_tree *it;
	void *data;
	unsigned long size;
	unsigned char tree_sha1[20];
905

906 907 908
	if (window <= num_preferred_base++)
		return;

909 910
	data = read_object_with_reference(sha1, tree_type, &size, tree_sha1);
	if (!data)
911
		return;
912 913

	for (it = pbase_tree; it; it = it->next) {
914
		if (!hashcmp(it->pcache.sha1, tree_sha1)) {
915 916 917 918 919 920 921 922 923
			free(data);
			return;
		}
	}

	it = xcalloc(1, sizeof(*it));
	it->next = pbase_tree;
	pbase_tree = it;

924
	hashcpy(it->pcache.sha1, tree_sha1);
925 926
	it->pcache.tree_data = data;
	it->pcache.tree_size = size;
927 928
}

929 930
static void check_object(struct object_entry *entry)
{
931 932
	char type[20];

933
	if (entry->in_pack && !entry->preferred_base) {
934 935 936 937 938 939 940 941 942
		struct packed_git *p = entry->in_pack;
		unsigned long left = p->pack_size - entry->in_pack_offset;
		unsigned long size, used;
		unsigned char *buf;
		struct object_entry *base_entry = NULL;

		use_packed_git(p);
		buf = p->pack_base;
		buf += entry->in_pack_offset;
943 944 945 946

		/* We want in_pack_type even if we do not reuse delta.
		 * There is no point not reusing non-delta representations.
		 */
947 948 949 950
		used = unpack_object_header_gently(buf, left,
						   &entry->in_pack_type, &size);
		if (!used || left - used <= 20)
			die("corrupt pack for %s", sha1_to_hex(entry->sha1));
951

952 953 954 955
		/* Check if it is delta, and the base is also an object
		 * we are going to pack.  If so we will reuse the existing
		 * delta.
		 */
956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990
		if (!no_reuse_delta) {
			unsigned char c, *base_name;
			unsigned long ofs;
			/* there is at least 20 bytes left in the pack */
			switch (entry->in_pack_type) {
			case OBJ_REF_DELTA:
				base_name = buf + used;
				used += 20;
				break;
			case OBJ_OFS_DELTA:
				c = buf[used++];
				ofs = c & 127;
				while (c & 128) {
					ofs += 1;
					if (!ofs || ofs & ~(~0UL >> 7))
						die("delta base offset overflow in pack for %s",
						    sha1_to_hex(entry->sha1));
					c = buf[used++];
					ofs = (ofs << 7) + (c & 127);
				}
				if (ofs >= entry->in_pack_offset)
					die("delta base offset out of bound for %s",
					    sha1_to_hex(entry->sha1));
				ofs = entry->in_pack_offset - ofs;
				base_name = find_packed_object_name(p, ofs);
				break;
			default:
				base_name = NULL;
			}
			if (base_name)
				base_entry = locate_object_entry(base_name);
		}
		unuse_packed_git(p);
		entry->in_pack_header_size = used;

991
		if (base_entry) {
992 993 994 995 996

			/* Depth value does not matter - find_deltas()
			 * will never consider reused delta as the
			 * base object to deltify other objects
			 * against, in order to avoid circular deltas.
997
			 */
998 999

			/* uncompressed size of the delta data */
1000
			entry->size = size;
1001
			entry->delta = base_entry;
1002
			entry->type = entry->in_pack_type;
1003

1004 1005
			entry->delta_sibling = base_entry->delta_child;
			base_entry->delta_child = entry;
1006

1007 1008 1009
			return;
		}
		/* Otherwise we would do the usual */
1010
	}
1011 1012

	if (sha1_object_info(entry->sha1, type, &entry->size))
1013 1014
		die("unable to get type of object %s",
		    sha1_to_hex(entry->sha1));
1015

1016
	if (!strcmp(type, commit_type)) {
1017
		entry->type = OBJ_COMMIT;
1018
	} else if (!strcmp(type, tree_type)) {
1019
		entry->type = OBJ_TREE;
1020
	} else if (!strcmp(type, blob_type)) {
1021
		entry->type = OBJ_BLOB;
1022
	} else if (!strcmp(type, tag_type)) {
1023 1024 1025 1026 1027 1028
		entry->type = OBJ_TAG;
	} else
		die("unable to pack object %s of type %s",
		    sha1_to_hex(entry->sha1), type);
}

1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041
static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
{
	struct object_entry *child = me->delta_child;
	unsigned int m = n;
	while (child) {
		unsigned int c = check_delta_limit(child, n + 1);
		if (m < c)
			m = c;
		child = child->delta_sibling;
	}
	return m;
}

1042 1043 1044
static void get_object_details(void)
{
	int i;
1045
	struct object_entry *entry;
1046

1047
	prepare_pack_ix();
1048 1049
	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
		check_object(entry);
1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066

	if (nr_objects == nr_result) {
		/*
		 * Depth of objects that depend on the entry -- this
		 * is subtracted from depth-max to break too deep
		 * delta chain because of delta data reusing.
		 * However, we loosen this restriction when we know we
		 * are creating a thin pack -- it will have to be
		 * expanded on the other end anyway, so do not
		 * artificially cut the delta chain and let it go as
		 * deep as it wants.
		 */
		for (i = 0, entry = objects; i < nr_objects; i++, entry++)
			if (!entry->delta && entry->delta_child)
				entry->delta_limit =
					check_delta_limit(entry, 1);
	}
1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093
}

typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);

static entry_sort_t current_sort;

static int sort_comparator(const void *_a, const void *_b)
{
	struct object_entry *a = *(struct object_entry **)_a;
	struct object_entry *b = *(struct object_entry **)_b;
	return current_sort(a,b);
}

static struct object_entry **create_sorted_list(entry_sort_t sort)
{
	struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *));
	int i;

	for (i = 0; i < nr_objects; i++)
		list[i] = objects + i;
	current_sort = sort;
	qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator);
	return list;
}

static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
{
1094
	return hashcmp(a->sha1, b->sha1);
1095 1096
}

T
Timo Hirvonen 已提交
1097
static struct object_entry **create_final_object_list(void)
1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114
{
	struct object_entry **list;
	int i, j;

	for (i = nr_result = 0; i < nr_objects; i++)
		if (!objects[i].preferred_base)
			nr_result++;
	list = xmalloc(nr_result * sizeof(struct object_entry *));
	for (i = j = 0; i < nr_objects; i++) {
		if (!objects[i].preferred_base)
			list[j++] = objects + i;
	}
	current_sort = sha1_sort;
	qsort(list, nr_result, sizeof(struct object_entry *), sort_comparator);
	return list;
}

1115 1116 1117 1118 1119 1120
static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
{
	if (a->type < b->type)
		return -1;
	if (a->type > b->type)
		return 1;
1121 1122 1123 1124
	if (a->hash < b->hash)
		return -1;
	if (a->hash > b->hash)
		return 1;
1125 1126 1127 1128
	if (a->preferred_base < b->preferred_base)
		return -1;
	if (a->preferred_base > b->preferred_base)
		return 1;
1129 1130 1131 1132 1133 1134 1135 1136 1137 1138
	if (a->size < b->size)
		return -1;
	if (a->size > b->size)
		return 1;
	return a < b ? -1 : (a > b);
}

struct unpacked {
	struct object_entry *entry;
	void *data;
1139
	struct delta_index *index;
1140 1141 1142
};

/*
1143 1144 1145 1146 1147 1148
 * We search for deltas _backwards_ in a list sorted by type and
 * by size, so that we see progressively smaller and smaller files.
 * That's because we prefer deltas to be from the bigger file
 * to the smaller - deletes are potentially cheaper, but perhaps
 * more importantly, the bigger file is likely the more recent
 * one.
1149
 */
1150
static int try_delta(struct unpacked *trg, struct unpacked *src,
1151
		     unsigned max_depth)
1152
{
1153 1154
	struct object_entry *trg_entry = trg->entry;
	struct object_entry *src_entry = src->entry;
1155 1156
	unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
	char type[10];
1157 1158 1159
	void *delta_buf;

	/* Don't bother doing diffs between different types */
1160
	if (trg_entry->type != src_entry->type)
1161 1162
		return -1;

1163 1164 1165
	/* We do not compute delta to *create* objects we are not
	 * going to pack.
	 */
1166
	if (trg_entry->preferred_base)
1167
		return -1;
1168

1169 1170
	/*
	 * We do not bother to try a delta that we discarded
1171
	 * on an earlier try, but only when reusing delta data.
1172
	 */
1173 1174
	if (!no_reuse_delta && trg_entry->in_pack &&
	    trg_entry->in_pack == src_entry->in_pack)
1175 1176
		return 0;

1177 1178
	/*
	 * If the current object is at pack edge, take the depth the
1179 1180
	 * objects that depend on the current object into account --
	 * otherwise they would become too deep.
1181
	 */
1182 1183
	if (trg_entry->delta_child) {
		if (max_depth <= trg_entry->delta_limit)
1184
			return 0;
1185
		max_depth -= trg_entry->delta_limit;
1186
	}
1187
	if (src_entry->depth >= max_depth)
1188
		return 0;
1189

1190
	/* Now some size filtering heuristics. */
1191 1192
	trg_size = trg_entry->size;
	max_size = trg_size/2 - 20;
1193 1194 1195
	max_size = max_size * (max_depth - src_entry->depth) / max_depth;
	if (max_size == 0)
		return 0;
1196
	if (trg_entry->delta && trg_entry->delta_size <= max_size)
1197 1198
		max_size = trg_entry->delta_size-1;
	src_size = src_entry->size;
1199
	sizediff = src_size < trg_size ? trg_size - src_size : 0;
1200
	if (sizediff >= max_size)
1201
		return 0;
1202

1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222
	/* Load data if not already done */
	if (!trg->data) {
		trg->data = read_sha1_file(trg_entry->sha1, type, &sz);
		if (sz != trg_size)
			die("object %s inconsistent object length (%lu vs %lu)",
			    sha1_to_hex(trg_entry->sha1), sz, trg_size);
	}
	if (!src->data) {
		src->data = read_sha1_file(src_entry->sha1, type, &sz);
		if (sz != src_size)
			die("object %s inconsistent object length (%lu vs %lu)",
			    sha1_to_hex(src_entry->sha1), sz, src_size);
	}
	if (!src->index) {
		src->index = create_delta_index(src->data, src_size);
		if (!src->index)
			die("out of memory");
	}

	delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
1223
	if (!delta_buf)
1224
		return 0;
1225 1226 1227 1228

	trg_entry->delta = src_entry;
	trg_entry->delta_size = delta_size;
	trg_entry->depth = src_entry->depth + 1;
1229
	free(delta_buf);
1230
	return 1;
1231 1232
}

1233 1234 1235 1236 1237
static void progress_interval(int signum)
{
	progress_update = 1;
}

1238
static void find_deltas(struct object_entry **list, int window, int depth)
1239
{
1240
	int i, idx;
1241 1242
	unsigned int array_size = window * sizeof(struct unpacked);
	struct unpacked *array = xmalloc(array_size);
1243 1244
	unsigned processed = 0;
	unsigned last_percent = 999;
1245 1246

	memset(array, 0, array_size);
1247 1248
	i = nr_objects;
	idx = 0;
1249
	if (progress)
1250
		fprintf(stderr, "Deltifying %d objects.\n", nr_result);
1251

1252
	while (--i >= 0) {
1253 1254 1255 1256
		struct object_entry *entry = list[i];
		struct unpacked *n = array + idx;
		int j;

1257 1258 1259
		if (!entry->preferred_base)
			processed++;

1260
		if (progress) {
1261
			unsigned percent = processed * 100 / nr_result;
1262 1263
			if (percent != last_percent || progress_update) {
				fprintf(stderr, "%4u%% (%u/%u) done\r",
1264
					percent, processed, nr_result);
1265 1266 1267
				progress_update = 0;
				last_percent = percent;
			}
1268
		}
1269 1270 1271

		if (entry->delta)
			/* This happens if we decided to reuse existing
1272
			 * delta from a pack.  "!no_reuse_delta &&" is implied.
1273 1274 1275
			 */
			continue;

1276 1277
		if (entry->size < 50)
			continue;
1278 1279
		free_delta_index(n->index);
		n->index = NULL;
1280
		free(n->data);
1281
		n->data = NULL;
1282
		n->entry = entry;
1283

L
Linus Torvalds 已提交
1284 1285 1286
		j = window;
		while (--j > 0) {
			unsigned int other_idx = idx + j;
1287
			struct unpacked *m;
L
Linus Torvalds 已提交
1288 1289
			if (other_idx >= window)
				other_idx -= window;
1290 1291 1292
			m = array + other_idx;
			if (!m->entry)
				break;
1293
			if (try_delta(n, m, depth) < 0)
1294 1295
				break;
		}
1296 1297 1298 1299 1300 1301
		/* if we made n a delta, and if n is already at max
		 * depth, leaving it in the window is pointless.  we
		 * should evict it first.
		 */
		if (entry->delta && depth <= entry->depth)
			continue;
1302

1303 1304 1305
		idx++;
		if (idx >= window)
			idx = 0;
1306
	}
1307

1308 1309 1310
	if (progress)
		fputc('\n', stderr);

1311
	for (i = 0; i < window; ++i) {
1312
		free_delta_index(array[i].index);
1313
		free(array[i].data);
1314
	}
1315
	free(array);
1316 1317
}

1318 1319
static void prepare_pack(int window, int depth)
{
1320
	get_object_details();
1321 1322 1323 1324 1325
	sorted_by_type = create_sorted_list(type_size_sort);
	if (window && depth)
		find_deltas(sorted_by_type, window+1, depth);
}

1326
static int reuse_cached_pack(unsigned char *sha1)
1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345
{
	static const char cache[] = "pack-cache/pack-%s.%s";
	char *cached_pack, *cached_idx;
	int ifd, ofd, ifd_ix = -1;

	cached_pack = git_path(cache, sha1_to_hex(sha1), "pack");
	ifd = open(cached_pack, O_RDONLY);
	if (ifd < 0)
		return 0;

	if (!pack_to_stdout) {
		cached_idx = git_path(cache, sha1_to_hex(sha1), "idx");
		ifd_ix = open(cached_idx, O_RDONLY);
		if (ifd_ix < 0) {
			close(ifd);
			return 0;
		}
	}

1346 1347 1348
	if (progress)
		fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects,
			sha1_to_hex(sha1));
1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379

	if (pack_to_stdout) {
		if (copy_fd(ifd, 1))
			exit(1);
		close(ifd);
	}
	else {
		char name[PATH_MAX];
		snprintf(name, sizeof(name),
			 "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack");
		ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
		if (ofd < 0)
			die("unable to open %s (%s)", name, strerror(errno));
		if (copy_fd(ifd, ofd))
			exit(1);
		close(ifd);

		snprintf(name, sizeof(name),
			 "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx");
		ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
		if (ofd < 0)
			die("unable to open %s (%s)", name, strerror(errno));
		if (copy_fd(ifd_ix, ofd))
			exit(1);
		close(ifd_ix);
		puts(sha1_to_hex(sha1));
	}

	return 1;
}

1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396
static void setup_progress_signal(void)
{
	struct sigaction sa;
	struct itimerval v;

	memset(&sa, 0, sizeof(sa));
	sa.sa_handler = progress_interval;
	sigemptyset(&sa.sa_mask);
	sa.sa_flags = SA_RESTART;
	sigaction(SIGALRM, &sa, NULL);

	v.it_interval.tv_sec = 1;
	v.it_interval.tv_usec = 0;
	v.it_value = v.it_interval;
	setitimer(ITIMER_REAL, &v, NULL);
}

1397 1398 1399 1400 1401 1402 1403 1404 1405
static int git_pack_config(const char *k, const char *v)
{
	if(!strcmp(k, "pack.window")) {
		window = git_config_int(k, v);
		return 0;
	}
	return git_default_config(k, v);
}

1406
static void read_object_list_from_stdin(void)
1407
{
1408 1409 1410
	char line[40 + 1 + PATH_MAX + 2];
	unsigned char sha1[20];
	unsigned hash;
1411

1412 1413 1414 1415 1416 1417
	for (;;) {
		if (!fgets(line, sizeof(line), stdin)) {
			if (feof(stdin))
				break;
			if (!ferror(stdin))
				die("fgets returned NULL, not EOF, not error!");
1418 1419 1420 1421
			if (errno != EINTR)
				die("fgets: %s", strerror(errno));
			clearerr(stdin);
			continue;
1422
		}
1423 1424 1425
		if (line[0] == '-') {
			if (get_sha1_hex(line+1, sha1))
				die("expected edge sha1, got garbage:\n %s",
1426
				    line);
1427
			add_preferred_base(sha1);
1428
			continue;
1429
		}
1430
		if (get_sha1_hex(line, sha1))
1431
			die("expected sha1, got garbage:\n %s", line);
1432

1433
		hash = name_hash(line+41);
1434 1435
		add_preferred_base_object(line+41, hash);
		add_object_entry(sha1, hash, 0);
1436
	}
1437 1438 1439 1440 1441
}

static void show_commit(struct commit *commit)
{
	unsigned hash = name_hash("");
1442
	add_preferred_base_object("", hash);
1443 1444 1445 1446 1447 1448
	add_object_entry(commit->object.sha1, hash, 0);
}

static void show_object(struct object_array_entry *p)
{
	unsigned hash = name_hash(p->name);
1449
	add_preferred_base_object(p->name, hash);
1450 1451 1452
	add_object_entry(p->item->sha1, hash, 0);
}

1453 1454 1455 1456 1457 1458
static void show_edge(struct commit *commit)
{
	add_preferred_base(commit->object.sha1);
}

static void get_object_list(int ac, const char **av)
1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486
{
	struct rev_info revs;
	char line[1000];
	int flags = 0;

	init_revisions(&revs, NULL);
	save_commit_buffer = 0;
	track_object_refs = 0;
	setup_revisions(ac, av, &revs, NULL);

	while (fgets(line, sizeof(line), stdin) != NULL) {
		int len = strlen(line);
		if (line[len - 1] == '\n')
			line[--len] = 0;
		if (!len)
			break;
		if (*line == '-') {
			if (!strcmp(line, "--not")) {
				flags ^= UNINTERESTING;
				continue;
			}
			die("not a rev '%s'", line);
		}
		if (handle_revision_arg(line, &revs, flags, 1))
			die("bad revision '%s'", line);
	}

	prepare_revision_walk(&revs);
1487
	mark_edges_uninteresting(revs.commits, &revs, show_edge);
1488 1489 1490 1491 1492 1493 1494 1495 1496
	traverse_commit_list(&revs, show_commit, show_object);
}

int cmd_pack_objects(int argc, const char **argv, const char *prefix)
{
	SHA_CTX ctx;
	int depth = 10;
	struct object_entry **list;
	int use_internal_rev_list = 0;
1497
	int thin = 0;
1498
	int i;
1499 1500 1501 1502 1503 1504
	const char *rp_av[64];
	int rp_ac;

	rp_av[0] = "pack-objects";
	rp_av[1] = "--objects"; /* --thin will make it --objects-edge */
	rp_ac = 2;
1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556

	git_config(git_pack_config);

	progress = isatty(2);
	for (i = 1; i < argc; i++) {
		const char *arg = argv[i];

		if (*arg != '-')
			break;

		if (!strcmp("--non-empty", arg)) {
			non_empty = 1;
			continue;
		}
		if (!strcmp("--local", arg)) {
			local = 1;
			continue;
		}
		if (!strcmp("--progress", arg)) {
			progress = 1;
			continue;
		}
		if (!strcmp("--incremental", arg)) {
			incremental = 1;
			continue;
		}
		if (!strncmp("--window=", arg, 9)) {
			char *end;
			window = strtoul(arg+9, &end, 0);
			if (!arg[9] || *end)
				usage(pack_usage);
			continue;
		}
		if (!strncmp("--depth=", arg, 8)) {
			char *end;
			depth = strtoul(arg+8, &end, 0);
			if (!arg[8] || *end)
				usage(pack_usage);
			continue;
		}
		if (!strcmp("--progress", arg)) {
			progress = 1;
			continue;
		}
		if (!strcmp("-q", arg)) {
			progress = 0;
			continue;
		}
		if (!strcmp("--no-reuse-delta", arg)) {
			no_reuse_delta = 1;
			continue;
		}
1557
		if (!strcmp("--delta-base-offset", arg)) {
1558
			allow_ofs_delta = 1;
1559 1560
			continue;
		}
1561 1562 1563 1564 1565 1566 1567 1568
		if (!strcmp("--stdout", arg)) {
			pack_to_stdout = 1;
			continue;
		}
		if (!strcmp("--revs", arg)) {
			use_internal_rev_list = 1;
			continue;
		}
1569 1570 1571 1572 1573 1574 1575
		if (!strcmp("--unpacked", arg) ||
		    !strncmp("--unpacked=", arg, 11) ||
		    !strcmp("--all", arg)) {
			use_internal_rev_list = 1;
			if (ARRAY_SIZE(rp_av) - 1 <= rp_ac)
				die("too many internal rev-list options");
			rp_av[rp_ac++] = arg;
1576 1577
			continue;
		}
1578 1579 1580 1581
		if (!strcmp("--thin", arg)) {
			use_internal_rev_list = 1;
			thin = 1;
			rp_av[1] = "--objects-edge";
1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605
			continue;
		}
		usage(pack_usage);
	}

	/* Traditionally "pack-objects [options] base extra" failed;
	 * we would however want to take refs parameter that would
	 * have been given to upstream rev-list ourselves, which means
	 * we somehow want to say what the base name is.  So the
	 * syntax would be:
	 *
	 * pack-objects [options] base <refs...>
	 *
	 * in other words, we would treat the first non-option as the
	 * base_name and send everything else to the internal revision
	 * walker.
	 */

	if (!pack_to_stdout)
		base_name = argv[i++];

	if (pack_to_stdout != !base_name)
		usage(pack_usage);

1606 1607
	if (!pack_to_stdout && thin)
		die("--thin cannot be used to build an indexable pack.");
1608 1609 1610 1611 1612 1613 1614 1615 1616 1617

	prepare_packed_git();

	if (progress) {
		fprintf(stderr, "Generating pack...\n");
		setup_progress_signal();
	}

	if (!use_internal_rev_list)
		read_object_list_from_stdin();
1618 1619 1620 1621
	else {
		rp_av[rp_ac] = NULL;
		get_object_list(rp_ac, rp_av);
	}
1622

1623 1624
	if (progress)
		fprintf(stderr, "Done counting %d objects.\n", nr_objects);
1625 1626
	sorted_by_sha = create_final_object_list();
	if (non_empty && !nr_result)
1627
		return 0;
1628

J
Junio C Hamano 已提交
1629 1630
	SHA1_Init(&ctx);
	list = sorted_by_sha;
1631
	for (i = 0; i < nr_result; i++) {
J
Junio C Hamano 已提交
1632 1633 1634 1635
		struct object_entry *entry = *list++;
		SHA1_Update(&ctx, entry->sha1, 20);
	}
	SHA1_Final(object_list_sha1, &ctx);
1636 1637
	if (progress && (nr_objects != nr_result))
		fprintf(stderr, "Result has %d objects.\n", nr_result);
J
Junio C Hamano 已提交
1638

1639
	if (reuse_cached_pack(object_list_sha1))
1640 1641
		;
	else {
1642 1643
		if (nr_result)
			prepare_pack(window, depth);
1644 1645 1646 1647 1648 1649 1650 1651
		if (progress && pack_to_stdout) {
			/* the other end usually displays progress itself */
			struct itimerval v = {{0,},};
			setitimer(ITIMER_REAL, &v, NULL);
			signal(SIGALRM, SIG_IGN );
			progress_update = 0;
		}
		write_pack_file();
1652 1653 1654 1655
		if (!pack_to_stdout) {
			write_index_file();
			puts(sha1_to_hex(object_list_sha1));
		}
1656
	}
1657 1658
	if (progress)
		fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n",
1659
			nr_result, written, written_delta, reused, reused_delta);
1660 1661
	return 0;
}