pack-objects.c 23.8 KB
Newer Older
1 2 3
#include "cache.h"
#include "object.h"
#include "delta.h"
4
#include "pack.h"
5
#include "csum-file.h"
6
#include <sys/time.h>
7
#include <signal.h>
8

9
static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
10 11 12

struct object_entry {
	unsigned char sha1[20];
13
	unsigned long size;	/* uncompressed size */
14 15 16
	unsigned long offset;	/* offset into the final pack file;
				 * nonzero if already written.
				 */
17
	unsigned int depth;	/* delta depth */
18
	unsigned int delta_limit;	/* base adjustment for in-pack delta */
19
	unsigned int hash;	/* name hint hash */
20
	enum object_type type;
21
	enum object_type in_pack_type;	/* could be delta */
22 23 24 25
	unsigned long delta_size;	/* delta data size (uncompressed) */
	struct object_entry *delta;	/* delta base object */
	struct packed_git *in_pack; 	/* already in pack */
	unsigned int in_pack_offset;
26 27 28 29
	struct object_entry *delta_child; /* delitified objects who bases me */
	struct object_entry *delta_sibling; /* other deltified objects who
					     * uses the same base as me
					     */
30 31
};

32 33 34 35 36 37 38 39 40 41 42 43 44
/*
 * Objects we are going to pack are colected in objects array (dynamically
 * expanded).  nr_objects & nr_alloc controls this array.  They are stored
 * in the order we see -- typically rev-list --objects order that gives us
 * nice "minimum seek" order.
 *
 * sorted-by-sha ans sorted-by-type are arrays of pointers that point at
 * elements in the objects array.  The former is used to build the pack
 * index (lists object names in the ascending order to help offset lookup),
 * and the latter is used to group similar things together by try_delta()
 * heuristics.
 */

45
static unsigned char object_list_sha1[20];
46
static int non_empty = 0;
47
static int no_reuse_delta = 0;
L
Linus Torvalds 已提交
48
static int local = 0;
49
static int incremental = 0;
50 51 52 53
static struct object_entry **sorted_by_sha, **sorted_by_type;
static struct object_entry *objects = NULL;
static int nr_objects = 0, nr_alloc = 0;
static const char *base_name;
54
static unsigned char pack_file_sha1[20];
J
Junio C Hamano 已提交
55
static int progress = 1;
56
static volatile int progress_update = 0;
57

58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
/*
 * The object names in objects array are hashed with this hashtable,
 * to help looking up the entry by object name.  Binary search from
 * sorted_by_sha is also possible but this was easier to code and faster.
 * This hashtable is built after all the objects are seen.
 */
static int *object_ix = NULL;
static int object_ix_hashsz = 0;

/*
 * Pack index for existing packs give us easy access to the offsets into
 * corresponding pack file where each object's data starts, but the entries
 * do not store the size of the compressed representation (uncompressed
 * size is easily available by examining the pack entry header).  We build
 * a hashtable of existing packs (pack_revindex), and keep reverse index
 * here -- pack index file is sorted by object name mapping to offset; this
 * pack_revindex[].revindex array is an ordered list of offsets, so if you
 * know the offset of an object, next offset is where its packed
 * representation ends.
 */
struct pack_revindex {
	struct packed_git *p;
	unsigned long *revindex;
} *pack_revindex = NULL;
static int pack_revindex_hashsz = 0;

/*
 * stats
 */
static int written = 0;
88
static int written_delta = 0;
89
static int reused = 0;
90
static int reused_delta = 0;
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188

static int pack_revindex_ix(struct packed_git *p)
{
	unsigned int ui = (unsigned int) p;
	int i;

	ui = ui ^ (ui >> 16); /* defeat structure alignment */
	i = (int)(ui % pack_revindex_hashsz);
	while (pack_revindex[i].p) {
		if (pack_revindex[i].p == p)
			return i;
		if (++i == pack_revindex_hashsz)
			i = 0;
	}
	return -1 - i;
}

static void prepare_pack_ix(void)
{
	int num;
	struct packed_git *p;
	for (num = 0, p = packed_git; p; p = p->next)
		num++;
	if (!num)
		return;
	pack_revindex_hashsz = num * 11;
	pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
	for (p = packed_git; p; p = p->next) {
		num = pack_revindex_ix(p);
		num = - 1 - num;
		pack_revindex[num].p = p;
	}
	/* revindex elements are lazily initialized */
}

static int cmp_offset(const void *a_, const void *b_)
{
	unsigned long a = *(unsigned long *) a_;
	unsigned long b = *(unsigned long *) b_;
	if (a < b)
		return -1;
	else if (a == b)
		return 0;
	else
		return 1;
}

/*
 * Ordered list of offsets of objects in the pack.
 */
static void prepare_pack_revindex(struct pack_revindex *rix)
{
	struct packed_git *p = rix->p;
	int num_ent = num_packed_objects(p);
	int i;
	void *index = p->index_base + 256;

	rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1));
	for (i = 0; i < num_ent; i++) {
		long hl = *((long *)(index + 24 * i));
		rix->revindex[i] = ntohl(hl);
	}
	/* This knows the pack format -- the 20-byte trailer
	 * follows immediately after the last object data.
	 */
	rix->revindex[num_ent] = p->pack_size - 20;
	qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset);
}

static unsigned long find_packed_object_size(struct packed_git *p,
					     unsigned long ofs)
{
	int num;
	int lo, hi;
	struct pack_revindex *rix;
	unsigned long *revindex;
	num = pack_revindex_ix(p);
	if (num < 0)
		die("internal error: pack revindex uninitialized");
	rix = &pack_revindex[num];
	if (!rix->revindex)
		prepare_pack_revindex(rix);
	revindex = rix->revindex;
	lo = 0;
	hi = num_packed_objects(p) + 1;
	do {
		int mi = (lo + hi) / 2;
		if (revindex[mi] == ofs) {
			return revindex[mi+1] - ofs;
		}
		else if (ofs < revindex[mi])
			hi = mi;
		else
			lo = mi + 1;
	} while (lo < hi);
	die("internal error: pack revindex corrupt");
}

189 190 191 192 193 194 195 196 197
static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
{
	unsigned long othersize, delta_size;
	char type[10];
	void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize);
	void *delta_buf;

	if (!otherbuf)
		die("unable to read %s", sha1_to_hex(entry->delta->sha1));
198
        delta_buf = diff_delta(otherbuf, othersize,
199
			       buf, size, &delta_size, 0);
200
        if (!delta_buf || delta_size != entry->delta_size)
201 202 203 204 205 206
        	die("delta size changed");
        free(buf);
        free(otherbuf);
	return delta_buf;
}

207 208 209 210 211 212 213 214 215
/*
 * The per-object header is a pretty dense thing, which is
 *  - first byte: low four bits are "size", then three bits of "type",
 *    and the high bit is "size continues".
 *  - each byte afterwards: low seven bits are size continuation,
 *    with the high bit being "size continues"
 */
static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
{
216
	int n = 1;
217 218 219 220 221
	unsigned char c;

	if (type < OBJ_COMMIT || type > OBJ_DELTA)
		die("bad type %d", type);

222 223 224
	c = (type << 4) | (size & 15);
	size >>= 4;
	while (size) {
225
		*hdr++ = c | 0x80;
226 227 228
		c = size & 0x7f;
		size >>= 7;
		n++;
229 230 231 232 233
	}
	*hdr = c;
	return n;
}

234
static unsigned long write_object(struct sha1file *f, struct object_entry *entry)
235 236 237
{
	unsigned long size;
	char type[10];
238
	void *buf;
239
	unsigned char header[10];
240
	unsigned hdrlen, datalen;
241
	enum object_type obj_type;
242
	int to_reuse = 0;
243

244
	obj_type = entry->type;
245 246 247 248 249 250 251 252 253 254 255 256 257 258
	if (! entry->in_pack)
		to_reuse = 0;	/* can't reuse what we don't have */
	else if (obj_type == OBJ_DELTA)
		to_reuse = 1;	/* check_object() decided it for us */
	else if (obj_type != entry->in_pack_type)
		to_reuse = 0;	/* pack has delta which is unusable */
	else if (entry->delta)
		to_reuse = 0;	/* we want to pack afresh */
	else
		to_reuse = 1;	/* we have it in-pack undeltified,
				 * and we do not need to deltify it.
				 */

	if (! to_reuse) {
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
		buf = read_sha1_file(entry->sha1, type, &size);
		if (!buf)
			die("unable to read %s", sha1_to_hex(entry->sha1));
		if (size != entry->size)
			die("object %s size inconsistency (%lu vs %lu)",
			    sha1_to_hex(entry->sha1), size, entry->size);
		if (entry->delta) {
			buf = delta_against(buf, size, entry);
			size = entry->delta_size;
			obj_type = OBJ_DELTA;
		}
		/*
		 * The object header is a byte of 'type' followed by zero or
		 * more bytes of length.  For deltas, the 20 bytes of delta
		 * sha1 follows that.
		 */
		hdrlen = encode_header(obj_type, size, header);
		sha1write(f, header, hdrlen);

		if (entry->delta) {
			sha1write(f, entry->delta, 20);
			hdrlen += 20;
		}
		datalen = sha1write_compressed(f, buf, size);
		free(buf);
284
	}
285 286 287 288 289 290 291 292 293
	else {
		struct packed_git *p = entry->in_pack;
		use_packed_git(p);

		datalen = find_packed_object_size(p, entry->in_pack_offset);
		buf = p->pack_base + entry->in_pack_offset;
		sha1write(f, buf, datalen);
		unuse_packed_git(p);
		hdrlen = 0; /* not really */
294 295
		if (obj_type == OBJ_DELTA)
			reused_delta++;
296
		reused++;
297
	}
298 299
	if (obj_type == OBJ_DELTA)
		written_delta++;
300
	written++;
301 302 303
	return hdrlen + datalen;
}

304 305 306 307 308 309 310 311 312 313 314
static unsigned long write_one(struct sha1file *f,
			       struct object_entry *e,
			       unsigned long offset)
{
	if (e->offset)
		/* offset starts from header size and cannot be zero
		 * if it is written already.
		 */
		return offset;
	e->offset = offset;
	offset += write_object(f, e);
J
Junio C Hamano 已提交
315
	/* if we are deltified, write out its base object. */
316 317 318 319 320
	if (e->delta)
		offset = write_one(f, e->delta, offset);
	return offset;
}

321 322 323
static void write_pack_file(void)
{
	int i;
324
	struct sha1file *f;
325 326
	unsigned long offset;
	struct pack_header hdr;
327 328
	unsigned last_percent = 999;
	int do_progress = 0;
329

330 331
	if (!base_name)
		f = sha1fd(1, "<stdout>");
332 333 334 335 336 337 338 339
	else {
		f = sha1create("%s-%s.%s", base_name,
			       sha1_to_hex(object_list_sha1), "pack");
		do_progress = progress;
	}
	if (do_progress)
		fprintf(stderr, "Writing %d objects.\n", nr_objects);

340
	hdr.hdr_signature = htonl(PACK_SIGNATURE);
341
	hdr.hdr_version = htonl(PACK_VERSION);
342 343 344
	hdr.hdr_entries = htonl(nr_objects);
	sha1write(f, &hdr, sizeof(hdr));
	offset = sizeof(hdr);
345
	for (i = 0; i < nr_objects; i++) {
346
		offset = write_one(f, objects + i, offset);
347 348 349 350 351 352 353 354
		if (do_progress) {
			unsigned percent = written * 100 / nr_objects;
			if (progress_update || percent != last_percent) {
				fprintf(stderr, "%4u%% (%u/%u) done\r",
					percent, written, nr_objects);
				progress_update = 0;
				last_percent = percent;
			}
355 356
		}
	}
357 358
	if (do_progress)
		fputc('\n', stderr);
359

360
	sha1close(f, pack_file_sha1, 1);
361 362 363 364 365
}

static void write_index_file(void)
{
	int i;
366
	struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx");
367 368 369 370 371 372 373
	struct object_entry **list = sorted_by_sha;
	struct object_entry **last = list + nr_objects;
	unsigned int array[256];

	/*
	 * Write the first-level table (the list is sorted,
	 * but we use a 256-entry lookup to be able to avoid
374
	 * having to do eight extra binary search iterations).
375 376 377 378 379 380 381 382 383 384 385 386
	 */
	for (i = 0; i < 256; i++) {
		struct object_entry **next = list;
		while (next < last) {
			struct object_entry *entry = *next;
			if (entry->sha1[0] != i)
				break;
			next++;
		}
		array[i] = htonl(next - sorted_by_sha);
		list = next;
	}
387
	sha1write(f, array, 256 * sizeof(int));
388 389 390 391 392

	/*
	 * Write the actual SHA1 entries..
	 */
	list = sorted_by_sha;
L
Linus Torvalds 已提交
393
	for (i = 0; i < nr_objects; i++) {
394 395
		struct object_entry *entry = *list++;
		unsigned int offset = htonl(entry->offset);
396 397
		sha1write(f, &offset, 4);
		sha1write(f, entry->sha1, 20);
398
	}
399 400
	sha1write(f, pack_file_sha1, 20);
	sha1close(f, NULL, 1);
401 402
}

403
static int add_object_entry(unsigned char *sha1, unsigned int hash)
404 405 406
{
	unsigned int idx = nr_objects;
	struct object_entry *entry;
407
	struct packed_git *p;
408 409
	unsigned int found_offset = 0;
	struct packed_git *found_pack = NULL;
410 411 412 413 414 415 416 417 418 419 420

	for (p = packed_git; p; p = p->next) {
		struct pack_entry e;
		if (find_pack_entry_one(sha1, &e, p)) {
			if (incremental)
				return 0;
			if (local && !p->pack_local)
				return 0;
			if (!found_pack) {
				found_offset = e.offset;
				found_pack = e.p;
L
Linus Torvalds 已提交
421 422 423
			}
		}
	}
424

425 426 427 428 429 430 431 432
	if (idx >= nr_alloc) {
		unsigned int needed = (idx + 1024) * 3 / 2;
		objects = xrealloc(objects, needed * sizeof(*entry));
		nr_alloc = needed;
	}
	entry = objects + idx;
	memset(entry, 0, sizeof(*entry));
	memcpy(entry->sha1, sha1, 20);
433
	entry->hash = hash;
434 435 436 437
	if (found_pack) {
		entry->in_pack = found_pack;
		entry->in_pack_offset = found_offset;
	}
438
	nr_objects = idx+1;
439
	return 1;
440 441
}

442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
static int locate_object_entry_hash(unsigned char *sha1)
{
	int i;
	unsigned int ui;
	memcpy(&ui, sha1, sizeof(unsigned int));
	i = ui % object_ix_hashsz;
	while (0 < object_ix[i]) {
		if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20))
			return i;
		if (++i == object_ix_hashsz)
			i = 0;
	}
	return -1 - i;
}

static struct object_entry *locate_object_entry(unsigned char *sha1)
{
	int i = locate_object_entry_hash(sha1);
	if (0 <= i)
		return &objects[object_ix[i]-1];
	return NULL;
}

465 466
static void check_object(struct object_entry *entry)
{
467 468
	char type[20];

469
	if (entry->in_pack) {
470 471 472 473 474 475 476 477 478 479 480 481
		unsigned char base[20];
		unsigned long size;
		struct object_entry *base_entry;

		/* We want in_pack_type even if we do not reuse delta.
		 * There is no point not reusing non-delta representations.
		 */
		check_reuse_pack_delta(entry->in_pack,
				       entry->in_pack_offset,
				       base, &size,
				       &entry->in_pack_type);

482 483 484 485
		/* Check if it is delta, and the base is also an object
		 * we are going to pack.  If so we will reuse the existing
		 * delta.
		 */
486 487
		if (!no_reuse_delta &&
		    entry->in_pack_type == OBJ_DELTA &&
488
		    (base_entry = locate_object_entry(base))) {
489 490 491 492 493

			/* Depth value does not matter - find_deltas()
			 * will never consider reused delta as the
			 * base object to deltify other objects
			 * against, in order to avoid circular deltas.
494
			 */
495 496

			/* uncompressed size of the delta data */
497 498 499
			entry->size = entry->delta_size = size;
			entry->delta = base_entry;
			entry->type = OBJ_DELTA;
500

501 502
			entry->delta_sibling = base_entry->delta_child;
			base_entry->delta_child = entry;
503

504 505 506
			return;
		}
		/* Otherwise we would do the usual */
507
	}
508 509

	if (sha1_object_info(entry->sha1, type, &entry->size))
510 511
		die("unable to get type of object %s",
		    sha1_to_hex(entry->sha1));
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542

	if (!strcmp(type, "commit")) {
		entry->type = OBJ_COMMIT;
	} else if (!strcmp(type, "tree")) {
		entry->type = OBJ_TREE;
	} else if (!strcmp(type, "blob")) {
		entry->type = OBJ_BLOB;
	} else if (!strcmp(type, "tag")) {
		entry->type = OBJ_TAG;
	} else
		die("unable to pack object %s of type %s",
		    sha1_to_hex(entry->sha1), type);
}

static void hash_objects(void)
{
	int i;
	struct object_entry *oe;

	object_ix_hashsz = nr_objects * 2;
	object_ix = xcalloc(sizeof(int), object_ix_hashsz);
	for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
		int ix = locate_object_entry_hash(oe->sha1);
		if (0 <= ix) {
			error("the same object '%s' added twice",
			      sha1_to_hex(oe->sha1));
			continue;
		}
		ix = -1 - ix;
		object_ix[ix] = i + 1;
	}
543 544
}

545 546 547 548 549 550 551 552 553 554 555 556 557
static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
{
	struct object_entry *child = me->delta_child;
	unsigned int m = n;
	while (child) {
		unsigned int c = check_delta_limit(child, n + 1);
		if (m < c)
			m = c;
		child = child->delta_sibling;
	}
	return m;
}

558 559 560
static void get_object_details(void)
{
	int i;
561
	struct object_entry *entry;
562

563 564
	hash_objects();
	prepare_pack_ix();
565 566 567 568 569 570
	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
		check_object(entry);
	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
		if (!entry->delta && entry->delta_child)
			entry->delta_limit =
				check_delta_limit(entry, 1);
571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606
}

typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);

static entry_sort_t current_sort;

static int sort_comparator(const void *_a, const void *_b)
{
	struct object_entry *a = *(struct object_entry **)_a;
	struct object_entry *b = *(struct object_entry **)_b;
	return current_sort(a,b);
}

static struct object_entry **create_sorted_list(entry_sort_t sort)
{
	struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *));
	int i;

	for (i = 0; i < nr_objects; i++)
		list[i] = objects + i;
	current_sort = sort;
	qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator);
	return list;
}

static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
{
	return memcmp(a->sha1, b->sha1, 20);
}

static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
{
	if (a->type < b->type)
		return -1;
	if (a->type > b->type)
		return 1;
607 608 609 610
	if (a->hash < b->hash)
		return -1;
	if (a->hash > b->hash)
		return 1;
611 612 613 614 615 616 617 618 619 620 621 622 623
	if (a->size < b->size)
		return -1;
	if (a->size > b->size)
		return 1;
	return a < b ? -1 : (a > b);
}

struct unpacked {
	struct object_entry *entry;
	void *data;
};

/*
624 625 626 627 628 629
 * We search for deltas _backwards_ in a list sorted by type and
 * by size, so that we see progressively smaller and smaller files.
 * That's because we prefer deltas to be from the bigger file
 * to the smaller - deletes are potentially cheaper, but perhaps
 * more importantly, the bigger file is likely the more recent
 * one.
630
 */
631
static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth)
632 633 634
{
	struct object_entry *cur_entry = cur->entry;
	struct object_entry *old_entry = old->entry;
635
	unsigned long size, oldsize, delta_size, sizediff;
636
	long max_size;
637 638 639 640 641 642
	void *delta_buf;

	/* Don't bother doing diffs between different types */
	if (cur_entry->type != old_entry->type)
		return -1;

643 644 645 646
	/* If the current object is at edge, take the depth the objects
	 * that depend on the current object into account -- otherwise
	 * they would become too deep.
	 */
647 648 649 650 651
	if (cur_entry->delta_child) {
		if (max_depth <= cur_entry->delta_limit)
			return 0;
		max_depth -= cur_entry->delta_limit;
	}
652

653
	size = cur_entry->size;
654 655
	if (size < 50)
		return -1;
656
	oldsize = old_entry->size;
657 658
	sizediff = oldsize > size ? oldsize - size : size - oldsize;
	if (sizediff > size / 8)
659
		return -1;
660 661
	if (old_entry->depth >= max_depth)
		return 0;
662 663 664 665 666 667 668 669

	/*
	 * NOTE!
	 *
	 * We always delta from the bigger to the smaller, since that's
	 * more space-efficient (deletes don't have to say _what_ they
	 * delete).
	 */
670 671 672
	max_size = size / 2 - 20;
	if (cur_entry->delta)
		max_size = cur_entry->delta_size-1;
673 674
	if (sizediff >= max_size)
		return -1;
675 676
	delta_buf = diff_delta(old->data, oldsize,
			       cur->data, size, &delta_size, max_size);
677
	if (!delta_buf)
678 679 680
		return 0;
	cur_entry->delta = old_entry;
	cur_entry->delta_size = delta_size;
681
	cur_entry->depth = old_entry->depth + 1;
682
	free(delta_buf);
683
	return 0;
684 685
}

686 687 688 689 690 691
static void progress_interval(int signum)
{
	signal(SIGALRM, progress_interval);
	progress_update = 1;
}

692
static void find_deltas(struct object_entry **list, int window, int depth)
693
{
694
	int i, idx;
695 696
	unsigned int array_size = window * sizeof(struct unpacked);
	struct unpacked *array = xmalloc(array_size);
697 698
	unsigned processed = 0;
	unsigned last_percent = 999;
699 700

	memset(array, 0, array_size);
701 702
	i = nr_objects;
	idx = 0;
703 704
	if (progress)
		fprintf(stderr, "Deltifying %d objects.\n", nr_objects);
705

706
	while (--i >= 0) {
707 708 709 710 711 712
		struct object_entry *entry = list[i];
		struct unpacked *n = array + idx;
		unsigned long size;
		char type[10];
		int j;

713 714 715 716 717 718 719 720 721
		processed++;
		if (progress) {
			unsigned percent = processed * 100 / nr_objects;
			if (percent != last_percent || progress_update) {
				fprintf(stderr, "%4u%% (%u/%u) done\r",
					percent, processed, nr_objects);
				progress_update = 0;
				last_percent = percent;
			}
722
		}
723 724 725

		if (entry->delta)
			/* This happens if we decided to reuse existing
726
			 * delta from a pack.  "!no_reuse_delta &&" is implied.
727 728 729
			 */
			continue;

730 731 732 733 734
		free(n->data);
		n->entry = entry;
		n->data = read_sha1_file(entry->sha1, type, &size);
		if (size != entry->size)
			die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
735

L
Linus Torvalds 已提交
736 737 738
		j = window;
		while (--j > 0) {
			unsigned int other_idx = idx + j;
739
			struct unpacked *m;
L
Linus Torvalds 已提交
740 741
			if (other_idx >= window)
				other_idx -= window;
742 743 744
			m = array + other_idx;
			if (!m->entry)
				break;
745
			if (try_delta(n, m, depth) < 0)
746 747
				break;
		}
748 749 750
		idx++;
		if (idx >= window)
			idx = 0;
751
	}
752

753 754 755
	if (progress)
		fputc('\n', stderr);

756 757 758
	for (i = 0; i < window; ++i)
		free(array[i].data);
	free(array);
759 760
}

761 762
static void prepare_pack(int window, int depth)
{
763
	get_object_details();
764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788
	sorted_by_type = create_sorted_list(type_size_sort);
	if (window && depth)
		find_deltas(sorted_by_type, window+1, depth);
}

static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
{
	static const char cache[] = "pack-cache/pack-%s.%s";
	char *cached_pack, *cached_idx;
	int ifd, ofd, ifd_ix = -1;

	cached_pack = git_path(cache, sha1_to_hex(sha1), "pack");
	ifd = open(cached_pack, O_RDONLY);
	if (ifd < 0)
		return 0;

	if (!pack_to_stdout) {
		cached_idx = git_path(cache, sha1_to_hex(sha1), "idx");
		ifd_ix = open(cached_idx, O_RDONLY);
		if (ifd_ix < 0) {
			close(ifd);
			return 0;
		}
	}

789 790 791
	if (progress)
		fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects,
			sha1_to_hex(sha1));
792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822

	if (pack_to_stdout) {
		if (copy_fd(ifd, 1))
			exit(1);
		close(ifd);
	}
	else {
		char name[PATH_MAX];
		snprintf(name, sizeof(name),
			 "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack");
		ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
		if (ofd < 0)
			die("unable to open %s (%s)", name, strerror(errno));
		if (copy_fd(ifd, ofd))
			exit(1);
		close(ifd);

		snprintf(name, sizeof(name),
			 "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx");
		ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
		if (ofd < 0)
			die("unable to open %s (%s)", name, strerror(errno));
		if (copy_fd(ifd_ix, ofd))
			exit(1);
		close(ifd_ix);
		puts(sha1_to_hex(sha1));
	}

	return 1;
}

823 824
int main(int argc, char **argv)
{
825
	SHA_CTX ctx;
826
	char line[PATH_MAX + 20];
827
	int window = 10, depth = 10, pack_to_stdout = 0;
J
Junio C Hamano 已提交
828
	struct object_entry **list;
829 830
	int i;

831 832
	setup_git_directory();

833 834 835 836
	for (i = 1; i < argc; i++) {
		const char *arg = argv[i];

		if (*arg == '-') {
837 838 839 840
			if (!strcmp("--non-empty", arg)) {
				non_empty = 1;
				continue;
			}
L
Linus Torvalds 已提交
841 842 843 844
			if (!strcmp("--local", arg)) {
				local = 1;
				continue;
			}
845 846 847 848
			if (!strcmp("--incremental", arg)) {
				incremental = 1;
				continue;
			}
849 850
			if (!strncmp("--window=", arg, 9)) {
				char *end;
851
				window = strtoul(arg+9, &end, 0);
852 853 854 855
				if (!arg[9] || *end)
					usage(pack_usage);
				continue;
			}
856 857 858 859 860 861 862
			if (!strncmp("--depth=", arg, 8)) {
				char *end;
				depth = strtoul(arg+8, &end, 0);
				if (!arg[8] || *end)
					usage(pack_usage);
				continue;
			}
J
Junio C Hamano 已提交
863 864 865 866
			if (!strcmp("-q", arg)) {
				progress = 0;
				continue;
			}
867 868 869 870
			if (!strcmp("--no-reuse-delta", arg)) {
				no_reuse_delta = 1;
				continue;
			}
871 872 873 874
			if (!strcmp("--stdout", arg)) {
				pack_to_stdout = 1;
				continue;
			}
875 876 877 878 879 880 881
			usage(pack_usage);
		}
		if (base_name)
			usage(pack_usage);
		base_name = arg;
	}

882
	if (pack_to_stdout != !base_name)
883 884
		usage(pack_usage);

L
Linus Torvalds 已提交
885
	prepare_packed_git();
886

887
	if (progress) {
888 889 890 891 892 893
		struct itimerval v;
		v.it_interval.tv_sec = 1;
		v.it_interval.tv_usec = 0;
		v.it_value = v.it_interval;
		signal(SIGALRM, progress_interval);
		setitimer(ITIMER_REAL, &v, NULL);
894 895
		fprintf(stderr, "Generating pack...\n");
	}
896

897
	while (fgets(line, sizeof(line), stdin) != NULL) {
898 899
		unsigned int hash;
		char *p;
900
		unsigned char sha1[20];
901

902
		if (progress_update) {
903
			fprintf(stderr, "Counting objects...%d\r", nr_objects);
904
			progress_update = 0;
905
		}
906
		if (get_sha1_hex(line, sha1))
907
			die("expected sha1, got garbage:\n %s", line);
908 909 910 911 912 913 914 915
		hash = 0;
		p = line+40;
		while (*p) {
			unsigned char c = *p++;
			if (isspace(c))
				continue;
			hash = hash * 11 + c;
		}
J
Junio C Hamano 已提交
916
		add_object_entry(sha1, hash);
917
	}
918 919
	if (progress)
		fprintf(stderr, "Done counting %d objects.\n", nr_objects);
920 921
	if (non_empty && !nr_objects)
		return 0;
922 923

	sorted_by_sha = create_sorted_list(sha1_sort);
J
Junio C Hamano 已提交
924 925 926 927 928 929 930 931
	SHA1_Init(&ctx);
	list = sorted_by_sha;
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *entry = *list++;
		SHA1_Update(&ctx, entry->sha1, 20);
	}
	SHA1_Final(object_list_sha1, &ctx);

932 933 934 935
	if (reuse_cached_pack(object_list_sha1, pack_to_stdout))
		;
	else {
		prepare_pack(window, depth);
936 937 938 939 940 941 942 943
		if (progress && pack_to_stdout) {
			/* the other end usually displays progress itself */
			struct itimerval v = {{0,},};
			setitimer(ITIMER_REAL, &v, NULL);
			signal(SIGALRM, SIG_IGN );
			progress_update = 0;
		}
		write_pack_file();
944 945 946 947
		if (!pack_to_stdout) {
			write_index_file();
			puts(sha1_to_hex(object_list_sha1));
		}
948
	}
949 950 951
	if (progress)
		fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n",
			nr_objects, written, written_delta, reused, reused_delta);
952 953
	return 0;
}