pack-objects.c 72.0 KB
Newer Older
1
#include "builtin.h"
2
#include "cache.h"
3
#include "attr.h"
4
#include "object.h"
5 6 7 8
#include "blob.h"
#include "commit.h"
#include "tag.h"
#include "tree.h"
9
#include "delta.h"
10
#include "pack.h"
11
#include "pack-revindex.h"
12
#include "csum-file.h"
J
Junio C Hamano 已提交
13
#include "tree-walk.h"
14 15 16
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
17
#include "pack-objects.h"
N
Nicolas Pitre 已提交
18
#include "progress.h"
19
#include "refs.h"
20
#include "streaming.h"
21
#include "thread-utils.h"
22
#include "pack-bitmap.h"
23 24
#include "reachable.h"
#include "sha1-array.h"
J
Jeff King 已提交
25
#include "argv-array.h"
N
Nicolas Pitre 已提交
26

27
static const char *pack_usage[] = {
28 29
	N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
	N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"),
30 31
	NULL
};
32

33
/*
34 35 36
 * Objects we are going to pack are collected in the `to_pack` structure.
 * It contains an array (dynamically expanded) of the object data, and a map
 * that can resolve SHA1s to their position in the array.
37
 */
38 39
static struct packing_data to_pack;

40
static struct pack_idx_entry **written_list;
41
static uint32_t nr_result, nr_written;
42

43
static int non_empty;
44
static int reuse_delta = 1, reuse_object = 1;
J
Junio C Hamano 已提交
45
static int keep_unreachable, unpack_unreachable, include_tag;
46
static unsigned long unpack_unreachable_expiration;
47 48
static int local;
static int incremental;
49
static int ignore_packed_keep;
50
static int allow_ofs_delta;
51
static struct pack_idx_option pack_idx_opts;
52
static const char *base_name;
J
Junio C Hamano 已提交
53
static int progress = 1;
54
static int window = 10;
55
static unsigned long pack_size_limit;
56
static int depth = 50;
57
static int delta_search_threads;
58
static int pack_to_stdout;
59
static int num_preferred_base;
60
static struct progress *progress_state;
61 62
static int pack_compression_level = Z_DEFAULT_COMPRESSION;
static int pack_compression_seen;
63

64 65 66 67 68
static struct packed_git *reuse_packfile;
static uint32_t reuse_packfile_objects;
static off_t reuse_packfile_offset;

static int use_bitmap_index = 1;
69
static int write_bitmap_index;
70
static uint16_t write_bitmap_options;
71

72
static unsigned long delta_cache_size = 0;
73
static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
74
static unsigned long cache_max_small_delta_size = 1000;
75

76 77
static unsigned long window_memory_limit = 0;

78 79 80
/*
 * stats
 */
81 82
static uint32_t written, written_delta;
static uint32_t reused, reused_delta;
83

84 85 86 87 88 89 90 91 92 93 94
/*
 * Indexed commits
 */
static struct commit **indexed_commits;
static unsigned int indexed_commits_nr;
static unsigned int indexed_commits_alloc;

static void index_commit_for_bitmap(struct commit *commit)
{
	if (indexed_commits_nr >= indexed_commits_alloc) {
		indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
95
		REALLOC_ARRAY(indexed_commits, indexed_commits_alloc);
96 97 98 99
	}

	indexed_commits[indexed_commits_nr++] = commit;
}
100

N
Nicolas Pitre 已提交
101
static void *get_delta(struct object_entry *entry)
102
{
N
Nicolas Pitre 已提交
103 104
	unsigned long size, base_size, delta_size;
	void *buf, *base_buf, *delta_buf;
105
	enum object_type type;
106

N
Nicolas Pitre 已提交
107 108 109 110 111
	buf = read_sha1_file(entry->idx.sha1, &type, &size);
	if (!buf)
		die("unable to read %s", sha1_to_hex(entry->idx.sha1));
	base_buf = read_sha1_file(entry->delta->idx.sha1, &type, &base_size);
	if (!base_buf)
G
Geert Bosch 已提交
112
		die("unable to read %s", sha1_to_hex(entry->delta->idx.sha1));
N
Nicolas Pitre 已提交
113
	delta_buf = diff_delta(base_buf, base_size,
114
			       buf, size, &delta_size, 0);
N
Nicolas Pitre 已提交
115
	if (!delta_buf || delta_size != entry->delta_size)
J
Junio C Hamano 已提交
116
		die("delta size changed");
N
Nicolas Pitre 已提交
117 118
	free(buf);
	free(base_buf);
119 120 121
	return delta_buf;
}

122 123
static unsigned long do_compress(void **pptr, unsigned long size)
{
124
	git_zstream stream;
125 126 127
	void *in, *out;
	unsigned long maxsize;

128
	git_deflate_init(&stream, pack_compression_level);
J
Junio C Hamano 已提交
129
	maxsize = git_deflate_bound(&stream, size);
130 131 132 133 134 135 136 137 138

	in = *pptr;
	out = xmalloc(maxsize);
	*pptr = out;

	stream.next_in = in;
	stream.avail_in = size;
	stream.next_out = out;
	stream.avail_out = maxsize;
139
	while (git_deflate(&stream, Z_FINISH) == Z_OK)
140
		; /* nothing */
141
	git_deflate_end(&stream);
142 143 144 145 146

	free(in);
	return stream.total_out;
}

147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
static unsigned long write_large_blob_data(struct git_istream *st, struct sha1file *f,
					   const unsigned char *sha1)
{
	git_zstream stream;
	unsigned char ibuf[1024 * 16];
	unsigned char obuf[1024 * 16];
	unsigned long olen = 0;

	git_deflate_init(&stream, pack_compression_level);

	for (;;) {
		ssize_t readlen;
		int zret = Z_OK;
		readlen = read_istream(st, ibuf, sizeof(ibuf));
		if (readlen == -1)
			die(_("unable to read %s"), sha1_to_hex(sha1));

		stream.next_in = ibuf;
		stream.avail_in = readlen;
		while ((stream.avail_in || readlen == 0) &&
		       (zret == Z_OK || zret == Z_BUF_ERROR)) {
			stream.next_out = obuf;
			stream.avail_out = sizeof(obuf);
			zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
			sha1write(f, obuf, stream.next_out - obuf);
			olen += stream.next_out - obuf;
		}
		if (stream.avail_in)
			die(_("deflate error (%d)"), zret);
		if (readlen == 0) {
			if (zret != Z_STREAM_END)
				die(_("deflate error (%d)"), zret);
			break;
		}
	}
	git_deflate_end(&stream);
	return olen;
}

186 187 188 189
/*
 * we are going to reuse the existing object data as is.  make
 * sure it is not corrupt.
 */
190 191
static int check_pack_inflate(struct packed_git *p,
		struct pack_window **w_curs,
192 193
		off_t offset,
		off_t len,
194 195
		unsigned long expect)
{
196
	git_zstream stream;
197 198 199 200
	unsigned char fakebuf[4096], *in;
	int st;

	memset(&stream, 0, sizeof(stream));
201
	git_inflate_init(&stream);
202 203 204 205 206
	do {
		in = use_pack(p, w_curs, offset, &stream.avail_in);
		stream.next_in = in;
		stream.next_out = fakebuf;
		stream.avail_out = sizeof(fakebuf);
207
		st = git_inflate(&stream, Z_FINISH);
208 209
		offset += stream.next_in - in;
	} while (st == Z_OK || st == Z_BUF_ERROR);
210
	git_inflate_end(&stream);
211 212 213 214 215 216 217 218
	return (st == Z_STREAM_END &&
		stream.total_out == expect &&
		stream.total_in == len) ? 0 : -1;
}

static void copy_pack_data(struct sha1file *f,
		struct packed_git *p,
		struct pack_window **w_curs,
219 220
		off_t offset,
		off_t len)
221 222
{
	unsigned char *in;
223
	unsigned long avail;
224 225 226 227

	while (len) {
		in = use_pack(p, w_curs, offset, &avail);
		if (avail > len)
228
			avail = (unsigned long)len;
229 230 231 232 233 234
		sha1write(f, in, avail);
		offset += avail;
		len -= avail;
	}
}

235
/* Return 0 if we will bust the pack-size limit */
236 237
static unsigned long write_no_reuse_object(struct sha1file *f, struct object_entry *entry,
					   unsigned long limit, int usable_delta)
238
{
239
	unsigned long size, datalen;
240
	unsigned char header[10], dheader[10];
241
	unsigned hdrlen;
242
	enum object_type type;
243
	void *buf;
244
	struct git_istream *st = NULL;
245 246

	if (!usable_delta) {
247 248 249 250 251 252 253 254 255
		if (entry->type == OBJ_BLOB &&
		    entry->size > big_file_threshold &&
		    (st = open_istream(entry->idx.sha1, &type, &size, NULL)) != NULL)
			buf = NULL;
		else {
			buf = read_sha1_file(entry->idx.sha1, &type, &size);
			if (!buf)
				die(_("unable to read %s"), sha1_to_hex(entry->idx.sha1));
		}
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
		/*
		 * make sure no cached delta data remains from a
		 * previous attempt before a pack split occurred.
		 */
		free(entry->delta_data);
		entry->delta_data = NULL;
		entry->z_delta_size = 0;
	} else if (entry->delta_data) {
		size = entry->delta_size;
		buf = entry->delta_data;
		entry->delta_data = NULL;
		type = (allow_ofs_delta && entry->delta->idx.offset) ?
			OBJ_OFS_DELTA : OBJ_REF_DELTA;
	} else {
		buf = get_delta(entry);
		size = entry->delta_size;
		type = (allow_ofs_delta && entry->delta->idx.offset) ?
			OBJ_OFS_DELTA : OBJ_REF_DELTA;
	}

276 277 278
	if (st)	/* large blob case, just assume we don't compress well */
		datalen = size;
	else if (entry->z_delta_size)
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
		datalen = entry->z_delta_size;
	else
		datalen = do_compress(&buf, size);

	/*
	 * The object header is a byte of 'type' followed by zero or
	 * more bytes of length.
	 */
	hdrlen = encode_in_pack_object_header(type, size, header);

	if (type == OBJ_OFS_DELTA) {
		/*
		 * Deltas with relative base contain an additional
		 * encoding of the relative offset for the delta
		 * base from this object's position in the pack.
		 */
		off_t ofs = entry->idx.offset - entry->delta->idx.offset;
		unsigned pos = sizeof(dheader) - 1;
		dheader[pos] = ofs & 127;
		while (ofs >>= 7)
			dheader[--pos] = 128 | (--ofs & 127);
		if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
301 302
			if (st)
				close_istream(st);
303 304 305 306 307 308 309 310 311 312 313 314
			free(buf);
			return 0;
		}
		sha1write(f, header, hdrlen);
		sha1write(f, dheader + pos, sizeof(dheader) - pos);
		hdrlen += sizeof(dheader) - pos;
	} else if (type == OBJ_REF_DELTA) {
		/*
		 * Deltas with a base reference contain
		 * an additional 20 bytes for the base sha1.
		 */
		if (limit && hdrlen + 20 + datalen + 20 >= limit) {
315 316
			if (st)
				close_istream(st);
317 318 319 320 321 322 323 324
			free(buf);
			return 0;
		}
		sha1write(f, header, hdrlen);
		sha1write(f, entry->delta->idx.sha1, 20);
		hdrlen += 20;
	} else {
		if (limit && hdrlen + datalen + 20 >= limit) {
325 326
			if (st)
				close_istream(st);
327 328 329 330 331
			free(buf);
			return 0;
		}
		sha1write(f, header, hdrlen);
	}
332 333 334 335 336 337 338
	if (st) {
		datalen = write_large_blob_data(st, f, entry->idx.sha1);
		close_istream(st);
	} else {
		sha1write(f, buf, datalen);
		free(buf);
	}
339 340 341 342 343 344 345 346 347 348 349 350 351

	return hdrlen + datalen;
}

/* Return 0 if we will bust the pack-size limit */
static unsigned long write_reuse_object(struct sha1file *f, struct object_entry *entry,
					unsigned long limit, int usable_delta)
{
	struct packed_git *p = entry->in_pack;
	struct pack_window *w_curs = NULL;
	struct revindex_entry *revidx;
	off_t offset;
	enum object_type type = entry->type;
352
	off_t datalen;
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
	unsigned char header[10], dheader[10];
	unsigned hdrlen;

	if (entry->delta)
		type = (allow_ofs_delta && entry->delta->idx.offset) ?
			OBJ_OFS_DELTA : OBJ_REF_DELTA;
	hdrlen = encode_in_pack_object_header(type, entry->size, header);

	offset = entry->in_pack_offset;
	revidx = find_pack_revindex(p, offset);
	datalen = revidx[1].offset - offset;
	if (!pack_to_stdout && p->index_version > 1 &&
	    check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) {
		error("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1));
		unuse_pack(&w_curs);
		return write_no_reuse_object(f, entry, limit, usable_delta);
	}

	offset += entry->in_pack_header_size;
	datalen -= entry->in_pack_header_size;

	if (!pack_to_stdout && p->index_version == 1 &&
	    check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) {
		error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1));
		unuse_pack(&w_curs);
		return write_no_reuse_object(f, entry, limit, usable_delta);
	}

	if (type == OBJ_OFS_DELTA) {
		off_t ofs = entry->idx.offset - entry->delta->idx.offset;
		unsigned pos = sizeof(dheader) - 1;
		dheader[pos] = ofs & 127;
		while (ofs >>= 7)
			dheader[--pos] = 128 | (--ofs & 127);
		if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
			unuse_pack(&w_curs);
			return 0;
		}
		sha1write(f, header, hdrlen);
		sha1write(f, dheader + pos, sizeof(dheader) - pos);
		hdrlen += sizeof(dheader) - pos;
		reused_delta++;
	} else if (type == OBJ_REF_DELTA) {
		if (limit && hdrlen + 20 + datalen + 20 >= limit) {
			unuse_pack(&w_curs);
			return 0;
		}
		sha1write(f, header, hdrlen);
		sha1write(f, entry->delta->idx.sha1, 20);
		hdrlen += 20;
		reused_delta++;
	} else {
		if (limit && hdrlen + datalen + 20 >= limit) {
			unuse_pack(&w_curs);
			return 0;
		}
		sha1write(f, header, hdrlen);
	}
	copy_pack_data(f, p, &w_curs, offset, datalen);
	unuse_pack(&w_curs);
	reused++;
	return hdrlen + datalen;
}

/* Return 0 if we will bust the pack-size limit */
static unsigned long write_object(struct sha1file *f,
				  struct object_entry *entry,
				  off_t write_offset)
{
	unsigned long limit, len;
423
	int usable_delta, to_reuse;
424

425 426 427
	if (!pack_to_stdout)
		crc32_begin(f);

428
	/* apply size limit if limited packsize and not first object */
429 430 431 432 433 434 435 436 437 438
	if (!pack_size_limit || !nr_written)
		limit = 0;
	else if (pack_size_limit <= write_offset)
		/*
		 * the earlier object did not fit the limit; avoid
		 * mistaking this with unlimited (i.e. limit = 0).
		 */
		limit = 1;
	else
		limit = pack_size_limit - write_offset;
439 440 441 442 443 444 445 446 447 448 449 450

	if (!entry->delta)
		usable_delta = 0;	/* no delta */
	else if (!pack_size_limit)
	       usable_delta = 1;	/* unlimited packfile */
	else if (entry->delta->idx.offset == (off_t)-1)
		usable_delta = 0;	/* base was written to another pack */
	else if (entry->delta->idx.offset)
		usable_delta = 1;	/* base already exists in this pack */
	else
		usable_delta = 0;	/* base could end up in another pack */

451
	if (!reuse_object)
452 453
		to_reuse = 0;	/* explicit */
	else if (!entry->in_pack)
454
		to_reuse = 0;	/* can't reuse what we don't have */
455
	else if (entry->type == OBJ_REF_DELTA || entry->type == OBJ_OFS_DELTA)
456 457 458
				/* check_object() decided it for us ... */
		to_reuse = usable_delta;
				/* ... but pack split may override that */
459
	else if (entry->type != entry->in_pack_type)
460 461 462 463 464 465 466 467
		to_reuse = 0;	/* pack has delta which is unusable */
	else if (entry->delta)
		to_reuse = 0;	/* we want to pack afresh */
	else
		to_reuse = 1;	/* we have it in-pack undeltified,
				 * and we do not need to deltify it.
				 */

468 469 470 471 472 473
	if (!to_reuse)
		len = write_no_reuse_object(f, entry, limit, usable_delta);
	else
		len = write_reuse_object(f, entry, limit, usable_delta);
	if (!len)
		return 0;
474

475
	if (usable_delta)
476
		written_delta++;
477
	written++;
478
	if (!pack_to_stdout)
G
Geert Bosch 已提交
479
		entry->idx.crc32 = crc32_end(f);
480
	return len;
481 482
}

483 484 485 486 487 488 489 490 491 492
enum write_one_status {
	WRITE_ONE_SKIP = -1, /* already written */
	WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
	WRITE_ONE_WRITTEN = 1, /* normal */
	WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
};

static enum write_one_status write_one(struct sha1file *f,
				       struct object_entry *e,
				       off_t *offset)
493
{
494
	unsigned long size;
495
	int recursing;
496

497 498 499 500 501 502 503 504 505 506 507 508 509 510
	/*
	 * we set offset to 1 (which is an impossible value) to mark
	 * the fact that this object is involved in "write its base
	 * first before writing a deltified object" recursion.
	 */
	recursing = (e->idx.offset == 1);
	if (recursing) {
		warning("recursive delta detected for object %s",
			sha1_to_hex(e->idx.sha1));
		return WRITE_ONE_RECURSIVE;
	} else if (e->idx.offset || e->preferred_base) {
		/* offset is non zero if object is written already. */
		return WRITE_ONE_SKIP;
	}
511

512
	/* if we are deltified, write out base object first. */
513 514 515 516 517 518 519 520 521 522 523 524 525 526
	if (e->delta) {
		e->idx.offset = 1; /* now recurse */
		switch (write_one(f, e->delta, offset)) {
		case WRITE_ONE_RECURSIVE:
			/* we cannot depend on this one */
			e->delta = NULL;
			break;
		default:
			break;
		case WRITE_ONE_BREAK:
			e->idx.offset = recursing;
			return WRITE_ONE_BREAK;
		}
	}
527

528 529
	e->idx.offset = *offset;
	size = write_object(f, e, *offset);
530
	if (!size) {
531 532
		e->idx.offset = recursing;
		return WRITE_ONE_BREAK;
533
	}
534
	written_list[nr_written++] = &e->idx;
535 536

	/* make sure off_t is sufficiently large not to wrap */
537
	if (signed_add_overflows(*offset, size))
538
		die("pack too large for current definition of off_t");
539
	*offset += size;
540
	return WRITE_ONE_WRITTEN;
541 542
}

543
static int mark_tagged(const char *path, const struct object_id *oid, int flag,
544 545 546
		       void *cb_data)
{
	unsigned char peeled[20];
547
	struct object_entry *entry = packlist_find(&to_pack, oid->hash, NULL);
548 549 550 551

	if (entry)
		entry->tagged = 1;
	if (!peel_ref(path, peeled)) {
552
		entry = packlist_find(&to_pack, peeled, NULL);
553 554 555 556 557 558
		if (entry)
			entry->tagged = 1;
	}
	return 0;
}

559
static inline void add_to_write_order(struct object_entry **wo,
560
			       unsigned int *endp,
561 562 563 564 565 566 567 568 569
			       struct object_entry *e)
{
	if (e->filled)
		return;
	wo[(*endp)++] = e;
	e->filled = 1;
}

static void add_descendants_to_write_order(struct object_entry **wo,
570
					   unsigned int *endp,
571 572
					   struct object_entry *e)
{
573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609
	int add_to_order = 1;
	while (e) {
		if (add_to_order) {
			struct object_entry *s;
			/* add this node... */
			add_to_write_order(wo, endp, e);
			/* all its siblings... */
			for (s = e->delta_sibling; s; s = s->delta_sibling) {
				add_to_write_order(wo, endp, s);
			}
		}
		/* drop down a level to add left subtree nodes if possible */
		if (e->delta_child) {
			add_to_order = 1;
			e = e->delta_child;
		} else {
			add_to_order = 0;
			/* our sibling might have some children, it is next */
			if (e->delta_sibling) {
				e = e->delta_sibling;
				continue;
			}
			/* go back to our parent node */
			e = e->delta;
			while (e && !e->delta_sibling) {
				/* we're on the right side of a subtree, keep
				 * going up until we can go right again */
				e = e->delta;
			}
			if (!e) {
				/* done- we hit our original root node */
				return;
			}
			/* pass it off to sibling at this level */
			e = e->delta_sibling;
		}
	};
610 611 612
}

static void add_family_to_write_order(struct object_entry **wo,
613
				      unsigned int *endp,
614 615 616 617 618 619 620 621 622 623 624
				      struct object_entry *e)
{
	struct object_entry *root;

	for (root = e; root->delta; root = root->delta)
		; /* nothing */
	add_descendants_to_write_order(wo, endp, root);
}

static struct object_entry **compute_write_order(void)
{
625
	unsigned int i, wo_end, last_untagged;
626

J
Jeff King 已提交
627
	struct object_entry **wo;
628
	struct object_entry *objects = to_pack.objects;
629

630
	for (i = 0; i < to_pack.nr_objects; i++) {
631 632 633 634 635 636 637 638 639 640 641
		objects[i].tagged = 0;
		objects[i].filled = 0;
		objects[i].delta_child = NULL;
		objects[i].delta_sibling = NULL;
	}

	/*
	 * Fully connect delta_child/delta_sibling network.
	 * Make sure delta_sibling is sorted in the original
	 * recency order.
	 */
642
	for (i = to_pack.nr_objects; i > 0;) {
643
		struct object_entry *e = &objects[--i];
644 645 646 647 648 649 650 651 652 653
		if (!e->delta)
			continue;
		/* Mark me as the first child */
		e->delta_sibling = e->delta->delta_child;
		e->delta->delta_child = e;
	}

	/*
	 * Mark objects that are at the tip of tags.
	 */
654
	for_each_tag_ref(mark_tagged, NULL);
655 656

	/*
657
	 * Give the objects in the original recency order until
658 659
	 * we see a tagged tip.
	 */
J
Jeff King 已提交
660
	ALLOC_ARRAY(wo, to_pack.nr_objects);
661
	for (i = wo_end = 0; i < to_pack.nr_objects; i++) {
662 663 664 665
		if (objects[i].tagged)
			break;
		add_to_write_order(wo, &wo_end, &objects[i]);
	}
666
	last_untagged = i;
667 668 669 670

	/*
	 * Then fill all the tagged tips.
	 */
671
	for (; i < to_pack.nr_objects; i++) {
672 673 674 675 676 677 678
		if (objects[i].tagged)
			add_to_write_order(wo, &wo_end, &objects[i]);
	}

	/*
	 * And then all remaining commits and tags.
	 */
679
	for (i = last_untagged; i < to_pack.nr_objects; i++) {
680 681 682 683 684 685 686 687 688
		if (objects[i].type != OBJ_COMMIT &&
		    objects[i].type != OBJ_TAG)
			continue;
		add_to_write_order(wo, &wo_end, &objects[i]);
	}

	/*
	 * And then all the trees.
	 */
689
	for (i = last_untagged; i < to_pack.nr_objects; i++) {
690 691 692 693 694 695 696 697
		if (objects[i].type != OBJ_TREE)
			continue;
		add_to_write_order(wo, &wo_end, &objects[i]);
	}

	/*
	 * Finally all the rest in really tight order
	 */
698
	for (i = last_untagged; i < to_pack.nr_objects; i++) {
699 700 701 702
		if (!objects[i].filled)
			add_family_to_write_order(wo, &wo_end, &objects[i]);
	}

703 704
	if (wo_end != to_pack.nr_objects)
		die("ordered %u objects, expected %"PRIu32, wo_end, to_pack.nr_objects);
705 706 707 708

	return wo;
}

709 710 711
static off_t write_reused_pack(struct sha1file *f)
{
	unsigned char buffer[8192];
712
	off_t to_write, total;
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728
	int fd;

	if (!is_pack_valid(reuse_packfile))
		die("packfile is invalid: %s", reuse_packfile->pack_name);

	fd = git_open_noatime(reuse_packfile->pack_name);
	if (fd < 0)
		die_errno("unable to open packfile for reuse: %s",
			  reuse_packfile->pack_name);

	if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1)
		die_errno("unable to seek in reused packfile");

	if (reuse_packfile_offset < 0)
		reuse_packfile_offset = reuse_packfile->pack_size - 20;

729
	total = to_write = reuse_packfile_offset - sizeof(struct pack_header);
730 731 732 733 734 735 736 737 738 739 740 741

	while (to_write) {
		int read_pack = xread(fd, buffer, sizeof(buffer));

		if (read_pack <= 0)
			die_errno("unable to read from reused packfile");

		if (read_pack > to_write)
			read_pack = to_write;

		sha1write(f, buffer, read_pack);
		to_write -= read_pack;
742 743 744 745 746 747 748 749 750 751 752 753

		/*
		 * We don't know the actual number of objects written,
		 * only how many bytes written, how many bytes total, and
		 * how many objects total. So we can fake it by pretending all
		 * objects we are writing are the same size. This gives us a
		 * smooth progress meter, and at the end it matches the true
		 * answer.
		 */
		written = reuse_packfile_objects *
				(((double)(total - to_write)) / total);
		display_progress(progress_state, written);
754 755 756
	}

	close(fd);
757 758
	written = reuse_packfile_objects;
	display_progress(progress_state, written);
759 760 761
	return reuse_packfile_offset - sizeof(struct pack_header);
}

762 763 764 765
static const char no_split_warning[] = N_(
"disabling bitmap writing, packs are split due to pack.packSizeLimit"
);

766
static void write_pack_file(void)
767
{
768
	uint32_t i = 0, j;
769
	struct sha1file *f;
770
	off_t offset;
771
	uint32_t nr_remaining = nr_result;
772
	time_t last_mtime = 0;
773
	struct object_entry **write_order;
774

775
	if (progress > pack_to_stdout)
776
		progress_state = start_progress(_("Writing objects"), nr_result);
J
Jeff King 已提交
777
	ALLOC_ARRAY(written_list, to_pack.nr_objects);
778
	write_order = compute_write_order();
779

780
	do {
G
Geert Bosch 已提交
781
		unsigned char sha1[20];
782
		char *pack_tmp_name = NULL;
G
Geert Bosch 已提交
783

784
		if (pack_to_stdout)
N
Nicolas Pitre 已提交
785
			f = sha1fd_throughput(1, "<stdout>", progress_state);
786 787
		else
			f = create_tmp_packfile(&pack_tmp_name);
788

789
		offset = write_pack_header(f, nr_remaining);
790 791 792 793 794 795 796 797 798

		if (reuse_packfile) {
			off_t packfile_size;
			assert(pack_to_stdout);

			packfile_size = write_reused_pack(f);
			offset += packfile_size;
		}

799
		nr_written = 0;
800
		for (; i < to_pack.nr_objects; i++) {
801
			struct object_entry *e = write_order[i];
802
			if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
803 804 805
				break;
			display_progress(progress_state, written);
		}
806

807 808 809 810
		/*
		 * Did we write the wrong # entries in the header?
		 * If so, rewrite it like in fast-import
		 */
811 812 813 814
		if (pack_to_stdout) {
			sha1close(f, sha1, CSUM_CLOSE);
		} else if (nr_written == nr_remaining) {
			sha1close(f, sha1, CSUM_FSYNC);
815
		} else {
816
			int fd = sha1close(f, sha1, 0);
817
			fixup_pack_header_footer(fd, sha1, pack_tmp_name,
818
						 nr_written, sha1, offset);
819
			close(fd);
820 821 822 823
			if (write_bitmap_index) {
				warning(_(no_split_warning));
				write_bitmap_index = 0;
			}
824 825 826
		}

		if (!pack_to_stdout) {
827
			struct stat st;
828
			struct strbuf tmpname = STRBUF_INIT;
829

830 831 832 833 834 835 836
			/*
			 * Packs are runtime accessed in their mtime
			 * order since newer packs are more likely to contain
			 * younger objects.  So if we are creating multiple
			 * packs then we should modify the mtime of later ones
			 * to preserve this property.
			 */
837
			if (stat(pack_tmp_name, &st) < 0) {
838
				warning("failed to stat %s: %s",
839
					pack_tmp_name, strerror(errno));
840 841 842 843 844 845
			} else if (!last_mtime) {
				last_mtime = st.st_mtime;
			} else {
				struct utimbuf utb;
				utb.actime = st.st_atime;
				utb.modtime = --last_mtime;
846
				if (utime(pack_tmp_name, &utb) < 0)
847
					warning("failed utime() on %s: %s",
848
						pack_tmp_name, strerror(errno));
849 850
			}

851
			strbuf_addf(&tmpname, "%s-", base_name);
852 853 854 855 856 857

			if (write_bitmap_index) {
				bitmap_writer_set_checksum(sha1);
				bitmap_writer_build_type_index(written_list, nr_written);
			}

858
			finish_tmp_packfile(&tmpname, pack_tmp_name,
859 860
					    written_list, nr_written,
					    &pack_idx_opts, sha1);
861 862

			if (write_bitmap_index) {
863
				strbuf_addf(&tmpname, "%s.bitmap", sha1_to_hex(sha1));
864 865 866 867 868 869 870

				stop_progress(&progress_state);

				bitmap_writer_show_progress(progress);
				bitmap_writer_reuse_bitmaps(&to_pack);
				bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
				bitmap_writer_build(&to_pack);
871
				bitmap_writer_finish(written_list, nr_written,
872
						     tmpname.buf, write_bitmap_options);
873 874 875
				write_bitmap_index = 0;
			}

876
			strbuf_release(&tmpname);
877
			free(pack_tmp_name);
G
Geert Bosch 已提交
878
			puts(sha1_to_hex(sha1));
879 880 881 882
		}

		/* mark written objects as written to previous pack */
		for (j = 0; j < nr_written; j++) {
883
			written_list[j]->offset = (off_t)-1;
884 885
		}
		nr_remaining -= nr_written;
886
	} while (nr_remaining && i < to_pack.nr_objects);
887 888

	free(written_list);
889
	free(write_order);
N
Nicolas Pitre 已提交
890
	stop_progress(&progress_state);
891
	if (written != nr_result)
892 893
		die("wrote %"PRIu32" objects while expecting %"PRIu32,
			written, nr_result);
894 895
}

896 897 898 899 900
static void setup_delta_attr_check(struct git_attr_check *check)
{
	static struct git_attr *attr_delta;

	if (!attr_delta)
901
		attr_delta = git_attr("delta");
902 903 904 905 906 907 908 909 910

	check[0].attr = attr_delta;
}

static int no_try_delta(const char *path)
{
	struct git_attr_check check[1];

	setup_delta_attr_check(check);
911
	if (git_check_attr(path, ARRAY_SIZE(check), check))
912 913 914 915 916 917
		return 0;
	if (ATTR_FALSE(check->value))
		return 1;
	return 0;
}

J
Jeff King 已提交
918 919 920 921 922 923 924 925 926 927 928 929 930
/*
 * When adding an object, check whether we have already added it
 * to our packing list. If so, we can skip. However, if we are
 * being asked to excludei t, but the previous mention was to include
 * it, make sure to adjust its flags and tweak our numbers accordingly.
 *
 * As an optimization, we pass out the index position where we would have
 * found the item, since that saves us from having to look it up again a
 * few lines later when we want to add the new entry.
 */
static int have_duplicate_entry(const unsigned char *sha1,
				int exclude,
				uint32_t *index_pos)
931 932
{
	struct object_entry *entry;
N
Nicolas Pitre 已提交
933

J
Jeff King 已提交
934 935
	entry = packlist_find(&to_pack, sha1, index_pos);
	if (!entry)
N
Nicolas Pitre 已提交
936
		return 0;
J
Jeff King 已提交
937 938 939 940 941

	if (exclude) {
		if (!entry->preferred_base)
			nr_result--;
		entry->preferred_base = 1;
N
Nicolas Pitre 已提交
942
	}
943

J
Jeff King 已提交
944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
	return 1;
}

/*
 * Check whether we want the object in the pack (e.g., we do not want
 * objects found in non-local stores if the "--local" option was used).
 *
 * As a side effect of this check, we will find the packed version of this
 * object, if any. We therefore pass out the pack information to avoid having
 * to look it up again later.
 */
static int want_object_in_pack(const unsigned char *sha1,
			       int exclude,
			       struct packed_git **found_pack,
			       off_t *found_offset)
{
	struct packed_git *p;

962 963 964
	if (!exclude && local && has_loose_object_nonlocal(sha1))
		return 0;

J
Jeff King 已提交
965 966 967
	*found_pack = NULL;
	*found_offset = 0;

968 969 970
	for (p = packed_git; p; p = p->next) {
		off_t offset = find_pack_entry_one(sha1, p);
		if (offset) {
J
Jeff King 已提交
971
			if (!*found_pack) {
972
				if (!is_pack_valid(p))
973
					continue;
J
Jeff King 已提交
974 975
				*found_offset = offset;
				*found_pack = p;
L
Linus Torvalds 已提交
976
			}
977
			if (exclude)
J
Jeff King 已提交
978
				return 1;
979 980 981 982
			if (incremental)
				return 0;
			if (local && !p->pack_local)
				return 0;
983 984
			if (ignore_packed_keep && p->pack_local && p->pack_keep)
				return 0;
L
Linus Torvalds 已提交
985 986
		}
	}
987

J
Jeff King 已提交
988 989 990 991 992 993 994 995 996 997 998 999 1000
	return 1;
}

static void create_object_entry(const unsigned char *sha1,
				enum object_type type,
				uint32_t hash,
				int exclude,
				int no_try_delta,
				uint32_t index_pos,
				struct packed_git *found_pack,
				off_t found_offset)
{
	struct object_entry *entry;
N
Nicolas Pitre 已提交
1001

1002
	entry = packlist_alloc(&to_pack, sha1, index_pos);
1003
	entry->hash = hash;
1004 1005
	if (type)
		entry->type = type;
N
Nicolas Pitre 已提交
1006 1007
	if (exclude)
		entry->preferred_base = 1;
1008 1009
	else
		nr_result++;
N
Nicolas Pitre 已提交
1010 1011 1012 1013
	if (found_pack) {
		entry->in_pack = found_pack;
		entry->in_pack_offset = found_offset;
	}
1014

J
Jeff King 已提交
1015 1016
	entry->no_try_delta = no_try_delta;
}
N
Nicolas Pitre 已提交
1017

1018 1019 1020 1021
static const char no_closure_warning[] = N_(
"disabling bitmap writing, as some objects are not being packed"
);

J
Jeff King 已提交
1022 1023 1024 1025 1026 1027
static int add_object_entry(const unsigned char *sha1, enum object_type type,
			    const char *name, int exclude)
{
	struct packed_git *found_pack;
	off_t found_offset;
	uint32_t index_pos;
1028

J
Jeff King 已提交
1029 1030
	if (have_duplicate_entry(sha1, exclude, &index_pos))
		return 0;
1031

1032 1033 1034 1035 1036 1037
	if (!want_object_in_pack(sha1, exclude, &found_pack, &found_offset)) {
		/* The pack is missing an object, so it will not have closure */
		if (write_bitmap_index) {
			warning(_(no_closure_warning));
			write_bitmap_index = 0;
		}
J
Jeff King 已提交
1038
		return 0;
1039
	}
N
Nicolas Pitre 已提交
1040

J
Jeff King 已提交
1041 1042 1043
	create_object_entry(sha1, type, pack_name_hash(name),
			    exclude, name && no_try_delta(name),
			    index_pos, found_pack, found_offset);
1044

1045
	display_progress(progress_state, nr_result);
N
Nicolas Pitre 已提交
1046
	return 1;
1047 1048
}

1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060
static int add_object_entry_from_bitmap(const unsigned char *sha1,
					enum object_type type,
					int flags, uint32_t name_hash,
					struct packed_git *pack, off_t offset)
{
	uint32_t index_pos;

	if (have_duplicate_entry(sha1, 0, &index_pos))
		return 0;

	create_object_entry(sha1, type, name_hash, 0, 0, index_pos, pack, offset);

1061
	display_progress(progress_state, nr_result);
N
Nicolas Pitre 已提交
1062
	return 1;
1063 1064
}

1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
struct pbase_tree_cache {
	unsigned char sha1[20];
	int ref;
	int temporary;
	void *tree_data;
	unsigned long tree_size;
};

static struct pbase_tree_cache *(pbase_tree_cache[256]);
static int pbase_tree_cache_ix(const unsigned char *sha1)
{
	return sha1[0] % ARRAY_SIZE(pbase_tree_cache);
}
static int pbase_tree_cache_ix_incr(int ix)
{
	return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
}

static struct pbase_tree {
	struct pbase_tree *next;
	/* This is a phony "cache" entry; we are not
J
Justin Lebar 已提交
1086
	 * going to evict it or find it through _get()
1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097
	 * mechanism -- this is for the toplevel node that
	 * would almost always change with any commit.
	 */
	struct pbase_tree_cache pcache;
} *pbase_tree;

static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1)
{
	struct pbase_tree_cache *ent, *nent;
	void *data;
	unsigned long size;
1098
	enum object_type type;
1099 1100 1101 1102 1103 1104 1105 1106 1107 1108
	int neigh;
	int my_ix = pbase_tree_cache_ix(sha1);
	int available_ix = -1;

	/* pbase-tree-cache acts as a limited hashtable.
	 * your object will be found at your index or within a few
	 * slots after that slot if it is cached.
	 */
	for (neigh = 0; neigh < 8; neigh++) {
		ent = pbase_tree_cache[my_ix];
1109
		if (ent && !hashcmp(ent->sha1, sha1)) {
1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124
			ent->ref++;
			return ent;
		}
		else if (((available_ix < 0) && (!ent || !ent->ref)) ||
			 ((0 <= available_ix) &&
			  (!ent && pbase_tree_cache[available_ix])))
			available_ix = my_ix;
		if (!ent)
			break;
		my_ix = pbase_tree_cache_ix_incr(my_ix);
	}

	/* Did not find one.  Either we got a bogus request or
	 * we need to read and perhaps cache.
	 */
1125
	data = read_sha1_file(sha1, &type, &size);
1126 1127
	if (!data)
		return NULL;
1128
	if (type != OBJ_TREE) {
1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150
		free(data);
		return NULL;
	}

	/* We need to either cache or return a throwaway copy */

	if (available_ix < 0)
		ent = NULL;
	else {
		ent = pbase_tree_cache[available_ix];
		my_ix = available_ix;
	}

	if (!ent) {
		nent = xmalloc(sizeof(*nent));
		nent->temporary = (available_ix < 0);
	}
	else {
		/* evict and reuse */
		free(ent->tree_data);
		nent = ent;
	}
1151
	hashcpy(nent->sha1, sha1);
1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179
	nent->tree_data = data;
	nent->tree_size = size;
	nent->ref = 1;
	if (!nent->temporary)
		pbase_tree_cache[my_ix] = nent;
	return nent;
}

static void pbase_tree_put(struct pbase_tree_cache *cache)
{
	if (!cache->temporary) {
		cache->ref--;
		return;
	}
	free(cache->tree_data);
	free(cache);
}

static int name_cmp_len(const char *name)
{
	int i;
	for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)
		;
	return i;
}

static void add_pbase_object(struct tree_desc *tree,
			     const char *name,
1180 1181
			     int cmplen,
			     const char *fullname)
1182
{
1183
	struct name_entry entry;
1184
	int cmp;
1185 1186

	while (tree_entry(tree,&entry)) {
1187 1188
		if (S_ISGITLINK(entry.mode))
			continue;
1189
		cmp = tree_entry_len(&entry) != cmplen ? 1 :
1190 1191
		      memcmp(name, entry.path, cmplen);
		if (cmp > 0)
1192
			continue;
1193 1194
		if (cmp < 0)
			return;
1195
		if (name[cmplen] != '/') {
1196
			add_object_entry(entry.sha1,
1197
					 object_type(entry.mode),
1198
					 fullname, 1);
1199 1200
			return;
		}
1201
		if (S_ISDIR(entry.mode)) {
1202
			struct tree_desc sub;
1203 1204 1205 1206
			struct pbase_tree_cache *tree;
			const char *down = name+cmplen+1;
			int downlen = name_cmp_len(down);

1207
			tree = pbase_tree_get(entry.sha1);
1208 1209
			if (!tree)
				return;
1210
			init_tree_desc(&sub, tree->tree_data, tree->tree_size);
1211

1212
			add_pbase_object(&sub, down, downlen, fullname);
1213 1214 1215 1216
			pbase_tree_put(tree);
		}
	}
}
1217

1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242
static unsigned *done_pbase_paths;
static int done_pbase_paths_num;
static int done_pbase_paths_alloc;
static int done_pbase_path_pos(unsigned hash)
{
	int lo = 0;
	int hi = done_pbase_paths_num;
	while (lo < hi) {
		int mi = (hi + lo) / 2;
		if (done_pbase_paths[mi] == hash)
			return mi;
		if (done_pbase_paths[mi] < hash)
			hi = mi;
		else
			lo = mi + 1;
	}
	return -lo-1;
}

static int check_pbase_path(unsigned hash)
{
	int pos = (!done_pbase_paths) ? -1 : done_pbase_path_pos(hash);
	if (0 <= pos)
		return 1;
	pos = -pos - 1;
1243 1244 1245
	ALLOC_GROW(done_pbase_paths,
		   done_pbase_paths_num + 1,
		   done_pbase_paths_alloc);
1246 1247 1248 1249 1250 1251 1252 1253 1254
	done_pbase_paths_num++;
	if (pos < done_pbase_paths_num)
		memmove(done_pbase_paths + pos + 1,
			done_pbase_paths + pos,
			(done_pbase_paths_num - pos - 1) * sizeof(unsigned));
	done_pbase_paths[pos] = hash;
	return 0;
}

1255
static void add_preferred_base_object(const char *name)
1256 1257
{
	struct pbase_tree *it;
1258
	int cmplen;
V
Vicent Marti 已提交
1259
	unsigned hash = pack_name_hash(name);
1260

1261
	if (!num_preferred_base || check_pbase_path(hash))
1262 1263
		return;

1264
	cmplen = name_cmp_len(name);
1265 1266
	for (it = pbase_tree; it; it = it->next) {
		if (cmplen == 0) {
1267
			add_object_entry(it->pcache.sha1, OBJ_TREE, NULL, 1);
1268 1269 1270
		}
		else {
			struct tree_desc tree;
1271
			init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);
1272
			add_pbase_object(&tree, name, cmplen, name);
1273
		}
1274 1275 1276
	}
}

1277
static void add_preferred_base(unsigned char *sha1)
1278
{
1279 1280 1281 1282
	struct pbase_tree *it;
	void *data;
	unsigned long size;
	unsigned char tree_sha1[20];
1283

1284 1285 1286
	if (window <= num_preferred_base++)
		return;

1287 1288
	data = read_object_with_reference(sha1, tree_type, &size, tree_sha1);
	if (!data)
1289
		return;
1290 1291

	for (it = pbase_tree; it; it = it->next) {
1292
		if (!hashcmp(it->pcache.sha1, tree_sha1)) {
1293 1294 1295 1296 1297 1298 1299 1300 1301
			free(data);
			return;
		}
	}

	it = xcalloc(1, sizeof(*it));
	it->next = pbase_tree;
	pbase_tree = it;

1302
	hashcpy(it->pcache.sha1, tree_sha1);
1303 1304
	it->pcache.tree_data = data;
	it->pcache.tree_size = size;
1305 1306
}

1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333
static void cleanup_preferred_base(void)
{
	struct pbase_tree *it;
	unsigned i;

	it = pbase_tree;
	pbase_tree = NULL;
	while (it) {
		struct pbase_tree *this = it;
		it = this->next;
		free(this->pcache.tree_data);
		free(this);
	}

	for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {
		if (!pbase_tree_cache[i])
			continue;
		free(pbase_tree_cache[i]->tree_data);
		free(pbase_tree_cache[i]);
		pbase_tree_cache[i] = NULL;
	}

	free(done_pbase_paths);
	done_pbase_paths = NULL;
	done_pbase_paths_num = done_pbase_paths_alloc = 0;
}

1334 1335
static void check_object(struct object_entry *entry)
{
1336
	if (entry->in_pack) {
1337
		struct packed_git *p = entry->in_pack;
1338
		struct pack_window *w_curs = NULL;
1339 1340 1341
		const unsigned char *base_ref = NULL;
		struct object_entry *base_entry;
		unsigned long used, used_0;
1342
		unsigned long avail;
1343 1344
		off_t ofs;
		unsigned char *buf, c;
1345

1346
		buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
1347

1348
		/*
1349 1350
		 * We want in_pack_type even if we do not reuse delta
		 * since non-delta representations could still be reused.
1351
		 */
1352
		used = unpack_object_header_buffer(buf, avail,
1353 1354
						   &entry->in_pack_type,
						   &entry->size);
1355 1356
		if (used == 0)
			goto give_up;
1357

1358 1359 1360 1361
		/*
		 * Determine if this is a delta and if so whether we can
		 * reuse it or not.  Otherwise let's find out as cheaply as
		 * possible what the actual type and size for this object is.
1362
		 */
1363 1364 1365 1366 1367
		switch (entry->in_pack_type) {
		default:
			/* Not a delta hence we've already got all we need. */
			entry->type = entry->in_pack_type;
			entry->in_pack_header_size = used;
1368 1369
			if (entry->type < OBJ_COMMIT || entry->type > OBJ_BLOB)
				goto give_up;
1370 1371 1372
			unuse_pack(&w_curs);
			return;
		case OBJ_REF_DELTA:
1373
			if (reuse_delta && !entry->preferred_base)
1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385
				base_ref = use_pack(p, &w_curs,
						entry->in_pack_offset + used, NULL);
			entry->in_pack_header_size = used + 20;
			break;
		case OBJ_OFS_DELTA:
			buf = use_pack(p, &w_curs,
				       entry->in_pack_offset + used, NULL);
			used_0 = 0;
			c = buf[used_0++];
			ofs = c & 127;
			while (c & 128) {
				ofs += 1;
1386 1387 1388 1389 1390
				if (!ofs || MSB(ofs, 7)) {
					error("delta base offset overflow in pack for %s",
					      sha1_to_hex(entry->idx.sha1));
					goto give_up;
				}
1391 1392
				c = buf[used_0++];
				ofs = (ofs << 7) + (c & 127);
1393
			}
1394
			ofs = entry->in_pack_offset - ofs;
1395 1396 1397 1398 1399
			if (ofs <= 0 || ofs >= entry->in_pack_offset) {
				error("delta base offset out of bound for %s",
				      sha1_to_hex(entry->idx.sha1));
				goto give_up;
			}
1400
			if (reuse_delta && !entry->preferred_base) {
1401 1402
				struct revindex_entry *revidx;
				revidx = find_pack_revindex(p, ofs);
1403 1404
				if (!revidx)
					goto give_up;
1405 1406
				base_ref = nth_packed_object_sha1(p, revidx->nr);
			}
1407 1408
			entry->in_pack_header_size = used + used_0;
			break;
1409 1410
		}

1411
		if (base_ref && (base_entry = packlist_find(&to_pack, base_ref, NULL))) {
1412 1413 1414 1415 1416 1417 1418 1419 1420
			/*
			 * If base_ref was set above that means we wish to
			 * reuse delta data, and we even found that base
			 * in the list of objects we want to pack. Goodie!
			 *
			 * Depth value does not matter - find_deltas() will
			 * never consider reused delta as the base object to
			 * deltify other objects against, in order to avoid
			 * circular deltas.
1421
			 */
1422
			entry->type = entry->in_pack_type;
1423
			entry->delta = base_entry;
1424
			entry->delta_size = entry->size;
1425 1426
			entry->delta_sibling = base_entry->delta_child;
			base_entry->delta_child = entry;
1427 1428 1429
			unuse_pack(&w_curs);
			return;
		}
1430

1431 1432 1433 1434 1435 1436 1437 1438
		if (entry->type) {
			/*
			 * This must be a delta and we already know what the
			 * final object type is.  Let's extract the actual
			 * object size from the delta header.
			 */
			entry->size = get_size_from_delta(p, &w_curs,
					entry->in_pack_offset + entry->in_pack_header_size);
1439 1440
			if (entry->size == 0)
				goto give_up;
1441
			unuse_pack(&w_curs);
1442 1443
			return;
		}
1444 1445 1446 1447 1448 1449

		/*
		 * No choice but to fall back to the recursive delta walk
		 * with sha1_object_info() to find about the object type
		 * at this point...
		 */
1450
		give_up:
1451
		unuse_pack(&w_curs);
1452
	}
1453

G
Geert Bosch 已提交
1454
	entry->type = sha1_object_info(entry->idx.sha1, &entry->size);
1455 1456 1457 1458 1459 1460
	/*
	 * The error condition is checked in prepare_pack().  This is
	 * to permit a missing preferred base object to be ignored
	 * as a preferred base.  Doing so can result in a larger
	 * pack file, but the transfer will still take place.
	 */
1461 1462
}

1463 1464 1465 1466 1467 1468 1469
static int pack_offset_sort(const void *_a, const void *_b)
{
	const struct object_entry *a = *(struct object_entry **)_a;
	const struct object_entry *b = *(struct object_entry **)_b;

	/* avoid filesystem trashing with loose objects */
	if (!a->in_pack && !b->in_pack)
G
Geert Bosch 已提交
1470
		return hashcmp(a->idx.sha1, b->idx.sha1);
1471 1472 1473 1474 1475 1476 1477 1478 1479

	if (a->in_pack < b->in_pack)
		return -1;
	if (a->in_pack > b->in_pack)
		return 1;
	return a->in_pack_offset < b->in_pack_offset ? -1 :
			(a->in_pack_offset > b->in_pack_offset);
}

1480 1481
static void get_object_details(void)
{
1482
	uint32_t i;
1483 1484
	struct object_entry **sorted_by_offset;

1485 1486 1487 1488
	sorted_by_offset = xcalloc(to_pack.nr_objects, sizeof(struct object_entry *));
	for (i = 0; i < to_pack.nr_objects; i++)
		sorted_by_offset[i] = to_pack.objects + i;
	qsort(sorted_by_offset, to_pack.nr_objects, sizeof(*sorted_by_offset), pack_offset_sort);
1489

1490
	for (i = 0; i < to_pack.nr_objects; i++) {
1491 1492
		struct object_entry *entry = sorted_by_offset[i];
		check_object(entry);
1493
		if (big_file_threshold < entry->size)
1494 1495
			entry->no_try_delta = 1;
	}
1496

1497
	free(sorted_by_offset);
1498 1499
}

1500 1501 1502 1503 1504 1505 1506 1507 1508
/*
 * We search for deltas in a list sorted by type, by filename hash, and then
 * by size, so that we see progressively smaller and smaller files.
 * That's because we prefer deltas to be from the bigger file
 * to the smaller -- deletes are potentially cheaper, but perhaps
 * more importantly, the bigger file is likely the more recent
 * one.  The deepest deltas are therefore the oldest objects which are
 * less susceptible to be accessed often.
 */
1509
static int type_size_sort(const void *_a, const void *_b)
1510
{
1511 1512 1513
	const struct object_entry *a = *(struct object_entry **)_a;
	const struct object_entry *b = *(struct object_entry **)_b;

1514
	if (a->type > b->type)
1515
		return -1;
1516
	if (a->type < b->type)
1517
		return 1;
1518
	if (a->hash > b->hash)
1519
		return -1;
1520
	if (a->hash < b->hash)
1521
		return 1;
1522
	if (a->preferred_base > b->preferred_base)
1523
		return -1;
1524 1525
	if (a->preferred_base < b->preferred_base)
		return 1;
1526
	if (a->size > b->size)
1527 1528
		return -1;
	if (a->size < b->size)
1529
		return 1;
1530
	return a < b ? -1 : (a > b);  /* newest first */
1531 1532 1533 1534 1535
}

struct unpacked {
	struct object_entry *entry;
	void *data;
1536
	struct delta_index *index;
1537
	unsigned depth;
1538 1539
};

1540 1541
static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
			   unsigned long delta_size)
1542 1543 1544 1545
{
	if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
		return 0;

1546 1547 1548
	if (delta_size < cache_max_small_delta_size)
		return 1;

1549 1550 1551 1552 1553 1554 1555
	/* cache delta, if objects are large enough compared to delta size */
	if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
		return 1;

	return 0;
}

1556
#ifndef NO_PTHREADS
N
Nicolas Pitre 已提交
1557

1558
static pthread_mutex_t read_mutex;
N
Nicolas Pitre 已提交
1559 1560 1561
#define read_lock()		pthread_mutex_lock(&read_mutex)
#define read_unlock()		pthread_mutex_unlock(&read_mutex)

1562
static pthread_mutex_t cache_mutex;
1563 1564 1565
#define cache_lock()		pthread_mutex_lock(&cache_mutex)
#define cache_unlock()		pthread_mutex_unlock(&cache_mutex)

1566
static pthread_mutex_t progress_mutex;
N
Nicolas Pitre 已提交
1567 1568 1569 1570 1571
#define progress_lock()		pthread_mutex_lock(&progress_mutex)
#define progress_unlock()	pthread_mutex_unlock(&progress_mutex)

#else

1572 1573 1574 1575 1576 1577
#define read_lock()		(void)0
#define read_unlock()		(void)0
#define cache_lock()		(void)0
#define cache_unlock()		(void)0
#define progress_lock()		(void)0
#define progress_unlock()	(void)0
N
Nicolas Pitre 已提交
1578 1579 1580

#endif

1581
static int try_delta(struct unpacked *trg, struct unpacked *src,
1582
		     unsigned max_depth, unsigned long *mem_usage)
1583
{
1584 1585
	struct object_entry *trg_entry = trg->entry;
	struct object_entry *src_entry = src->entry;
1586
	unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
1587
	unsigned ref_depth;
1588
	enum object_type type;
1589 1590 1591
	void *delta_buf;

	/* Don't bother doing diffs between different types */
1592
	if (trg_entry->type != src_entry->type)
1593 1594
		return -1;

1595
	/*
1596 1597 1598 1599 1600 1601
	 * We do not bother to try a delta that we discarded on an
	 * earlier try, but only when reusing delta data.  Note that
	 * src_entry that is marked as the preferred_base should always
	 * be considered, as even if we produce a suboptimal delta against
	 * it, we will still save the transfer cost, as we already know
	 * the other side has it and we won't send src_entry at all.
1602
	 */
1603
	if (reuse_delta && trg_entry->in_pack &&
1604
	    trg_entry->in_pack == src_entry->in_pack &&
1605
	    !src_entry->preferred_base &&
1606 1607
	    trg_entry->in_pack_type != OBJ_REF_DELTA &&
	    trg_entry->in_pack_type != OBJ_OFS_DELTA)
1608 1609
		return 0;

1610
	/* Let's not bust the allowed depth. */
1611
	if (src->depth >= max_depth)
1612
		return 0;
1613

1614
	/* Now some size filtering heuristics. */
1615
	trg_size = trg_entry->size;
1616 1617 1618 1619 1620
	if (!trg_entry->delta) {
		max_size = trg_size/2 - 20;
		ref_depth = 1;
	} else {
		max_size = trg_entry->delta_size;
1621
		ref_depth = trg->depth;
1622
	}
1623
	max_size = (uint64_t)max_size * (max_depth - src->depth) /
1624
						(max_depth - ref_depth + 1);
1625 1626
	if (max_size == 0)
		return 0;
1627
	src_size = src_entry->size;
1628
	sizediff = src_size < trg_size ? trg_size - src_size : 0;
1629
	if (sizediff >= max_size)
1630
		return 0;
1631 1632
	if (trg_size < src_size / 32)
		return 0;
1633

1634 1635
	/* Load data if not already done */
	if (!trg->data) {
N
Nicolas Pitre 已提交
1636
		read_lock();
G
Geert Bosch 已提交
1637
		trg->data = read_sha1_file(trg_entry->idx.sha1, &type, &sz);
N
Nicolas Pitre 已提交
1638
		read_unlock();
1639 1640 1641
		if (!trg->data)
			die("object %s cannot be read",
			    sha1_to_hex(trg_entry->idx.sha1));
1642 1643
		if (sz != trg_size)
			die("object %s inconsistent object length (%lu vs %lu)",
G
Geert Bosch 已提交
1644
			    sha1_to_hex(trg_entry->idx.sha1), sz, trg_size);
1645
		*mem_usage += sz;
1646 1647
	}
	if (!src->data) {
N
Nicolas Pitre 已提交
1648
		read_lock();
G
Geert Bosch 已提交
1649
		src->data = read_sha1_file(src_entry->idx.sha1, &type, &sz);
N
Nicolas Pitre 已提交
1650
		read_unlock();
1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664
		if (!src->data) {
			if (src_entry->preferred_base) {
				static int warned = 0;
				if (!warned++)
					warning("object %s cannot be read",
						sha1_to_hex(src_entry->idx.sha1));
				/*
				 * Those objects are not included in the
				 * resulting pack.  Be resilient and ignore
				 * them if they can't be read, in case the
				 * pack could be created nevertheless.
				 */
				return 0;
			}
1665 1666
			die("object %s cannot be read",
			    sha1_to_hex(src_entry->idx.sha1));
1667
		}
1668 1669
		if (sz != src_size)
			die("object %s inconsistent object length (%lu vs %lu)",
G
Geert Bosch 已提交
1670
			    sha1_to_hex(src_entry->idx.sha1), sz, src_size);
1671
		*mem_usage += sz;
1672 1673 1674
	}
	if (!src->index) {
		src->index = create_delta_index(src->data, src_size);
1675 1676 1677 1678 1679 1680
		if (!src->index) {
			static int warned = 0;
			if (!warned++)
				warning("suboptimal pack - out of memory");
			return 0;
		}
1681
		*mem_usage += sizeof_delta_index(src->index);
1682 1683 1684
	}

	delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
1685
	if (!delta_buf)
1686
		return 0;
1687

N
Nicolas Pitre 已提交
1688
	if (trg_entry->delta) {
1689 1690
		/* Prefer only shallower same-sized deltas. */
		if (delta_size == trg_entry->delta_size &&
1691
		    src->depth + 1 >= trg->depth) {
1692 1693 1694
			free(delta_buf);
			return 0;
		}
1695
	}
N
Nicolas Pitre 已提交
1696

1697 1698 1699
	/*
	 * Handle memory allocation outside of the cache
	 * accounting lock.  Compiler will optimize the strangeness
1700
	 * away when NO_PTHREADS is defined.
1701
	 */
1702
	free(trg_entry->delta_data);
1703
	cache_lock();
N
Nicolas Pitre 已提交
1704 1705 1706 1707
	if (trg_entry->delta_data) {
		delta_cache_size -= trg_entry->delta_size;
		trg_entry->delta_data = NULL;
	}
1708
	if (delta_cacheable(src_size, trg_size, delta_size)) {
1709
		delta_cache_size += delta_size;
1710 1711 1712 1713
		cache_unlock();
		trg_entry->delta_data = xrealloc(delta_buf, delta_size);
	} else {
		cache_unlock();
1714
		free(delta_buf);
1715 1716
	}

1717 1718 1719 1720
	trg_entry->delta = src_entry;
	trg_entry->delta_size = delta_size;
	trg->depth = src->depth + 1;

1721
	return 1;
1722 1723
}

1724
static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
1725
{
1726 1727 1728 1729 1730 1731 1732 1733 1734
	struct object_entry *child = me->delta_child;
	unsigned int m = n;
	while (child) {
		unsigned int c = check_delta_limit(child, n + 1);
		if (m < c)
			m = c;
		child = child->delta_sibling;
	}
	return m;
1735 1736
}

1737
static unsigned long free_unpacked(struct unpacked *n)
1738
{
1739
	unsigned long freed_mem = sizeof_delta_index(n->index);
1740 1741 1742
	free_delta_index(n->index);
	n->index = NULL;
	if (n->data) {
1743
		freed_mem += n->entry->size;
1744 1745 1746 1747
		free(n->data);
		n->data = NULL;
	}
	n->entry = NULL;
1748
	n->depth = 0;
1749
	return freed_mem;
1750 1751
}

1752
static void find_deltas(struct object_entry **list, unsigned *list_size,
1753
			int window, int depth, unsigned *processed)
1754
{
1755
	uint32_t i, idx = 0, count = 0;
1756
	struct unpacked *array;
1757
	unsigned long mem_usage = 0;
1758

1759
	array = xcalloc(window, sizeof(struct unpacked));
1760

1761
	for (;;) {
1762
		struct object_entry *entry;
1763
		struct unpacked *n = array + idx;
1764
		int j, max_depth, best_base = -1;
1765

1766 1767 1768 1769 1770
		progress_lock();
		if (!*list_size) {
			progress_unlock();
			break;
		}
1771
		entry = *list++;
1772 1773 1774 1775 1776 1777 1778
		(*list_size)--;
		if (!entry->preferred_base) {
			(*processed)++;
			display_progress(progress_state, *processed);
		}
		progress_unlock();

1779
		mem_usage -= free_unpacked(n);
1780
		n->entry = entry;
1781

1782
		while (window_memory_limit &&
1783
		       mem_usage > window_memory_limit &&
1784 1785
		       count > 1) {
			uint32_t tail = (idx + window - count) % window;
1786
			mem_usage -= free_unpacked(array + tail);
1787 1788 1789
			count--;
		}

1790 1791 1792 1793 1794 1795
		/* We do not compute delta to *create* objects we are not
		 * going to pack.
		 */
		if (entry->preferred_base)
			goto next;

1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807
		/*
		 * If the current object is at pack edge, take the depth the
		 * objects that depend on the current object into account
		 * otherwise they would become too deep.
		 */
		max_depth = depth;
		if (entry->delta_child) {
			max_depth -= check_delta_limit(entry, 0);
			if (max_depth <= 0)
				goto next;
		}

L
Linus Torvalds 已提交
1808 1809
		j = window;
		while (--j > 0) {
1810
			int ret;
1811
			uint32_t other_idx = idx + j;
1812
			struct unpacked *m;
L
Linus Torvalds 已提交
1813 1814
			if (other_idx >= window)
				other_idx -= window;
1815 1816 1817
			m = array + other_idx;
			if (!m->entry)
				break;
1818
			ret = try_delta(n, m, max_depth, &mem_usage);
1819
			if (ret < 0)
1820
				break;
1821 1822
			else if (ret > 0)
				best_base = other_idx;
1823
		}
1824

1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847
		/*
		 * If we decided to cache the delta data, then it is best
		 * to compress it right away.  First because we have to do
		 * it anyway, and doing it here while we're threaded will
		 * save a lot of time in the non threaded write phase,
		 * as well as allow for caching more deltas within
		 * the same cache size limit.
		 * ...
		 * But only if not writing to stdout, since in that case
		 * the network is most likely throttling writes anyway,
		 * and therefore it is best to go to the write phase ASAP
		 * instead, as we can afford spending more time compressing
		 * between writes at that moment.
		 */
		if (entry->delta_data && !pack_to_stdout) {
			entry->z_delta_size = do_compress(&entry->delta_data,
							  entry->delta_size);
			cache_lock();
			delta_cache_size -= entry->delta_size;
			delta_cache_size += entry->z_delta_size;
			cache_unlock();
		}

1848 1849 1850 1851
		/* if we made n a delta, and if n is already at max
		 * depth, leaving it in the window is pointless.  we
		 * should evict it first.
		 */
1852
		if (entry->delta && max_depth <= n->depth)
1853
			continue;
1854

1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871
		/*
		 * Move the best delta base up in the window, after the
		 * currently deltified object, to keep it longer.  It will
		 * be the first base object to be attempted next.
		 */
		if (entry->delta) {
			struct unpacked swap = array[best_base];
			int dist = (window + idx - best_base) % window;
			int dst = best_base;
			while (dist--) {
				int src = (dst + 1) % window;
				array[dst] = array[src];
				dst = src;
			}
			array[dst] = swap;
		}

1872
		next:
1873
		idx++;
1874 1875
		if (count + 1 < window)
			count++;
1876 1877
		if (idx >= window)
			idx = 0;
1878
	}
1879

1880
	for (i = 0; i < window; ++i) {
1881
		free_delta_index(array[i].index);
1882
		free(array[i].data);
1883
	}
1884
	free(array);
1885 1886
}

1887
#ifndef NO_PTHREADS
N
Nicolas Pitre 已提交
1888

1889 1890 1891
static void try_to_free_from_threads(size_t size)
{
	read_lock();
1892
	release_pack_memory(size);
1893 1894 1895
	read_unlock();
}

1896
static try_to_free_t old_try_to_free_routine;
1897

1898 1899 1900 1901 1902 1903 1904 1905 1906
/*
 * The main thread waits on the condition that (at least) one of the workers
 * has stopped working (which is indicated in the .working member of
 * struct thread_params).
 * When a work thread has completed its work, it sets .working to 0 and
 * signals the main thread and waits on the condition that .data_ready
 * becomes 1.
 */

N
Nicolas Pitre 已提交
1907 1908 1909 1910
struct thread_params {
	pthread_t thread;
	struct object_entry **list;
	unsigned list_size;
1911
	unsigned remaining;
N
Nicolas Pitre 已提交
1912 1913
	int window;
	int depth;
1914 1915 1916 1917
	int working;
	int data_ready;
	pthread_mutex_t mutex;
	pthread_cond_t cond;
N
Nicolas Pitre 已提交
1918 1919 1920
	unsigned *processed;
};

1921 1922 1923 1924 1925 1926 1927
static pthread_cond_t progress_cond;

/*
 * Mutex and conditional variable can't be statically-initialized on Windows.
 */
static void init_threaded_search(void)
{
1928
	init_recursive_mutex(&read_mutex);
1929 1930 1931
	pthread_mutex_init(&cache_mutex, NULL);
	pthread_mutex_init(&progress_mutex, NULL);
	pthread_cond_init(&progress_cond, NULL);
1932
	old_try_to_free_routine = set_try_to_free_routine(try_to_free_from_threads);
1933 1934 1935 1936
}

static void cleanup_threaded_search(void)
{
1937
	set_try_to_free_routine(old_try_to_free_routine);
1938 1939 1940 1941 1942
	pthread_cond_destroy(&progress_cond);
	pthread_mutex_destroy(&read_mutex);
	pthread_mutex_destroy(&cache_mutex);
	pthread_mutex_destroy(&progress_mutex);
}
1943

N
Nicolas Pitre 已提交
1944 1945
static void *threaded_find_deltas(void *arg)
{
1946 1947
	struct thread_params *me = arg;

1948
	while (me->remaining) {
1949
		find_deltas(me->list, &me->remaining,
1950
			    me->window, me->depth, me->processed);
1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969

		progress_lock();
		me->working = 0;
		pthread_cond_signal(&progress_cond);
		progress_unlock();

		/*
		 * We must not set ->data_ready before we wait on the
		 * condition because the main thread may have set it to 1
		 * before we get here. In order to be sure that new
		 * work is available if we see 1 in ->data_ready, it
		 * was initialized to 0 before this thread was spawned
		 * and we reset it to 0 right away.
		 */
		pthread_mutex_lock(&me->mutex);
		while (!me->data_ready)
			pthread_cond_wait(&me->cond, &me->mutex);
		me->data_ready = 0;
		pthread_mutex_unlock(&me->mutex);
1970
	}
1971 1972
	/* leave ->working 1 so that this doesn't get more work assigned */
	return NULL;
N
Nicolas Pitre 已提交
1973 1974 1975 1976 1977
}

static void ll_find_deltas(struct object_entry **list, unsigned list_size,
			   int window, int depth, unsigned *processed)
{
1978
	struct thread_params *p;
1979
	int i, ret, active_threads = 0;
1980

1981 1982
	init_threaded_search();

1983
	if (delta_search_threads <= 1) {
1984
		find_deltas(list, &list_size, window, depth, processed);
1985
		cleanup_threaded_search();
1986 1987
		return;
	}
1988
	if (progress > pack_to_stdout)
1989
		fprintf(stderr, "Delta compression using up to %d threads.\n",
1990
				delta_search_threads);
1991
	p = xcalloc(delta_search_threads, sizeof(*p));
1992

1993
	/* Partition the work amongst work threads. */
1994
	for (i = 0; i < delta_search_threads; i++) {
1995 1996
		unsigned sub_size = list_size / (delta_search_threads - i);

1997 1998 1999 2000
		/* don't use too small segments or no deltas will be found */
		if (sub_size < 2*window && i+1 < delta_search_threads)
			sub_size = 0;

N
Nicolas Pitre 已提交
2001 2002 2003
		p[i].window = window;
		p[i].depth = depth;
		p[i].processed = processed;
2004 2005
		p[i].working = 1;
		p[i].data_ready = 0;
2006

2007
		/* try to split chunks on "path" boundaries */
2008 2009
		while (sub_size && sub_size < list_size &&
		       list[sub_size]->hash &&
2010 2011 2012
		       list[sub_size]->hash == list[sub_size-1]->hash)
			sub_size++;

2013 2014 2015
		p[i].list = list;
		p[i].list_size = sub_size;
		p[i].remaining = sub_size;
2016

2017 2018 2019 2020
		list += sub_size;
		list_size -= sub_size;
	}

2021 2022 2023 2024
	/* Start work threads. */
	for (i = 0; i < delta_search_threads; i++) {
		if (!p[i].list_size)
			continue;
2025 2026
		pthread_mutex_init(&p[i].mutex, NULL);
		pthread_cond_init(&p[i].cond, NULL);
2027 2028 2029 2030 2031 2032 2033
		ret = pthread_create(&p[i].thread, NULL,
				     threaded_find_deltas, &p[i]);
		if (ret)
			die("unable to create thread: %s", strerror(ret));
		active_threads++;
	}

2034 2035 2036 2037 2038 2039 2040 2041
	/*
	 * Now let's wait for work completion.  Each time a thread is done
	 * with its work, we steal half of the remaining work from the
	 * thread with the largest number of unprocessed objects and give
	 * it to that newly idle thread.  This ensure good load balancing
	 * until the remaining object list segments are simply too short
	 * to be worth splitting anymore.
	 */
2042 2043
	while (active_threads) {
		struct thread_params *target = NULL;
2044 2045 2046 2047
		struct thread_params *victim = NULL;
		unsigned sub_size = 0;

		progress_lock();
2048 2049 2050 2051 2052 2053 2054 2055 2056
		for (;;) {
			for (i = 0; !target && i < delta_search_threads; i++)
				if (!p[i].working)
					target = &p[i];
			if (target)
				break;
			pthread_cond_wait(&progress_cond, &progress_mutex);
		}

2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068
		for (i = 0; i < delta_search_threads; i++)
			if (p[i].remaining > 2*window &&
			    (!victim || victim->remaining < p[i].remaining))
				victim = &p[i];
		if (victim) {
			sub_size = victim->remaining / 2;
			list = victim->list + victim->list_size - sub_size;
			while (sub_size && list[0]->hash &&
			       list[0]->hash == list[-1]->hash) {
				list++;
				sub_size--;
			}
2069 2070 2071 2072 2073 2074 2075 2076 2077 2078
			if (!sub_size) {
				/*
				 * It is possible for some "paths" to have
				 * so many objects that no hash boundary
				 * might be found.  Let's just steal the
				 * exact half in that case.
				 */
				sub_size = victim->remaining / 2;
				list -= sub_size;
			}
2079 2080 2081 2082 2083 2084
			target->list = list;
			victim->list_size -= sub_size;
			victim->remaining -= sub_size;
		}
		target->list_size = sub_size;
		target->remaining = sub_size;
2085 2086 2087 2088 2089 2090 2091
		target->working = 1;
		progress_unlock();

		pthread_mutex_lock(&target->mutex);
		target->data_ready = 1;
		pthread_cond_signal(&target->cond);
		pthread_mutex_unlock(&target->mutex);
2092

2093
		if (!sub_size) {
2094
			pthread_join(target->thread, NULL);
2095 2096
			pthread_cond_destroy(&target->cond);
			pthread_mutex_destroy(&target->mutex);
2097
			active_threads--;
2098
		}
2099
	}
2100
	cleanup_threaded_search();
2101
	free(p);
N
Nicolas Pitre 已提交
2102 2103 2104
}

#else
2105
#define ll_find_deltas(l, s, w, d, p)	find_deltas(l, &s, w, d, p)
N
Nicolas Pitre 已提交
2106 2107
#endif

2108
static int add_ref_tag(const char *path, const struct object_id *oid, int flag, void *cb_data)
2109
{
2110
	struct object_id peeled;
2111

2112
	if (starts_with(path, "refs/tags/") && /* is a tag? */
2113 2114 2115
	    !peel_ref(path, peeled.hash)    && /* peelable? */
	    packlist_find(&to_pack, peeled.hash, NULL))      /* object packed? */
		add_object_entry(oid->hash, OBJ_TAG, NULL, 0);
2116 2117 2118
	return 0;
}

2119 2120
static void prepare_pack(int window, int depth)
{
2121
	struct object_entry **delta_list;
2122 2123
	uint32_t i, nr_deltas;
	unsigned n;
2124

2125
	get_object_details();
2126

2127 2128 2129 2130 2131 2132 2133 2134 2135 2136
	/*
	 * If we're locally repacking then we need to be doubly careful
	 * from now on in order to make sure no stealth corruption gets
	 * propagated to the new pack.  Clients receiving streamed packs
	 * should validate everything they get anyway so no need to incur
	 * the additional cost here in that case.
	 */
	if (!pack_to_stdout)
		do_check_packed_object_crc = 1;

2137
	if (!to_pack.nr_objects || !window || !depth)
2138 2139
		return;

J
Jeff King 已提交
2140
	ALLOC_ARRAY(delta_list, to_pack.nr_objects);
2141 2142
	nr_deltas = n = 0;

2143 2144
	for (i = 0; i < to_pack.nr_objects; i++) {
		struct object_entry *entry = to_pack.objects + i;
2145 2146 2147

		if (entry->delta)
			/* This happens if we decided to reuse existing
2148
			 * delta from a pack.  "reuse_delta &&" is implied.
2149 2150 2151 2152 2153 2154 2155 2156 2157
			 */
			continue;

		if (entry->size < 50)
			continue;

		if (entry->no_try_delta)
			continue;

2158
		if (!entry->preferred_base) {
2159
			nr_deltas++;
2160 2161 2162
			if (entry->type < 0)
				die("unable to get type of object %s",
				    sha1_to_hex(entry->idx.sha1));
2163 2164 2165 2166 2167 2168 2169 2170
		} else {
			if (entry->type < 0) {
				/*
				 * This object is not found, but we
				 * don't have to include it anyway.
				 */
				continue;
			}
2171
		}
2172 2173 2174 2175

		delta_list[n++] = entry;
	}

2176
	if (nr_deltas && n > 1) {
2177 2178
		unsigned nr_done = 0;
		if (progress)
2179
			progress_state = start_progress(_("Compressing objects"),
2180
							nr_deltas);
2181
		qsort(delta_list, n, sizeof(*delta_list), type_size_sort);
N
Nicolas Pitre 已提交
2182
		ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
N
Nicolas Pitre 已提交
2183
		stop_progress(&progress_state);
2184 2185
		if (nr_done != nr_deltas)
			die("inconsistency with delta count");
2186
	}
2187
	free(delta_list);
2188 2189
}

2190
static int git_pack_config(const char *k, const char *v, void *cb)
2191
{
2192
	if (!strcmp(k, "pack.window")) {
2193 2194 2195
		window = git_config_int(k, v);
		return 0;
	}
2196 2197 2198 2199 2200
	if (!strcmp(k, "pack.windowmemory")) {
		window_memory_limit = git_config_ulong(k, v);
		return 0;
	}
	if (!strcmp(k, "pack.depth")) {
2201 2202 2203
		depth = git_config_int(k, v);
		return 0;
	}
2204 2205 2206 2207 2208 2209 2210 2211 2212 2213
	if (!strcmp(k, "pack.compression")) {
		int level = git_config_int(k, v);
		if (level == -1)
			level = Z_DEFAULT_COMPRESSION;
		else if (level < 0 || level > Z_BEST_COMPRESSION)
			die("bad pack compression level %d", level);
		pack_compression_level = level;
		pack_compression_seen = 1;
		return 0;
	}
2214 2215 2216 2217
	if (!strcmp(k, "pack.deltacachesize")) {
		max_delta_cache_size = git_config_int(k, v);
		return 0;
	}
2218 2219 2220 2221
	if (!strcmp(k, "pack.deltacachelimit")) {
		cache_max_small_delta_size = git_config_int(k, v);
		return 0;
	}
2222 2223 2224 2225 2226 2227
	if (!strcmp(k, "pack.writebitmaphashcache")) {
		if (git_config_bool(k, v))
			write_bitmap_options |= BITMAP_OPT_HASH_CACHE;
		else
			write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
	}
2228 2229 2230 2231
	if (!strcmp(k, "pack.usebitmaps")) {
		use_bitmap_index = git_config_bool(k, v);
		return 0;
	}
2232 2233
	if (!strcmp(k, "pack.threads")) {
		delta_search_threads = git_config_int(k, v);
2234
		if (delta_search_threads < 0)
2235 2236
			die("invalid number of threads specified (%d)",
			    delta_search_threads);
2237
#ifdef NO_PTHREADS
2238
		if (delta_search_threads != 1)
2239 2240 2241 2242
			warning("no threads support, ignoring %s", k);
#endif
		return 0;
	}
2243
	if (!strcmp(k, "pack.indexversion")) {
2244 2245
		pack_idx_opts.version = git_config_int(k, v);
		if (pack_idx_opts.version > 2)
2246
			die("bad pack.indexversion=%"PRIu32,
2247
			    pack_idx_opts.version);
2248 2249
		return 0;
	}
2250
	return git_default_config(k, v, cb);
2251 2252
}

2253
static void read_object_list_from_stdin(void)
2254
{
2255 2256
	char line[40 + 1 + PATH_MAX + 2];
	unsigned char sha1[20];
2257

2258 2259 2260 2261 2262 2263
	for (;;) {
		if (!fgets(line, sizeof(line), stdin)) {
			if (feof(stdin))
				break;
			if (!ferror(stdin))
				die("fgets returned NULL, not EOF, not error!");
2264
			if (errno != EINTR)
2265
				die_errno("fgets");
2266 2267
			clearerr(stdin);
			continue;
2268
		}
2269 2270 2271
		if (line[0] == '-') {
			if (get_sha1_hex(line+1, sha1))
				die("expected edge sha1, got garbage:\n %s",
2272
				    line);
2273
			add_preferred_base(sha1);
2274
			continue;
2275
		}
2276
		if (get_sha1_hex(line, sha1))
2277
			die("expected sha1, got garbage:\n %s", line);
2278

2279 2280
		add_preferred_base_object(line+41);
		add_object_entry(sha1, 0, line+41, 0);
2281
	}
2282 2283
}

J
Junio C Hamano 已提交
2284 2285
#define OBJECT_ADDED (1u<<20)

2286
static void show_commit(struct commit *commit, void *data)
2287
{
B
brian m. carlson 已提交
2288
	add_object_entry(commit->object.oid.hash, OBJ_COMMIT, NULL, 0);
J
Junio C Hamano 已提交
2289
	commit->object.flags |= OBJECT_ADDED;
2290 2291 2292

	if (write_bitmap_index)
		index_commit_for_bitmap(commit);
2293 2294
}

2295
static void show_object(struct object *obj, const char *name, void *data)
2296
{
2297
	add_preferred_base_object(name);
B
brian m. carlson 已提交
2298
	add_object_entry(obj->oid.hash, obj->type, name, 0);
2299
	obj->flags |= OBJECT_ADDED;
2300 2301
}

2302 2303
static void show_edge(struct commit *commit)
{
B
brian m. carlson 已提交
2304
	add_preferred_base(commit->object.oid.hash);
2305 2306
}

J
Junio C Hamano 已提交
2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319
struct in_pack_object {
	off_t offset;
	struct object *object;
};

struct in_pack {
	int alloc;
	int nr;
	struct in_pack_object *array;
};

static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack)
{
B
brian m. carlson 已提交
2320
	in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->oid.hash, p);
J
Junio C Hamano 已提交
2321 2322 2323 2324 2325 2326
	in_pack->array[in_pack->nr].object = object;
	in_pack->nr++;
}

/*
 * Compare the objects in the offset order, in order to emulate the
2327
 * "git rev-list --objects" output that produced the pack originally.
J
Junio C Hamano 已提交
2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338
 */
static int ofscmp(const void *a_, const void *b_)
{
	struct in_pack_object *a = (struct in_pack_object *)a_;
	struct in_pack_object *b = (struct in_pack_object *)b_;

	if (a->offset < b->offset)
		return -1;
	else if (a->offset > b->offset)
		return 1;
	else
2339
		return oidcmp(&a->object->oid, &b->object->oid);
J
Junio C Hamano 已提交
2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353
}

static void add_objects_in_unpacked_packs(struct rev_info *revs)
{
	struct packed_git *p;
	struct in_pack in_pack;
	uint32_t i;

	memset(&in_pack, 0, sizeof(in_pack));

	for (p = packed_git; p; p = p->next) {
		const unsigned char *sha1;
		struct object *o;

2354
		if (!p->pack_local || p->pack_keep)
J
Junio C Hamano 已提交
2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376
			continue;
		if (open_pack_index(p))
			die("cannot open pack index");

		ALLOC_GROW(in_pack.array,
			   in_pack.nr + p->num_objects,
			   in_pack.alloc);

		for (i = 0; i < p->num_objects; i++) {
			sha1 = nth_packed_object_sha1(p, i);
			o = lookup_unknown_object(sha1);
			if (!(o->flags & OBJECT_ADDED))
				mark_in_pack_object(o, p, &in_pack);
			o->flags |= OBJECT_ADDED;
		}
	}

	if (in_pack.nr) {
		qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]),
		      ofscmp);
		for (i = 0; i < in_pack.nr; i++) {
			struct object *o = in_pack.array[i].object;
B
brian m. carlson 已提交
2377
			add_object_entry(o->oid.hash, o->type, "", 0);
J
Junio C Hamano 已提交
2378 2379 2380 2381 2382
		}
	}
	free(in_pack.array);
}

2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405
static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1)
{
	static struct packed_git *last_found = (void *)1;
	struct packed_git *p;

	p = (last_found != (void *)1) ? last_found : packed_git;

	while (p) {
		if ((!p->pack_local || p->pack_keep) &&
			find_pack_entry_one(sha1, p)) {
			last_found = p;
			return 1;
		}
		if (p == last_found)
			p = packed_git;
		else
			p = p->next;
		if (p == last_found)
			p = p->next;
	}
	return 0;
}

2406 2407 2408 2409 2410 2411 2412 2413 2414
/*
 * Store a list of sha1s that are should not be discarded
 * because they are either written too recently, or are
 * reachable from another object that was.
 *
 * This is filled by get_object_list.
 */
static struct sha1_array recent_objects;

2415 2416 2417 2418 2419 2420 2421
static int loosened_object_can_be_discarded(const unsigned char *sha1,
					    unsigned long mtime)
{
	if (!unpack_unreachable_expiration)
		return 0;
	if (mtime > unpack_unreachable_expiration)
		return 0;
2422 2423
	if (sha1_array_lookup(&recent_objects, sha1) >= 0)
		return 0;
2424 2425 2426
	return 1;
}

2427 2428 2429 2430 2431 2432 2433
static void loosen_unused_packed_objects(struct rev_info *revs)
{
	struct packed_git *p;
	uint32_t i;
	const unsigned char *sha1;

	for (p = packed_git; p; p = p->next) {
2434
		if (!p->pack_local || p->pack_keep)
2435 2436 2437 2438 2439 2440 2441
			continue;

		if (open_pack_index(p))
			die("cannot open pack index");

		for (i = 0; i < p->num_objects; i++) {
			sha1 = nth_packed_object_sha1(p, i);
2442
			if (!packlist_find(&to_pack, sha1, NULL) &&
2443 2444
			    !has_sha1_pack_kept_or_nonlocal(sha1) &&
			    !loosened_object_can_be_discarded(sha1, p->mtime))
2445 2446 2447 2448 2449 2450
				if (force_object_loose(sha1, p->mtime))
					die("unable to force loose object");
		}
	}
}

2451 2452 2453 2454 2455 2456 2457 2458 2459 2460
/*
 * This tracks any options which a reader of the pack might
 * not understand, and which would therefore prevent blind reuse
 * of what we have on disk.
 */
static int pack_options_allow_reuse(void)
{
	return allow_ofs_delta;
}

2461 2462 2463 2464 2465
static int get_object_list_from_bitmap(struct rev_info *revs)
{
	if (prepare_bitmap_walk(revs) < 0)
		return -1;

2466 2467
	if (pack_options_allow_reuse() &&
	    !reuse_partial_packfile_from_bitmap(
2468 2469 2470 2471 2472
			&reuse_packfile,
			&reuse_packfile_objects,
			&reuse_packfile_offset)) {
		assert(reuse_packfile_objects);
		nr_result += reuse_packfile_objects;
2473
		display_progress(progress_state, nr_result);
2474 2475 2476 2477 2478 2479
	}

	traverse_bitmap_commit_list(&add_object_entry_from_bitmap);
	return 0;
}

2480
static void record_recent_object(struct object *obj,
2481
				 const char *name,
2482 2483
				 void *data)
{
B
brian m. carlson 已提交
2484
	sha1_array_append(&recent_objects, obj->oid.hash);
2485 2486 2487 2488
}

static void record_recent_commit(struct commit *commit, void *data)
{
B
brian m. carlson 已提交
2489
	sha1_array_append(&recent_objects, commit->object.oid.hash);
2490 2491
}

2492
static void get_object_list(int ac, const char **av)
2493 2494 2495 2496 2497 2498 2499 2500 2501
{
	struct rev_info revs;
	char line[1000];
	int flags = 0;

	init_revisions(&revs, NULL);
	save_commit_buffer = 0;
	setup_revisions(ac, av, &revs, NULL);

2502 2503 2504
	/* make sure shallows are read */
	is_repository_shallow();

2505 2506
	while (fgets(line, sizeof(line), stdin) != NULL) {
		int len = strlen(line);
2507
		if (len && line[len - 1] == '\n')
2508 2509 2510 2511 2512 2513
			line[--len] = 0;
		if (!len)
			break;
		if (*line == '-') {
			if (!strcmp(line, "--not")) {
				flags ^= UNINTERESTING;
2514
				write_bitmap_index = 0;
2515 2516
				continue;
			}
2517 2518 2519 2520 2521
			if (starts_with(line, "--shallow ")) {
				unsigned char sha1[20];
				if (get_sha1_hex(line + 10, sha1))
					die("not an SHA-1 '%s'", line + 10);
				register_shallow(sha1);
2522
				use_bitmap_index = 0;
2523 2524
				continue;
			}
2525 2526
			die("not a rev '%s'", line);
		}
2527
		if (handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME))
2528 2529 2530
			die("bad revision '%s'", line);
	}

2531 2532 2533
	if (use_bitmap_index && !get_object_list_from_bitmap(&revs))
		return;

2534 2535
	if (prepare_revision_walk(&revs))
		die("revision walk setup failed");
2536
	mark_edges_uninteresting(&revs, show_edge);
2537
	traverse_commit_list(&revs, show_commit, show_object, NULL);
J
Junio C Hamano 已提交
2538

2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549
	if (unpack_unreachable_expiration) {
		revs.ignore_missing_links = 1;
		if (add_unseen_recent_objects_to_traversal(&revs,
				unpack_unreachable_expiration))
			die("unable to add recent objects");
		if (prepare_revision_walk(&revs))
			die("revision walk setup failed");
		traverse_commit_list(&revs, record_recent_commit,
				     record_recent_object, NULL);
	}

J
Junio C Hamano 已提交
2550 2551
	if (keep_unreachable)
		add_objects_in_unpacked_packs(&revs);
2552 2553
	if (unpack_unreachable)
		loosen_unused_packed_objects(&revs);
2554 2555

	sha1_array_clear(&recent_objects);
2556 2557
}

2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572
static int option_parse_index_version(const struct option *opt,
				      const char *arg, int unset)
{
	char *c;
	const char *val = arg;
	pack_idx_opts.version = strtoul(val, &c, 10);
	if (pack_idx_opts.version > 2)
		die(_("unsupported index version %s"), val);
	if (*c == ',' && c[1])
		pack_idx_opts.off32_limit = strtoul(c+1, &c, 0);
	if (*c || pack_idx_opts.off32_limit & 0x80000000)
		die(_("bad index version '%s'"), val);
	return 0;
}

2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587
static int option_parse_unpack_unreachable(const struct option *opt,
					   const char *arg, int unset)
{
	if (unset) {
		unpack_unreachable = 0;
		unpack_unreachable_expiration = 0;
	}
	else {
		unpack_unreachable = 1;
		if (arg)
			unpack_unreachable_expiration = approxidate(arg);
	}
	return 0;
}

2588 2589 2590
int cmd_pack_objects(int argc, const char **argv, const char *prefix)
{
	int use_internal_rev_list = 0;
2591
	int thin = 0;
2592
	int shallow = 0;
2593
	int all_progress_implied = 0;
J
Jeff King 已提交
2594
	struct argv_array rp = ARGV_ARRAY_INIT;
2595
	int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
2596
	int rev_list_index = 0;
2597 2598
	struct option pack_objects_options[] = {
		OPT_SET_INT('q', "quiet", &progress,
2599
			    N_("do not show progress meter"), 0),
2600
		OPT_SET_INT(0, "progress", &progress,
2601
			    N_("show progress meter"), 1),
2602
		OPT_SET_INT(0, "all-progress", &progress,
2603
			    N_("show progress meter during object writing phase"), 2),
2604 2605
		OPT_BOOL(0, "all-progress-implied",
			 &all_progress_implied,
2606 2607 2608
			 N_("similar to --all-progress when progress meter is shown")),
		{ OPTION_CALLBACK, 0, "index-version", NULL, N_("version[,offset]"),
		  N_("write the pack index file in the specified idx format version"),
2609
		  0, option_parse_index_version },
2610 2611
		OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit,
			      N_("maximum size of each output pack file")),
2612
		OPT_BOOL(0, "local", &local,
2613
			 N_("ignore borrowed objects from alternate object store")),
2614
		OPT_BOOL(0, "incremental", &incremental,
2615
			 N_("ignore packed objects")),
2616
		OPT_INTEGER(0, "window", &window,
2617
			    N_("limit pack window by objects")),
2618 2619
		OPT_MAGNITUDE(0, "window-memory", &window_memory_limit,
			      N_("limit pack window by memory in addition to object limit")),
2620
		OPT_INTEGER(0, "depth", &depth,
2621
			    N_("maximum length of delta chain allowed in the resulting pack")),
2622
		OPT_BOOL(0, "reuse-delta", &reuse_delta,
2623
			 N_("reuse existing deltas")),
2624
		OPT_BOOL(0, "reuse-object", &reuse_object,
2625
			 N_("reuse existing objects")),
2626
		OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,
2627
			 N_("use OFS_DELTA objects")),
2628
		OPT_INTEGER(0, "threads", &delta_search_threads,
2629
			    N_("use threads when searching for best delta matches")),
2630
		OPT_BOOL(0, "non-empty", &non_empty,
2631
			 N_("do not create an empty pack output")),
2632
		OPT_BOOL(0, "revs", &use_internal_rev_list,
2633
			 N_("read revision arguments from standard input")),
2634
		{ OPTION_SET_INT, 0, "unpacked", &rev_list_unpacked, NULL,
2635
		  N_("limit the objects to those that are not yet packed"),
2636 2637
		  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
		{ OPTION_SET_INT, 0, "all", &rev_list_all, NULL,
2638
		  N_("include objects reachable from any reference"),
2639 2640
		  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
		{ OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL,
2641
		  N_("include objects referred by reflog entries"),
2642
		  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
2643 2644 2645
		{ OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL,
		  N_("include objects referred to by the index"),
		  PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 },
2646
		OPT_BOOL(0, "stdout", &pack_to_stdout,
2647
			 N_("output pack to stdout")),
2648
		OPT_BOOL(0, "include-tag", &include_tag,
2649
			 N_("include tag objects that refer to objects to be packed")),
2650
		OPT_BOOL(0, "keep-unreachable", &keep_unreachable,
2651 2652 2653
			 N_("keep unreachable objects")),
		{ OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"),
		  N_("unpack unreachable objects newer than <time>"),
2654
		  PARSE_OPT_OPTARG, option_parse_unpack_unreachable },
2655
		OPT_BOOL(0, "thin", &thin,
2656
			 N_("create thin packs")),
2657 2658
		OPT_BOOL(0, "shallow", &shallow,
			 N_("create packs suitable for shallow fetches")),
2659
		OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep,
2660
			 N_("ignore packs that have companion .keep file")),
2661
		OPT_INTEGER(0, "compression", &pack_compression_level,
2662
			    N_("pack compression level")),
2663
		OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents,
2664
			    N_("do not hide commits by grafts"), 0),
2665 2666
		OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,
			 N_("use a bitmap index if available to speed up counting objects")),
2667 2668
		OPT_BOOL(0, "write-bitmap-index", &write_bitmap_index,
			 N_("write a bitmap index together with the pack index")),
2669 2670
		OPT_END(),
	};
2671

2672
	check_replace_refs = 0;
2673

2674
	reset_pack_idx_option(&pack_idx_opts);
2675
	git_config(git_pack_config, NULL);
2676 2677
	if (!pack_compression_seen && core_compression_seen)
		pack_compression_level = core_compression_level;
2678 2679

	progress = isatty(2);
2680 2681
	argc = parse_options(argc, argv, prefix, pack_objects_options,
			     pack_usage, 0);
2682

2683 2684 2685
	if (argc) {
		base_name = argv[0];
		argc--;
2686
	}
2687 2688
	if (pack_to_stdout != !base_name || argc)
		usage_with_options(pack_usage, pack_objects_options);
2689

J
Jeff King 已提交
2690
	argv_array_push(&rp, "pack-objects");
2691 2692
	if (thin) {
		use_internal_rev_list = 1;
2693 2694 2695
		argv_array_push(&rp, shallow
				? "--objects-edge-aggressive"
				: "--objects-edge");
2696
	} else
J
Jeff King 已提交
2697
		argv_array_push(&rp, "--objects");
2698

2699 2700
	if (rev_list_all) {
		use_internal_rev_list = 1;
J
Jeff King 已提交
2701
		argv_array_push(&rp, "--all");
2702 2703 2704
	}
	if (rev_list_reflog) {
		use_internal_rev_list = 1;
J
Jeff King 已提交
2705
		argv_array_push(&rp, "--reflog");
2706
	}
2707 2708 2709
	if (rev_list_index) {
		use_internal_rev_list = 1;
		argv_array_push(&rp, "--indexed-objects");
2710 2711 2712
	}
	if (rev_list_unpacked) {
		use_internal_rev_list = 1;
J
Jeff King 已提交
2713
		argv_array_push(&rp, "--unpacked");
2714
	}
2715

2716 2717 2718 2719 2720 2721
	if (!reuse_object)
		reuse_delta = 0;
	if (pack_compression_level == -1)
		pack_compression_level = Z_DEFAULT_COMPRESSION;
	else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
		die("bad pack compression level %d", pack_compression_level);
2722 2723 2724 2725

	if (!delta_search_threads)	/* --threads=0 means autodetect */
		delta_search_threads = online_cpus();

2726 2727
#ifdef NO_PTHREADS
	if (delta_search_threads != 1)
2728
		warning("no threads support, ignoring --threads");
2729
#endif
2730 2731
	if (!pack_to_stdout && !pack_size_limit)
		pack_size_limit = pack_size_limit_cfg;
2732 2733
	if (pack_to_stdout && pack_size_limit)
		die("--max-pack-size cannot be used to build a pack for transfer.");
2734 2735 2736 2737
	if (pack_size_limit && pack_size_limit < 1024*1024) {
		warning("minimum pack size limit is 1 MiB");
		pack_size_limit = 1024*1024;
	}
2738

2739 2740
	if (!pack_to_stdout && thin)
		die("--thin cannot be used to build an indexable pack.");
2741

2742 2743
	if (keep_unreachable && unpack_unreachable)
		die("--keep-unreachable and --unpack-unreachable are incompatible.");
2744 2745
	if (!rev_list_all || !rev_list_reflog || !rev_list_index)
		unpack_unreachable_expiration = 0;
2746

2747 2748 2749
	if (!use_internal_rev_list || !pack_to_stdout || is_repository_shallow())
		use_bitmap_index = 0;

2750 2751 2752
	if (pack_to_stdout || !rev_list_all)
		write_bitmap_index = 0;

2753 2754 2755
	if (progress && all_progress_implied)
		progress = 2;

2756 2757
	prepare_packed_git();

2758
	if (progress)
2759
		progress_state = start_progress(_("Counting objects"), 0);
2760 2761
	if (!use_internal_rev_list)
		read_object_list_from_stdin();
2762
	else {
J
Jeff King 已提交
2763 2764
		get_object_list(rp.argc, rp.argv);
		argv_array_clear(&rp);
2765
	}
2766
	cleanup_preferred_base();
2767 2768
	if (include_tag && nr_result)
		for_each_ref(add_ref_tag, NULL);
N
Nicolas Pitre 已提交
2769
	stop_progress(&progress_state);
N
Nicolas Pitre 已提交
2770

2771
	if (non_empty && !nr_result)
2772
		return 0;
2773 2774
	if (nr_result)
		prepare_pack(window, depth);
2775
	write_pack_file();
2776
	if (progress)
2777 2778
		fprintf(stderr, "Total %"PRIu32" (delta %"PRIu32"),"
			" reused %"PRIu32" (delta %"PRIu32")\n",
2779
			written, written_delta, reused, reused_delta);
2780 2781
	return 0;
}