builtin-pack-objects.c 56.9 KB
Newer Older
1
#include "builtin.h"
2
#include "cache.h"
3
#include "attr.h"
4
#include "object.h"
5 6 7 8
#include "blob.h"
#include "commit.h"
#include "tag.h"
#include "tree.h"
9
#include "delta.h"
10
#include "pack.h"
11
#include "csum-file.h"
J
Junio C Hamano 已提交
12
#include "tree-walk.h"
13 14 15
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
N
Nicolas Pitre 已提交
16
#include "progress.h"
17

N
Nicolas Pitre 已提交
18 19 20 21
#ifdef THREADED_DELTA_SEARCH
#include <pthread.h>
#endif

22
static const char pack_usage[] = "\
23 24 25
git-pack-objects [{ -q | --progress | --all-progress }] \n\
	[--max-pack-size=N] [--local] [--incremental] \n\
	[--window=N] [--window-memory=N] [--depth=N] \n\
26
	[--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\
27
	[--threads=N] [--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\
J
Junio C Hamano 已提交
28
	[--stdout | base-name] [--keep-unreachable] [<ref-list | <object-list]";
29 30

struct object_entry {
G
Geert Bosch 已提交
31
	struct pack_idx_entry idx;
32 33
	unsigned long size;	/* uncompressed size */
	struct packed_git *in_pack; 	/* already in pack */
34
	off_t in_pack_offset;
35
	struct object_entry *delta;	/* delta base object */
36
	struct object_entry *delta_child; /* deltified objects who bases me */
37 38 39
	struct object_entry *delta_sibling; /* other deltified objects who
					     * uses the same base as me
					     */
40
	void *delta_data;	/* cached delta (uncompressed) */
41
	unsigned long delta_size;	/* delta data size (uncompressed) */
42
	unsigned int hash;	/* name hint hash */
43 44 45 46
	enum object_type type;
	enum object_type in_pack_type;	/* could be delta */
	unsigned char in_pack_header_size;
	unsigned char preferred_base; /* we do not pack this, but is available
47
				       * to be used as the base object to delta
48 49
				       * objects against.
				       */
50
	unsigned char no_try_delta;
51 52
};

53
/*
54
 * Objects we are going to pack are collected in objects array (dynamically
55 56 57 58
 * expanded).  nr_objects & nr_alloc controls this array.  They are stored
 * in the order we see -- typically rev-list --objects order that gives us
 * nice "minimum seek" order.
 */
59
static struct object_entry *objects;
60
static struct pack_idx_entry **written_list;
61
static uint32_t nr_objects, nr_alloc, nr_result, nr_written;
62

63
static int non_empty;
J
Junio C Hamano 已提交
64
static int no_reuse_delta, no_reuse_object, keep_unreachable;
65 66
static int local;
static int incremental;
67
static int allow_ofs_delta;
68
static const char *base_name;
J
Junio C Hamano 已提交
69
static int progress = 1;
70
static int window = 10;
71
static uint32_t pack_size_limit;
72
static int depth = 50;
73
static int delta_search_threads = 1;
74
static int pack_to_stdout;
75
static int num_preferred_base;
76
static struct progress *progress_state;
77 78
static int pack_compression_level = Z_DEFAULT_COMPRESSION;
static int pack_compression_seen;
79

80 81
static unsigned long delta_cache_size = 0;
static unsigned long max_delta_cache_size = 0;
82
static unsigned long cache_max_small_delta_size = 1000;
83

84 85
static unsigned long window_memory_limit = 0;

86 87
/*
 * The object names in objects array are hashed with this hashtable,
88
 * to help looking up the entry by object name.
89 90
 * This hashtable is built after all the objects are seen.
 */
91 92
static int *object_ix;
static int object_ix_hashsz;
93 94 95 96 97

/*
 * Pack index for existing packs give us easy access to the offsets into
 * corresponding pack file where each object's data starts, but the entries
 * do not store the size of the compressed representation (uncompressed
98 99 100 101 102 103 104 105 106
 * size is easily available by examining the pack entry header).  It is
 * also rather expensive to find the sha1 for an object given its offset.
 *
 * We build a hashtable of existing packs (pack_revindex), and keep reverse
 * index here -- pack index file is sorted by object name mapping to offset;
 * this pack_revindex[].revindex array is a list of offset/index_nr pairs
 * ordered by offset, so if you know the offset of an object, next offset
 * is where its packed representation ends and the index_nr can be used to
 * get the object sha1 from the main index.
107
 */
108
struct revindex_entry {
109
	off_t offset;
110 111
	unsigned int nr;
};
112 113
struct pack_revindex {
	struct packed_git *p;
114 115 116
	struct revindex_entry *revindex;
};
static struct  pack_revindex *pack_revindex;
117
static int pack_revindex_hashsz;
118 119 120 121

/*
 * stats
 */
122 123
static uint32_t written, written_delta;
static uint32_t reused, reused_delta;
124 125 126

static int pack_revindex_ix(struct packed_git *p)
{
L
Luck, Tony 已提交
127
	unsigned long ui = (unsigned long)p;
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
	int i;

	ui = ui ^ (ui >> 16); /* defeat structure alignment */
	i = (int)(ui % pack_revindex_hashsz);
	while (pack_revindex[i].p) {
		if (pack_revindex[i].p == p)
			return i;
		if (++i == pack_revindex_hashsz)
			i = 0;
	}
	return -1 - i;
}

static void prepare_pack_ix(void)
{
	int num;
	struct packed_git *p;
	for (num = 0, p = packed_git; p; p = p->next)
		num++;
	if (!num)
		return;
	pack_revindex_hashsz = num * 11;
	pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
	for (p = packed_git; p; p = p->next) {
		num = pack_revindex_ix(p);
		num = - 1 - num;
		pack_revindex[num].p = p;
	}
	/* revindex elements are lazily initialized */
}

static int cmp_offset(const void *a_, const void *b_)
{
161 162 163
	const struct revindex_entry *a = a_;
	const struct revindex_entry *b = b_;
	return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0;
164 165 166 167 168 169 170 171
}

/*
 * Ordered list of offsets of objects in the pack.
 */
static void prepare_pack_revindex(struct pack_revindex *rix)
{
	struct packed_git *p = rix->p;
N
Nicolas Pitre 已提交
172
	int num_ent = p->num_objects;
173
	int i;
174
	const char *index = p->index_data;
175

176
	rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1));
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
	index += 4 * 256;

	if (p->index_version > 1) {
		const uint32_t *off_32 =
			(uint32_t *)(index + 8 + p->num_objects * (20 + 4));
		const uint32_t *off_64 = off_32 + p->num_objects;
		for (i = 0; i < num_ent; i++) {
			uint32_t off = ntohl(*off_32++);
			if (!(off & 0x80000000)) {
				rix->revindex[i].offset = off;
			} else {
				rix->revindex[i].offset =
					((uint64_t)ntohl(*off_64++)) << 32;
				rix->revindex[i].offset |=
					ntohl(*off_64++);
			}
			rix->revindex[i].nr = i;
		}
	} else {
		for (i = 0; i < num_ent; i++) {
			uint32_t hl = *((uint32_t *)(index + 24 * i));
			rix->revindex[i].offset = ntohl(hl);
			rix->revindex[i].nr = i;
		}
201
	}
202

203 204 205
	/* This knows the pack format -- the 20-byte trailer
	 * follows immediately after the last object data.
	 */
206 207 208
	rix->revindex[num_ent].offset = p->pack_size - 20;
	rix->revindex[num_ent].nr = -1;
	qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset);
209 210
}

211
static struct revindex_entry * find_packed_object(struct packed_git *p,
212
						  off_t ofs)
213 214 215 216
{
	int num;
	int lo, hi;
	struct pack_revindex *rix;
217
	struct revindex_entry *revindex;
218 219 220 221 222 223 224 225
	num = pack_revindex_ix(p);
	if (num < 0)
		die("internal error: pack revindex uninitialized");
	rix = &pack_revindex[num];
	if (!rix->revindex)
		prepare_pack_revindex(rix);
	revindex = rix->revindex;
	lo = 0;
N
Nicolas Pitre 已提交
226
	hi = p->num_objects + 1;
227 228
	do {
		int mi = (lo + hi) / 2;
229 230
		if (revindex[mi].offset == ofs) {
			return revindex + mi;
231
		}
232
		else if (ofs < revindex[mi].offset)
233 234 235 236 237 238 239
			hi = mi;
		else
			lo = mi + 1;
	} while (lo < hi);
	die("internal error: pack revindex corrupt");
}

240 241
static const unsigned char *find_packed_object_name(struct packed_git *p,
						    off_t ofs)
242 243
{
	struct revindex_entry *entry = find_packed_object(p, ofs);
244
	return nth_packed_object_sha1(p, entry->nr);
245 246
}

247 248 249
static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
{
	unsigned long othersize, delta_size;
250
	enum object_type type;
G
Geert Bosch 已提交
251
	void *otherbuf = read_sha1_file(entry->delta->idx.sha1, &type, &othersize);
252 253 254
	void *delta_buf;

	if (!otherbuf)
G
Geert Bosch 已提交
255
		die("unable to read %s", sha1_to_hex(entry->delta->idx.sha1));
256
        delta_buf = diff_delta(otherbuf, othersize,
257
			       buf, size, &delta_size, 0);
258
        if (!delta_buf || delta_size != entry->delta_size)
J
Junio C Hamano 已提交
259
		die("delta size changed");
260 261 262 263 264
        free(buf);
        free(otherbuf);
	return delta_buf;
}

265 266 267 268 269 270 271 272 273
/*
 * The per-object header is a pretty dense thing, which is
 *  - first byte: low four bits are "size", then three bits of "type",
 *    and the high bit is "size continues".
 *  - each byte afterwards: low seven bits are size continuation,
 *    with the high bit being "size continues"
 */
static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
{
274
	int n = 1;
275 276
	unsigned char c;

277
	if (type < OBJ_COMMIT || type > OBJ_REF_DELTA)
278 279
		die("bad type %d", type);

280 281 282
	c = (type << 4) | (size & 15);
	size >>= 4;
	while (size) {
283
		*hdr++ = c | 0x80;
284 285 286
		c = size & 0x7f;
		size >>= 7;
		n++;
287 288 289 290 291
	}
	*hdr = c;
	return n;
}

292 293 294 295
/*
 * we are going to reuse the existing object data as is.  make
 * sure it is not corrupt.
 */
296 297
static int check_pack_inflate(struct packed_git *p,
		struct pack_window **w_curs,
298 299
		off_t offset,
		off_t len,
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
		unsigned long expect)
{
	z_stream stream;
	unsigned char fakebuf[4096], *in;
	int st;

	memset(&stream, 0, sizeof(stream));
	inflateInit(&stream);
	do {
		in = use_pack(p, w_curs, offset, &stream.avail_in);
		stream.next_in = in;
		stream.next_out = fakebuf;
		stream.avail_out = sizeof(fakebuf);
		st = inflate(&stream, Z_FINISH);
		offset += stream.next_in - in;
	} while (st == Z_OK || st == Z_BUF_ERROR);
	inflateEnd(&stream);
	return (st == Z_STREAM_END &&
		stream.total_out == expect &&
		stream.total_in == len) ? 0 : -1;
}

322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
static int check_pack_crc(struct packed_git *p, struct pack_window **w_curs,
			  off_t offset, off_t len, unsigned int nr)
{
	const uint32_t *index_crc;
	uint32_t data_crc = crc32(0, Z_NULL, 0);

	do {
		unsigned int avail;
		void *data = use_pack(p, w_curs, offset, &avail);
		if (avail > len)
			avail = len;
		data_crc = crc32(data_crc, data, avail);
		offset += avail;
		len -= avail;
	} while (len);

	index_crc = p->index_data;
	index_crc += 2 + 256 + p->num_objects * (20/4) + nr;

	return data_crc != ntohl(*index_crc);
}

344 345 346
static void copy_pack_data(struct sha1file *f,
		struct packed_git *p,
		struct pack_window **w_curs,
347 348
		off_t offset,
		off_t len)
349 350 351 352 353 354 355
{
	unsigned char *in;
	unsigned int avail;

	while (len) {
		in = use_pack(p, w_curs, offset, &avail);
		if (avail > len)
356
			avail = (unsigned int)len;
357 358 359 360 361 362
		sha1write(f, in, avail);
		offset += avail;
		len -= avail;
	}
}

363
static unsigned long write_object(struct sha1file *f,
364 365
				  struct object_entry *entry,
				  off_t write_offset)
366 367
{
	unsigned long size;
368
	enum object_type type;
369
	void *buf;
370
	unsigned char header[10];
371
	unsigned char dheader[10];
372 373
	unsigned hdrlen;
	off_t datalen;
374
	enum object_type obj_type;
375
	int to_reuse = 0;
376 377 378 379 380 381 382 383
	/* write limit if limited packsize and not first object */
	unsigned long limit = pack_size_limit && nr_written ?
				pack_size_limit - write_offset : 0;
				/* no if no delta */
	int usable_delta =	!entry->delta ? 0 :
				/* yes if unlimited packfile */
				!pack_size_limit ? 1 :
				/* no if base written to previous pack */
G
Geert Bosch 已提交
384
				entry->delta->idx.offset == (off_t)-1 ? 0 :
385 386 387
				/* otherwise double-check written to this
				 * pack,  like we do below
				 */
G
Geert Bosch 已提交
388
				entry->delta->idx.offset ? 1 : 0;
389

390 391 392
	if (!pack_to_stdout)
		crc32_begin(f);

393
	obj_type = entry->type;
394 395 396
	if (no_reuse_object)
		to_reuse = 0;	/* explicit */
	else if (!entry->in_pack)
397
		to_reuse = 0;	/* can't reuse what we don't have */
398
	else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA)
399 400 401
				/* check_object() decided it for us ... */
		to_reuse = usable_delta;
				/* ... but pack split may override that */
402 403 404 405 406 407 408 409 410
	else if (obj_type != entry->in_pack_type)
		to_reuse = 0;	/* pack has delta which is unusable */
	else if (entry->delta)
		to_reuse = 0;	/* we want to pack afresh */
	else
		to_reuse = 1;	/* we have it in-pack undeltified,
				 * and we do not need to deltify it.
				 */

411
	if (!to_reuse) {
412 413 414
		z_stream stream;
		unsigned long maxsize;
		void *out;
N
Nicolas Pitre 已提交
415
		if (!usable_delta) {
G
Geert Bosch 已提交
416
			buf = read_sha1_file(entry->idx.sha1, &obj_type, &size);
N
Nicolas Pitre 已提交
417
			if (!buf)
G
Geert Bosch 已提交
418
				die("unable to read %s", sha1_to_hex(entry->idx.sha1));
N
Nicolas Pitre 已提交
419
		} else if (entry->delta_data) {
420
			size = entry->delta_size;
N
Nicolas Pitre 已提交
421 422
			buf = entry->delta_data;
			entry->delta_data = NULL;
G
Geert Bosch 已提交
423
			obj_type = (allow_ofs_delta && entry->delta->idx.offset) ?
424
				OBJ_OFS_DELTA : OBJ_REF_DELTA;
425
		} else {
G
Geert Bosch 已提交
426
			buf = read_sha1_file(entry->idx.sha1, &type, &size);
427
			if (!buf)
G
Geert Bosch 已提交
428
				die("unable to read %s", sha1_to_hex(entry->idx.sha1));
N
Nicolas Pitre 已提交
429 430
			buf = delta_against(buf, size, entry);
			size = entry->delta_size;
G
Geert Bosch 已提交
431
			obj_type = (allow_ofs_delta && entry->delta->idx.offset) ?
N
Nicolas Pitre 已提交
432
				OBJ_OFS_DELTA : OBJ_REF_DELTA;
433
		}
434 435 436 437 438 439 440 441 442 443 444 445 446 447
		/* compress the data to store and put compressed length in datalen */
		memset(&stream, 0, sizeof(stream));
		deflateInit(&stream, pack_compression_level);
		maxsize = deflateBound(&stream, size);
		out = xmalloc(maxsize);
		/* Compress it */
		stream.next_in = buf;
		stream.avail_in = size;
		stream.next_out = out;
		stream.avail_out = maxsize;
		while (deflate(&stream, Z_FINISH) == Z_OK)
			/* nothing */;
		deflateEnd(&stream);
		datalen = stream.total_out;
448

449 450
		/*
		 * The object header is a byte of 'type' followed by zero or
451
		 * more bytes of length.
452 453 454
		 */
		hdrlen = encode_header(obj_type, size, header);

455 456 457 458 459 460
		if (obj_type == OBJ_OFS_DELTA) {
			/*
			 * Deltas with relative base contain an additional
			 * encoding of the relative offset for the delta
			 * base from this object's position in the pack.
			 */
G
Geert Bosch 已提交
461
			off_t ofs = entry->idx.offset - entry->delta->idx.offset;
462 463
			unsigned pos = sizeof(dheader) - 1;
			dheader[pos] = ofs & 127;
464
			while (ofs >>= 7)
465 466 467 468 469 470 471 472 473
				dheader[--pos] = 128 | (--ofs & 127);
			if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) {
				free(out);
				free(buf);
				return 0;
			}
			sha1write(f, header, hdrlen);
			sha1write(f, dheader + pos, sizeof(dheader) - pos);
			hdrlen += sizeof(dheader) - pos;
474 475 476 477 478
		} else if (obj_type == OBJ_REF_DELTA) {
			/*
			 * Deltas with a base reference contain
			 * an additional 20 bytes for the base sha1.
			 */
479 480 481 482 483 484
			if (limit && hdrlen + 20 + datalen + 20 >= limit) {
				free(out);
				free(buf);
				return 0;
			}
			sha1write(f, header, hdrlen);
G
Geert Bosch 已提交
485
			sha1write(f, entry->delta->idx.sha1, 20);
486
			hdrlen += 20;
487 488 489 490 491 492 493
		} else {
			if (limit && hdrlen + datalen + 20 >= limit) {
				free(out);
				free(buf);
				return 0;
			}
			sha1write(f, header, hdrlen);
494
		}
495 496
		sha1write(f, out, datalen);
		free(out);
497
		free(buf);
498
	}
499 500
	else {
		struct packed_git *p = entry->in_pack;
501
		struct pack_window *w_curs = NULL;
502
		struct revindex_entry *revidx;
503
		off_t offset;
504

505
		if (entry->delta) {
G
Geert Bosch 已提交
506
			obj_type = (allow_ofs_delta && entry->delta->idx.offset) ?
507 508 509 510
				OBJ_OFS_DELTA : OBJ_REF_DELTA;
			reused_delta++;
		}
		hdrlen = encode_header(obj_type, entry->size, header);
511 512 513 514 515
		offset = entry->in_pack_offset;
		revidx = find_packed_object(p, offset);
		datalen = revidx[1].offset - offset;
		if (!pack_to_stdout && p->index_version > 1 &&
		    check_pack_crc(p, &w_curs, offset, datalen, revidx->nr))
G
Geert Bosch 已提交
516
			die("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1));
517 518
		offset += entry->in_pack_header_size;
		datalen -= entry->in_pack_header_size;
519
		if (obj_type == OBJ_OFS_DELTA) {
G
Geert Bosch 已提交
520
			off_t ofs = entry->idx.offset - entry->delta->idx.offset;
521 522 523 524 525 526 527 528 529 530 531 532 533
			unsigned pos = sizeof(dheader) - 1;
			dheader[pos] = ofs & 127;
			while (ofs >>= 7)
				dheader[--pos] = 128 | (--ofs & 127);
			if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit)
				return 0;
			sha1write(f, header, hdrlen);
			sha1write(f, dheader + pos, sizeof(dheader) - pos);
			hdrlen += sizeof(dheader) - pos;
		} else if (obj_type == OBJ_REF_DELTA) {
			if (limit && hdrlen + 20 + datalen + 20 >= limit)
				return 0;
			sha1write(f, header, hdrlen);
G
Geert Bosch 已提交
534
			sha1write(f, entry->delta->idx.sha1, 20);
535 536 537 538 539 540 541
			hdrlen += 20;
		} else {
			if (limit && hdrlen + datalen + 20 >= limit)
				return 0;
			sha1write(f, header, hdrlen);
		}

542 543
		if (!pack_to_stdout && p->index_version == 1 &&
		    check_pack_inflate(p, &w_curs, offset, datalen, entry->size))
G
Geert Bosch 已提交
544
			die("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1));
545
		copy_pack_data(f, p, &w_curs, offset, datalen);
546
		unuse_pack(&w_curs);
547
		reused++;
548
	}
549
	if (usable_delta)
550
		written_delta++;
551
	written++;
552
	if (!pack_to_stdout)
G
Geert Bosch 已提交
553
		entry->idx.crc32 = crc32_end(f);
554 555 556
	return hdrlen + datalen;
}

557
static off_t write_one(struct sha1file *f,
558
			       struct object_entry *e,
559
			       off_t offset)
560
{
561 562 563
	unsigned long size;

	/* offset is non zero if object is written already. */
G
Geert Bosch 已提交
564
	if (e->idx.offset || e->preferred_base)
565
		return offset;
566 567

	/* if we are deltified, write out base object first. */
568
	if (e->delta) {
569
		offset = write_one(f, e->delta, offset);
570 571 572
		if (!offset)
			return 0;
	}
573

G
Geert Bosch 已提交
574
	e->idx.offset = offset;
575 576
	size = write_object(f, e, offset);
	if (!size) {
G
Geert Bosch 已提交
577
		e->idx.offset = 0;
578 579
		return 0;
	}
580
	written_list[nr_written++] = &e->idx;
581 582 583 584 585

	/* make sure off_t is sufficiently large not to wrap */
	if (offset > offset + size)
		die("pack too large for current definition of off_t");
	return offset + size;
586 587
}

G
Geert Bosch 已提交
588
/* forward declaration for write_pack_file */
589 590 591
static int adjust_perm(const char *path, mode_t mode);

static void write_pack_file(void)
592
{
593
	uint32_t i = 0, j;
594
	struct sha1file *f;
595
	off_t offset, offset_one, last_obj_offset = 0;
596
	struct pack_header hdr;
597 598
	int do_progress = progress >> pack_to_stdout;
	uint32_t nr_remaining = nr_result;
599

600
	if (do_progress)
601
		progress_state = start_progress("Writing objects", nr_result);
602
	written_list = xmalloc(nr_objects * sizeof(*written_list));
603

604
	do {
G
Geert Bosch 已提交
605
		unsigned char sha1[20];
606
		char *pack_tmp_name = NULL;
G
Geert Bosch 已提交
607

608
		if (pack_to_stdout) {
N
Nicolas Pitre 已提交
609
			f = sha1fd_throughput(1, "<stdout>", progress_state);
610
		} else {
611 612 613 614 615
			char tmpname[PATH_MAX];
			int fd;
			snprintf(tmpname, sizeof(tmpname),
				 "%s/tmp_pack_XXXXXX", get_object_directory());
			fd = xmkstemp(tmpname);
616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
			pack_tmp_name = xstrdup(tmpname);
			f = sha1fd(fd, pack_tmp_name);
		}

		hdr.hdr_signature = htonl(PACK_SIGNATURE);
		hdr.hdr_version = htonl(PACK_VERSION);
		hdr.hdr_entries = htonl(nr_remaining);
		sha1write(f, &hdr, sizeof(hdr));
		offset = sizeof(hdr);
		nr_written = 0;
		for (; i < nr_objects; i++) {
			last_obj_offset = offset;
			offset_one = write_one(f, objects + i, offset);
			if (!offset_one)
				break;
			offset = offset_one;
N
Nicolas Pitre 已提交
632
			display_progress(progress_state, written);
633
		}
634

635 636 637 638 639
		/*
		 * Did we write the wrong # entries in the header?
		 * If so, rewrite it like in fast-import
		 */
		if (pack_to_stdout || nr_written == nr_remaining) {
G
Geert Bosch 已提交
640
			sha1close(f, sha1, 1);
641
		} else {
642 643 644
			int fd = sha1close(f, NULL, 0);
			fixup_pack_header_footer(fd, sha1, pack_tmp_name, nr_written);
			close(fd);
645 646 647
		}

		if (!pack_to_stdout) {
648
			mode_t mode = umask(0);
649
			char *idx_tmp_name, tmpname[PATH_MAX];
650 651 652 653

			umask(mode);
			mode = 0444 & ~mode;

654 655
			idx_tmp_name = write_idx_file(NULL, written_list,
						      nr_written, sha1);
656
			snprintf(tmpname, sizeof(tmpname), "%s-%s.pack",
G
Geert Bosch 已提交
657
				 base_name, sha1_to_hex(sha1));
658 659 660 661 662 663 664
			if (adjust_perm(pack_tmp_name, mode))
				die("unable to make temporary pack file readable: %s",
				    strerror(errno));
			if (rename(pack_tmp_name, tmpname))
				die("unable to rename temporary pack file: %s",
				    strerror(errno));
			snprintf(tmpname, sizeof(tmpname), "%s-%s.idx",
G
Geert Bosch 已提交
665
				 base_name, sha1_to_hex(sha1));
666 667 668 669 670 671
			if (adjust_perm(idx_tmp_name, mode))
				die("unable to make temporary index file readable: %s",
				    strerror(errno));
			if (rename(idx_tmp_name, tmpname))
				die("unable to rename temporary index file: %s",
				    strerror(errno));
672 673
			free(idx_tmp_name);
			free(pack_tmp_name);
G
Geert Bosch 已提交
674
			puts(sha1_to_hex(sha1));
675 676 677 678
		}

		/* mark written objects as written to previous pack */
		for (j = 0; j < nr_written; j++) {
679
			written_list[j]->offset = (off_t)-1;
680 681 682 683 684
		}
		nr_remaining -= nr_written;
	} while (nr_remaining && i < nr_objects);

	free(written_list);
N
Nicolas Pitre 已提交
685
	stop_progress(&progress_state);
686
	if (written != nr_result)
687
		die("wrote %u objects while expecting %u", written, nr_result);
688 689 690 691 692 693
	/*
	 * We have scanned through [0 ... i).  Since we have written
	 * the correct number of objects,  the remaining [i ... nr_objects)
	 * items must be either already written (due to out-of-order delta base)
	 * or a preferred base.  Count those which are neither and complain if any.
	 */
694 695
	for (j = 0; i < nr_objects; i++) {
		struct object_entry *e = objects + i;
G
Geert Bosch 已提交
696
		j += !e->idx.offset && !e->preferred_base;
697
	}
698 699
	if (j)
		die("wrote %u objects as expected but %u unwritten", written, j);
700 701
}

702 703 704 705 706 707 708
static int locate_object_entry_hash(const unsigned char *sha1)
{
	int i;
	unsigned int ui;
	memcpy(&ui, sha1, sizeof(unsigned int));
	i = ui % object_ix_hashsz;
	while (0 < object_ix[i]) {
G
Geert Bosch 已提交
709
		if (!hashcmp(sha1, objects[object_ix[i] - 1].idx.sha1))
710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730
			return i;
		if (++i == object_ix_hashsz)
			i = 0;
	}
	return -1 - i;
}

static struct object_entry *locate_object_entry(const unsigned char *sha1)
{
	int i;

	if (!object_ix_hashsz)
		return NULL;

	i = locate_object_entry_hash(sha1);
	if (0 <= i)
		return &objects[object_ix[i]-1];
	return NULL;
}

static void rehash_objects(void)
731
{
732
	uint32_t i;
733 734 735 736 737 738
	struct object_entry *oe;

	object_ix_hashsz = nr_objects * 3;
	if (object_ix_hashsz < 1024)
		object_ix_hashsz = 1024;
	object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz);
739
	memset(object_ix, 0, sizeof(int) * object_ix_hashsz);
740
	for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
G
Geert Bosch 已提交
741
		int ix = locate_object_entry_hash(oe->idx.sha1);
742 743 744 745 746 747 748
		if (0 <= ix)
			continue;
		ix = -1 - ix;
		object_ix[ix] = i + 1;
	}
}

749
static unsigned name_hash(const char *name)
750
{
751 752 753
	unsigned char c;
	unsigned hash = 0;

754 755 756
	if (!name)
		return 0;

757
	/*
758 759 760
	 * This effectively just creates a sortable number from the
	 * last sixteen non-whitespace characters. Last characters
	 * count "most", so things that end in ".c" sort together.
761
	 */
762 763 764 765 766
	while ((c = *name++) != 0) {
		if (isspace(c))
			continue;
		hash = (hash >> 2) + (c << 24);
	}
767 768 769
	return hash;
}

770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791
static void setup_delta_attr_check(struct git_attr_check *check)
{
	static struct git_attr *attr_delta;

	if (!attr_delta)
		attr_delta = git_attr("delta", 5);

	check[0].attr = attr_delta;
}

static int no_try_delta(const char *path)
{
	struct git_attr_check check[1];

	setup_delta_attr_check(check);
	if (git_checkattr(path, ARRAY_SIZE(check), check))
		return 0;
	if (ATTR_FALSE(check->value))
		return 1;
	return 0;
}

792
static int add_object_entry(const unsigned char *sha1, enum object_type type,
793
			    const char *name, int exclude)
794 795
{
	struct object_entry *entry;
N
Nicolas Pitre 已提交
796
	struct packed_git *p, *found_pack = NULL;
797
	off_t found_offset = 0;
N
Nicolas Pitre 已提交
798
	int ix;
799
	unsigned hash = name_hash(name);
N
Nicolas Pitre 已提交
800 801 802 803 804

	ix = nr_objects ? locate_object_entry_hash(sha1) : -1;
	if (ix >= 0) {
		if (exclude) {
			entry = objects + object_ix[ix] - 1;
805 806
			if (!entry->preferred_base)
				nr_result--;
N
Nicolas Pitre 已提交
807 808 809 810
			entry->preferred_base = 1;
		}
		return 0;
	}
811

812 813 814 815 816 817
	for (p = packed_git; p; p = p->next) {
		off_t offset = find_pack_entry_one(sha1, p);
		if (offset) {
			if (!found_pack) {
				found_offset = offset;
				found_pack = p;
L
Linus Torvalds 已提交
818
			}
819 820 821 822 823 824
			if (exclude)
				break;
			if (incremental)
				return 0;
			if (local && !p->pack_local)
				return 0;
L
Linus Torvalds 已提交
825 826
		}
	}
827

N
Nicolas Pitre 已提交
828 829
	if (nr_objects >= nr_alloc) {
		nr_alloc = (nr_alloc  + 1024) * 3 / 2;
830
		objects = xrealloc(objects, nr_alloc * sizeof(*entry));
831
	}
N
Nicolas Pitre 已提交
832 833

	entry = objects + nr_objects++;
834
	memset(entry, 0, sizeof(*entry));
G
Geert Bosch 已提交
835
	hashcpy(entry->idx.sha1, sha1);
836
	entry->hash = hash;
837 838
	if (type)
		entry->type = type;
N
Nicolas Pitre 已提交
839 840
	if (exclude)
		entry->preferred_base = 1;
841 842
	else
		nr_result++;
N
Nicolas Pitre 已提交
843 844 845 846
	if (found_pack) {
		entry->in_pack = found_pack;
		entry->in_pack_offset = found_offset;
	}
847 848 849

	if (object_ix_hashsz * 3 <= nr_objects * 4)
		rehash_objects();
N
Nicolas Pitre 已提交
850 851
	else
		object_ix[-1 - ix] = nr_objects;
852

N
Nicolas Pitre 已提交
853
	display_progress(progress_state, nr_objects);
N
Nicolas Pitre 已提交
854

855 856 857
	if (name && no_try_delta(name))
		entry->no_try_delta = 1;

N
Nicolas Pitre 已提交
858
	return 1;
859 860
}

861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893
struct pbase_tree_cache {
	unsigned char sha1[20];
	int ref;
	int temporary;
	void *tree_data;
	unsigned long tree_size;
};

static struct pbase_tree_cache *(pbase_tree_cache[256]);
static int pbase_tree_cache_ix(const unsigned char *sha1)
{
	return sha1[0] % ARRAY_SIZE(pbase_tree_cache);
}
static int pbase_tree_cache_ix_incr(int ix)
{
	return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
}

static struct pbase_tree {
	struct pbase_tree *next;
	/* This is a phony "cache" entry; we are not
	 * going to evict it nor find it through _get()
	 * mechanism -- this is for the toplevel node that
	 * would almost always change with any commit.
	 */
	struct pbase_tree_cache pcache;
} *pbase_tree;

static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1)
{
	struct pbase_tree_cache *ent, *nent;
	void *data;
	unsigned long size;
894
	enum object_type type;
895 896 897 898 899 900 901 902 903 904
	int neigh;
	int my_ix = pbase_tree_cache_ix(sha1);
	int available_ix = -1;

	/* pbase-tree-cache acts as a limited hashtable.
	 * your object will be found at your index or within a few
	 * slots after that slot if it is cached.
	 */
	for (neigh = 0; neigh < 8; neigh++) {
		ent = pbase_tree_cache[my_ix];
905
		if (ent && !hashcmp(ent->sha1, sha1)) {
906 907 908 909 910 911 912 913 914 915 916 917 918 919 920
			ent->ref++;
			return ent;
		}
		else if (((available_ix < 0) && (!ent || !ent->ref)) ||
			 ((0 <= available_ix) &&
			  (!ent && pbase_tree_cache[available_ix])))
			available_ix = my_ix;
		if (!ent)
			break;
		my_ix = pbase_tree_cache_ix_incr(my_ix);
	}

	/* Did not find one.  Either we got a bogus request or
	 * we need to read and perhaps cache.
	 */
921
	data = read_sha1_file(sha1, &type, &size);
922 923
	if (!data)
		return NULL;
924
	if (type != OBJ_TREE) {
925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
		free(data);
		return NULL;
	}

	/* We need to either cache or return a throwaway copy */

	if (available_ix < 0)
		ent = NULL;
	else {
		ent = pbase_tree_cache[available_ix];
		my_ix = available_ix;
	}

	if (!ent) {
		nent = xmalloc(sizeof(*nent));
		nent->temporary = (available_ix < 0);
	}
	else {
		/* evict and reuse */
		free(ent->tree_data);
		nent = ent;
	}
947
	hashcpy(nent->sha1, sha1);
948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975
	nent->tree_data = data;
	nent->tree_size = size;
	nent->ref = 1;
	if (!nent->temporary)
		pbase_tree_cache[my_ix] = nent;
	return nent;
}

static void pbase_tree_put(struct pbase_tree_cache *cache)
{
	if (!cache->temporary) {
		cache->ref--;
		return;
	}
	free(cache->tree_data);
	free(cache);
}

static int name_cmp_len(const char *name)
{
	int i;
	for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)
		;
	return i;
}

static void add_pbase_object(struct tree_desc *tree,
			     const char *name,
976 977
			     int cmplen,
			     const char *fullname)
978
{
979
	struct name_entry entry;
980
	int cmp;
981 982

	while (tree_entry(tree,&entry)) {
983 984
		if (S_ISGITLINK(entry.mode))
			continue;
985 986 987
		cmp = tree_entry_len(entry.path, entry.sha1) != cmplen ? 1 :
		      memcmp(name, entry.path, cmplen);
		if (cmp > 0)
988
			continue;
989 990
		if (cmp < 0)
			return;
991
		if (name[cmplen] != '/') {
992
			add_object_entry(entry.sha1,
993
					 object_type(entry.mode),
994
					 fullname, 1);
995 996
			return;
		}
997
		if (S_ISDIR(entry.mode)) {
998
			struct tree_desc sub;
999 1000 1001 1002
			struct pbase_tree_cache *tree;
			const char *down = name+cmplen+1;
			int downlen = name_cmp_len(down);

1003
			tree = pbase_tree_get(entry.sha1);
1004 1005
			if (!tree)
				return;
1006
			init_tree_desc(&sub, tree->tree_data, tree->tree_size);
1007

1008
			add_pbase_object(&sub, down, downlen, fullname);
1009 1010 1011 1012
			pbase_tree_put(tree);
		}
	}
}
1013

1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053
static unsigned *done_pbase_paths;
static int done_pbase_paths_num;
static int done_pbase_paths_alloc;
static int done_pbase_path_pos(unsigned hash)
{
	int lo = 0;
	int hi = done_pbase_paths_num;
	while (lo < hi) {
		int mi = (hi + lo) / 2;
		if (done_pbase_paths[mi] == hash)
			return mi;
		if (done_pbase_paths[mi] < hash)
			hi = mi;
		else
			lo = mi + 1;
	}
	return -lo-1;
}

static int check_pbase_path(unsigned hash)
{
	int pos = (!done_pbase_paths) ? -1 : done_pbase_path_pos(hash);
	if (0 <= pos)
		return 1;
	pos = -pos - 1;
	if (done_pbase_paths_alloc <= done_pbase_paths_num) {
		done_pbase_paths_alloc = alloc_nr(done_pbase_paths_alloc);
		done_pbase_paths = xrealloc(done_pbase_paths,
					    done_pbase_paths_alloc *
					    sizeof(unsigned));
	}
	done_pbase_paths_num++;
	if (pos < done_pbase_paths_num)
		memmove(done_pbase_paths + pos + 1,
			done_pbase_paths + pos,
			(done_pbase_paths_num - pos - 1) * sizeof(unsigned));
	done_pbase_paths[pos] = hash;
	return 0;
}

1054
static void add_preferred_base_object(const char *name)
1055 1056
{
	struct pbase_tree *it;
1057
	int cmplen;
1058
	unsigned hash = name_hash(name);
1059

1060
	if (!num_preferred_base || check_pbase_path(hash))
1061 1062
		return;

1063
	cmplen = name_cmp_len(name);
1064 1065
	for (it = pbase_tree; it; it = it->next) {
		if (cmplen == 0) {
1066
			add_object_entry(it->pcache.sha1, OBJ_TREE, NULL, 1);
1067 1068 1069
		}
		else {
			struct tree_desc tree;
1070
			init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);
1071
			add_pbase_object(&tree, name, cmplen, name);
1072
		}
1073 1074 1075
	}
}

1076
static void add_preferred_base(unsigned char *sha1)
1077
{
1078 1079 1080 1081
	struct pbase_tree *it;
	void *data;
	unsigned long size;
	unsigned char tree_sha1[20];
1082

1083 1084 1085
	if (window <= num_preferred_base++)
		return;

1086 1087
	data = read_object_with_reference(sha1, tree_type, &size, tree_sha1);
	if (!data)
1088
		return;
1089 1090

	for (it = pbase_tree; it; it = it->next) {
1091
		if (!hashcmp(it->pcache.sha1, tree_sha1)) {
1092 1093 1094 1095 1096 1097 1098 1099 1100
			free(data);
			return;
		}
	}

	it = xcalloc(1, sizeof(*it));
	it->next = pbase_tree;
	pbase_tree = it;

1101
	hashcpy(it->pcache.sha1, tree_sha1);
1102 1103
	it->pcache.tree_data = data;
	it->pcache.tree_size = size;
1104 1105
}

1106 1107
static void check_object(struct object_entry *entry)
{
1108
	if (entry->in_pack) {
1109
		struct packed_git *p = entry->in_pack;
1110
		struct pack_window *w_curs = NULL;
1111 1112 1113
		const unsigned char *base_ref = NULL;
		struct object_entry *base_entry;
		unsigned long used, used_0;
1114
		unsigned int avail;
1115 1116
		off_t ofs;
		unsigned char *buf, c;
1117

1118
		buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
1119

1120
		/*
1121 1122
		 * We want in_pack_type even if we do not reuse delta
		 * since non-delta representations could still be reused.
1123
		 */
1124
		used = unpack_object_header_gently(buf, avail,
1125 1126
						   &entry->in_pack_type,
						   &entry->size);
1127

1128 1129 1130 1131
		/*
		 * Determine if this is a delta and if so whether we can
		 * reuse it or not.  Otherwise let's find out as cheaply as
		 * possible what the actual type and size for this object is.
1132
		 */
1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
		switch (entry->in_pack_type) {
		default:
			/* Not a delta hence we've already got all we need. */
			entry->type = entry->in_pack_type;
			entry->in_pack_header_size = used;
			unuse_pack(&w_curs);
			return;
		case OBJ_REF_DELTA:
			if (!no_reuse_delta && !entry->preferred_base)
				base_ref = use_pack(p, &w_curs,
						entry->in_pack_offset + used, NULL);
			entry->in_pack_header_size = used + 20;
			break;
		case OBJ_OFS_DELTA:
			buf = use_pack(p, &w_curs,
				       entry->in_pack_offset + used, NULL);
			used_0 = 0;
			c = buf[used_0++];
			ofs = c & 127;
			while (c & 128) {
				ofs += 1;
				if (!ofs || MSB(ofs, 7))
					die("delta base offset overflow in pack for %s",
G
Geert Bosch 已提交
1156
					    sha1_to_hex(entry->idx.sha1));
1157 1158
				c = buf[used_0++];
				ofs = (ofs << 7) + (c & 127);
1159
			}
1160 1161
			if (ofs >= entry->in_pack_offset)
				die("delta base offset out of bound for %s",
G
Geert Bosch 已提交
1162
				    sha1_to_hex(entry->idx.sha1));
1163 1164 1165 1166 1167
			ofs = entry->in_pack_offset - ofs;
			if (!no_reuse_delta && !entry->preferred_base)
				base_ref = find_packed_object_name(p, ofs);
			entry->in_pack_header_size = used + used_0;
			break;
1168 1169
		}

1170 1171 1172 1173 1174 1175 1176 1177 1178 1179
		if (base_ref && (base_entry = locate_object_entry(base_ref))) {
			/*
			 * If base_ref was set above that means we wish to
			 * reuse delta data, and we even found that base
			 * in the list of objects we want to pack. Goodie!
			 *
			 * Depth value does not matter - find_deltas() will
			 * never consider reused delta as the base object to
			 * deltify other objects against, in order to avoid
			 * circular deltas.
1180
			 */
1181
			entry->type = entry->in_pack_type;
1182
			entry->delta = base_entry;
1183 1184
			entry->delta_sibling = base_entry->delta_child;
			base_entry->delta_child = entry;
1185 1186 1187
			unuse_pack(&w_curs);
			return;
		}
1188

1189 1190 1191 1192 1193 1194 1195 1196 1197
		if (entry->type) {
			/*
			 * This must be a delta and we already know what the
			 * final object type is.  Let's extract the actual
			 * object size from the delta header.
			 */
			entry->size = get_size_from_delta(p, &w_curs,
					entry->in_pack_offset + entry->in_pack_header_size);
			unuse_pack(&w_curs);
1198 1199
			return;
		}
1200 1201 1202 1203 1204 1205 1206

		/*
		 * No choice but to fall back to the recursive delta walk
		 * with sha1_object_info() to find about the object type
		 * at this point...
		 */
		unuse_pack(&w_curs);
1207
	}
1208

G
Geert Bosch 已提交
1209
	entry->type = sha1_object_info(entry->idx.sha1, &entry->size);
1210
	if (entry->type < 0)
1211
		die("unable to get type of object %s",
G
Geert Bosch 已提交
1212
		    sha1_to_hex(entry->idx.sha1));
1213 1214
}

1215 1216 1217 1218 1219 1220 1221
static int pack_offset_sort(const void *_a, const void *_b)
{
	const struct object_entry *a = *(struct object_entry **)_a;
	const struct object_entry *b = *(struct object_entry **)_b;

	/* avoid filesystem trashing with loose objects */
	if (!a->in_pack && !b->in_pack)
G
Geert Bosch 已提交
1222
		return hashcmp(a->idx.sha1, b->idx.sha1);
1223 1224 1225 1226 1227 1228 1229 1230 1231

	if (a->in_pack < b->in_pack)
		return -1;
	if (a->in_pack > b->in_pack)
		return 1;
	return a->in_pack_offset < b->in_pack_offset ? -1 :
			(a->in_pack_offset > b->in_pack_offset);
}

1232 1233
static void get_object_details(void)
{
1234
	uint32_t i;
1235 1236 1237 1238 1239 1240
	struct object_entry **sorted_by_offset;

	sorted_by_offset = xcalloc(nr_objects, sizeof(struct object_entry *));
	for (i = 0; i < nr_objects; i++)
		sorted_by_offset[i] = objects + i;
	qsort(sorted_by_offset, nr_objects, sizeof(*sorted_by_offset), pack_offset_sort);
1241

1242
	prepare_pack_ix();
1243 1244 1245
	for (i = 0; i < nr_objects; i++)
		check_object(sorted_by_offset[i]);
	free(sorted_by_offset);
1246 1247
}

1248 1249 1250 1251 1252 1253 1254 1255 1256
/*
 * We search for deltas in a list sorted by type, by filename hash, and then
 * by size, so that we see progressively smaller and smaller files.
 * That's because we prefer deltas to be from the bigger file
 * to the smaller -- deletes are potentially cheaper, but perhaps
 * more importantly, the bigger file is likely the more recent
 * one.  The deepest deltas are therefore the oldest objects which are
 * less susceptible to be accessed often.
 */
1257
static int type_size_sort(const void *_a, const void *_b)
1258
{
1259 1260 1261
	const struct object_entry *a = *(struct object_entry **)_a;
	const struct object_entry *b = *(struct object_entry **)_b;

1262
	if (a->type > b->type)
1263
		return -1;
1264
	if (a->type < b->type)
1265
		return 1;
1266
	if (a->hash > b->hash)
1267
		return -1;
1268
	if (a->hash < b->hash)
1269
		return 1;
1270
	if (a->preferred_base > b->preferred_base)
1271
		return -1;
1272 1273
	if (a->preferred_base < b->preferred_base)
		return 1;
1274
	if (a->size > b->size)
1275 1276
		return -1;
	if (a->size < b->size)
1277
		return 1;
1278
	return a < b ? -1 : (a > b);  /* newest first */
1279 1280 1281 1282 1283
}

struct unpacked {
	struct object_entry *entry;
	void *data;
1284
	struct delta_index *index;
1285
	unsigned depth;
1286 1287
};

1288 1289
static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
			   unsigned long delta_size)
1290 1291 1292 1293
{
	if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
		return 0;

1294 1295 1296
	if (delta_size < cache_max_small_delta_size)
		return 1;

1297 1298 1299 1300 1301 1302 1303
	/* cache delta, if objects are large enough compared to delta size */
	if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
		return 1;

	return 0;
}

N
Nicolas Pitre 已提交
1304 1305 1306 1307 1308 1309
#ifdef THREADED_DELTA_SEARCH

static pthread_mutex_t read_mutex = PTHREAD_MUTEX_INITIALIZER;
#define read_lock()		pthread_mutex_lock(&read_mutex)
#define read_unlock()		pthread_mutex_unlock(&read_mutex)

1310 1311 1312 1313
static pthread_mutex_t cache_mutex = PTHREAD_MUTEX_INITIALIZER;
#define cache_lock()		pthread_mutex_lock(&cache_mutex)
#define cache_unlock()		pthread_mutex_unlock(&cache_mutex)

N
Nicolas Pitre 已提交
1314 1315 1316 1317 1318 1319
static pthread_mutex_t progress_mutex = PTHREAD_MUTEX_INITIALIZER;
#define progress_lock()		pthread_mutex_lock(&progress_mutex)
#define progress_unlock()	pthread_mutex_unlock(&progress_mutex)

#else

1320 1321 1322 1323 1324 1325
#define read_lock()		(void)0
#define read_unlock()		(void)0
#define cache_lock()		(void)0
#define cache_unlock()		(void)0
#define progress_lock()		(void)0
#define progress_unlock()	(void)0
N
Nicolas Pitre 已提交
1326 1327 1328

#endif

1329
static int try_delta(struct unpacked *trg, struct unpacked *src,
1330
		     unsigned max_depth, unsigned long *mem_usage)
1331
{
1332 1333
	struct object_entry *trg_entry = trg->entry;
	struct object_entry *src_entry = src->entry;
1334
	unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
1335
	unsigned ref_depth;
1336
	enum object_type type;
1337 1338 1339
	void *delta_buf;

	/* Don't bother doing diffs between different types */
1340
	if (trg_entry->type != src_entry->type)
1341 1342
		return -1;

1343 1344
	/*
	 * We do not bother to try a delta that we discarded
1345
	 * on an earlier try, but only when reusing delta data.
1346
	 */
1347
	if (!no_reuse_delta && trg_entry->in_pack &&
1348 1349 1350
	    trg_entry->in_pack == src_entry->in_pack &&
	    trg_entry->in_pack_type != OBJ_REF_DELTA &&
	    trg_entry->in_pack_type != OBJ_OFS_DELTA)
1351 1352
		return 0;

1353
	/* Let's not bust the allowed depth. */
1354
	if (src->depth >= max_depth)
1355
		return 0;
1356

1357
	/* Now some size filtering heuristics. */
1358
	trg_size = trg_entry->size;
1359 1360 1361 1362 1363
	if (!trg_entry->delta) {
		max_size = trg_size/2 - 20;
		ref_depth = 1;
	} else {
		max_size = trg_entry->delta_size;
1364
		ref_depth = trg->depth;
1365
	}
1366
	max_size = max_size * (max_depth - src->depth) /
1367
						(max_depth - ref_depth + 1);
1368 1369
	if (max_size == 0)
		return 0;
1370
	src_size = src_entry->size;
1371
	sizediff = src_size < trg_size ? trg_size - src_size : 0;
1372
	if (sizediff >= max_size)
1373
		return 0;
1374 1375
	if (trg_size < src_size / 32)
		return 0;
1376

1377 1378
	/* Load data if not already done */
	if (!trg->data) {
N
Nicolas Pitre 已提交
1379
		read_lock();
G
Geert Bosch 已提交
1380
		trg->data = read_sha1_file(trg_entry->idx.sha1, &type, &sz);
N
Nicolas Pitre 已提交
1381
		read_unlock();
1382 1383 1384
		if (!trg->data)
			die("object %s cannot be read",
			    sha1_to_hex(trg_entry->idx.sha1));
1385 1386
		if (sz != trg_size)
			die("object %s inconsistent object length (%lu vs %lu)",
G
Geert Bosch 已提交
1387
			    sha1_to_hex(trg_entry->idx.sha1), sz, trg_size);
1388
		*mem_usage += sz;
1389 1390
	}
	if (!src->data) {
N
Nicolas Pitre 已提交
1391
		read_lock();
G
Geert Bosch 已提交
1392
		src->data = read_sha1_file(src_entry->idx.sha1, &type, &sz);
N
Nicolas Pitre 已提交
1393
		read_unlock();
1394 1395 1396
		if (!src->data)
			die("object %s cannot be read",
			    sha1_to_hex(src_entry->idx.sha1));
1397 1398
		if (sz != src_size)
			die("object %s inconsistent object length (%lu vs %lu)",
G
Geert Bosch 已提交
1399
			    sha1_to_hex(src_entry->idx.sha1), sz, src_size);
1400
		*mem_usage += sz;
1401 1402 1403
	}
	if (!src->index) {
		src->index = create_delta_index(src->data, src_size);
1404 1405 1406 1407 1408 1409
		if (!src->index) {
			static int warned = 0;
			if (!warned++)
				warning("suboptimal pack - out of memory");
			return 0;
		}
1410
		*mem_usage += sizeof_delta_index(src->index);
1411 1412 1413
	}

	delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
1414
	if (!delta_buf)
1415
		return 0;
1416

N
Nicolas Pitre 已提交
1417
	if (trg_entry->delta) {
1418 1419
		/* Prefer only shallower same-sized deltas. */
		if (delta_size == trg_entry->delta_size &&
1420
		    src->depth + 1 >= trg->depth) {
1421 1422 1423
			free(delta_buf);
			return 0;
		}
1424
	}
N
Nicolas Pitre 已提交
1425

1426 1427 1428 1429 1430 1431 1432 1433
	/*
	 * Handle memory allocation outside of the cache
	 * accounting lock.  Compiler will optimize the strangeness
	 * away when THREADED_DELTA_SEARCH is not defined.
	 */
	if (trg_entry->delta_data)
		free(trg_entry->delta_data);
	cache_lock();
N
Nicolas Pitre 已提交
1434 1435 1436 1437
	if (trg_entry->delta_data) {
		delta_cache_size -= trg_entry->delta_size;
		trg_entry->delta_data = NULL;
	}
1438
	if (delta_cacheable(src_size, trg_size, delta_size)) {
1439
		delta_cache_size += delta_size;
1440 1441 1442 1443
		cache_unlock();
		trg_entry->delta_data = xrealloc(delta_buf, delta_size);
	} else {
		cache_unlock();
1444
		free(delta_buf);
1445 1446
	}

1447 1448 1449 1450
	trg_entry->delta = src_entry;
	trg_entry->delta_size = delta_size;
	trg->depth = src->depth + 1;

1451
	return 1;
1452 1453
}

1454
static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
1455
{
1456 1457 1458 1459 1460 1461 1462 1463 1464
	struct object_entry *child = me->delta_child;
	unsigned int m = n;
	while (child) {
		unsigned int c = check_delta_limit(child, n + 1);
		if (m < c)
			m = c;
		child = child->delta_sibling;
	}
	return m;
1465 1466
}

1467
static unsigned long free_unpacked(struct unpacked *n)
1468
{
1469
	unsigned long freed_mem = sizeof_delta_index(n->index);
1470 1471 1472
	free_delta_index(n->index);
	n->index = NULL;
	if (n->data) {
1473
		freed_mem += n->entry->size;
1474 1475 1476 1477
		free(n->data);
		n->data = NULL;
	}
	n->entry = NULL;
1478
	n->depth = 0;
1479
	return freed_mem;
1480 1481
}

1482
static void find_deltas(struct object_entry **list, unsigned *list_size,
1483
			int window, int depth, unsigned *processed)
1484
{
1485
	uint32_t i, idx = 0, count = 0;
1486
	unsigned int array_size = window * sizeof(struct unpacked);
1487
	struct unpacked *array;
1488
	unsigned long mem_usage = 0;
1489

1490
	array = xmalloc(array_size);
1491
	memset(array, 0, array_size);
1492

1493 1494
	for (;;) {
		struct object_entry *entry = *list++;
1495
		struct unpacked *n = array + idx;
1496
		int j, max_depth, best_base = -1;
1497

1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509
		progress_lock();
		if (!*list_size) {
			progress_unlock();
			break;
		}
		(*list_size)--;
		if (!entry->preferred_base) {
			(*processed)++;
			display_progress(progress_state, *processed);
		}
		progress_unlock();

1510
		mem_usage -= free_unpacked(n);
1511
		n->entry = entry;
1512

1513
		while (window_memory_limit &&
1514
		       mem_usage > window_memory_limit &&
1515 1516
		       count > 1) {
			uint32_t tail = (idx + window - count) % window;
1517
			mem_usage -= free_unpacked(array + tail);
1518 1519 1520
			count--;
		}

1521 1522 1523 1524 1525 1526
		/* We do not compute delta to *create* objects we are not
		 * going to pack.
		 */
		if (entry->preferred_base)
			goto next;

1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538
		/*
		 * If the current object is at pack edge, take the depth the
		 * objects that depend on the current object into account
		 * otherwise they would become too deep.
		 */
		max_depth = depth;
		if (entry->delta_child) {
			max_depth -= check_delta_limit(entry, 0);
			if (max_depth <= 0)
				goto next;
		}

L
Linus Torvalds 已提交
1539 1540
		j = window;
		while (--j > 0) {
1541
			int ret;
1542
			uint32_t other_idx = idx + j;
1543
			struct unpacked *m;
L
Linus Torvalds 已提交
1544 1545
			if (other_idx >= window)
				other_idx -= window;
1546 1547 1548
			m = array + other_idx;
			if (!m->entry)
				break;
1549
			ret = try_delta(n, m, max_depth, &mem_usage);
1550
			if (ret < 0)
1551
				break;
1552 1553
			else if (ret > 0)
				best_base = other_idx;
1554
		}
1555

1556 1557 1558 1559
		/* if we made n a delta, and if n is already at max
		 * depth, leaving it in the window is pointless.  we
		 * should evict it first.
		 */
1560
		if (entry->delta && depth <= n->depth)
1561
			continue;
1562

1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579
		/*
		 * Move the best delta base up in the window, after the
		 * currently deltified object, to keep it longer.  It will
		 * be the first base object to be attempted next.
		 */
		if (entry->delta) {
			struct unpacked swap = array[best_base];
			int dist = (window + idx - best_base) % window;
			int dst = best_base;
			while (dist--) {
				int src = (dst + 1) % window;
				array[dst] = array[src];
				dst = src;
			}
			array[dst] = swap;
		}

1580
		next:
1581
		idx++;
1582 1583
		if (count + 1 < window)
			count++;
1584 1585
		if (idx >= window)
			idx = 0;
1586
	}
1587

1588
	for (i = 0; i < window; ++i) {
1589
		free_delta_index(array[i].index);
1590
		free(array[i].data);
1591
	}
1592
	free(array);
1593 1594
}

N
Nicolas Pitre 已提交
1595 1596
#ifdef THREADED_DELTA_SEARCH

1597 1598 1599 1600 1601 1602 1603 1604 1605
/*
 * The main thread waits on the condition that (at least) one of the workers
 * has stopped working (which is indicated in the .working member of
 * struct thread_params).
 * When a work thread has completed its work, it sets .working to 0 and
 * signals the main thread and waits on the condition that .data_ready
 * becomes 1.
 */

N
Nicolas Pitre 已提交
1606 1607 1608 1609
struct thread_params {
	pthread_t thread;
	struct object_entry **list;
	unsigned list_size;
1610
	unsigned remaining;
N
Nicolas Pitre 已提交
1611 1612
	int window;
	int depth;
1613 1614 1615 1616
	int working;
	int data_ready;
	pthread_mutex_t mutex;
	pthread_cond_t cond;
N
Nicolas Pitre 已提交
1617 1618 1619
	unsigned *processed;
};

1620
static pthread_cond_t progress_cond = PTHREAD_COND_INITIALIZER;
1621

N
Nicolas Pitre 已提交
1622 1623
static void *threaded_find_deltas(void *arg)
{
1624 1625
	struct thread_params *me = arg;

1626
	while (me->remaining) {
1627
		find_deltas(me->list, &me->remaining,
1628
			    me->window, me->depth, me->processed);
1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647

		progress_lock();
		me->working = 0;
		pthread_cond_signal(&progress_cond);
		progress_unlock();

		/*
		 * We must not set ->data_ready before we wait on the
		 * condition because the main thread may have set it to 1
		 * before we get here. In order to be sure that new
		 * work is available if we see 1 in ->data_ready, it
		 * was initialized to 0 before this thread was spawned
		 * and we reset it to 0 right away.
		 */
		pthread_mutex_lock(&me->mutex);
		while (!me->data_ready)
			pthread_cond_wait(&me->cond, &me->mutex);
		me->data_ready = 0;
		pthread_mutex_unlock(&me->mutex);
1648
	}
1649 1650
	/* leave ->working 1 so that this doesn't get more work assigned */
	return NULL;
N
Nicolas Pitre 已提交
1651 1652 1653 1654 1655
}

static void ll_find_deltas(struct object_entry **list, unsigned list_size,
			   int window, int depth, unsigned *processed)
{
1656
	struct thread_params p[delta_search_threads];
1657
	int i, ret, active_threads = 0;
1658

1659
	if (delta_search_threads <= 1) {
1660
		find_deltas(list, &list_size, window, depth, processed);
1661 1662 1663
		return;
	}

1664
	/* Partition the work amongst work threads. */
1665
	for (i = 0; i < delta_search_threads; i++) {
1666 1667
		unsigned sub_size = list_size / (delta_search_threads - i);

N
Nicolas Pitre 已提交
1668 1669 1670
		p[i].window = window;
		p[i].depth = depth;
		p[i].processed = processed;
1671 1672
		p[i].working = 1;
		p[i].data_ready = 0;
1673

1674
		/* try to split chunks on "path" boundaries */
1675 1676
		while (sub_size && sub_size < list_size &&
		       list[sub_size]->hash &&
1677 1678 1679
		       list[sub_size]->hash == list[sub_size-1]->hash)
			sub_size++;

1680 1681 1682
		p[i].list = list;
		p[i].list_size = sub_size;
		p[i].remaining = sub_size;
1683

1684 1685 1686 1687
		list += sub_size;
		list_size -= sub_size;
	}

1688 1689 1690 1691
	/* Start work threads. */
	for (i = 0; i < delta_search_threads; i++) {
		if (!p[i].list_size)
			continue;
1692 1693
		pthread_mutex_init(&p[i].mutex, NULL);
		pthread_cond_init(&p[i].cond, NULL);
1694 1695 1696 1697 1698 1699 1700
		ret = pthread_create(&p[i].thread, NULL,
				     threaded_find_deltas, &p[i]);
		if (ret)
			die("unable to create thread: %s", strerror(ret));
		active_threads++;
	}

1701 1702 1703 1704 1705 1706 1707 1708
	/*
	 * Now let's wait for work completion.  Each time a thread is done
	 * with its work, we steal half of the remaining work from the
	 * thread with the largest number of unprocessed objects and give
	 * it to that newly idle thread.  This ensure good load balancing
	 * until the remaining object list segments are simply too short
	 * to be worth splitting anymore.
	 */
1709 1710
	while (active_threads) {
		struct thread_params *target = NULL;
1711 1712 1713 1714
		struct thread_params *victim = NULL;
		unsigned sub_size = 0;

		progress_lock();
1715 1716 1717 1718 1719 1720 1721 1722 1723
		for (;;) {
			for (i = 0; !target && i < delta_search_threads; i++)
				if (!p[i].working)
					target = &p[i];
			if (target)
				break;
			pthread_cond_wait(&progress_cond, &progress_mutex);
		}

1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735
		for (i = 0; i < delta_search_threads; i++)
			if (p[i].remaining > 2*window &&
			    (!victim || victim->remaining < p[i].remaining))
				victim = &p[i];
		if (victim) {
			sub_size = victim->remaining / 2;
			list = victim->list + victim->list_size - sub_size;
			while (sub_size && list[0]->hash &&
			       list[0]->hash == list[-1]->hash) {
				list++;
				sub_size--;
			}
1736 1737 1738 1739 1740 1741 1742 1743 1744 1745
			if (!sub_size) {
				/*
				 * It is possible for some "paths" to have
				 * so many objects that no hash boundary
				 * might be found.  Let's just steal the
				 * exact half in that case.
				 */
				sub_size = victim->remaining / 2;
				list -= sub_size;
			}
1746 1747 1748 1749 1750 1751
			target->list = list;
			victim->list_size -= sub_size;
			victim->remaining -= sub_size;
		}
		target->list_size = sub_size;
		target->remaining = sub_size;
1752 1753 1754 1755 1756 1757 1758
		target->working = 1;
		progress_unlock();

		pthread_mutex_lock(&target->mutex);
		target->data_ready = 1;
		pthread_cond_signal(&target->cond);
		pthread_mutex_unlock(&target->mutex);
1759

1760
		if (!sub_size) {
1761
			pthread_join(target->thread, NULL);
1762 1763
			pthread_cond_destroy(&target->cond);
			pthread_mutex_destroy(&target->mutex);
1764
			active_threads--;
1765
		}
1766
	}
N
Nicolas Pitre 已提交
1767 1768 1769
}

#else
1770
#define ll_find_deltas(l, s, w, d, p)	find_deltas(l, &s, w, d, p)
N
Nicolas Pitre 已提交
1771 1772
#endif

1773 1774
static void prepare_pack(int window, int depth)
{
1775
	struct object_entry **delta_list;
1776
	uint32_t i, n, nr_deltas;
1777

1778
	get_object_details();
1779

1780
	if (!nr_objects || !window || !depth)
1781 1782 1783
		return;

	delta_list = xmalloc(nr_objects * sizeof(*delta_list));
1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806
	nr_deltas = n = 0;

	for (i = 0; i < nr_objects; i++) {
		struct object_entry *entry = objects + i;

		if (entry->delta)
			/* This happens if we decided to reuse existing
			 * delta from a pack.  "!no_reuse_delta &&" is implied.
			 */
			continue;

		if (entry->size < 50)
			continue;

		if (entry->no_try_delta)
			continue;

		if (!entry->preferred_base)
			nr_deltas++;

		delta_list[n++] = entry;
	}

1807
	if (nr_deltas && n > 1) {
1808 1809
		unsigned nr_done = 0;
		if (progress)
1810 1811
			progress_state = start_progress("Compressing objects",
							nr_deltas);
1812
		qsort(delta_list, n, sizeof(*delta_list), type_size_sort);
N
Nicolas Pitre 已提交
1813
		ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
N
Nicolas Pitre 已提交
1814
		stop_progress(&progress_state);
1815 1816
		if (nr_done != nr_deltas)
			die("inconsistency with delta count");
1817
	}
1818
	free(delta_list);
1819 1820
}

1821 1822 1823 1824 1825 1826
static int git_pack_config(const char *k, const char *v)
{
	if(!strcmp(k, "pack.window")) {
		window = git_config_int(k, v);
		return 0;
	}
1827 1828 1829 1830 1831
	if (!strcmp(k, "pack.windowmemory")) {
		window_memory_limit = git_config_ulong(k, v);
		return 0;
	}
	if (!strcmp(k, "pack.depth")) {
1832 1833 1834
		depth = git_config_int(k, v);
		return 0;
	}
1835 1836 1837 1838 1839 1840 1841 1842 1843 1844
	if (!strcmp(k, "pack.compression")) {
		int level = git_config_int(k, v);
		if (level == -1)
			level = Z_DEFAULT_COMPRESSION;
		else if (level < 0 || level > Z_BEST_COMPRESSION)
			die("bad pack compression level %d", level);
		pack_compression_level = level;
		pack_compression_seen = 1;
		return 0;
	}
1845 1846 1847 1848
	if (!strcmp(k, "pack.deltacachesize")) {
		max_delta_cache_size = git_config_int(k, v);
		return 0;
	}
1849 1850 1851 1852
	if (!strcmp(k, "pack.deltacachelimit")) {
		cache_max_small_delta_size = git_config_int(k, v);
		return 0;
	}
1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863
	if (!strcmp(k, "pack.threads")) {
		delta_search_threads = git_config_int(k, v);
		if (delta_search_threads < 1)
			die("invalid number of threads specified (%d)",
			    delta_search_threads);
#ifndef THREADED_DELTA_SEARCH
		if (delta_search_threads > 1)
			warning("no threads support, ignoring %s", k);
#endif
		return 0;
	}
1864 1865 1866 1867 1868 1869
	if (!strcmp(k, "pack.indexversion")) {
		pack_idx_default_version = git_config_int(k, v);
		if (pack_idx_default_version > 2)
			die("bad pack.indexversion=%d", pack_idx_default_version);
		return 0;
	}
1870 1871 1872
	return git_default_config(k, v);
}

1873
static void read_object_list_from_stdin(void)
1874
{
1875 1876
	char line[40 + 1 + PATH_MAX + 2];
	unsigned char sha1[20];
1877

1878 1879 1880 1881 1882 1883
	for (;;) {
		if (!fgets(line, sizeof(line), stdin)) {
			if (feof(stdin))
				break;
			if (!ferror(stdin))
				die("fgets returned NULL, not EOF, not error!");
1884 1885 1886 1887
			if (errno != EINTR)
				die("fgets: %s", strerror(errno));
			clearerr(stdin);
			continue;
1888
		}
1889 1890 1891
		if (line[0] == '-') {
			if (get_sha1_hex(line+1, sha1))
				die("expected edge sha1, got garbage:\n %s",
1892
				    line);
1893
			add_preferred_base(sha1);
1894
			continue;
1895
		}
1896
		if (get_sha1_hex(line, sha1))
1897
			die("expected sha1, got garbage:\n %s", line);
1898

1899 1900
		add_preferred_base_object(line+41);
		add_object_entry(sha1, 0, line+41, 0);
1901
	}
1902 1903
}

J
Junio C Hamano 已提交
1904 1905
#define OBJECT_ADDED (1u<<20)

1906 1907
static void show_commit(struct commit *commit)
{
1908
	add_object_entry(commit->object.sha1, OBJ_COMMIT, NULL, 0);
J
Junio C Hamano 已提交
1909
	commit->object.flags |= OBJECT_ADDED;
1910 1911 1912 1913
}

static void show_object(struct object_array_entry *p)
{
1914 1915
	add_preferred_base_object(p->name);
	add_object_entry(p->item->sha1, p->item->type, p->name, 0);
J
Junio C Hamano 已提交
1916
	p->item->flags |= OBJECT_ADDED;
1917 1918
}

1919 1920 1921 1922 1923
static void show_edge(struct commit *commit)
{
	add_preferred_base(commit->object.sha1);
}

J
Junio C Hamano 已提交
1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
struct in_pack_object {
	off_t offset;
	struct object *object;
};

struct in_pack {
	int alloc;
	int nr;
	struct in_pack_object *array;
};

static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack)
{
	in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->sha1, p);
	in_pack->array[in_pack->nr].object = object;
	in_pack->nr++;
}

/*
 * Compare the objects in the offset order, in order to emulate the
 * "git-rev-list --objects" output that produced the pack originally.
 */
static int ofscmp(const void *a_, const void *b_)
{
	struct in_pack_object *a = (struct in_pack_object *)a_;
	struct in_pack_object *b = (struct in_pack_object *)b_;

	if (a->offset < b->offset)
		return -1;
	else if (a->offset > b->offset)
		return 1;
	else
		return hashcmp(a->object->sha1, b->object->sha1);
}

static void add_objects_in_unpacked_packs(struct rev_info *revs)
{
	struct packed_git *p;
	struct in_pack in_pack;
	uint32_t i;

	memset(&in_pack, 0, sizeof(in_pack));

	for (p = packed_git; p; p = p->next) {
		const unsigned char *sha1;
		struct object *o;

		for (i = 0; i < revs->num_ignore_packed; i++) {
			if (matches_pack_name(p, revs->ignore_packed[i]))
				break;
		}
		if (revs->num_ignore_packed <= i)
			continue;
		if (open_pack_index(p))
			die("cannot open pack index");

		ALLOC_GROW(in_pack.array,
			   in_pack.nr + p->num_objects,
			   in_pack.alloc);

		for (i = 0; i < p->num_objects; i++) {
			sha1 = nth_packed_object_sha1(p, i);
			o = lookup_unknown_object(sha1);
			if (!(o->flags & OBJECT_ADDED))
				mark_in_pack_object(o, p, &in_pack);
			o->flags |= OBJECT_ADDED;
		}
	}

	if (in_pack.nr) {
		qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]),
		      ofscmp);
		for (i = 0; i < in_pack.nr; i++) {
			struct object *o = in_pack.array[i].object;
			add_object_entry(o->sha1, o->type, "", 0);
		}
	}
	free(in_pack.array);
}

2004
static void get_object_list(int ac, const char **av)
2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
{
	struct rev_info revs;
	char line[1000];
	int flags = 0;

	init_revisions(&revs, NULL);
	save_commit_buffer = 0;
	track_object_refs = 0;
	setup_revisions(ac, av, &revs, NULL);

	while (fgets(line, sizeof(line), stdin) != NULL) {
		int len = strlen(line);
2017
		if (len && line[len - 1] == '\n')
2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032
			line[--len] = 0;
		if (!len)
			break;
		if (*line == '-') {
			if (!strcmp(line, "--not")) {
				flags ^= UNINTERESTING;
				continue;
			}
			die("not a rev '%s'", line);
		}
		if (handle_revision_arg(line, &revs, flags, 1))
			die("bad revision '%s'", line);
	}

	prepare_revision_walk(&revs);
2033
	mark_edges_uninteresting(revs.commits, &revs, show_edge);
2034
	traverse_commit_list(&revs, show_commit, show_object);
J
Junio C Hamano 已提交
2035 2036 2037

	if (keep_unreachable)
		add_objects_in_unpacked_packs(&revs);
2038 2039
}

J
Junio C Hamano 已提交
2040 2041 2042 2043 2044 2045 2046
static int adjust_perm(const char *path, mode_t mode)
{
	if (chmod(path, mode))
		return -1;
	return adjust_shared_perm(path);
}

2047 2048 2049
int cmd_pack_objects(int argc, const char **argv, const char *prefix)
{
	int use_internal_rev_list = 0;
2050
	int thin = 0;
2051
	uint32_t i;
2052 2053
	const char **rp_av;
	int rp_ac_alloc = 64;
2054 2055
	int rp_ac;

2056 2057
	rp_av = xcalloc(rp_ac_alloc, sizeof(*rp_av));

2058 2059 2060
	rp_av[0] = "pack-objects";
	rp_av[1] = "--objects"; /* --thin will make it --objects-edge */
	rp_ac = 2;
2061 2062

	git_config(git_pack_config);
2063 2064
	if (!pack_compression_seen && core_compression_seen)
		pack_compression_level = core_compression_level;
2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084

	progress = isatty(2);
	for (i = 1; i < argc; i++) {
		const char *arg = argv[i];

		if (*arg != '-')
			break;

		if (!strcmp("--non-empty", arg)) {
			non_empty = 1;
			continue;
		}
		if (!strcmp("--local", arg)) {
			local = 1;
			continue;
		}
		if (!strcmp("--incremental", arg)) {
			incremental = 1;
			continue;
		}
2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096
		if (!prefixcmp(arg, "--compression=")) {
			char *end;
			int level = strtoul(arg+14, &end, 0);
			if (!arg[14] || *end)
				usage(pack_usage);
			if (level == -1)
				level = Z_DEFAULT_COMPRESSION;
			else if (level < 0 || level > Z_BEST_COMPRESSION)
				die("bad pack compression level %d", level);
			pack_compression_level = level;
			continue;
		}
2097 2098 2099 2100 2101 2102 2103
		if (!prefixcmp(arg, "--max-pack-size=")) {
			char *end;
			pack_size_limit = strtoul(arg+16, &end, 0) * 1024 * 1024;
			if (!arg[16] || *end)
				usage(pack_usage);
			continue;
		}
2104
		if (!prefixcmp(arg, "--window=")) {
2105 2106 2107 2108 2109 2110
			char *end;
			window = strtoul(arg+9, &end, 0);
			if (!arg[9] || *end)
				usage(pack_usage);
			continue;
		}
2111 2112 2113 2114 2115
		if (!prefixcmp(arg, "--window-memory=")) {
			if (!git_parse_ulong(arg+16, &window_memory_limit))
				usage(pack_usage);
			continue;
		}
2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127
		if (!prefixcmp(arg, "--threads=")) {
			char *end;
			delta_search_threads = strtoul(arg+10, &end, 0);
			if (!arg[10] || *end || delta_search_threads < 1)
				usage(pack_usage);
#ifndef THREADED_DELTA_SEARCH
			if (delta_search_threads > 1)
				warning("no threads support, "
					"ignoring %s", arg);
#endif
			continue;
		}
2128
		if (!prefixcmp(arg, "--depth=")) {
2129 2130 2131 2132 2133 2134 2135 2136 2137 2138
			char *end;
			depth = strtoul(arg+8, &end, 0);
			if (!arg[8] || *end)
				usage(pack_usage);
			continue;
		}
		if (!strcmp("--progress", arg)) {
			progress = 1;
			continue;
		}
2139 2140 2141 2142
		if (!strcmp("--all-progress", arg)) {
			progress = 2;
			continue;
		}
2143 2144 2145 2146 2147 2148 2149 2150
		if (!strcmp("-q", arg)) {
			progress = 0;
			continue;
		}
		if (!strcmp("--no-reuse-delta", arg)) {
			no_reuse_delta = 1;
			continue;
		}
2151 2152 2153 2154
		if (!strcmp("--no-reuse-object", arg)) {
			no_reuse_object = no_reuse_delta = 1;
			continue;
		}
2155
		if (!strcmp("--delta-base-offset", arg)) {
2156
			allow_ofs_delta = 1;
2157 2158
			continue;
		}
2159 2160 2161 2162 2163 2164 2165 2166
		if (!strcmp("--stdout", arg)) {
			pack_to_stdout = 1;
			continue;
		}
		if (!strcmp("--revs", arg)) {
			use_internal_rev_list = 1;
			continue;
		}
J
Junio C Hamano 已提交
2167 2168 2169 2170
		if (!strcmp("--keep-unreachable", arg)) {
			keep_unreachable = 1;
			continue;
		}
2171
		if (!strcmp("--unpacked", arg) ||
2172
		    !prefixcmp(arg, "--unpacked=") ||
2173
		    !strcmp("--reflog", arg) ||
2174 2175
		    !strcmp("--all", arg)) {
			use_internal_rev_list = 1;
2176 2177 2178 2179 2180
			if (rp_ac >= rp_ac_alloc - 1) {
				rp_ac_alloc = alloc_nr(rp_ac_alloc);
				rp_av = xrealloc(rp_av,
						 rp_ac_alloc * sizeof(*rp_av));
			}
2181
			rp_av[rp_ac++] = arg;
2182 2183
			continue;
		}
2184 2185 2186 2187
		if (!strcmp("--thin", arg)) {
			use_internal_rev_list = 1;
			thin = 1;
			rp_av[1] = "--objects-edge";
2188
			continue;
2189 2190 2191
		}
		if (!prefixcmp(arg, "--index-version=")) {
			char *c;
G
Geert Bosch 已提交
2192 2193
			pack_idx_default_version = strtoul(arg + 16, &c, 10);
			if (pack_idx_default_version > 2)
2194 2195
				die("bad %s", arg);
			if (*c == ',')
G
Geert Bosch 已提交
2196 2197
				pack_idx_off32_limit = strtoul(c+1, &c, 0);
			if (*c || pack_idx_off32_limit & 0x80000000)
2198 2199
				die("bad %s", arg);
			continue;
2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222
		}
		usage(pack_usage);
	}

	/* Traditionally "pack-objects [options] base extra" failed;
	 * we would however want to take refs parameter that would
	 * have been given to upstream rev-list ourselves, which means
	 * we somehow want to say what the base name is.  So the
	 * syntax would be:
	 *
	 * pack-objects [options] base <refs...>
	 *
	 * in other words, we would treat the first non-option as the
	 * base_name and send everything else to the internal revision
	 * walker.
	 */

	if (!pack_to_stdout)
		base_name = argv[i++];

	if (pack_to_stdout != !base_name)
		usage(pack_usage);

2223 2224 2225
	if (pack_to_stdout && pack_size_limit)
		die("--max-pack-size cannot be used to build a pack for transfer.");

2226 2227
	if (!pack_to_stdout && thin)
		die("--thin cannot be used to build an indexable pack.");
2228 2229 2230

	prepare_packed_git();

2231
	if (progress)
2232
		progress_state = start_progress("Counting objects", 0);
2233 2234
	if (!use_internal_rev_list)
		read_object_list_from_stdin();
2235 2236 2237 2238
	else {
		rp_av[rp_ac] = NULL;
		get_object_list(rp_ac, rp_av);
	}
N
Nicolas Pitre 已提交
2239
	stop_progress(&progress_state);
N
Nicolas Pitre 已提交
2240

2241
	if (non_empty && !nr_result)
2242
		return 0;
2243 2244
	if (nr_result)
		prepare_pack(window, depth);
2245
	write_pack_file();
2246
	if (progress)
2247
		fprintf(stderr, "Total %u (delta %u), reused %u (delta %u)\n",
2248
			written, written_delta, reused, reused_delta);
2249 2250
	return 0;
}