pack-objects.c 14.3 KB
Newer Older
1 2 3
#include "cache.h"
#include "object.h"
#include "delta.h"
4
#include "pack.h"
5
#include "csum-file.h"
6
#include <sys/time.h>
7

J
Junio C Hamano 已提交
8
static const char pack_usage[] = "git-pack-objects [-q] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";
9 10 11 12 13 14

struct object_entry {
	unsigned char sha1[20];
	unsigned long size;
	unsigned long offset;
	unsigned int depth;
15
	unsigned int hash;
16
	enum object_type type;
17 18 19 20
	unsigned long delta_size;
	struct object_entry *delta;
};

21
static unsigned char object_list_sha1[20];
22
static int non_empty = 0;
L
Linus Torvalds 已提交
23
static int local = 0;
24
static int incremental = 0;
25 26 27 28
static struct object_entry **sorted_by_sha, **sorted_by_type;
static struct object_entry *objects = NULL;
static int nr_objects = 0, nr_alloc = 0;
static const char *base_name;
29
static unsigned char pack_file_sha1[20];
J
Junio C Hamano 已提交
30
static int progress = 1;
31 32 33 34 35 36 37 38 39 40

static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
{
	unsigned long othersize, delta_size;
	char type[10];
	void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize);
	void *delta_buf;

	if (!otherbuf)
		die("unable to read %s", sha1_to_hex(entry->delta->sha1));
41
        delta_buf = diff_delta(otherbuf, othersize,
42
			       buf, size, &delta_size, 0);
43
        if (!delta_buf || delta_size != entry->delta_size)
44 45 46 47 48 49
        	die("delta size changed");
        free(buf);
        free(otherbuf);
	return delta_buf;
}

50 51 52 53 54 55 56 57 58
/*
 * The per-object header is a pretty dense thing, which is
 *  - first byte: low four bits are "size", then three bits of "type",
 *    and the high bit is "size continues".
 *  - each byte afterwards: low seven bits are size continuation,
 *    with the high bit being "size continues"
 */
static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
{
59
	int n = 1;
60 61 62 63 64
	unsigned char c;

	if (type < OBJ_COMMIT || type > OBJ_DELTA)
		die("bad type %d", type);

65 66 67
	c = (type << 4) | (size & 15);
	size >>= 4;
	while (size) {
68
		*hdr++ = c | 0x80;
69 70 71
		c = size & 0x7f;
		size >>= 7;
		n++;
72 73 74 75 76
	}
	*hdr = c;
	return n;
}

77
static unsigned long write_object(struct sha1file *f, struct object_entry *entry)
78 79 80 81
{
	unsigned long size;
	char type[10];
	void *buf = read_sha1_file(entry->sha1, type, &size);
82
	unsigned char header[10];
83
	unsigned hdrlen, datalen;
84
	enum object_type obj_type;
85 86 87 88 89 90 91

	if (!buf)
		die("unable to read %s", sha1_to_hex(entry->sha1));
	if (size != entry->size)
		die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);

	/*
92 93 94
	 * The object header is a byte of 'type' followed by zero or
	 * more bytes of length.  For deltas, the 20 bytes of delta sha1
	 * follows that.
95
	 */
96
	obj_type = entry->type;
97 98 99
	if (entry->delta) {
		buf = delta_against(buf, size, entry);
		size = entry->delta_size;
100
		obj_type = OBJ_DELTA;
101
	}
102
	hdrlen = encode_header(obj_type, size, header);
103
	sha1write(f, header, hdrlen);
104 105 106 107
	if (entry->delta) {
		sha1write(f, entry->delta, 20);
		hdrlen += 20;
	}
108
	datalen = sha1write_compressed(f, buf, size);
109 110 111 112
	free(buf);
	return hdrlen + datalen;
}

113 114 115 116 117 118 119 120 121 122 123
static unsigned long write_one(struct sha1file *f,
			       struct object_entry *e,
			       unsigned long offset)
{
	if (e->offset)
		/* offset starts from header size and cannot be zero
		 * if it is written already.
		 */
		return offset;
	e->offset = offset;
	offset += write_object(f, e);
J
Junio C Hamano 已提交
124
	/* if we are deltified, write out its base object. */
125 126 127 128 129
	if (e->delta)
		offset = write_one(f, e->delta, offset);
	return offset;
}

130 131 132
static void write_pack_file(void)
{
	int i;
133
	struct sha1file *f;
134
	unsigned long offset;
135
	unsigned long mb;
136
	struct pack_header hdr;
137

138 139 140
	if (!base_name)
		f = sha1fd(1, "<stdout>");
	else
141
		f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack");
142
	hdr.hdr_signature = htonl(PACK_SIGNATURE);
143
	hdr.hdr_version = htonl(PACK_VERSION);
144 145 146
	hdr.hdr_entries = htonl(nr_objects);
	sha1write(f, &hdr, sizeof(hdr));
	offset = sizeof(hdr);
147 148 149
	for (i = 0; i < nr_objects; i++)
		offset = write_one(f, objects + i, offset);

150
	sha1close(f, pack_file_sha1, 1);
151 152 153 154 155 156 157
	mb = offset >> 20;
	offset &= 0xfffff;
}

static void write_index_file(void)
{
	int i;
158
	struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx");
159 160 161 162 163 164 165
	struct object_entry **list = sorted_by_sha;
	struct object_entry **last = list + nr_objects;
	unsigned int array[256];

	/*
	 * Write the first-level table (the list is sorted,
	 * but we use a 256-entry lookup to be able to avoid
166
	 * having to do eight extra binary search iterations).
167 168 169 170 171 172 173 174 175 176 177 178
	 */
	for (i = 0; i < 256; i++) {
		struct object_entry **next = list;
		while (next < last) {
			struct object_entry *entry = *next;
			if (entry->sha1[0] != i)
				break;
			next++;
		}
		array[i] = htonl(next - sorted_by_sha);
		list = next;
	}
179
	sha1write(f, array, 256 * sizeof(int));
180 181 182 183 184

	/*
	 * Write the actual SHA1 entries..
	 */
	list = sorted_by_sha;
L
Linus Torvalds 已提交
185
	for (i = 0; i < nr_objects; i++) {
186 187
		struct object_entry *entry = *list++;
		unsigned int offset = htonl(entry->offset);
188 189
		sha1write(f, &offset, 4);
		sha1write(f, entry->sha1, 20);
190
	}
191 192
	sha1write(f, pack_file_sha1, 20);
	sha1close(f, NULL, 1);
193 194
}

195
static int add_object_entry(unsigned char *sha1, unsigned int hash)
196 197 198 199
{
	unsigned int idx = nr_objects;
	struct object_entry *entry;

L
Linus Torvalds 已提交
200 201 202 203 204 205 206 207 208 209 210 211 212 213
	if (incremental || local) {
		struct packed_git *p;

		for (p = packed_git; p; p = p->next) {
			struct pack_entry e;

			if (find_pack_entry_one(sha1, &e, p)) {
				if (incremental)
					return 0;
				if (local && !p->pack_local)
					return 0;
			}
		}
	}
214

215 216 217 218 219 220 221 222
	if (idx >= nr_alloc) {
		unsigned int needed = (idx + 1024) * 3 / 2;
		objects = xrealloc(objects, needed * sizeof(*entry));
		nr_alloc = needed;
	}
	entry = objects + idx;
	memset(entry, 0, sizeof(*entry));
	memcpy(entry->sha1, sha1, 20);
223
	entry->hash = hash;
224
	nr_objects = idx+1;
225
	return 1;
226 227 228 229
}

static void check_object(struct object_entry *entry)
{
230 231 232 233
	char type[20];

	if (!sha1_object_info(entry->sha1, type, &entry->size)) {
		if (!strcmp(type, "commit")) {
234
			entry->type = OBJ_COMMIT;
235
		} else if (!strcmp(type, "tree")) {
236
			entry->type = OBJ_TREE;
237
		} else if (!strcmp(type, "blob")) {
238
			entry->type = OBJ_BLOB;
239
		} else if (!strcmp(type, "tag")) {
240
			entry->type = OBJ_TAG;
241 242 243 244 245 246 247
		} else
			die("unable to pack object %s of type %s",
			    sha1_to_hex(entry->sha1), type);
	}
	else
		die("unable to get type of object %s",
		    sha1_to_hex(entry->sha1));
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
}

static void get_object_details(void)
{
	int i;
	struct object_entry *entry = objects;

	for (i = 0; i < nr_objects; i++)
		check_object(entry++);
}

typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);

static entry_sort_t current_sort;

static int sort_comparator(const void *_a, const void *_b)
{
	struct object_entry *a = *(struct object_entry **)_a;
	struct object_entry *b = *(struct object_entry **)_b;
	return current_sort(a,b);
}

static struct object_entry **create_sorted_list(entry_sort_t sort)
{
	struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *));
	int i;

	for (i = 0; i < nr_objects; i++)
		list[i] = objects + i;
	current_sort = sort;
	qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator);
	return list;
}

static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
{
	return memcmp(a->sha1, b->sha1, 20);
}

static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
{
	if (a->type < b->type)
		return -1;
	if (a->type > b->type)
		return 1;
293 294 295 296
	if (a->hash < b->hash)
		return -1;
	if (a->hash > b->hash)
		return 1;
297 298 299 300 301 302 303 304 305 306 307 308 309
	if (a->size < b->size)
		return -1;
	if (a->size > b->size)
		return 1;
	return a < b ? -1 : (a > b);
}

struct unpacked {
	struct object_entry *entry;
	void *data;
};

/*
310 311 312 313 314 315
 * We search for deltas _backwards_ in a list sorted by type and
 * by size, so that we see progressively smaller and smaller files.
 * That's because we prefer deltas to be from the bigger file
 * to the smaller - deletes are potentially cheaper, but perhaps
 * more importantly, the bigger file is likely the more recent
 * one.
316
 */
317
static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth)
318 319 320
{
	struct object_entry *cur_entry = cur->entry;
	struct object_entry *old_entry = old->entry;
321
	unsigned long size, oldsize, delta_size, sizediff;
322
	long max_size;
323 324 325 326 327 328 329
	void *delta_buf;

	/* Don't bother doing diffs between different types */
	if (cur_entry->type != old_entry->type)
		return -1;

	size = cur_entry->size;
330 331
	if (size < 50)
		return -1;
332
	oldsize = old_entry->size;
333 334
	sizediff = oldsize > size ? oldsize - size : size - oldsize;
	if (sizediff > size / 8)
335
		return -1;
336 337
	if (old_entry->depth >= max_depth)
		return 0;
338 339 340 341 342 343 344 345

	/*
	 * NOTE!
	 *
	 * We always delta from the bigger to the smaller, since that's
	 * more space-efficient (deletes don't have to say _what_ they
	 * delete).
	 */
346 347 348
	max_size = size / 2 - 20;
	if (cur_entry->delta)
		max_size = cur_entry->delta_size-1;
349 350
	if (sizediff >= max_size)
		return -1;
351 352
	delta_buf = diff_delta(old->data, oldsize,
			       cur->data, size, &delta_size, max_size);
353
	if (!delta_buf)
354 355 356
		return 0;
	cur_entry->delta = old_entry;
	cur_entry->delta_size = delta_size;
357
	cur_entry->depth = old_entry->depth + 1;
358
	free(delta_buf);
359
	return 0;
360 361
}

362
static void find_deltas(struct object_entry **list, int window, int depth)
363
{
364
	int i, idx;
365 366
	unsigned int array_size = window * sizeof(struct unpacked);
	struct unpacked *array = xmalloc(array_size);
367
	int eye_candy;
368 369

	memset(array, 0, array_size);
370 371
	i = nr_objects;
	idx = 0;
372 373
	eye_candy = i - (nr_objects / 20);

374
	while (--i >= 0) {
375 376 377 378 379 380
		struct object_entry *entry = list[i];
		struct unpacked *n = array + idx;
		unsigned long size;
		char type[10];
		int j;

381 382 383 384
		if (progress && i <= eye_candy) {
			eye_candy -= nr_objects / 20;
			fputc('.', stderr);
		}
385 386 387 388 389
		free(n->data);
		n->entry = entry;
		n->data = read_sha1_file(entry->sha1, type, &size);
		if (size != entry->size)
			die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
L
Linus Torvalds 已提交
390 391 392
		j = window;
		while (--j > 0) {
			unsigned int other_idx = idx + j;
393
			struct unpacked *m;
L
Linus Torvalds 已提交
394 395
			if (other_idx >= window)
				other_idx -= window;
396 397 398
			m = array + other_idx;
			if (!m->entry)
				break;
399
			if (try_delta(n, m, depth) < 0)
400 401
				break;
		}
402 403 404
		idx++;
		if (idx >= window)
			idx = 0;
405
	}
406 407 408 409

	for (i = 0; i < window; ++i)
		free(array[i].data);
	free(array);
410 411
}

412 413 414 415
static void prepare_pack(int window, int depth)
{
	get_object_details();

416 417
	if (progress)
		fprintf(stderr, "Packing %d objects", nr_objects);
418 419 420
	sorted_by_type = create_sorted_list(type_size_sort);
	if (window && depth)
		find_deltas(sorted_by_type, window+1, depth);
421 422
	if (progress)
		fputc('\n', stderr);
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
	write_pack_file();
}

static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
{
	static const char cache[] = "pack-cache/pack-%s.%s";
	char *cached_pack, *cached_idx;
	int ifd, ofd, ifd_ix = -1;

	cached_pack = git_path(cache, sha1_to_hex(sha1), "pack");
	ifd = open(cached_pack, O_RDONLY);
	if (ifd < 0)
		return 0;

	if (!pack_to_stdout) {
		cached_idx = git_path(cache, sha1_to_hex(sha1), "idx");
		ifd_ix = open(cached_idx, O_RDONLY);
		if (ifd_ix < 0) {
			close(ifd);
			return 0;
		}
	}

	fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects,
		sha1_to_hex(sha1));

	if (pack_to_stdout) {
		if (copy_fd(ifd, 1))
			exit(1);
		close(ifd);
	}
	else {
		char name[PATH_MAX];
		snprintf(name, sizeof(name),
			 "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack");
		ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
		if (ofd < 0)
			die("unable to open %s (%s)", name, strerror(errno));
		if (copy_fd(ifd, ofd))
			exit(1);
		close(ifd);

		snprintf(name, sizeof(name),
			 "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx");
		ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
		if (ofd < 0)
			die("unable to open %s (%s)", name, strerror(errno));
		if (copy_fd(ifd_ix, ofd))
			exit(1);
		close(ifd_ix);
		puts(sha1_to_hex(sha1));
	}

	return 1;
}

479 480
int main(int argc, char **argv)
{
481
	SHA_CTX ctx;
482
	char line[PATH_MAX + 20];
483
	int window = 10, depth = 10, pack_to_stdout = 0;
J
Junio C Hamano 已提交
484
	struct object_entry **list;
485
	int i;
486 487 488 489
	struct timeval prev_tv;
	int eye_candy = 0;
	int eye_candy_incr = 500;

490

491 492
	setup_git_directory();

493 494 495 496
	for (i = 1; i < argc; i++) {
		const char *arg = argv[i];

		if (*arg == '-') {
497 498 499 500
			if (!strcmp("--non-empty", arg)) {
				non_empty = 1;
				continue;
			}
L
Linus Torvalds 已提交
501 502 503 504
			if (!strcmp("--local", arg)) {
				local = 1;
				continue;
			}
505 506 507 508
			if (!strcmp("--incremental", arg)) {
				incremental = 1;
				continue;
			}
509 510
			if (!strncmp("--window=", arg, 9)) {
				char *end;
511
				window = strtoul(arg+9, &end, 0);
512 513 514 515
				if (!arg[9] || *end)
					usage(pack_usage);
				continue;
			}
516 517 518 519 520 521 522
			if (!strncmp("--depth=", arg, 8)) {
				char *end;
				depth = strtoul(arg+8, &end, 0);
				if (!arg[8] || *end)
					usage(pack_usage);
				continue;
			}
J
Junio C Hamano 已提交
523 524 525 526
			if (!strcmp("-q", arg)) {
				progress = 0;
				continue;
			}
527 528 529 530
			if (!strcmp("--stdout", arg)) {
				pack_to_stdout = 1;
				continue;
			}
531 532 533 534 535 536 537
			usage(pack_usage);
		}
		if (base_name)
			usage(pack_usage);
		base_name = arg;
	}

538
	if (pack_to_stdout != !base_name)
539 540
		usage(pack_usage);

L
Linus Torvalds 已提交
541
	prepare_packed_git();
542 543 544 545
	if (progress) {
		fprintf(stderr, "Generating pack...\n");
		gettimeofday(&prev_tv, NULL);
	}
546
	while (fgets(line, sizeof(line), stdin) != NULL) {
547 548
		unsigned int hash;
		char *p;
549
		unsigned char sha1[20];
550

551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
		if (progress && (eye_candy <= nr_objects)) {
			fprintf(stderr, "Counting objects...%d\r", nr_objects);
			if (eye_candy && (50 <= eye_candy_incr)) {
				struct timeval tv;
				int time_diff;
				gettimeofday(&tv, NULL);
				time_diff = (tv.tv_sec - prev_tv.tv_sec);
				time_diff <<= 10;
				time_diff += (tv.tv_usec - prev_tv.tv_usec);
				if ((1 << 9) < time_diff)
					eye_candy_incr += 50;
				else if (50 < eye_candy_incr)
					eye_candy_incr -= 50;
			}
			eye_candy += eye_candy_incr;
		}
567
		if (get_sha1_hex(line, sha1))
568
			die("expected sha1, got garbage:\n %s", line);
569 570 571 572 573 574 575 576
		hash = 0;
		p = line+40;
		while (*p) {
			unsigned char c = *p++;
			if (isspace(c))
				continue;
			hash = hash * 11 + c;
		}
J
Junio C Hamano 已提交
577
		add_object_entry(sha1, hash);
578
	}
579 580
	if (progress)
		fprintf(stderr, "Done counting %d objects.\n", nr_objects);
581 582
	if (non_empty && !nr_objects)
		return 0;
583 584

	sorted_by_sha = create_sorted_list(sha1_sort);
J
Junio C Hamano 已提交
585 586 587 588 589 590 591 592
	SHA1_Init(&ctx);
	list = sorted_by_sha;
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *entry = *list++;
		SHA1_Update(&ctx, entry->sha1, 20);
	}
	SHA1_Final(object_list_sha1, &ctx);

593 594 595 596 597 598 599 600
	if (reuse_cached_pack(object_list_sha1, pack_to_stdout))
		;
	else {
		prepare_pack(window, depth);
		if (!pack_to_stdout) {
			write_index_file();
			puts(sha1_to_hex(object_list_sha1));
		}
601
	}
602 603
	return 0;
}