index-pack.c 24.9 KB
Newer Older
S
Sergey Vlasov 已提交
1 2 3 4
#include "cache.h"
#include "delta.h"
#include "pack.h"
#include "csum-file.h"
5 6 7 8
#include "blob.h"
#include "commit.h"
#include "tag.h"
#include "tree.h"
N
Nicolas Pitre 已提交
9 10
#include <sys/time.h>
#include <signal.h>
S
Sergey Vlasov 已提交
11 12

static const char index_pack_usage[] =
13
"git-index-pack [-v] [-o <index-file>] [{ ---keep | --keep=<msg> }] { <pack-file> | --stdin [--fix-thin] [<pack-file>] }";
S
Sergey Vlasov 已提交
14 15 16 17

struct object_entry
{
	unsigned long offset;
18 19
	unsigned long size;
	unsigned int hdr_size;
S
Sergey Vlasov 已提交
20 21 22 23 24
	enum object_type type;
	enum object_type real_type;
	unsigned char sha1[20];
};

25 26 27 28 29
union delta_base {
	unsigned char sha1[20];
	unsigned long offset;
};

30 31 32 33 34 35
/*
 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
 * to memcmp() only the first 20 bytes.
 */
#define UNION_BASE_SZ	20

S
Sergey Vlasov 已提交
36 37
struct delta_entry
{
38
	union delta_base base;
39
	int obj_no;
S
Sergey Vlasov 已提交
40 41 42 43 44 45
};

static struct object_entry *objects;
static struct delta_entry *deltas;
static int nr_objects;
static int nr_deltas;
46
static int nr_resolved_deltas;
S
Sergey Vlasov 已提交
47

48
static int from_stdin;
N
Nicolas Pitre 已提交
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
static int verbose;

static volatile sig_atomic_t progress_update;

static void progress_interval(int signum)
{
	progress_update = 1;
}

static void setup_progress_signal(void)
{
	struct sigaction sa;
	struct itimerval v;

	memset(&sa, 0, sizeof(sa));
	sa.sa_handler = progress_interval;
	sigemptyset(&sa.sa_mask);
	sa.sa_flags = SA_RESTART;
	sigaction(SIGALRM, &sa, NULL);

	v.it_interval.tv_sec = 1;
	v.it_interval.tv_usec = 0;
	v.it_value = v.it_interval;
	setitimer(ITIMER_REAL, &v, NULL);

}

static unsigned display_progress(unsigned n, unsigned total, unsigned last_pc)
{
	unsigned percent = n * 100 / total;
	if (percent != last_pc || progress_update) {
		fprintf(stderr, "%4u%% (%u/%u) done\r", percent, n, total);
		progress_update = 0;
	}
	return percent;
}
85

86 87 88 89
/* We always read in 4kB chunks. */
static unsigned char input_buffer[4096];
static unsigned long input_offset, input_len, consumed_bytes;
static SHA_CTX input_ctx;
90
static int input_fd, output_fd, mmap_fd;
91

92
/* Discard current buffer used content. */
93
static void flush(void)
94 95 96 97 98
{
	if (input_offset) {
		if (output_fd >= 0)
			write_or_die(output_fd, input_buffer, input_offset);
		SHA1_Update(&input_ctx, input_buffer, input_offset);
99
		memmove(input_buffer, input_buffer + input_offset, input_len);
100 101 102 103
		input_offset = 0;
	}
}

104 105 106 107
/*
 * Make sure at least "min" bytes are available in the buffer, and
 * return the pointer to the buffer.
 */
108
static void *fill(int min)
S
Sergey Vlasov 已提交
109
{
110 111 112 113
	if (min <= input_len)
		return input_buffer + input_offset;
	if (min > sizeof(input_buffer))
		die("cannot fill %d bytes", min);
114
	flush();
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
	do {
		int ret = xread(input_fd, input_buffer + input_len,
				sizeof(input_buffer) - input_len);
		if (ret <= 0) {
			if (!ret)
				die("early EOF");
			die("read error on input: %s", strerror(errno));
		}
		input_len += ret;
	} while (input_len < min);
	return input_buffer;
}

static void use(int bytes)
{
	if (bytes > input_len)
		die("used more bytes than were available");
	input_len -= bytes;
	input_offset += bytes;
	consumed_bytes += bytes;
}
S
Sergey Vlasov 已提交
136

137
static const char *open_pack_file(const char *pack_name)
138
{
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
	if (from_stdin) {
		input_fd = 0;
		if (!pack_name) {
			static char tmpfile[PATH_MAX];
			snprintf(tmpfile, sizeof(tmpfile),
				 "%s/pack_XXXXXX", get_object_directory());
			output_fd = mkstemp(tmpfile);
			pack_name = xstrdup(tmpfile);
		} else
			output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600);
		if (output_fd < 0)
			die("unable to create %s: %s\n", pack_name, strerror(errno));
		mmap_fd = output_fd;
	} else {
		input_fd = open(pack_name, O_RDONLY);
		if (input_fd < 0)
			die("cannot open packfile '%s': %s",
			    pack_name, strerror(errno));
		output_fd = -1;
		mmap_fd = input_fd;
	}
160
	SHA1_Init(&input_ctx);
161
	return pack_name;
S
Sergey Vlasov 已提交
162 163 164 165
}

static void parse_pack_header(void)
{
166
	struct pack_header *hdr = fill(sizeof(struct pack_header));
S
Sergey Vlasov 已提交
167 168 169

	/* Header consistency check */
	if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
170
		die("pack signature mismatch");
N
Nicolas Pitre 已提交
171
	if (!pack_version_ok(hdr->hdr_version))
172
		die("pack version %d unsupported", ntohl(hdr->hdr_version));
S
Sergey Vlasov 已提交
173 174

	nr_objects = ntohl(hdr->hdr_entries);
175
	use(sizeof(struct pack_header));
S
Sergey Vlasov 已提交
176 177 178 179 180 181 182 183 184 185 186 187 188
}

static void bad_object(unsigned long offset, const char *format,
		       ...) NORETURN __attribute__((format (printf, 2, 3)));

static void bad_object(unsigned long offset, const char *format, ...)
{
	va_list params;
	char buf[1024];

	va_start(params, format);
	vsnprintf(buf, sizeof(buf), format, params);
	va_end(params);
189
	die("pack has bad object at offset %lu: %s", offset, buf);
S
Sergey Vlasov 已提交
190 191
}

192
static void *unpack_entry_data(unsigned long offset, unsigned long size)
S
Sergey Vlasov 已提交
193 194 195 196 197 198 199
{
	z_stream stream;
	void *buf = xmalloc(size);

	memset(&stream, 0, sizeof(stream));
	stream.next_out = buf;
	stream.avail_out = size;
200 201
	stream.next_in = fill(1);
	stream.avail_in = input_len;
S
Sergey Vlasov 已提交
202 203 204 205
	inflateInit(&stream);

	for (;;) {
		int ret = inflate(&stream, 0);
206 207
		use(input_len - stream.avail_in);
		if (stream.total_out == size && ret == Z_STREAM_END)
S
Sergey Vlasov 已提交
208 209 210
			break;
		if (ret != Z_OK)
			bad_object(offset, "inflate returned %d", ret);
211 212
		stream.next_in = fill(1);
		stream.avail_in = input_len;
S
Sergey Vlasov 已提交
213 214 215 216 217
	}
	inflateEnd(&stream);
	return buf;
}

218
static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
S
Sergey Vlasov 已提交
219
{
220
	unsigned char *p, c;
221
	unsigned long size, base_offset;
S
Sergey Vlasov 已提交
222 223
	unsigned shift;

224 225 226 227 228 229
	obj->offset = consumed_bytes;

	p = fill(1);
	c = *p;
	use(1);
	obj->type = (c >> 4) & 7;
S
Sergey Vlasov 已提交
230 231 232
	size = (c & 15);
	shift = 4;
	while (c & 0x80) {
233 234 235
		p = fill(1);
		c = *p;
		use(1);
S
Sergey Vlasov 已提交
236 237 238
		size += (c & 0x7fUL) << shift;
		shift += 7;
	}
239
	obj->size = size;
S
Sergey Vlasov 已提交
240

241
	switch (obj->type) {
242
	case OBJ_REF_DELTA:
243 244
		hashcpy(delta_base->sha1, fill(20));
		use(20);
245 246 247
		break;
	case OBJ_OFS_DELTA:
		memset(delta_base, 0, sizeof(*delta_base));
248 249 250
		p = fill(1);
		c = *p;
		use(1);
251 252 253 254
		base_offset = c & 127;
		while (c & 128) {
			base_offset += 1;
			if (!base_offset || base_offset & ~(~0UL >> 7))
255 256 257 258
				bad_object(obj->offset, "offset value overflow for delta base object");
			p = fill(1);
			c = *p;
			use(1);
259 260
			base_offset = (base_offset << 7) + (c & 127);
		}
261 262 263
		delta_base->offset = obj->offset - base_offset;
		if (delta_base->offset >= obj->offset)
			bad_object(obj->offset, "delta base offset is out of bound");
264
		break;
S
Sergey Vlasov 已提交
265 266 267 268 269 270
	case OBJ_COMMIT:
	case OBJ_TREE:
	case OBJ_BLOB:
	case OBJ_TAG:
		break;
	default:
271
		bad_object(obj->offset, "bad object type %d", obj->type);
S
Sergey Vlasov 已提交
272
	}
273 274 275 276 277
	obj->hdr_size = consumed_bytes - obj->offset;

	return unpack_entry_data(obj->offset, obj->size);
}

278
static void *get_data_from_pack(struct object_entry *obj)
279 280 281 282 283 284 285
{
	unsigned long from = obj[0].offset + obj[0].hdr_size;
	unsigned long len = obj[1].offset - from;
	unsigned pg_offset = from % getpagesize();
	unsigned char *map, *data;
	z_stream stream;
	int st;
S
Sergey Vlasov 已提交
286

287
	map = mmap(NULL, len + pg_offset, PROT_READ, MAP_PRIVATE,
288
		   mmap_fd, from - pg_offset);
289
	if (map == MAP_FAILED)
290
		die("cannot mmap pack file: %s", strerror(errno));
291 292 293 294 295 296 297 298 299 300 301 302
	data = xmalloc(obj->size);
	memset(&stream, 0, sizeof(stream));
	stream.next_out = data;
	stream.avail_out = obj->size;
	stream.next_in = map + pg_offset;
	stream.avail_in = len;
	inflateInit(&stream);
	while ((st = inflate(&stream, Z_FINISH)) == Z_OK);
	inflateEnd(&stream);
	if (st != Z_STREAM_END || stream.total_out != obj->size)
		die("serious inflate inconsistency");
	munmap(map, len + pg_offset);
S
Sergey Vlasov 已提交
303 304 305
	return data;
}

306
static int find_delta(const union delta_base *base)
S
Sergey Vlasov 已提交
307 308 309 310 311 312 313 314
{
	int first = 0, last = nr_deltas;

        while (first < last) {
                int next = (first + last) / 2;
                struct delta_entry *delta = &deltas[next];
                int cmp;

315
                cmp = memcmp(base, &delta->base, UNION_BASE_SZ);
S
Sergey Vlasov 已提交
316 317 318 319 320 321 322 323 324 325 326
                if (!cmp)
                        return next;
                if (cmp < 0) {
                        last = next;
                        continue;
                }
                first = next+1;
        }
        return -first-1;
}

327 328
static int find_delta_children(const union delta_base *base,
			       int *first_index, int *last_index)
S
Sergey Vlasov 已提交
329
{
330
	int first = find_delta(base);
S
Sergey Vlasov 已提交
331 332 333 334 335
	int last = first;
	int end = nr_deltas - 1;

	if (first < 0)
		return -1;
336
	while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
S
Sergey Vlasov 已提交
337
		--first;
338
	while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
S
Sergey Vlasov 已提交
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
		++last;
	*first_index = first;
	*last_index = last;
	return 0;
}

static void sha1_object(const void *data, unsigned long size,
			enum object_type type, unsigned char *sha1)
{
	SHA_CTX ctx;
	char header[50];
	int header_size;
	const char *type_str;

	switch (type) {
354 355 356 357
	case OBJ_COMMIT: type_str = commit_type; break;
	case OBJ_TREE:   type_str = tree_type; break;
	case OBJ_BLOB:   type_str = blob_type; break;
	case OBJ_TAG:    type_str = tag_type; break;
S
Sergey Vlasov 已提交
358 359 360 361 362 363 364 365 366 367 368 369
	default:
		die("bad type %d", type);
	}

	header_size = sprintf(header, "%s %lu", type_str, size) + 1;

	SHA1_Init(&ctx);
	SHA1_Update(&ctx, header, header_size);
	SHA1_Update(&ctx, data, size);
	SHA1_Final(sha1, &ctx);
}

370
static void resolve_delta(struct object_entry *delta_obj, void *base_data,
S
Sergey Vlasov 已提交
371 372 373 374 375 376
			  unsigned long base_size, enum object_type type)
{
	void *delta_data;
	unsigned long delta_size;
	void *result;
	unsigned long result_size;
377
	union delta_base delta_base;
S
Sergey Vlasov 已提交
378 379
	int j, first, last;

380 381 382
	delta_obj->real_type = type;
	delta_data = get_data_from_pack(delta_obj);
	delta_size = delta_obj->size;
S
Sergey Vlasov 已提交
383 384 385 386
	result = patch_delta(base_data, base_size, delta_data, delta_size,
			     &result_size);
	free(delta_data);
	if (!result)
387 388 389
		bad_object(delta_obj->offset, "failed to apply delta");
	sha1_object(result, result_size, type, delta_obj->sha1);
	nr_resolved_deltas++;
390

391
	hashcpy(delta_base.sha1, delta_obj->sha1);
392
	if (!find_delta_children(&delta_base, &first, &last)) {
393 394 395 396 397
		for (j = first; j <= last; j++) {
			struct object_entry *child = objects + deltas[j].obj_no;
			if (child->real_type == OBJ_REF_DELTA)
				resolve_delta(child, result, result_size, type);
		}
398 399 400
	}

	memset(&delta_base, 0, sizeof(delta_base));
401
	delta_base.offset = delta_obj->offset;
402
	if (!find_delta_children(&delta_base, &first, &last)) {
403 404 405 406 407
		for (j = first; j <= last; j++) {
			struct object_entry *child = objects + deltas[j].obj_no;
			if (child->real_type == OBJ_OFS_DELTA)
				resolve_delta(child, result, result_size, type);
		}
S
Sergey Vlasov 已提交
408
	}
409

S
Sergey Vlasov 已提交
410 411 412 413 414 415 416
	free(result);
}

static int compare_delta_entry(const void *a, const void *b)
{
	const struct delta_entry *delta_a = a;
	const struct delta_entry *delta_b = b;
417
	return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
S
Sergey Vlasov 已提交
418 419
}

420 421
/* Parse all objects and return the pack content SHA1 hash */
static void parse_pack_objects(unsigned char *sha1)
S
Sergey Vlasov 已提交
422
{
N
Nicolas Pitre 已提交
423
	int i, percent = -1;
424
	struct delta_entry *delta = deltas;
S
Sergey Vlasov 已提交
425
	void *data;
426
	struct stat st;
S
Sergey Vlasov 已提交
427 428 429 430 431

	/*
	 * First pass:
	 * - find locations of all objects;
	 * - calculate SHA1 of all non-delta objects;
432
	 * - remember base (SHA1 or offset) for all deltas.
S
Sergey Vlasov 已提交
433
	 */
N
Nicolas Pitre 已提交
434 435
	if (verbose)
		fprintf(stderr, "Indexing %d objects.\n", nr_objects);
S
Sergey Vlasov 已提交
436 437
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *obj = &objects[i];
438
		data = unpack_raw_entry(obj, &delta->base);
S
Sergey Vlasov 已提交
439
		obj->real_type = obj->type;
440 441
		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
			nr_deltas++;
442
			delta->obj_no = i;
443
			delta++;
S
Sergey Vlasov 已提交
444
		} else
445
			sha1_object(data, obj->size, obj->type, obj->sha1);
S
Sergey Vlasov 已提交
446
		free(data);
N
Nicolas Pitre 已提交
447 448
		if (verbose)
			percent = display_progress(i+1, nr_objects, percent);
S
Sergey Vlasov 已提交
449
	}
450
	objects[i].offset = consumed_bytes;
N
Nicolas Pitre 已提交
451 452
	if (verbose)
		fputc('\n', stderr);
453 454

	/* Check pack integrity */
455
	flush();
456 457
	SHA1_Final(sha1, &input_ctx);
	if (hashcmp(fill(20), sha1))
458
		die("pack is corrupted (SHA1 mismatch)");
459
	use(20);
460 461 462

	/* If input_fd is a file, we should have reached its end now. */
	if (fstat(input_fd, &st))
463
		die("cannot fstat packfile: %s", strerror(errno));
464
	if (S_ISREG(st.st_mode) && st.st_size != consumed_bytes)
465
		die("pack has junk at the end");
S
Sergey Vlasov 已提交
466

N
Nicolas Pitre 已提交
467 468 469
	if (!nr_deltas)
		return;

470
	/* Sort deltas by base SHA1/offset for fast searching */
S
Sergey Vlasov 已提交
471 472 473 474 475 476 477 478 479 480 481
	qsort(deltas, nr_deltas, sizeof(struct delta_entry),
	      compare_delta_entry);

	/*
	 * Second pass:
	 * - for all non-delta objects, look if it is used as a base for
	 *   deltas;
	 * - if used as a base, uncompress the object and apply all deltas,
	 *   recursively checking if the resulting object is used as a base
	 *   for some more deltas.
	 */
N
Nicolas Pitre 已提交
482 483
	if (verbose)
		fprintf(stderr, "Resolving %d deltas.\n", nr_deltas);
S
Sergey Vlasov 已提交
484 485
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *obj = &objects[i];
486 487
		union delta_base base;
		int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
S
Sergey Vlasov 已提交
488

489
		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
S
Sergey Vlasov 已提交
490
			continue;
491
		hashcpy(base.sha1, obj->sha1);
492
		ref = !find_delta_children(&base, &ref_first, &ref_last);
493 494
		memset(&base, 0, sizeof(base));
		base.offset = obj->offset;
495
		ofs = !find_delta_children(&base, &ofs_first, &ofs_last);
496
		if (!ref && !ofs)
S
Sergey Vlasov 已提交
497
			continue;
498
		data = get_data_from_pack(obj);
499
		if (ref)
500 501 502 503
			for (j = ref_first; j <= ref_last; j++) {
				struct object_entry *child = objects + deltas[j].obj_no;
				if (child->real_type == OBJ_REF_DELTA)
					resolve_delta(child, data,
504
						      obj->size, obj->type);
505
			}
506
		if (ofs)
507 508 509 510
			for (j = ofs_first; j <= ofs_last; j++) {
				struct object_entry *child = objects + deltas[j].obj_no;
				if (child->real_type == OBJ_OFS_DELTA)
					resolve_delta(child, data,
511
						      obj->size, obj->type);
512
			}
S
Sergey Vlasov 已提交
513
		free(data);
N
Nicolas Pitre 已提交
514 515 516
		if (verbose)
			percent = display_progress(nr_resolved_deltas,
						   nr_deltas, percent);
S
Sergey Vlasov 已提交
517
	}
N
Nicolas Pitre 已提交
518 519
	if (verbose && nr_resolved_deltas == nr_deltas)
		fputc('\n', stderr);
520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
}

static int write_compressed(int fd, void *in, unsigned int size)
{
	z_stream stream;
	unsigned long maxsize;
	void *out;

	memset(&stream, 0, sizeof(stream));
	deflateInit(&stream, zlib_compression_level);
	maxsize = deflateBound(&stream, size);
	out = xmalloc(maxsize);

	/* Compress it */
	stream.next_in = in;
	stream.avail_in = size;
	stream.next_out = out;
	stream.avail_out = maxsize;
	while (deflate(&stream, Z_FINISH) == Z_OK);
	deflateEnd(&stream);

	size = stream.total_out;
	write_or_die(fd, out, size);
	free(out);
	return size;
}

static void append_obj_to_pack(void *buf,
			       unsigned long size, enum object_type type)
{
	struct object_entry *obj = &objects[nr_objects++];
	unsigned char header[10];
	unsigned long s = size;
	int n = 0;
	unsigned char c = (type << 4) | (s & 15);
	s >>= 4;
	while (s) {
		header[n++] = c | 0x80;
		c = s & 0x7f;
		s >>= 7;
	}
	header[n++] = c;
	write_or_die(output_fd, header, n);
	obj[1].offset = obj[0].offset + n;
	obj[1].offset += write_compressed(output_fd, buf, size);
	sha1_object(buf, size, type, obj->sha1);
}

static int delta_pos_compare(const void *_a, const void *_b)
{
	struct delta_entry *a = *(struct delta_entry **)_a;
	struct delta_entry *b = *(struct delta_entry **)_b;
	return a->obj_no - b->obj_no;
}
S
Sergey Vlasov 已提交
574

575 576 577
static void fix_unresolved_deltas(int nr_unresolved)
{
	struct delta_entry **sorted_by_pos;
N
Nicolas Pitre 已提交
578
	int i, n = 0, percent = -1;
579 580 581 582 583 584 585 586 587 588 589 590

	/*
	 * Since many unresolved deltas may well be themselves base objects
	 * for more unresolved deltas, we really want to include the
	 * smallest number of base objects that would cover as much delta
	 * as possible by picking the
	 * trunc deltas first, allowing for other deltas to resolve without
	 * additional base objects.  Since most base objects are to be found
	 * before deltas depending on them, a good heuristic is to start
	 * resolving deltas in the same order as their position in the pack.
	 */
	sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos));
S
Sergey Vlasov 已提交
591
	for (i = 0; i < nr_deltas; i++) {
592 593 594
		if (objects[deltas[i].obj_no].real_type != OBJ_REF_DELTA)
			continue;
		sorted_by_pos[n++] = &deltas[i];
S
Sergey Vlasov 已提交
595
	}
596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617
	qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare);

	for (i = 0; i < n; i++) {
		struct delta_entry *d = sorted_by_pos[i];
		void *data;
		unsigned long size;
		char type[10];
		enum object_type obj_type;
		int j, first, last;

		if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
			continue;
		data = read_sha1_file(d->base.sha1, type, &size);
		if (!data)
			continue;
		if      (!strcmp(type, blob_type))   obj_type = OBJ_BLOB;
		else if (!strcmp(type, tree_type))   obj_type = OBJ_TREE;
		else if (!strcmp(type, commit_type)) obj_type = OBJ_COMMIT;
		else if (!strcmp(type, tag_type))    obj_type = OBJ_TAG;
		else die("base object %s is of type '%s'",
			 sha1_to_hex(d->base.sha1), type);

618
		find_delta_children(&d->base, &first, &last);
619 620 621 622 623 624 625 626
		for (j = first; j <= last; j++) {
			struct object_entry *child = objects + deltas[j].obj_no;
			if (child->real_type == OBJ_REF_DELTA)
				resolve_delta(child, data, size, obj_type);
		}

		append_obj_to_pack(data, size, obj_type);
		free(data);
N
Nicolas Pitre 已提交
627 628 629
		if (verbose)
			percent = display_progress(nr_resolved_deltas,
						   nr_deltas, percent);
630 631
	}
	free(sorted_by_pos);
N
Nicolas Pitre 已提交
632 633
	if (verbose)
		fputc('\n', stderr);
634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664
}

static void readjust_pack_header_and_sha1(unsigned char *sha1)
{
	struct pack_header hdr;
	SHA_CTX ctx;
	int size;

	/* Rewrite pack header with updated object number */
	if (lseek(output_fd, 0, SEEK_SET) != 0)
		die("cannot seek back: %s", strerror(errno));
	if (xread(output_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
		die("cannot read pack header back: %s", strerror(errno));
	hdr.hdr_entries = htonl(nr_objects);
	if (lseek(output_fd, 0, SEEK_SET) != 0)
		die("cannot seek back: %s", strerror(errno));
	write_or_die(output_fd, &hdr, sizeof(hdr));
	if (lseek(output_fd, 0, SEEK_SET) != 0)
		die("cannot seek back: %s", strerror(errno));

	/* Recompute and store the new pack's SHA1 */
	SHA1_Init(&ctx);
	do {
		unsigned char *buf[4096];
		size = xread(output_fd, buf, sizeof(buf));
		if (size < 0)
			die("cannot read pack data back: %s", strerror(errno));
		SHA1_Update(&ctx, buf, size);
	} while (size > 0);
	SHA1_Final(sha1, &ctx);
	write_or_die(output_fd, sha1, 20);
S
Sergey Vlasov 已提交
665 666 667 668 669 670
}

static int sha1_compare(const void *_a, const void *_b)
{
	struct object_entry *a = *(struct object_entry **)_a;
	struct object_entry *b = *(struct object_entry **)_b;
671
	return hashcmp(a->sha1, b->sha1);
S
Sergey Vlasov 已提交
672 673
}

674 675 676 677
/*
 * On entry *sha1 contains the pack content SHA1 hash, on exit it is
 * the SHA1 hash of sorted object names.
 */
678
static const char *write_index_file(const char *index_name, unsigned char *sha1)
S
Sergey Vlasov 已提交
679 680
{
	struct sha1file *f;
681
	struct object_entry **sorted_by_sha, **list, **last;
S
Sergey Vlasov 已提交
682
	unsigned int array[256];
683
	int i, fd;
J
Junio C Hamano 已提交
684
	SHA_CTX ctx;
S
Sergey Vlasov 已提交
685

686 687 688 689 690 691 692 693 694 695 696 697 698
	if (nr_objects) {
		sorted_by_sha =
			xcalloc(nr_objects, sizeof(struct object_entry *));
		list = sorted_by_sha;
		last = sorted_by_sha + nr_objects;
		for (i = 0; i < nr_objects; ++i)
			sorted_by_sha[i] = &objects[i];
		qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
		      sha1_compare);

	}
	else
		sorted_by_sha = list = last = NULL;
S
Sergey Vlasov 已提交
699

700 701 702 703 704 705 706 707 708 709 710 711 712
	if (!index_name) {
		static char tmpfile[PATH_MAX];
		snprintf(tmpfile, sizeof(tmpfile),
			 "%s/index_XXXXXX", get_object_directory());
		fd = mkstemp(tmpfile);
		index_name = xstrdup(tmpfile);
	} else {
		unlink(index_name);
		fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
	}
	if (fd < 0)
		die("unable to create %s: %s", index_name, strerror(errno));
	f = sha1fd(fd, index_name);
S
Sergey Vlasov 已提交
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731

	/*
	 * Write the first-level table (the list is sorted,
	 * but we use a 256-entry lookup to be able to avoid
	 * having to do eight extra binary search iterations).
	 */
	for (i = 0; i < 256; i++) {
		struct object_entry **next = list;
		while (next < last) {
			struct object_entry *obj = *next;
			if (obj->sha1[0] != i)
				break;
			next++;
		}
		array[i] = htonl(next - sorted_by_sha);
		list = next;
	}
	sha1write(f, array, 256 * sizeof(int));

J
Junio C Hamano 已提交
732 733 734 735 736
	/* recompute the SHA1 hash of sorted object names.
	 * currently pack-objects does not do this, but that
	 * can be fixed.
	 */
	SHA1_Init(&ctx);
S
Sergey Vlasov 已提交
737 738 739 740 741 742 743 744 745
	/*
	 * Write the actual SHA1 entries..
	 */
	list = sorted_by_sha;
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *obj = *list++;
		unsigned int offset = htonl(obj->offset);
		sha1write(f, &offset, 4);
		sha1write(f, obj->sha1, 20);
J
Junio C Hamano 已提交
746
		SHA1_Update(&ctx, obj->sha1, 20);
S
Sergey Vlasov 已提交
747
	}
748
	sha1write(f, sha1, 20);
S
Sergey Vlasov 已提交
749 750
	sha1close(f, NULL, 1);
	free(sorted_by_sha);
J
Junio C Hamano 已提交
751
	SHA1_Final(sha1, &ctx);
752 753 754 755 756
	return index_name;
}

static void final(const char *final_pack_name, const char *curr_pack_name,
		  const char *final_index_name, const char *curr_index_name,
757
		  const char *keep_name, const char *keep_msg,
758 759
		  unsigned char *sha1)
{
760
	char *report = "pack";
761 762 763 764 765 766 767 768 769 770 771 772
	char name[PATH_MAX];
	int err;

	if (!from_stdin) {
		close(input_fd);
	} else {
		err = close(output_fd);
		if (err)
			die("error while closing pack file: %s", strerror(errno));
		chmod(curr_pack_name, 0444);
	}

773 774 775 776 777 778 779
	if (keep_msg) {
		int keep_fd, keep_msg_len = strlen(keep_msg);
		if (!keep_name) {
			snprintf(name, sizeof(name), "%s/pack/pack-%s.keep",
				 get_object_directory(), sha1_to_hex(sha1));
			keep_name = name;
		}
780 781 782 783 784 785 786 787 788 789
		keep_fd = open(keep_name, O_RDWR|O_CREAT|O_EXCL, 0600);
		if (keep_fd < 0) {
			if (errno != EEXIST)
				die("cannot write keep file");
		} else {
			if (keep_msg_len > 0) {
				write_or_die(keep_fd, keep_msg, keep_msg_len);
				write_or_die(keep_fd, "\n", 1);
			}
			close(keep_fd);
790
			report = "keep";
791 792 793
		}
	}

794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
	if (final_pack_name != curr_pack_name) {
		if (!final_pack_name) {
			snprintf(name, sizeof(name), "%s/pack/pack-%s.pack",
				 get_object_directory(), sha1_to_hex(sha1));
			final_pack_name = name;
		}
		if (move_temp_to_file(curr_pack_name, final_pack_name))
			die("cannot store pack file");
	}

	chmod(curr_index_name, 0444);
	if (final_index_name != curr_index_name) {
		if (!final_index_name) {
			snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
				 get_object_directory(), sha1_to_hex(sha1));
			final_index_name = name;
		}
		if (move_temp_to_file(curr_index_name, final_index_name))
			die("cannot store index file");
	}
814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834

	if (!from_stdin) {
		printf("%s\n", sha1_to_hex(sha1));
	} else {
		char buf[48];
		int len = snprintf(buf, sizeof(buf), "%s\t%s\n",
				   report, sha1_to_hex(sha1));
		xwrite(1, buf, len);

		/*
		 * Let's just mimic git-unpack-objects here and write
		 * the last part of the input buffer to stdout.
		 */
		while (input_len) {
			err = xwrite(1, input_buffer + input_offset, input_len);
			if (err <= 0)
				break;
			input_len -= err;
			input_offset += err;
		}
	}
S
Sergey Vlasov 已提交
835 836 837 838
}

int main(int argc, char **argv)
{
839
	int i, fix_thin_pack = 0;
840 841
	const char *curr_pack, *pack_name = NULL;
	const char *curr_index, *index_name = NULL;
842 843
	const char *keep_name = NULL, *keep_msg = NULL;
	char *index_name_buf = NULL, *keep_name_buf = NULL;
J
Junio C Hamano 已提交
844
	unsigned char sha1[20];
S
Sergey Vlasov 已提交
845 846 847 848 849

	for (i = 1; i < argc; i++) {
		const char *arg = argv[i];

		if (*arg == '-') {
850 851
			if (!strcmp(arg, "--stdin")) {
				from_stdin = 1;
852 853
			} else if (!strcmp(arg, "--fix-thin")) {
				fix_thin_pack = 1;
854 855 856 857
			} else if (!strcmp(arg, "--keep")) {
				keep_msg = "";
			} else if (!strncmp(arg, "--keep=", 7)) {
				keep_msg = arg + 7;
858 859 860 861 862 863 864 865 866 867 868 869 870
			} else if (!strncmp(arg, "--pack_header=", 14)) {
				struct pack_header *hdr;
				char *c;

				hdr = (struct pack_header *)input_buffer;
				hdr->hdr_signature = htonl(PACK_SIGNATURE);
				hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
				if (*c != ',')
					die("bad %s", arg);
				hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
				if (*c)
					die("bad %s", arg);
				input_len = sizeof(*hdr);
N
Nicolas Pitre 已提交
871 872
			} else if (!strcmp(arg, "-v")) {
				verbose = 1;
873
			} else if (!strcmp(arg, "-o")) {
S
Sergey Vlasov 已提交
874 875 876 877 878 879 880 881 882 883 884 885 886
				if (index_name || (i+1) >= argc)
					usage(index_pack_usage);
				index_name = argv[++i];
			} else
				usage(index_pack_usage);
			continue;
		}

		if (pack_name)
			usage(index_pack_usage);
		pack_name = arg;
	}

887
	if (!pack_name && !from_stdin)
S
Sergey Vlasov 已提交
888
		usage(index_pack_usage);
889 890
	if (fix_thin_pack && !from_stdin)
		die("--fix-thin cannot be used without --stdin");
891
	if (!index_name && pack_name) {
S
Sergey Vlasov 已提交
892
		int len = strlen(pack_name);
893
		if (!has_extension(pack_name, ".pack"))
S
Sergey Vlasov 已提交
894 895
			die("packfile name '%s' does not end with '.pack'",
			    pack_name);
P
Pavel Roskin 已提交
896
		index_name_buf = xmalloc(len);
S
Sergey Vlasov 已提交
897 898 899 900
		memcpy(index_name_buf, pack_name, len - 5);
		strcpy(index_name_buf + len - 5, ".idx");
		index_name = index_name_buf;
	}
901 902 903 904 905 906 907 908 909 910
	if (keep_msg && !keep_name && pack_name) {
		int len = strlen(pack_name);
		if (!has_extension(pack_name, ".pack"))
			die("packfile name '%s' does not end with '.pack'",
			    pack_name);
		keep_name_buf = xmalloc(len);
		memcpy(keep_name_buf, pack_name, len - 5);
		strcpy(keep_name_buf + len - 5, ".keep");
		keep_name = keep_name_buf;
	}
S
Sergey Vlasov 已提交
911

912
	curr_pack = open_pack_file(pack_name);
S
Sergey Vlasov 已提交
913
	parse_pack_header();
914 915
	objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry));
	deltas = xmalloc(nr_objects * sizeof(struct delta_entry));
N
Nicolas Pitre 已提交
916 917
	if (verbose)
		setup_progress_signal();
918
	parse_pack_objects(sha1);
919 920 921
	if (nr_deltas != nr_resolved_deltas) {
		if (fix_thin_pack) {
			int nr_unresolved = nr_deltas - nr_resolved_deltas;
N
Nicolas Pitre 已提交
922
			int nr_objects_initial = nr_objects;
923 924 925 926 927 928
			if (nr_unresolved <= 0)
				die("confusion beyond insanity");
			objects = xrealloc(objects,
					   (nr_objects + nr_unresolved + 1)
					   * sizeof(*objects));
			fix_unresolved_deltas(nr_unresolved);
N
Nicolas Pitre 已提交
929 930 931
			if (verbose)
				fprintf(stderr, "%d objects were added to complete this thin pack.\n",
					nr_objects - nr_objects_initial);
932 933 934 935 936 937 938 939 940
			readjust_pack_header_and_sha1(sha1);
		}
		if (nr_deltas != nr_resolved_deltas)
			die("pack has %d unresolved deltas",
			    nr_deltas - nr_resolved_deltas);
	} else {
		/* Flush remaining pack final 20-byte SHA1. */
		flush();
	}
S
Sergey Vlasov 已提交
941
	free(deltas);
942
	curr_index = write_index_file(index_name, sha1);
943 944 945 946
	final(pack_name, curr_pack,
		index_name, curr_index,
		keep_name, keep_msg,
		sha1);
S
Sergey Vlasov 已提交
947 948
	free(objects);
	free(index_name_buf);
949
	free(keep_name_buf);
S
Sergey Vlasov 已提交
950 951 952

	return 0;
}