index-pack.c 26.4 KB
Newer Older
S
Sergey Vlasov 已提交
1 2 3 4
#include "cache.h"
#include "delta.h"
#include "pack.h"
#include "csum-file.h"
5 6 7 8
#include "blob.h"
#include "commit.h"
#include "tag.h"
#include "tree.h"
S
Sergey Vlasov 已提交
9 10

static const char index_pack_usage[] =
11
"git-index-pack [-v] [-o <index-file>] [{ ---keep | --keep=<msg> }] { <pack-file> | --stdin [--fix-thin] [<pack-file>] }";
S
Sergey Vlasov 已提交
12 13 14

struct object_entry
{
15
	off_t offset;
16 17
	unsigned long size;
	unsigned int hdr_size;
18
	uint32_t crc32;
S
Sergey Vlasov 已提交
19 20 21 22 23
	enum object_type type;
	enum object_type real_type;
	unsigned char sha1[20];
};

24 25
union delta_base {
	unsigned char sha1[20];
26
	off_t offset;
27 28
};

29 30 31 32 33 34
/*
 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
 * to memcmp() only the first 20 bytes.
 */
#define UNION_BASE_SZ	20

S
Sergey Vlasov 已提交
35 36
struct delta_entry
{
37
	union delta_base base;
38
	int obj_no;
S
Sergey Vlasov 已提交
39 40 41 42 43 44
};

static struct object_entry *objects;
static struct delta_entry *deltas;
static int nr_objects;
static int nr_deltas;
45
static int nr_resolved_deltas;
S
Sergey Vlasov 已提交
46

47
static int from_stdin;
N
Nicolas Pitre 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
static int verbose;

static volatile sig_atomic_t progress_update;

static void progress_interval(int signum)
{
	progress_update = 1;
}

static void setup_progress_signal(void)
{
	struct sigaction sa;
	struct itimerval v;

	memset(&sa, 0, sizeof(sa));
	sa.sa_handler = progress_interval;
	sigemptyset(&sa.sa_mask);
	sa.sa_flags = SA_RESTART;
	sigaction(SIGALRM, &sa, NULL);

	v.it_interval.tv_sec = 1;
	v.it_interval.tv_usec = 0;
	v.it_value = v.it_interval;
	setitimer(ITIMER_REAL, &v, NULL);

}

static unsigned display_progress(unsigned n, unsigned total, unsigned last_pc)
{
	unsigned percent = n * 100 / total;
	if (percent != last_pc || progress_update) {
		fprintf(stderr, "%4u%% (%u/%u) done\r", percent, n, total);
		progress_update = 0;
	}
	return percent;
}
84

85 86
/* We always read in 4kB chunks. */
static unsigned char input_buffer[4096];
87 88
static unsigned int input_offset, input_len;
static off_t consumed_bytes;
89
static SHA_CTX input_ctx;
90
static uint32_t input_crc32;
91
static int input_fd, output_fd, pack_fd;
92

93
/* Discard current buffer used content. */
94
static void flush(void)
95 96 97 98 99
{
	if (input_offset) {
		if (output_fd >= 0)
			write_or_die(output_fd, input_buffer, input_offset);
		SHA1_Update(&input_ctx, input_buffer, input_offset);
100
		memmove(input_buffer, input_buffer + input_offset, input_len);
101 102 103 104
		input_offset = 0;
	}
}

105 106 107 108
/*
 * Make sure at least "min" bytes are available in the buffer, and
 * return the pointer to the buffer.
 */
109
static void *fill(int min)
S
Sergey Vlasov 已提交
110
{
111 112 113 114
	if (min <= input_len)
		return input_buffer + input_offset;
	if (min > sizeof(input_buffer))
		die("cannot fill %d bytes", min);
115
	flush();
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
	do {
		int ret = xread(input_fd, input_buffer + input_len,
				sizeof(input_buffer) - input_len);
		if (ret <= 0) {
			if (!ret)
				die("early EOF");
			die("read error on input: %s", strerror(errno));
		}
		input_len += ret;
	} while (input_len < min);
	return input_buffer;
}

static void use(int bytes)
{
	if (bytes > input_len)
		die("used more bytes than were available");
133
	input_crc32 = crc32(input_crc32, input_buffer + input_offset, bytes);
134 135
	input_len -= bytes;
	input_offset += bytes;
136 137 138 139

	/* make sure off_t is sufficiently large not to wrap */
	if (consumed_bytes > consumed_bytes + bytes)
		die("pack too large for current definition of off_t");
140 141
	consumed_bytes += bytes;
}
S
Sergey Vlasov 已提交
142

143
static const char *open_pack_file(const char *pack_name)
144
{
145 146 147 148 149
	if (from_stdin) {
		input_fd = 0;
		if (!pack_name) {
			static char tmpfile[PATH_MAX];
			snprintf(tmpfile, sizeof(tmpfile),
150
				 "%s/tmp_pack_XXXXXX", get_object_directory());
151 152 153 154 155 156
			output_fd = mkstemp(tmpfile);
			pack_name = xstrdup(tmpfile);
		} else
			output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600);
		if (output_fd < 0)
			die("unable to create %s: %s\n", pack_name, strerror(errno));
157
		pack_fd = output_fd;
158 159 160 161 162 163
	} else {
		input_fd = open(pack_name, O_RDONLY);
		if (input_fd < 0)
			die("cannot open packfile '%s': %s",
			    pack_name, strerror(errno));
		output_fd = -1;
164
		pack_fd = input_fd;
165
	}
166
	SHA1_Init(&input_ctx);
167
	return pack_name;
S
Sergey Vlasov 已提交
168 169 170 171
}

static void parse_pack_header(void)
{
172
	struct pack_header *hdr = fill(sizeof(struct pack_header));
S
Sergey Vlasov 已提交
173 174 175

	/* Header consistency check */
	if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
176
		die("pack signature mismatch");
N
Nicolas Pitre 已提交
177
	if (!pack_version_ok(hdr->hdr_version))
178
		die("pack version %d unsupported", ntohl(hdr->hdr_version));
S
Sergey Vlasov 已提交
179 180

	nr_objects = ntohl(hdr->hdr_entries);
181
	use(sizeof(struct pack_header));
S
Sergey Vlasov 已提交
182 183 184 185 186 187 188 189 190 191 192 193 194
}

static void bad_object(unsigned long offset, const char *format,
		       ...) NORETURN __attribute__((format (printf, 2, 3)));

static void bad_object(unsigned long offset, const char *format, ...)
{
	va_list params;
	char buf[1024];

	va_start(params, format);
	vsnprintf(buf, sizeof(buf), format, params);
	va_end(params);
195
	die("pack has bad object at offset %lu: %s", offset, buf);
S
Sergey Vlasov 已提交
196 197
}

198
static void *unpack_entry_data(unsigned long offset, unsigned long size)
S
Sergey Vlasov 已提交
199 200 201 202 203 204 205
{
	z_stream stream;
	void *buf = xmalloc(size);

	memset(&stream, 0, sizeof(stream));
	stream.next_out = buf;
	stream.avail_out = size;
206 207
	stream.next_in = fill(1);
	stream.avail_in = input_len;
S
Sergey Vlasov 已提交
208 209 210 211
	inflateInit(&stream);

	for (;;) {
		int ret = inflate(&stream, 0);
212 213
		use(input_len - stream.avail_in);
		if (stream.total_out == size && ret == Z_STREAM_END)
S
Sergey Vlasov 已提交
214 215 216
			break;
		if (ret != Z_OK)
			bad_object(offset, "inflate returned %d", ret);
217 218
		stream.next_in = fill(1);
		stream.avail_in = input_len;
S
Sergey Vlasov 已提交
219 220 221 222 223
	}
	inflateEnd(&stream);
	return buf;
}

224
static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
S
Sergey Vlasov 已提交
225
{
226
	unsigned char *p, c;
227 228
	unsigned long size;
	off_t base_offset;
S
Sergey Vlasov 已提交
229
	unsigned shift;
230
	void *data;
S
Sergey Vlasov 已提交
231

232
	obj->offset = consumed_bytes;
233
	input_crc32 = crc32(0, Z_NULL, 0);
234 235 236 237 238

	p = fill(1);
	c = *p;
	use(1);
	obj->type = (c >> 4) & 7;
S
Sergey Vlasov 已提交
239 240 241
	size = (c & 15);
	shift = 4;
	while (c & 0x80) {
242 243 244
		p = fill(1);
		c = *p;
		use(1);
S
Sergey Vlasov 已提交
245 246 247
		size += (c & 0x7fUL) << shift;
		shift += 7;
	}
248
	obj->size = size;
S
Sergey Vlasov 已提交
249

250
	switch (obj->type) {
251
	case OBJ_REF_DELTA:
252 253
		hashcpy(delta_base->sha1, fill(20));
		use(20);
254 255 256
		break;
	case OBJ_OFS_DELTA:
		memset(delta_base, 0, sizeof(*delta_base));
257 258 259
		p = fill(1);
		c = *p;
		use(1);
260 261 262
		base_offset = c & 127;
		while (c & 128) {
			base_offset += 1;
263
			if (!base_offset || MSB(base_offset, 7))
264 265 266 267
				bad_object(obj->offset, "offset value overflow for delta base object");
			p = fill(1);
			c = *p;
			use(1);
268 269
			base_offset = (base_offset << 7) + (c & 127);
		}
270 271 272
		delta_base->offset = obj->offset - base_offset;
		if (delta_base->offset >= obj->offset)
			bad_object(obj->offset, "delta base offset is out of bound");
273
		break;
S
Sergey Vlasov 已提交
274 275 276 277 278 279
	case OBJ_COMMIT:
	case OBJ_TREE:
	case OBJ_BLOB:
	case OBJ_TAG:
		break;
	default:
280
		bad_object(obj->offset, "unknown object type %d", obj->type);
S
Sergey Vlasov 已提交
281
	}
282 283
	obj->hdr_size = consumed_bytes - obj->offset;

284 285 286
	data = unpack_entry_data(obj->offset, obj->size);
	obj->crc32 = input_crc32;
	return data;
287 288
}

289
static void *get_data_from_pack(struct object_entry *obj)
290 291 292
{
	unsigned long from = obj[0].offset + obj[0].hdr_size;
	unsigned long len = obj[1].offset - from;
293
	unsigned long rdy = 0;
294
	unsigned char *src, *data;
295 296
	z_stream stream;
	int st;
S
Sergey Vlasov 已提交
297

298
	src = xmalloc(len);
299 300 301 302 303 304 305
	data = src;
	do {
		ssize_t n = pread(pack_fd, data + rdy, len - rdy, from + rdy);
		if (n <= 0)
			die("cannot pread pack file: %s", strerror(errno));
		rdy += n;
	} while (rdy < len);
306 307 308 309
	data = xmalloc(obj->size);
	memset(&stream, 0, sizeof(stream));
	stream.next_out = data;
	stream.avail_out = obj->size;
310
	stream.next_in = src;
311 312 313 314 315 316
	stream.avail_in = len;
	inflateInit(&stream);
	while ((st = inflate(&stream, Z_FINISH)) == Z_OK);
	inflateEnd(&stream);
	if (st != Z_STREAM_END || stream.total_out != obj->size)
		die("serious inflate inconsistency");
317
	free(src);
S
Sergey Vlasov 已提交
318 319 320
	return data;
}

321
static int find_delta(const union delta_base *base)
S
Sergey Vlasov 已提交
322 323 324 325 326 327 328 329
{
	int first = 0, last = nr_deltas;

        while (first < last) {
                int next = (first + last) / 2;
                struct delta_entry *delta = &deltas[next];
                int cmp;

330
                cmp = memcmp(base, &delta->base, UNION_BASE_SZ);
S
Sergey Vlasov 已提交
331 332 333 334 335 336 337 338 339 340 341
                if (!cmp)
                        return next;
                if (cmp < 0) {
                        last = next;
                        continue;
                }
                first = next+1;
        }
        return -first-1;
}

342 343
static int find_delta_children(const union delta_base *base,
			       int *first_index, int *last_index)
S
Sergey Vlasov 已提交
344
{
345
	int first = find_delta(base);
S
Sergey Vlasov 已提交
346 347 348 349 350
	int last = first;
	int end = nr_deltas - 1;

	if (first < 0)
		return -1;
351
	while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
S
Sergey Vlasov 已提交
352
		--first;
353
	while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
S
Sergey Vlasov 已提交
354 355 356 357 358 359 360
		++last;
	*first_index = first;
	*last_index = last;
	return 0;
}

static void sha1_object(const void *data, unsigned long size,
361
			enum object_type type, unsigned char *sha1)
S
Sergey Vlasov 已提交
362
{
N
Nicolas Pitre 已提交
363
	hash_sha1_file(data, size, typename(type), sha1);
364
	if (has_sha1_file(sha1)) {
365 366 367 368 369 370 371 372 373
		void *has_data;
		enum object_type has_type;
		unsigned long has_size;
		has_data = read_sha1_file(sha1, &has_type, &has_size);
		if (!has_data)
			die("cannot read existing object %s", sha1_to_hex(sha1));
		if (size != has_size || type != has_type ||
		    memcmp(data, has_data, size) != 0)
			die("SHA1 COLLISION FOUND WITH %s !", sha1_to_hex(sha1));
374
		free(has_data);
375
	}
S
Sergey Vlasov 已提交
376 377
}

378
static void resolve_delta(struct object_entry *delta_obj, void *base_data,
S
Sergey Vlasov 已提交
379 380 381 382 383 384
			  unsigned long base_size, enum object_type type)
{
	void *delta_data;
	unsigned long delta_size;
	void *result;
	unsigned long result_size;
385
	union delta_base delta_base;
S
Sergey Vlasov 已提交
386 387
	int j, first, last;

388 389 390
	delta_obj->real_type = type;
	delta_data = get_data_from_pack(delta_obj);
	delta_size = delta_obj->size;
S
Sergey Vlasov 已提交
391 392 393 394
	result = patch_delta(base_data, base_size, delta_data, delta_size,
			     &result_size);
	free(delta_data);
	if (!result)
395
		bad_object(delta_obj->offset, "failed to apply delta");
396
	sha1_object(result, result_size, type, delta_obj->sha1);
397
	nr_resolved_deltas++;
398

399
	hashcpy(delta_base.sha1, delta_obj->sha1);
400
	if (!find_delta_children(&delta_base, &first, &last)) {
401 402 403 404 405
		for (j = first; j <= last; j++) {
			struct object_entry *child = objects + deltas[j].obj_no;
			if (child->real_type == OBJ_REF_DELTA)
				resolve_delta(child, result, result_size, type);
		}
406 407 408
	}

	memset(&delta_base, 0, sizeof(delta_base));
409
	delta_base.offset = delta_obj->offset;
410
	if (!find_delta_children(&delta_base, &first, &last)) {
411 412 413 414 415
		for (j = first; j <= last; j++) {
			struct object_entry *child = objects + deltas[j].obj_no;
			if (child->real_type == OBJ_OFS_DELTA)
				resolve_delta(child, result, result_size, type);
		}
S
Sergey Vlasov 已提交
416
	}
417

S
Sergey Vlasov 已提交
418 419 420 421 422 423 424
	free(result);
}

static int compare_delta_entry(const void *a, const void *b)
{
	const struct delta_entry *delta_a = a;
	const struct delta_entry *delta_b = b;
425
	return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
S
Sergey Vlasov 已提交
426 427
}

428 429
/* Parse all objects and return the pack content SHA1 hash */
static void parse_pack_objects(unsigned char *sha1)
S
Sergey Vlasov 已提交
430
{
N
Nicolas Pitre 已提交
431
	int i, percent = -1;
432
	struct delta_entry *delta = deltas;
S
Sergey Vlasov 已提交
433
	void *data;
434
	struct stat st;
S
Sergey Vlasov 已提交
435 436 437 438 439

	/*
	 * First pass:
	 * - find locations of all objects;
	 * - calculate SHA1 of all non-delta objects;
440
	 * - remember base (SHA1 or offset) for all deltas.
S
Sergey Vlasov 已提交
441
	 */
N
Nicolas Pitre 已提交
442 443
	if (verbose)
		fprintf(stderr, "Indexing %d objects.\n", nr_objects);
S
Sergey Vlasov 已提交
444 445
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *obj = &objects[i];
446
		data = unpack_raw_entry(obj, &delta->base);
S
Sergey Vlasov 已提交
447
		obj->real_type = obj->type;
448 449
		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
			nr_deltas++;
450
			delta->obj_no = i;
451
			delta++;
S
Sergey Vlasov 已提交
452
		} else
453
			sha1_object(data, obj->size, obj->type, obj->sha1);
S
Sergey Vlasov 已提交
454
		free(data);
N
Nicolas Pitre 已提交
455 456
		if (verbose)
			percent = display_progress(i+1, nr_objects, percent);
S
Sergey Vlasov 已提交
457
	}
458
	objects[i].offset = consumed_bytes;
N
Nicolas Pitre 已提交
459 460
	if (verbose)
		fputc('\n', stderr);
461 462

	/* Check pack integrity */
463
	flush();
464 465
	SHA1_Final(sha1, &input_ctx);
	if (hashcmp(fill(20), sha1))
466
		die("pack is corrupted (SHA1 mismatch)");
467
	use(20);
468 469 470

	/* If input_fd is a file, we should have reached its end now. */
	if (fstat(input_fd, &st))
471
		die("cannot fstat packfile: %s", strerror(errno));
J
Johannes Schindelin 已提交
472 473
	if (S_ISREG(st.st_mode) &&
			lseek(input_fd, 0, SEEK_CUR) - input_len != st.st_size)
474
		die("pack has junk at the end");
S
Sergey Vlasov 已提交
475

N
Nicolas Pitre 已提交
476 477 478
	if (!nr_deltas)
		return;

479
	/* Sort deltas by base SHA1/offset for fast searching */
S
Sergey Vlasov 已提交
480 481 482 483 484 485 486 487 488 489 490
	qsort(deltas, nr_deltas, sizeof(struct delta_entry),
	      compare_delta_entry);

	/*
	 * Second pass:
	 * - for all non-delta objects, look if it is used as a base for
	 *   deltas;
	 * - if used as a base, uncompress the object and apply all deltas,
	 *   recursively checking if the resulting object is used as a base
	 *   for some more deltas.
	 */
N
Nicolas Pitre 已提交
491 492
	if (verbose)
		fprintf(stderr, "Resolving %d deltas.\n", nr_deltas);
S
Sergey Vlasov 已提交
493 494
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *obj = &objects[i];
495 496
		union delta_base base;
		int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
S
Sergey Vlasov 已提交
497

498
		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
S
Sergey Vlasov 已提交
499
			continue;
500
		hashcpy(base.sha1, obj->sha1);
501
		ref = !find_delta_children(&base, &ref_first, &ref_last);
502 503
		memset(&base, 0, sizeof(base));
		base.offset = obj->offset;
504
		ofs = !find_delta_children(&base, &ofs_first, &ofs_last);
505
		if (!ref && !ofs)
S
Sergey Vlasov 已提交
506
			continue;
507
		data = get_data_from_pack(obj);
508
		if (ref)
509 510 511 512
			for (j = ref_first; j <= ref_last; j++) {
				struct object_entry *child = objects + deltas[j].obj_no;
				if (child->real_type == OBJ_REF_DELTA)
					resolve_delta(child, data,
513
						      obj->size, obj->type);
514
			}
515
		if (ofs)
516 517 518 519
			for (j = ofs_first; j <= ofs_last; j++) {
				struct object_entry *child = objects + deltas[j].obj_no;
				if (child->real_type == OBJ_OFS_DELTA)
					resolve_delta(child, data,
520
						      obj->size, obj->type);
521
			}
S
Sergey Vlasov 已提交
522
		free(data);
N
Nicolas Pitre 已提交
523 524 525
		if (verbose)
			percent = display_progress(nr_resolved_deltas,
						   nr_deltas, percent);
S
Sergey Vlasov 已提交
526
	}
N
Nicolas Pitre 已提交
527 528
	if (verbose && nr_resolved_deltas == nr_deltas)
		fputc('\n', stderr);
529 530
}

531
static int write_compressed(int fd, void *in, unsigned int size, uint32_t *obj_crc)
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
{
	z_stream stream;
	unsigned long maxsize;
	void *out;

	memset(&stream, 0, sizeof(stream));
	deflateInit(&stream, zlib_compression_level);
	maxsize = deflateBound(&stream, size);
	out = xmalloc(maxsize);

	/* Compress it */
	stream.next_in = in;
	stream.avail_in = size;
	stream.next_out = out;
	stream.avail_out = maxsize;
	while (deflate(&stream, Z_FINISH) == Z_OK);
	deflateEnd(&stream);

	size = stream.total_out;
	write_or_die(fd, out, size);
552
	*obj_crc = crc32(*obj_crc, out, size);
553 554 555 556
	free(out);
	return size;
}

557
static void append_obj_to_pack(const unsigned char *sha1, void *buf,
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
			       unsigned long size, enum object_type type)
{
	struct object_entry *obj = &objects[nr_objects++];
	unsigned char header[10];
	unsigned long s = size;
	int n = 0;
	unsigned char c = (type << 4) | (s & 15);
	s >>= 4;
	while (s) {
		header[n++] = c | 0x80;
		c = s & 0x7f;
		s >>= 7;
	}
	header[n++] = c;
	write_or_die(output_fd, header, n);
573 574
	obj[0].crc32 = crc32(0, Z_NULL, 0);
	obj[0].crc32 = crc32(obj[0].crc32, header, n);
575
	obj[1].offset = obj[0].offset + n;
576
	obj[1].offset += write_compressed(output_fd, buf, size, &obj[0].crc32);
577
	hashcpy(obj->sha1, sha1);
578 579 580 581 582 583 584 585
}

static int delta_pos_compare(const void *_a, const void *_b)
{
	struct delta_entry *a = *(struct delta_entry **)_a;
	struct delta_entry *b = *(struct delta_entry **)_b;
	return a->obj_no - b->obj_no;
}
S
Sergey Vlasov 已提交
586

587 588 589
static void fix_unresolved_deltas(int nr_unresolved)
{
	struct delta_entry **sorted_by_pos;
N
Nicolas Pitre 已提交
590
	int i, n = 0, percent = -1;
591 592 593 594 595 596 597 598 599 600 601 602

	/*
	 * Since many unresolved deltas may well be themselves base objects
	 * for more unresolved deltas, we really want to include the
	 * smallest number of base objects that would cover as much delta
	 * as possible by picking the
	 * trunc deltas first, allowing for other deltas to resolve without
	 * additional base objects.  Since most base objects are to be found
	 * before deltas depending on them, a good heuristic is to start
	 * resolving deltas in the same order as their position in the pack.
	 */
	sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos));
S
Sergey Vlasov 已提交
603
	for (i = 0; i < nr_deltas; i++) {
604 605 606
		if (objects[deltas[i].obj_no].real_type != OBJ_REF_DELTA)
			continue;
		sorted_by_pos[n++] = &deltas[i];
S
Sergey Vlasov 已提交
607
	}
608 609 610 611 612 613
	qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare);

	for (i = 0; i < n; i++) {
		struct delta_entry *d = sorted_by_pos[i];
		void *data;
		unsigned long size;
614
		enum object_type type;
615 616 617 618
		int j, first, last;

		if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
			continue;
619
		data = read_sha1_file(d->base.sha1, &type, &size);
620 621 622
		if (!data)
			continue;

623
		find_delta_children(&d->base, &first, &last);
624 625 626
		for (j = first; j <= last; j++) {
			struct object_entry *child = objects + deltas[j].obj_no;
			if (child->real_type == OBJ_REF_DELTA)
627
				resolve_delta(child, data, size, type);
628 629
		}

630 631 632
		if (check_sha1_signature(d->base.sha1, data, size, typename(type)))
			die("local object %s is corrupt", sha1_to_hex(d->base.sha1));
		append_obj_to_pack(d->base.sha1, data, size, type);
633
		free(data);
N
Nicolas Pitre 已提交
634 635 636
		if (verbose)
			percent = display_progress(nr_resolved_deltas,
						   nr_deltas, percent);
637 638
	}
	free(sorted_by_pos);
N
Nicolas Pitre 已提交
639 640
	if (verbose)
		fputc('\n', stderr);
641 642 643 644 645 646 647 648 649 650 651
}

static void readjust_pack_header_and_sha1(unsigned char *sha1)
{
	struct pack_header hdr;
	SHA_CTX ctx;
	int size;

	/* Rewrite pack header with updated object number */
	if (lseek(output_fd, 0, SEEK_SET) != 0)
		die("cannot seek back: %s", strerror(errno));
652
	if (read_in_full(output_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
		die("cannot read pack header back: %s", strerror(errno));
	hdr.hdr_entries = htonl(nr_objects);
	if (lseek(output_fd, 0, SEEK_SET) != 0)
		die("cannot seek back: %s", strerror(errno));
	write_or_die(output_fd, &hdr, sizeof(hdr));
	if (lseek(output_fd, 0, SEEK_SET) != 0)
		die("cannot seek back: %s", strerror(errno));

	/* Recompute and store the new pack's SHA1 */
	SHA1_Init(&ctx);
	do {
		unsigned char *buf[4096];
		size = xread(output_fd, buf, sizeof(buf));
		if (size < 0)
			die("cannot read pack data back: %s", strerror(errno));
		SHA1_Update(&ctx, buf, size);
	} while (size > 0);
	SHA1_Final(sha1, &ctx);
	write_or_die(output_fd, sha1, 20);
S
Sergey Vlasov 已提交
672 673 674 675 676 677
}

static int sha1_compare(const void *_a, const void *_b)
{
	struct object_entry *a = *(struct object_entry **)_a;
	struct object_entry *b = *(struct object_entry **)_b;
678
	return hashcmp(a->sha1, b->sha1);
S
Sergey Vlasov 已提交
679 680
}

681 682 683 684
/*
 * On entry *sha1 contains the pack content SHA1 hash, on exit it is
 * the SHA1 hash of sorted object names.
 */
685
static const char *write_index_file(const char *index_name, unsigned char *sha1)
S
Sergey Vlasov 已提交
686 687
{
	struct sha1file *f;
688
	struct object_entry **sorted_by_sha, **list, **last;
689
	uint32_t array[256];
690
	int i, fd;
J
Junio C Hamano 已提交
691
	SHA_CTX ctx;
692
	uint32_t index_version;
S
Sergey Vlasov 已提交
693

694 695 696 697 698 699 700 701 702 703 704 705
	if (nr_objects) {
		sorted_by_sha =
			xcalloc(nr_objects, sizeof(struct object_entry *));
		list = sorted_by_sha;
		last = sorted_by_sha + nr_objects;
		for (i = 0; i < nr_objects; ++i)
			sorted_by_sha[i] = &objects[i];
		qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
		      sha1_compare);
	}
	else
		sorted_by_sha = list = last = NULL;
S
Sergey Vlasov 已提交
706

707 708 709
	if (!index_name) {
		static char tmpfile[PATH_MAX];
		snprintf(tmpfile, sizeof(tmpfile),
710
			 "%s/tmp_idx_XXXXXX", get_object_directory());
711 712 713 714 715 716 717 718 719
		fd = mkstemp(tmpfile);
		index_name = xstrdup(tmpfile);
	} else {
		unlink(index_name);
		fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
	}
	if (fd < 0)
		die("unable to create %s: %s", index_name, strerror(errno));
	f = sha1fd(fd, index_name);
S
Sergey Vlasov 已提交
720

721 722 723 724 725 726 727 728 729 730 731
	/* if last object's offset is >= 2^31 we should use index V2 */
	index_version = (objects[nr_objects-1].offset >> 31) ? 2 : 1;

	/* index versions 2 and above need a header */
	if (index_version >= 2) {
		struct pack_idx_header hdr;
		hdr.idx_signature = htonl(PACK_IDX_SIGNATURE);
		hdr.idx_version = htonl(index_version);
		sha1write(f, &hdr, sizeof(hdr));
	}

S
Sergey Vlasov 已提交
732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747
	/*
	 * Write the first-level table (the list is sorted,
	 * but we use a 256-entry lookup to be able to avoid
	 * having to do eight extra binary search iterations).
	 */
	for (i = 0; i < 256; i++) {
		struct object_entry **next = list;
		while (next < last) {
			struct object_entry *obj = *next;
			if (obj->sha1[0] != i)
				break;
			next++;
		}
		array[i] = htonl(next - sorted_by_sha);
		list = next;
	}
748
	sha1write(f, array, 256 * 4);
S
Sergey Vlasov 已提交
749

750
	/* compute the SHA1 hash of sorted object names. */
J
Junio C Hamano 已提交
751
	SHA1_Init(&ctx);
752

S
Sergey Vlasov 已提交
753 754 755 756 757 758
	/*
	 * Write the actual SHA1 entries..
	 */
	list = sorted_by_sha;
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *obj = *list++;
759 760 761 762
		if (index_version < 2) {
			uint32_t offset = htonl(obj->offset);
			sha1write(f, &offset, 4);
		}
S
Sergey Vlasov 已提交
763
		sha1write(f, obj->sha1, 20);
J
Junio C Hamano 已提交
764
		SHA1_Update(&ctx, obj->sha1, 20);
S
Sergey Vlasov 已提交
765
	}
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802

	if (index_version >= 2) {
		unsigned int nr_large_offset = 0;

		/* write the crc32 table */
		list = sorted_by_sha;
		for (i = 0; i < nr_objects; i++) {
			struct object_entry *obj = *list++;
			uint32_t crc32_val = htonl(obj->crc32);
			sha1write(f, &crc32_val, 4);
		}

		/* write the 32-bit offset table */
		list = sorted_by_sha;
		for (i = 0; i < nr_objects; i++) {
			struct object_entry *obj = *list++;
			uint32_t offset = (obj->offset <= 0x7fffffff) ?
				obj->offset : (0x80000000 | nr_large_offset++);
			offset = htonl(offset);
			sha1write(f, &offset, 4);
		}

		/* write the large offset table */
		list = sorted_by_sha;
		while (nr_large_offset) {
			struct object_entry *obj = *list++;
			uint64_t offset = obj->offset;
			if (offset > 0x7fffffff) {
				uint32_t split[2];
				split[0]	= htonl(offset >> 32);
				split[1] = htonl(offset & 0xffffffff);
				sha1write(f, split, 8);
				nr_large_offset--;
			}
		}
	}

803
	sha1write(f, sha1, 20);
S
Sergey Vlasov 已提交
804 805
	sha1close(f, NULL, 1);
	free(sorted_by_sha);
J
Junio C Hamano 已提交
806
	SHA1_Final(sha1, &ctx);
807 808 809 810 811
	return index_name;
}

static void final(const char *final_pack_name, const char *curr_pack_name,
		  const char *final_index_name, const char *curr_index_name,
812
		  const char *keep_name, const char *keep_msg,
813 814
		  unsigned char *sha1)
{
815
	const char *report = "pack";
816 817 818 819 820 821 822 823 824 825 826 827
	char name[PATH_MAX];
	int err;

	if (!from_stdin) {
		close(input_fd);
	} else {
		err = close(output_fd);
		if (err)
			die("error while closing pack file: %s", strerror(errno));
		chmod(curr_pack_name, 0444);
	}

828 829 830 831 832 833 834
	if (keep_msg) {
		int keep_fd, keep_msg_len = strlen(keep_msg);
		if (!keep_name) {
			snprintf(name, sizeof(name), "%s/pack/pack-%s.keep",
				 get_object_directory(), sha1_to_hex(sha1));
			keep_name = name;
		}
835 836 837 838 839 840 841 842 843 844
		keep_fd = open(keep_name, O_RDWR|O_CREAT|O_EXCL, 0600);
		if (keep_fd < 0) {
			if (errno != EEXIST)
				die("cannot write keep file");
		} else {
			if (keep_msg_len > 0) {
				write_or_die(keep_fd, keep_msg, keep_msg_len);
				write_or_die(keep_fd, "\n", 1);
			}
			close(keep_fd);
845
			report = "keep";
846 847 848
		}
	}

849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868
	if (final_pack_name != curr_pack_name) {
		if (!final_pack_name) {
			snprintf(name, sizeof(name), "%s/pack/pack-%s.pack",
				 get_object_directory(), sha1_to_hex(sha1));
			final_pack_name = name;
		}
		if (move_temp_to_file(curr_pack_name, final_pack_name))
			die("cannot store pack file");
	}

	chmod(curr_index_name, 0444);
	if (final_index_name != curr_index_name) {
		if (!final_index_name) {
			snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
				 get_object_directory(), sha1_to_hex(sha1));
			final_index_name = name;
		}
		if (move_temp_to_file(curr_index_name, final_index_name))
			die("cannot store index file");
	}
869 870 871 872 873 874 875

	if (!from_stdin) {
		printf("%s\n", sha1_to_hex(sha1));
	} else {
		char buf[48];
		int len = snprintf(buf, sizeof(buf), "%s\t%s\n",
				   report, sha1_to_hex(sha1));
876
		write_or_die(1, buf, len);
877 878 879 880 881 882 883 884 885 886 887 888 889

		/*
		 * Let's just mimic git-unpack-objects here and write
		 * the last part of the input buffer to stdout.
		 */
		while (input_len) {
			err = xwrite(1, input_buffer + input_offset, input_len);
			if (err <= 0)
				break;
			input_len -= err;
			input_offset += err;
		}
	}
S
Sergey Vlasov 已提交
890 891 892 893
}

int main(int argc, char **argv)
{
894
	int i, fix_thin_pack = 0;
895 896
	const char *curr_pack, *pack_name = NULL;
	const char *curr_index, *index_name = NULL;
897 898
	const char *keep_name = NULL, *keep_msg = NULL;
	char *index_name_buf = NULL, *keep_name_buf = NULL;
J
Junio C Hamano 已提交
899
	unsigned char sha1[20];
S
Sergey Vlasov 已提交
900 901 902 903 904

	for (i = 1; i < argc; i++) {
		const char *arg = argv[i];

		if (*arg == '-') {
905 906
			if (!strcmp(arg, "--stdin")) {
				from_stdin = 1;
907 908
			} else if (!strcmp(arg, "--fix-thin")) {
				fix_thin_pack = 1;
909 910
			} else if (!strcmp(arg, "--keep")) {
				keep_msg = "";
911
			} else if (!prefixcmp(arg, "--keep=")) {
912
				keep_msg = arg + 7;
913
			} else if (!prefixcmp(arg, "--pack_header=")) {
914 915 916 917 918 919 920 921 922 923 924 925
				struct pack_header *hdr;
				char *c;

				hdr = (struct pack_header *)input_buffer;
				hdr->hdr_signature = htonl(PACK_SIGNATURE);
				hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
				if (*c != ',')
					die("bad %s", arg);
				hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
				if (*c)
					die("bad %s", arg);
				input_len = sizeof(*hdr);
N
Nicolas Pitre 已提交
926 927
			} else if (!strcmp(arg, "-v")) {
				verbose = 1;
928
			} else if (!strcmp(arg, "-o")) {
S
Sergey Vlasov 已提交
929 930 931 932 933 934 935 936 937 938 939 940 941
				if (index_name || (i+1) >= argc)
					usage(index_pack_usage);
				index_name = argv[++i];
			} else
				usage(index_pack_usage);
			continue;
		}

		if (pack_name)
			usage(index_pack_usage);
		pack_name = arg;
	}

942
	if (!pack_name && !from_stdin)
S
Sergey Vlasov 已提交
943
		usage(index_pack_usage);
944 945
	if (fix_thin_pack && !from_stdin)
		die("--fix-thin cannot be used without --stdin");
946
	if (!index_name && pack_name) {
S
Sergey Vlasov 已提交
947
		int len = strlen(pack_name);
948
		if (!has_extension(pack_name, ".pack"))
S
Sergey Vlasov 已提交
949 950
			die("packfile name '%s' does not end with '.pack'",
			    pack_name);
P
Pavel Roskin 已提交
951
		index_name_buf = xmalloc(len);
S
Sergey Vlasov 已提交
952 953 954 955
		memcpy(index_name_buf, pack_name, len - 5);
		strcpy(index_name_buf + len - 5, ".idx");
		index_name = index_name_buf;
	}
956 957 958 959 960 961 962 963 964 965
	if (keep_msg && !keep_name && pack_name) {
		int len = strlen(pack_name);
		if (!has_extension(pack_name, ".pack"))
			die("packfile name '%s' does not end with '.pack'",
			    pack_name);
		keep_name_buf = xmalloc(len);
		memcpy(keep_name_buf, pack_name, len - 5);
		strcpy(keep_name_buf + len - 5, ".keep");
		keep_name = keep_name_buf;
	}
S
Sergey Vlasov 已提交
966

967
	curr_pack = open_pack_file(pack_name);
S
Sergey Vlasov 已提交
968
	parse_pack_header();
969 970
	objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry));
	deltas = xmalloc(nr_objects * sizeof(struct delta_entry));
N
Nicolas Pitre 已提交
971 972
	if (verbose)
		setup_progress_signal();
973
	parse_pack_objects(sha1);
974 975 976
	if (nr_deltas != nr_resolved_deltas) {
		if (fix_thin_pack) {
			int nr_unresolved = nr_deltas - nr_resolved_deltas;
N
Nicolas Pitre 已提交
977
			int nr_objects_initial = nr_objects;
978 979 980 981 982 983
			if (nr_unresolved <= 0)
				die("confusion beyond insanity");
			objects = xrealloc(objects,
					   (nr_objects + nr_unresolved + 1)
					   * sizeof(*objects));
			fix_unresolved_deltas(nr_unresolved);
N
Nicolas Pitre 已提交
984 985 986
			if (verbose)
				fprintf(stderr, "%d objects were added to complete this thin pack.\n",
					nr_objects - nr_objects_initial);
987 988 989 990 991 992 993 994 995
			readjust_pack_header_and_sha1(sha1);
		}
		if (nr_deltas != nr_resolved_deltas)
			die("pack has %d unresolved deltas",
			    nr_deltas - nr_resolved_deltas);
	} else {
		/* Flush remaining pack final 20-byte SHA1. */
		flush();
	}
S
Sergey Vlasov 已提交
996
	free(deltas);
997
	curr_index = write_index_file(index_name, sha1);
998 999 1000 1001
	final(pack_name, curr_pack,
		index_name, curr_index,
		keep_name, keep_msg,
		sha1);
S
Sergey Vlasov 已提交
1002 1003
	free(objects);
	free(index_name_buf);
1004
	free(keep_name_buf);
S
Sergey Vlasov 已提交
1005 1006 1007

	return 0;
}