index-pack.c 24.7 KB
Newer Older
S
Sergey Vlasov 已提交
1 2 3 4
#include "cache.h"
#include "delta.h"
#include "pack.h"
#include "csum-file.h"
5 6 7 8
#include "blob.h"
#include "commit.h"
#include "tag.h"
#include "tree.h"
S
Sergey Vlasov 已提交
9 10

static const char index_pack_usage[] =
11
"git-index-pack [-v] [-o <index-file>] [{ ---keep | --keep=<msg> }] { <pack-file> | --stdin [--fix-thin] [<pack-file>] }";
S
Sergey Vlasov 已提交
12 13 14 15

struct object_entry
{
	unsigned long offset;
16 17
	unsigned long size;
	unsigned int hdr_size;
S
Sergey Vlasov 已提交
18 19 20 21 22
	enum object_type type;
	enum object_type real_type;
	unsigned char sha1[20];
};

23 24 25 26 27
union delta_base {
	unsigned char sha1[20];
	unsigned long offset;
};

28 29 30 31 32 33
/*
 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
 * to memcmp() only the first 20 bytes.
 */
#define UNION_BASE_SZ	20

S
Sergey Vlasov 已提交
34 35
struct delta_entry
{
36
	union delta_base base;
37
	int obj_no;
S
Sergey Vlasov 已提交
38 39 40 41 42 43
};

static struct object_entry *objects;
static struct delta_entry *deltas;
static int nr_objects;
static int nr_deltas;
44
static int nr_resolved_deltas;
S
Sergey Vlasov 已提交
45

46
static int from_stdin;
N
Nicolas Pitre 已提交
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
static int verbose;

static volatile sig_atomic_t progress_update;

static void progress_interval(int signum)
{
	progress_update = 1;
}

static void setup_progress_signal(void)
{
	struct sigaction sa;
	struct itimerval v;

	memset(&sa, 0, sizeof(sa));
	sa.sa_handler = progress_interval;
	sigemptyset(&sa.sa_mask);
	sa.sa_flags = SA_RESTART;
	sigaction(SIGALRM, &sa, NULL);

	v.it_interval.tv_sec = 1;
	v.it_interval.tv_usec = 0;
	v.it_value = v.it_interval;
	setitimer(ITIMER_REAL, &v, NULL);

}

static unsigned display_progress(unsigned n, unsigned total, unsigned last_pc)
{
	unsigned percent = n * 100 / total;
	if (percent != last_pc || progress_update) {
		fprintf(stderr, "%4u%% (%u/%u) done\r", percent, n, total);
		progress_update = 0;
	}
	return percent;
}
83

84 85 86 87
/* We always read in 4kB chunks. */
static unsigned char input_buffer[4096];
static unsigned long input_offset, input_len, consumed_bytes;
static SHA_CTX input_ctx;
88
static int input_fd, output_fd, pack_fd;
89

90
/* Discard current buffer used content. */
91
static void flush(void)
92 93 94 95 96
{
	if (input_offset) {
		if (output_fd >= 0)
			write_or_die(output_fd, input_buffer, input_offset);
		SHA1_Update(&input_ctx, input_buffer, input_offset);
97
		memmove(input_buffer, input_buffer + input_offset, input_len);
98 99 100 101
		input_offset = 0;
	}
}

102 103 104 105
/*
 * Make sure at least "min" bytes are available in the buffer, and
 * return the pointer to the buffer.
 */
106
static void *fill(int min)
S
Sergey Vlasov 已提交
107
{
108 109 110 111
	if (min <= input_len)
		return input_buffer + input_offset;
	if (min > sizeof(input_buffer))
		die("cannot fill %d bytes", min);
112
	flush();
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
	do {
		int ret = xread(input_fd, input_buffer + input_len,
				sizeof(input_buffer) - input_len);
		if (ret <= 0) {
			if (!ret)
				die("early EOF");
			die("read error on input: %s", strerror(errno));
		}
		input_len += ret;
	} while (input_len < min);
	return input_buffer;
}

static void use(int bytes)
{
	if (bytes > input_len)
		die("used more bytes than were available");
	input_len -= bytes;
	input_offset += bytes;
	consumed_bytes += bytes;
}
S
Sergey Vlasov 已提交
134

135
static const char *open_pack_file(const char *pack_name)
136
{
137 138 139 140 141 142 143 144 145 146 147 148
	if (from_stdin) {
		input_fd = 0;
		if (!pack_name) {
			static char tmpfile[PATH_MAX];
			snprintf(tmpfile, sizeof(tmpfile),
				 "%s/pack_XXXXXX", get_object_directory());
			output_fd = mkstemp(tmpfile);
			pack_name = xstrdup(tmpfile);
		} else
			output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600);
		if (output_fd < 0)
			die("unable to create %s: %s\n", pack_name, strerror(errno));
149
		pack_fd = output_fd;
150 151 152 153 154 155
	} else {
		input_fd = open(pack_name, O_RDONLY);
		if (input_fd < 0)
			die("cannot open packfile '%s': %s",
			    pack_name, strerror(errno));
		output_fd = -1;
156
		pack_fd = input_fd;
157
	}
158
	SHA1_Init(&input_ctx);
159
	return pack_name;
S
Sergey Vlasov 已提交
160 161 162 163
}

static void parse_pack_header(void)
{
164
	struct pack_header *hdr = fill(sizeof(struct pack_header));
S
Sergey Vlasov 已提交
165 166 167

	/* Header consistency check */
	if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
168
		die("pack signature mismatch");
N
Nicolas Pitre 已提交
169
	if (!pack_version_ok(hdr->hdr_version))
170
		die("pack version %d unsupported", ntohl(hdr->hdr_version));
S
Sergey Vlasov 已提交
171 172

	nr_objects = ntohl(hdr->hdr_entries);
173
	use(sizeof(struct pack_header));
S
Sergey Vlasov 已提交
174 175 176 177 178 179 180 181 182 183 184 185 186
}

static void bad_object(unsigned long offset, const char *format,
		       ...) NORETURN __attribute__((format (printf, 2, 3)));

static void bad_object(unsigned long offset, const char *format, ...)
{
	va_list params;
	char buf[1024];

	va_start(params, format);
	vsnprintf(buf, sizeof(buf), format, params);
	va_end(params);
187
	die("pack has bad object at offset %lu: %s", offset, buf);
S
Sergey Vlasov 已提交
188 189
}

190
static void *unpack_entry_data(unsigned long offset, unsigned long size)
S
Sergey Vlasov 已提交
191 192 193 194 195 196 197
{
	z_stream stream;
	void *buf = xmalloc(size);

	memset(&stream, 0, sizeof(stream));
	stream.next_out = buf;
	stream.avail_out = size;
198 199
	stream.next_in = fill(1);
	stream.avail_in = input_len;
S
Sergey Vlasov 已提交
200 201 202 203
	inflateInit(&stream);

	for (;;) {
		int ret = inflate(&stream, 0);
204 205
		use(input_len - stream.avail_in);
		if (stream.total_out == size && ret == Z_STREAM_END)
S
Sergey Vlasov 已提交
206 207 208
			break;
		if (ret != Z_OK)
			bad_object(offset, "inflate returned %d", ret);
209 210
		stream.next_in = fill(1);
		stream.avail_in = input_len;
S
Sergey Vlasov 已提交
211 212 213 214 215
	}
	inflateEnd(&stream);
	return buf;
}

216
static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
S
Sergey Vlasov 已提交
217
{
218
	unsigned char *p, c;
219
	unsigned long size, base_offset;
S
Sergey Vlasov 已提交
220 221
	unsigned shift;

222 223 224 225 226 227
	obj->offset = consumed_bytes;

	p = fill(1);
	c = *p;
	use(1);
	obj->type = (c >> 4) & 7;
S
Sergey Vlasov 已提交
228 229 230
	size = (c & 15);
	shift = 4;
	while (c & 0x80) {
231 232 233
		p = fill(1);
		c = *p;
		use(1);
S
Sergey Vlasov 已提交
234 235 236
		size += (c & 0x7fUL) << shift;
		shift += 7;
	}
237
	obj->size = size;
S
Sergey Vlasov 已提交
238

239
	switch (obj->type) {
240
	case OBJ_REF_DELTA:
241 242
		hashcpy(delta_base->sha1, fill(20));
		use(20);
243 244 245
		break;
	case OBJ_OFS_DELTA:
		memset(delta_base, 0, sizeof(*delta_base));
246 247 248
		p = fill(1);
		c = *p;
		use(1);
249 250 251 252
		base_offset = c & 127;
		while (c & 128) {
			base_offset += 1;
			if (!base_offset || base_offset & ~(~0UL >> 7))
253 254 255 256
				bad_object(obj->offset, "offset value overflow for delta base object");
			p = fill(1);
			c = *p;
			use(1);
257 258
			base_offset = (base_offset << 7) + (c & 127);
		}
259 260 261
		delta_base->offset = obj->offset - base_offset;
		if (delta_base->offset >= obj->offset)
			bad_object(obj->offset, "delta base offset is out of bound");
262
		break;
S
Sergey Vlasov 已提交
263 264 265 266 267 268
	case OBJ_COMMIT:
	case OBJ_TREE:
	case OBJ_BLOB:
	case OBJ_TAG:
		break;
	default:
269
		bad_object(obj->offset, "unknown object type %d", obj->type);
S
Sergey Vlasov 已提交
270
	}
271 272 273 274 275
	obj->hdr_size = consumed_bytes - obj->offset;

	return unpack_entry_data(obj->offset, obj->size);
}

276
static void *get_data_from_pack(struct object_entry *obj)
277 278 279
{
	unsigned long from = obj[0].offset + obj[0].hdr_size;
	unsigned long len = obj[1].offset - from;
280
	unsigned char *src, *data;
281 282
	z_stream stream;
	int st;
S
Sergey Vlasov 已提交
283

284 285 286
	src = xmalloc(len);
	if (pread(pack_fd, src, len, from) != len)
		die("cannot pread pack file: %s", strerror(errno));
287 288 289 290
	data = xmalloc(obj->size);
	memset(&stream, 0, sizeof(stream));
	stream.next_out = data;
	stream.avail_out = obj->size;
291
	stream.next_in = src;
292 293 294 295 296 297
	stream.avail_in = len;
	inflateInit(&stream);
	while ((st = inflate(&stream, Z_FINISH)) == Z_OK);
	inflateEnd(&stream);
	if (st != Z_STREAM_END || stream.total_out != obj->size)
		die("serious inflate inconsistency");
298
	free(src);
S
Sergey Vlasov 已提交
299 300 301
	return data;
}

302
static int find_delta(const union delta_base *base)
S
Sergey Vlasov 已提交
303 304 305 306 307 308 309 310
{
	int first = 0, last = nr_deltas;

        while (first < last) {
                int next = (first + last) / 2;
                struct delta_entry *delta = &deltas[next];
                int cmp;

311
                cmp = memcmp(base, &delta->base, UNION_BASE_SZ);
S
Sergey Vlasov 已提交
312 313 314 315 316 317 318 319 320 321 322
                if (!cmp)
                        return next;
                if (cmp < 0) {
                        last = next;
                        continue;
                }
                first = next+1;
        }
        return -first-1;
}

323 324
static int find_delta_children(const union delta_base *base,
			       int *first_index, int *last_index)
S
Sergey Vlasov 已提交
325
{
326
	int first = find_delta(base);
S
Sergey Vlasov 已提交
327 328 329 330 331
	int last = first;
	int end = nr_deltas - 1;

	if (first < 0)
		return -1;
332
	while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
S
Sergey Vlasov 已提交
333
		--first;
334
	while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
S
Sergey Vlasov 已提交
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
		++last;
	*first_index = first;
	*last_index = last;
	return 0;
}

static void sha1_object(const void *data, unsigned long size,
			enum object_type type, unsigned char *sha1)
{
	SHA_CTX ctx;
	char header[50];
	int header_size;
	const char *type_str;

	switch (type) {
350 351 352 353
	case OBJ_COMMIT: type_str = commit_type; break;
	case OBJ_TREE:   type_str = tree_type; break;
	case OBJ_BLOB:   type_str = blob_type; break;
	case OBJ_TAG:    type_str = tag_type; break;
S
Sergey Vlasov 已提交
354 355 356 357 358 359 360 361 362 363 364 365
	default:
		die("bad type %d", type);
	}

	header_size = sprintf(header, "%s %lu", type_str, size) + 1;

	SHA1_Init(&ctx);
	SHA1_Update(&ctx, header, header_size);
	SHA1_Update(&ctx, data, size);
	SHA1_Final(sha1, &ctx);
}

366
static void resolve_delta(struct object_entry *delta_obj, void *base_data,
S
Sergey Vlasov 已提交
367 368 369 370 371 372
			  unsigned long base_size, enum object_type type)
{
	void *delta_data;
	unsigned long delta_size;
	void *result;
	unsigned long result_size;
373
	union delta_base delta_base;
S
Sergey Vlasov 已提交
374 375
	int j, first, last;

376 377 378
	delta_obj->real_type = type;
	delta_data = get_data_from_pack(delta_obj);
	delta_size = delta_obj->size;
S
Sergey Vlasov 已提交
379 380 381 382
	result = patch_delta(base_data, base_size, delta_data, delta_size,
			     &result_size);
	free(delta_data);
	if (!result)
383 384 385
		bad_object(delta_obj->offset, "failed to apply delta");
	sha1_object(result, result_size, type, delta_obj->sha1);
	nr_resolved_deltas++;
386

387
	hashcpy(delta_base.sha1, delta_obj->sha1);
388
	if (!find_delta_children(&delta_base, &first, &last)) {
389 390 391 392 393
		for (j = first; j <= last; j++) {
			struct object_entry *child = objects + deltas[j].obj_no;
			if (child->real_type == OBJ_REF_DELTA)
				resolve_delta(child, result, result_size, type);
		}
394 395 396
	}

	memset(&delta_base, 0, sizeof(delta_base));
397
	delta_base.offset = delta_obj->offset;
398
	if (!find_delta_children(&delta_base, &first, &last)) {
399 400 401 402 403
		for (j = first; j <= last; j++) {
			struct object_entry *child = objects + deltas[j].obj_no;
			if (child->real_type == OBJ_OFS_DELTA)
				resolve_delta(child, result, result_size, type);
		}
S
Sergey Vlasov 已提交
404
	}
405

S
Sergey Vlasov 已提交
406 407 408 409 410 411 412
	free(result);
}

static int compare_delta_entry(const void *a, const void *b)
{
	const struct delta_entry *delta_a = a;
	const struct delta_entry *delta_b = b;
413
	return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
S
Sergey Vlasov 已提交
414 415
}

416 417
/* Parse all objects and return the pack content SHA1 hash */
static void parse_pack_objects(unsigned char *sha1)
S
Sergey Vlasov 已提交
418
{
N
Nicolas Pitre 已提交
419
	int i, percent = -1;
420
	struct delta_entry *delta = deltas;
S
Sergey Vlasov 已提交
421
	void *data;
422
	struct stat st;
S
Sergey Vlasov 已提交
423 424 425 426 427

	/*
	 * First pass:
	 * - find locations of all objects;
	 * - calculate SHA1 of all non-delta objects;
428
	 * - remember base (SHA1 or offset) for all deltas.
S
Sergey Vlasov 已提交
429
	 */
N
Nicolas Pitre 已提交
430 431
	if (verbose)
		fprintf(stderr, "Indexing %d objects.\n", nr_objects);
S
Sergey Vlasov 已提交
432 433
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *obj = &objects[i];
434
		data = unpack_raw_entry(obj, &delta->base);
S
Sergey Vlasov 已提交
435
		obj->real_type = obj->type;
436 437
		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
			nr_deltas++;
438
			delta->obj_no = i;
439
			delta++;
S
Sergey Vlasov 已提交
440
		} else
441
			sha1_object(data, obj->size, obj->type, obj->sha1);
S
Sergey Vlasov 已提交
442
		free(data);
N
Nicolas Pitre 已提交
443 444
		if (verbose)
			percent = display_progress(i+1, nr_objects, percent);
S
Sergey Vlasov 已提交
445
	}
446
	objects[i].offset = consumed_bytes;
N
Nicolas Pitre 已提交
447 448
	if (verbose)
		fputc('\n', stderr);
449 450

	/* Check pack integrity */
451
	flush();
452 453
	SHA1_Final(sha1, &input_ctx);
	if (hashcmp(fill(20), sha1))
454
		die("pack is corrupted (SHA1 mismatch)");
455
	use(20);
456 457 458

	/* If input_fd is a file, we should have reached its end now. */
	if (fstat(input_fd, &st))
459
		die("cannot fstat packfile: %s", strerror(errno));
460
	if (S_ISREG(st.st_mode) && st.st_size != consumed_bytes)
461
		die("pack has junk at the end");
S
Sergey Vlasov 已提交
462

N
Nicolas Pitre 已提交
463 464 465
	if (!nr_deltas)
		return;

466
	/* Sort deltas by base SHA1/offset for fast searching */
S
Sergey Vlasov 已提交
467 468 469 470 471 472 473 474 475 476 477
	qsort(deltas, nr_deltas, sizeof(struct delta_entry),
	      compare_delta_entry);

	/*
	 * Second pass:
	 * - for all non-delta objects, look if it is used as a base for
	 *   deltas;
	 * - if used as a base, uncompress the object and apply all deltas,
	 *   recursively checking if the resulting object is used as a base
	 *   for some more deltas.
	 */
N
Nicolas Pitre 已提交
478 479
	if (verbose)
		fprintf(stderr, "Resolving %d deltas.\n", nr_deltas);
S
Sergey Vlasov 已提交
480 481
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *obj = &objects[i];
482 483
		union delta_base base;
		int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
S
Sergey Vlasov 已提交
484

485
		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
S
Sergey Vlasov 已提交
486
			continue;
487
		hashcpy(base.sha1, obj->sha1);
488
		ref = !find_delta_children(&base, &ref_first, &ref_last);
489 490
		memset(&base, 0, sizeof(base));
		base.offset = obj->offset;
491
		ofs = !find_delta_children(&base, &ofs_first, &ofs_last);
492
		if (!ref && !ofs)
S
Sergey Vlasov 已提交
493
			continue;
494
		data = get_data_from_pack(obj);
495
		if (ref)
496 497 498 499
			for (j = ref_first; j <= ref_last; j++) {
				struct object_entry *child = objects + deltas[j].obj_no;
				if (child->real_type == OBJ_REF_DELTA)
					resolve_delta(child, data,
500
						      obj->size, obj->type);
501
			}
502
		if (ofs)
503 504 505 506
			for (j = ofs_first; j <= ofs_last; j++) {
				struct object_entry *child = objects + deltas[j].obj_no;
				if (child->real_type == OBJ_OFS_DELTA)
					resolve_delta(child, data,
507
						      obj->size, obj->type);
508
			}
S
Sergey Vlasov 已提交
509
		free(data);
N
Nicolas Pitre 已提交
510 511 512
		if (verbose)
			percent = display_progress(nr_resolved_deltas,
						   nr_deltas, percent);
S
Sergey Vlasov 已提交
513
	}
N
Nicolas Pitre 已提交
514 515
	if (verbose && nr_resolved_deltas == nr_deltas)
		fputc('\n', stderr);
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
}

static int write_compressed(int fd, void *in, unsigned int size)
{
	z_stream stream;
	unsigned long maxsize;
	void *out;

	memset(&stream, 0, sizeof(stream));
	deflateInit(&stream, zlib_compression_level);
	maxsize = deflateBound(&stream, size);
	out = xmalloc(maxsize);

	/* Compress it */
	stream.next_in = in;
	stream.avail_in = size;
	stream.next_out = out;
	stream.avail_out = maxsize;
	while (deflate(&stream, Z_FINISH) == Z_OK);
	deflateEnd(&stream);

	size = stream.total_out;
	write_or_die(fd, out, size);
	free(out);
	return size;
}

static void append_obj_to_pack(void *buf,
			       unsigned long size, enum object_type type)
{
	struct object_entry *obj = &objects[nr_objects++];
	unsigned char header[10];
	unsigned long s = size;
	int n = 0;
	unsigned char c = (type << 4) | (s & 15);
	s >>= 4;
	while (s) {
		header[n++] = c | 0x80;
		c = s & 0x7f;
		s >>= 7;
	}
	header[n++] = c;
	write_or_die(output_fd, header, n);
	obj[1].offset = obj[0].offset + n;
	obj[1].offset += write_compressed(output_fd, buf, size);
	sha1_object(buf, size, type, obj->sha1);
}

static int delta_pos_compare(const void *_a, const void *_b)
{
	struct delta_entry *a = *(struct delta_entry **)_a;
	struct delta_entry *b = *(struct delta_entry **)_b;
	return a->obj_no - b->obj_no;
}
S
Sergey Vlasov 已提交
570

571 572 573
static void fix_unresolved_deltas(int nr_unresolved)
{
	struct delta_entry **sorted_by_pos;
N
Nicolas Pitre 已提交
574
	int i, n = 0, percent = -1;
575 576 577 578 579 580 581 582 583 584 585 586

	/*
	 * Since many unresolved deltas may well be themselves base objects
	 * for more unresolved deltas, we really want to include the
	 * smallest number of base objects that would cover as much delta
	 * as possible by picking the
	 * trunc deltas first, allowing for other deltas to resolve without
	 * additional base objects.  Since most base objects are to be found
	 * before deltas depending on them, a good heuristic is to start
	 * resolving deltas in the same order as their position in the pack.
	 */
	sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos));
S
Sergey Vlasov 已提交
587
	for (i = 0; i < nr_deltas; i++) {
588 589 590
		if (objects[deltas[i].obj_no].real_type != OBJ_REF_DELTA)
			continue;
		sorted_by_pos[n++] = &deltas[i];
S
Sergey Vlasov 已提交
591
	}
592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
	qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare);

	for (i = 0; i < n; i++) {
		struct delta_entry *d = sorted_by_pos[i];
		void *data;
		unsigned long size;
		char type[10];
		enum object_type obj_type;
		int j, first, last;

		if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
			continue;
		data = read_sha1_file(d->base.sha1, type, &size);
		if (!data)
			continue;
		if      (!strcmp(type, blob_type))   obj_type = OBJ_BLOB;
		else if (!strcmp(type, tree_type))   obj_type = OBJ_TREE;
		else if (!strcmp(type, commit_type)) obj_type = OBJ_COMMIT;
		else if (!strcmp(type, tag_type))    obj_type = OBJ_TAG;
		else die("base object %s is of type '%s'",
			 sha1_to_hex(d->base.sha1), type);

614
		find_delta_children(&d->base, &first, &last);
615 616 617 618 619 620 621 622
		for (j = first; j <= last; j++) {
			struct object_entry *child = objects + deltas[j].obj_no;
			if (child->real_type == OBJ_REF_DELTA)
				resolve_delta(child, data, size, obj_type);
		}

		append_obj_to_pack(data, size, obj_type);
		free(data);
N
Nicolas Pitre 已提交
623 624 625
		if (verbose)
			percent = display_progress(nr_resolved_deltas,
						   nr_deltas, percent);
626 627
	}
	free(sorted_by_pos);
N
Nicolas Pitre 已提交
628 629
	if (verbose)
		fputc('\n', stderr);
630 631 632 633 634 635 636 637 638 639 640
}

static void readjust_pack_header_and_sha1(unsigned char *sha1)
{
	struct pack_header hdr;
	SHA_CTX ctx;
	int size;

	/* Rewrite pack header with updated object number */
	if (lseek(output_fd, 0, SEEK_SET) != 0)
		die("cannot seek back: %s", strerror(errno));
641
	if (read_in_full(output_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
		die("cannot read pack header back: %s", strerror(errno));
	hdr.hdr_entries = htonl(nr_objects);
	if (lseek(output_fd, 0, SEEK_SET) != 0)
		die("cannot seek back: %s", strerror(errno));
	write_or_die(output_fd, &hdr, sizeof(hdr));
	if (lseek(output_fd, 0, SEEK_SET) != 0)
		die("cannot seek back: %s", strerror(errno));

	/* Recompute and store the new pack's SHA1 */
	SHA1_Init(&ctx);
	do {
		unsigned char *buf[4096];
		size = xread(output_fd, buf, sizeof(buf));
		if (size < 0)
			die("cannot read pack data back: %s", strerror(errno));
		SHA1_Update(&ctx, buf, size);
	} while (size > 0);
	SHA1_Final(sha1, &ctx);
	write_or_die(output_fd, sha1, 20);
S
Sergey Vlasov 已提交
661 662 663 664 665 666
}

static int sha1_compare(const void *_a, const void *_b)
{
	struct object_entry *a = *(struct object_entry **)_a;
	struct object_entry *b = *(struct object_entry **)_b;
667
	return hashcmp(a->sha1, b->sha1);
S
Sergey Vlasov 已提交
668 669
}

670 671 672 673
/*
 * On entry *sha1 contains the pack content SHA1 hash, on exit it is
 * the SHA1 hash of sorted object names.
 */
674
static const char *write_index_file(const char *index_name, unsigned char *sha1)
S
Sergey Vlasov 已提交
675 676
{
	struct sha1file *f;
677
	struct object_entry **sorted_by_sha, **list, **last;
S
Sergey Vlasov 已提交
678
	unsigned int array[256];
679
	int i, fd;
J
Junio C Hamano 已提交
680
	SHA_CTX ctx;
S
Sergey Vlasov 已提交
681

682 683 684 685 686 687 688 689 690 691 692 693 694
	if (nr_objects) {
		sorted_by_sha =
			xcalloc(nr_objects, sizeof(struct object_entry *));
		list = sorted_by_sha;
		last = sorted_by_sha + nr_objects;
		for (i = 0; i < nr_objects; ++i)
			sorted_by_sha[i] = &objects[i];
		qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
		      sha1_compare);

	}
	else
		sorted_by_sha = list = last = NULL;
S
Sergey Vlasov 已提交
695

696 697 698 699 700 701 702 703 704 705 706 707 708
	if (!index_name) {
		static char tmpfile[PATH_MAX];
		snprintf(tmpfile, sizeof(tmpfile),
			 "%s/index_XXXXXX", get_object_directory());
		fd = mkstemp(tmpfile);
		index_name = xstrdup(tmpfile);
	} else {
		unlink(index_name);
		fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
	}
	if (fd < 0)
		die("unable to create %s: %s", index_name, strerror(errno));
	f = sha1fd(fd, index_name);
S
Sergey Vlasov 已提交
709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727

	/*
	 * Write the first-level table (the list is sorted,
	 * but we use a 256-entry lookup to be able to avoid
	 * having to do eight extra binary search iterations).
	 */
	for (i = 0; i < 256; i++) {
		struct object_entry **next = list;
		while (next < last) {
			struct object_entry *obj = *next;
			if (obj->sha1[0] != i)
				break;
			next++;
		}
		array[i] = htonl(next - sorted_by_sha);
		list = next;
	}
	sha1write(f, array, 256 * sizeof(int));

J
Junio C Hamano 已提交
728 729 730 731 732
	/* recompute the SHA1 hash of sorted object names.
	 * currently pack-objects does not do this, but that
	 * can be fixed.
	 */
	SHA1_Init(&ctx);
S
Sergey Vlasov 已提交
733 734 735 736 737 738 739 740 741
	/*
	 * Write the actual SHA1 entries..
	 */
	list = sorted_by_sha;
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *obj = *list++;
		unsigned int offset = htonl(obj->offset);
		sha1write(f, &offset, 4);
		sha1write(f, obj->sha1, 20);
J
Junio C Hamano 已提交
742
		SHA1_Update(&ctx, obj->sha1, 20);
S
Sergey Vlasov 已提交
743
	}
744
	sha1write(f, sha1, 20);
S
Sergey Vlasov 已提交
745 746
	sha1close(f, NULL, 1);
	free(sorted_by_sha);
J
Junio C Hamano 已提交
747
	SHA1_Final(sha1, &ctx);
748 749 750 751 752
	return index_name;
}

static void final(const char *final_pack_name, const char *curr_pack_name,
		  const char *final_index_name, const char *curr_index_name,
753
		  const char *keep_name, const char *keep_msg,
754 755
		  unsigned char *sha1)
{
756
	char *report = "pack";
757 758 759 760 761 762 763 764 765 766 767 768
	char name[PATH_MAX];
	int err;

	if (!from_stdin) {
		close(input_fd);
	} else {
		err = close(output_fd);
		if (err)
			die("error while closing pack file: %s", strerror(errno));
		chmod(curr_pack_name, 0444);
	}

769 770 771 772 773 774 775
	if (keep_msg) {
		int keep_fd, keep_msg_len = strlen(keep_msg);
		if (!keep_name) {
			snprintf(name, sizeof(name), "%s/pack/pack-%s.keep",
				 get_object_directory(), sha1_to_hex(sha1));
			keep_name = name;
		}
776 777 778 779 780 781 782 783 784 785
		keep_fd = open(keep_name, O_RDWR|O_CREAT|O_EXCL, 0600);
		if (keep_fd < 0) {
			if (errno != EEXIST)
				die("cannot write keep file");
		} else {
			if (keep_msg_len > 0) {
				write_or_die(keep_fd, keep_msg, keep_msg_len);
				write_or_die(keep_fd, "\n", 1);
			}
			close(keep_fd);
786
			report = "keep";
787 788 789
		}
	}

790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
	if (final_pack_name != curr_pack_name) {
		if (!final_pack_name) {
			snprintf(name, sizeof(name), "%s/pack/pack-%s.pack",
				 get_object_directory(), sha1_to_hex(sha1));
			final_pack_name = name;
		}
		if (move_temp_to_file(curr_pack_name, final_pack_name))
			die("cannot store pack file");
	}

	chmod(curr_index_name, 0444);
	if (final_index_name != curr_index_name) {
		if (!final_index_name) {
			snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
				 get_object_directory(), sha1_to_hex(sha1));
			final_index_name = name;
		}
		if (move_temp_to_file(curr_index_name, final_index_name))
			die("cannot store index file");
	}
810 811 812 813 814 815 816

	if (!from_stdin) {
		printf("%s\n", sha1_to_hex(sha1));
	} else {
		char buf[48];
		int len = snprintf(buf, sizeof(buf), "%s\t%s\n",
				   report, sha1_to_hex(sha1));
817
		write_in_full(1, buf, len);
818 819 820 821 822 823 824 825 826 827 828 829 830

		/*
		 * Let's just mimic git-unpack-objects here and write
		 * the last part of the input buffer to stdout.
		 */
		while (input_len) {
			err = xwrite(1, input_buffer + input_offset, input_len);
			if (err <= 0)
				break;
			input_len -= err;
			input_offset += err;
		}
	}
S
Sergey Vlasov 已提交
831 832 833 834
}

int main(int argc, char **argv)
{
835
	int i, fix_thin_pack = 0;
836 837
	const char *curr_pack, *pack_name = NULL;
	const char *curr_index, *index_name = NULL;
838 839
	const char *keep_name = NULL, *keep_msg = NULL;
	char *index_name_buf = NULL, *keep_name_buf = NULL;
J
Junio C Hamano 已提交
840
	unsigned char sha1[20];
S
Sergey Vlasov 已提交
841 842 843 844 845

	for (i = 1; i < argc; i++) {
		const char *arg = argv[i];

		if (*arg == '-') {
846 847
			if (!strcmp(arg, "--stdin")) {
				from_stdin = 1;
848 849
			} else if (!strcmp(arg, "--fix-thin")) {
				fix_thin_pack = 1;
850 851 852 853
			} else if (!strcmp(arg, "--keep")) {
				keep_msg = "";
			} else if (!strncmp(arg, "--keep=", 7)) {
				keep_msg = arg + 7;
854 855 856 857 858 859 860 861 862 863 864 865 866
			} else if (!strncmp(arg, "--pack_header=", 14)) {
				struct pack_header *hdr;
				char *c;

				hdr = (struct pack_header *)input_buffer;
				hdr->hdr_signature = htonl(PACK_SIGNATURE);
				hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
				if (*c != ',')
					die("bad %s", arg);
				hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
				if (*c)
					die("bad %s", arg);
				input_len = sizeof(*hdr);
N
Nicolas Pitre 已提交
867 868
			} else if (!strcmp(arg, "-v")) {
				verbose = 1;
869
			} else if (!strcmp(arg, "-o")) {
S
Sergey Vlasov 已提交
870 871 872 873 874 875 876 877 878 879 880 881 882
				if (index_name || (i+1) >= argc)
					usage(index_pack_usage);
				index_name = argv[++i];
			} else
				usage(index_pack_usage);
			continue;
		}

		if (pack_name)
			usage(index_pack_usage);
		pack_name = arg;
	}

883
	if (!pack_name && !from_stdin)
S
Sergey Vlasov 已提交
884
		usage(index_pack_usage);
885 886
	if (fix_thin_pack && !from_stdin)
		die("--fix-thin cannot be used without --stdin");
887
	if (!index_name && pack_name) {
S
Sergey Vlasov 已提交
888
		int len = strlen(pack_name);
889
		if (!has_extension(pack_name, ".pack"))
S
Sergey Vlasov 已提交
890 891
			die("packfile name '%s' does not end with '.pack'",
			    pack_name);
P
Pavel Roskin 已提交
892
		index_name_buf = xmalloc(len);
S
Sergey Vlasov 已提交
893 894 895 896
		memcpy(index_name_buf, pack_name, len - 5);
		strcpy(index_name_buf + len - 5, ".idx");
		index_name = index_name_buf;
	}
897 898 899 900 901 902 903 904 905 906
	if (keep_msg && !keep_name && pack_name) {
		int len = strlen(pack_name);
		if (!has_extension(pack_name, ".pack"))
			die("packfile name '%s' does not end with '.pack'",
			    pack_name);
		keep_name_buf = xmalloc(len);
		memcpy(keep_name_buf, pack_name, len - 5);
		strcpy(keep_name_buf + len - 5, ".keep");
		keep_name = keep_name_buf;
	}
S
Sergey Vlasov 已提交
907

908
	curr_pack = open_pack_file(pack_name);
S
Sergey Vlasov 已提交
909
	parse_pack_header();
910 911
	objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry));
	deltas = xmalloc(nr_objects * sizeof(struct delta_entry));
N
Nicolas Pitre 已提交
912 913
	if (verbose)
		setup_progress_signal();
914
	parse_pack_objects(sha1);
915 916 917
	if (nr_deltas != nr_resolved_deltas) {
		if (fix_thin_pack) {
			int nr_unresolved = nr_deltas - nr_resolved_deltas;
N
Nicolas Pitre 已提交
918
			int nr_objects_initial = nr_objects;
919 920 921 922 923 924
			if (nr_unresolved <= 0)
				die("confusion beyond insanity");
			objects = xrealloc(objects,
					   (nr_objects + nr_unresolved + 1)
					   * sizeof(*objects));
			fix_unresolved_deltas(nr_unresolved);
N
Nicolas Pitre 已提交
925 926 927
			if (verbose)
				fprintf(stderr, "%d objects were added to complete this thin pack.\n",
					nr_objects - nr_objects_initial);
928 929 930 931 932 933 934 935 936
			readjust_pack_header_and_sha1(sha1);
		}
		if (nr_deltas != nr_resolved_deltas)
			die("pack has %d unresolved deltas",
			    nr_deltas - nr_resolved_deltas);
	} else {
		/* Flush remaining pack final 20-byte SHA1. */
		flush();
	}
S
Sergey Vlasov 已提交
937
	free(deltas);
938
	curr_index = write_index_file(index_name, sha1);
939 940 941 942
	final(pack_name, curr_pack,
		index_name, curr_index,
		keep_name, keep_msg,
		sha1);
S
Sergey Vlasov 已提交
943 944
	free(objects);
	free(index_name_buf);
945
	free(keep_name_buf);
S
Sergey Vlasov 已提交
946 947 948

	return 0;
}