index-pack.c 24.6 KB
Newer Older
S
Sergey Vlasov 已提交
1 2 3 4
#include "cache.h"
#include "delta.h"
#include "pack.h"
#include "csum-file.h"
5 6 7 8
#include "blob.h"
#include "commit.h"
#include "tag.h"
#include "tree.h"
S
Sergey Vlasov 已提交
9 10

static const char index_pack_usage[] =
11
"git-index-pack [-v] [-o <index-file>] [{ ---keep | --keep=<msg> }] { <pack-file> | --stdin [--fix-thin] [<pack-file>] }";
S
Sergey Vlasov 已提交
12 13 14 15

struct object_entry
{
	unsigned long offset;
16 17
	unsigned long size;
	unsigned int hdr_size;
S
Sergey Vlasov 已提交
18 19 20 21 22
	enum object_type type;
	enum object_type real_type;
	unsigned char sha1[20];
};

23 24 25 26 27
union delta_base {
	unsigned char sha1[20];
	unsigned long offset;
};

28 29 30 31 32 33
/*
 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
 * to memcmp() only the first 20 bytes.
 */
#define UNION_BASE_SZ	20

S
Sergey Vlasov 已提交
34 35
struct delta_entry
{
36
	union delta_base base;
37
	int obj_no;
S
Sergey Vlasov 已提交
38 39 40 41 42 43
};

static struct object_entry *objects;
static struct delta_entry *deltas;
static int nr_objects;
static int nr_deltas;
44
static int nr_resolved_deltas;
S
Sergey Vlasov 已提交
45

46
static int from_stdin;
N
Nicolas Pitre 已提交
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
static int verbose;

static volatile sig_atomic_t progress_update;

static void progress_interval(int signum)
{
	progress_update = 1;
}

static void setup_progress_signal(void)
{
	struct sigaction sa;
	struct itimerval v;

	memset(&sa, 0, sizeof(sa));
	sa.sa_handler = progress_interval;
	sigemptyset(&sa.sa_mask);
	sa.sa_flags = SA_RESTART;
	sigaction(SIGALRM, &sa, NULL);

	v.it_interval.tv_sec = 1;
	v.it_interval.tv_usec = 0;
	v.it_value = v.it_interval;
	setitimer(ITIMER_REAL, &v, NULL);

}

static unsigned display_progress(unsigned n, unsigned total, unsigned last_pc)
{
	unsigned percent = n * 100 / total;
	if (percent != last_pc || progress_update) {
		fprintf(stderr, "%4u%% (%u/%u) done\r", percent, n, total);
		progress_update = 0;
	}
	return percent;
}
83

84 85 86 87
/* We always read in 4kB chunks. */
static unsigned char input_buffer[4096];
static unsigned long input_offset, input_len, consumed_bytes;
static SHA_CTX input_ctx;
88
static int input_fd, output_fd, pack_fd;
89

90
/* Discard current buffer used content. */
91
static void flush(void)
92 93 94 95 96
{
	if (input_offset) {
		if (output_fd >= 0)
			write_or_die(output_fd, input_buffer, input_offset);
		SHA1_Update(&input_ctx, input_buffer, input_offset);
97
		memmove(input_buffer, input_buffer + input_offset, input_len);
98 99 100 101
		input_offset = 0;
	}
}

102 103 104 105
/*
 * Make sure at least "min" bytes are available in the buffer, and
 * return the pointer to the buffer.
 */
106
static void *fill(int min)
S
Sergey Vlasov 已提交
107
{
108 109 110 111
	if (min <= input_len)
		return input_buffer + input_offset;
	if (min > sizeof(input_buffer))
		die("cannot fill %d bytes", min);
112
	flush();
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
	do {
		int ret = xread(input_fd, input_buffer + input_len,
				sizeof(input_buffer) - input_len);
		if (ret <= 0) {
			if (!ret)
				die("early EOF");
			die("read error on input: %s", strerror(errno));
		}
		input_len += ret;
	} while (input_len < min);
	return input_buffer;
}

static void use(int bytes)
{
	if (bytes > input_len)
		die("used more bytes than were available");
	input_len -= bytes;
	input_offset += bytes;
	consumed_bytes += bytes;
}
S
Sergey Vlasov 已提交
134

135
static const char *open_pack_file(const char *pack_name)
136
{
137 138 139 140 141
	if (from_stdin) {
		input_fd = 0;
		if (!pack_name) {
			static char tmpfile[PATH_MAX];
			snprintf(tmpfile, sizeof(tmpfile),
142
				 "%s/tmp_pack_XXXXXX", get_object_directory());
143 144 145 146 147 148
			output_fd = mkstemp(tmpfile);
			pack_name = xstrdup(tmpfile);
		} else
			output_fd = open(pack_name, O_CREAT|O_EXCL|O_RDWR, 0600);
		if (output_fd < 0)
			die("unable to create %s: %s\n", pack_name, strerror(errno));
149
		pack_fd = output_fd;
150 151 152 153 154 155
	} else {
		input_fd = open(pack_name, O_RDONLY);
		if (input_fd < 0)
			die("cannot open packfile '%s': %s",
			    pack_name, strerror(errno));
		output_fd = -1;
156
		pack_fd = input_fd;
157
	}
158
	SHA1_Init(&input_ctx);
159
	return pack_name;
S
Sergey Vlasov 已提交
160 161 162 163
}

static void parse_pack_header(void)
{
164
	struct pack_header *hdr = fill(sizeof(struct pack_header));
S
Sergey Vlasov 已提交
165 166 167

	/* Header consistency check */
	if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
168
		die("pack signature mismatch");
N
Nicolas Pitre 已提交
169
	if (!pack_version_ok(hdr->hdr_version))
170
		die("pack version %d unsupported", ntohl(hdr->hdr_version));
S
Sergey Vlasov 已提交
171 172

	nr_objects = ntohl(hdr->hdr_entries);
173
	use(sizeof(struct pack_header));
S
Sergey Vlasov 已提交
174 175 176 177 178 179 180 181 182 183 184 185 186
}

static void bad_object(unsigned long offset, const char *format,
		       ...) NORETURN __attribute__((format (printf, 2, 3)));

static void bad_object(unsigned long offset, const char *format, ...)
{
	va_list params;
	char buf[1024];

	va_start(params, format);
	vsnprintf(buf, sizeof(buf), format, params);
	va_end(params);
187
	die("pack has bad object at offset %lu: %s", offset, buf);
S
Sergey Vlasov 已提交
188 189
}

190
static void *unpack_entry_data(unsigned long offset, unsigned long size)
S
Sergey Vlasov 已提交
191 192 193 194 195 196 197
{
	z_stream stream;
	void *buf = xmalloc(size);

	memset(&stream, 0, sizeof(stream));
	stream.next_out = buf;
	stream.avail_out = size;
198 199
	stream.next_in = fill(1);
	stream.avail_in = input_len;
S
Sergey Vlasov 已提交
200 201 202 203
	inflateInit(&stream);

	for (;;) {
		int ret = inflate(&stream, 0);
204 205
		use(input_len - stream.avail_in);
		if (stream.total_out == size && ret == Z_STREAM_END)
S
Sergey Vlasov 已提交
206 207 208
			break;
		if (ret != Z_OK)
			bad_object(offset, "inflate returned %d", ret);
209 210
		stream.next_in = fill(1);
		stream.avail_in = input_len;
S
Sergey Vlasov 已提交
211 212 213 214 215
	}
	inflateEnd(&stream);
	return buf;
}

216
static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
S
Sergey Vlasov 已提交
217
{
218
	unsigned char *p, c;
219
	unsigned long size, base_offset;
S
Sergey Vlasov 已提交
220 221
	unsigned shift;

222 223 224 225 226 227
	obj->offset = consumed_bytes;

	p = fill(1);
	c = *p;
	use(1);
	obj->type = (c >> 4) & 7;
S
Sergey Vlasov 已提交
228 229 230
	size = (c & 15);
	shift = 4;
	while (c & 0x80) {
231 232 233
		p = fill(1);
		c = *p;
		use(1);
S
Sergey Vlasov 已提交
234 235 236
		size += (c & 0x7fUL) << shift;
		shift += 7;
	}
237
	obj->size = size;
S
Sergey Vlasov 已提交
238

239
	switch (obj->type) {
240
	case OBJ_REF_DELTA:
241 242
		hashcpy(delta_base->sha1, fill(20));
		use(20);
243 244 245
		break;
	case OBJ_OFS_DELTA:
		memset(delta_base, 0, sizeof(*delta_base));
246 247 248
		p = fill(1);
		c = *p;
		use(1);
249 250 251 252
		base_offset = c & 127;
		while (c & 128) {
			base_offset += 1;
			if (!base_offset || base_offset & ~(~0UL >> 7))
253 254 255 256
				bad_object(obj->offset, "offset value overflow for delta base object");
			p = fill(1);
			c = *p;
			use(1);
257 258
			base_offset = (base_offset << 7) + (c & 127);
		}
259 260 261
		delta_base->offset = obj->offset - base_offset;
		if (delta_base->offset >= obj->offset)
			bad_object(obj->offset, "delta base offset is out of bound");
262
		break;
S
Sergey Vlasov 已提交
263 264 265 266 267 268
	case OBJ_COMMIT:
	case OBJ_TREE:
	case OBJ_BLOB:
	case OBJ_TAG:
		break;
	default:
269
		bad_object(obj->offset, "unknown object type %d", obj->type);
S
Sergey Vlasov 已提交
270
	}
271 272 273 274 275
	obj->hdr_size = consumed_bytes - obj->offset;

	return unpack_entry_data(obj->offset, obj->size);
}

276
static void *get_data_from_pack(struct object_entry *obj)
277 278 279
{
	unsigned long from = obj[0].offset + obj[0].hdr_size;
	unsigned long len = obj[1].offset - from;
280
	unsigned long rdy = 0;
281
	unsigned char *src, *data;
282 283
	z_stream stream;
	int st;
S
Sergey Vlasov 已提交
284

285
	src = xmalloc(len);
286 287 288 289 290 291 292
	data = src;
	do {
		ssize_t n = pread(pack_fd, data + rdy, len - rdy, from + rdy);
		if (n <= 0)
			die("cannot pread pack file: %s", strerror(errno));
		rdy += n;
	} while (rdy < len);
293 294 295 296
	data = xmalloc(obj->size);
	memset(&stream, 0, sizeof(stream));
	stream.next_out = data;
	stream.avail_out = obj->size;
297
	stream.next_in = src;
298 299 300 301 302 303
	stream.avail_in = len;
	inflateInit(&stream);
	while ((st = inflate(&stream, Z_FINISH)) == Z_OK);
	inflateEnd(&stream);
	if (st != Z_STREAM_END || stream.total_out != obj->size)
		die("serious inflate inconsistency");
304
	free(src);
S
Sergey Vlasov 已提交
305 306 307
	return data;
}

308
static int find_delta(const union delta_base *base)
S
Sergey Vlasov 已提交
309 310 311 312 313 314 315 316
{
	int first = 0, last = nr_deltas;

        while (first < last) {
                int next = (first + last) / 2;
                struct delta_entry *delta = &deltas[next];
                int cmp;

317
                cmp = memcmp(base, &delta->base, UNION_BASE_SZ);
S
Sergey Vlasov 已提交
318 319 320 321 322 323 324 325 326 327 328
                if (!cmp)
                        return next;
                if (cmp < 0) {
                        last = next;
                        continue;
                }
                first = next+1;
        }
        return -first-1;
}

329 330
static int find_delta_children(const union delta_base *base,
			       int *first_index, int *last_index)
S
Sergey Vlasov 已提交
331
{
332
	int first = find_delta(base);
S
Sergey Vlasov 已提交
333 334 335 336 337
	int last = first;
	int end = nr_deltas - 1;

	if (first < 0)
		return -1;
338
	while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ))
S
Sergey Vlasov 已提交
339
		--first;
340
	while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ))
S
Sergey Vlasov 已提交
341 342 343 344 345 346 347
		++last;
	*first_index = first;
	*last_index = last;
	return 0;
}

static void sha1_object(const void *data, unsigned long size,
348
			enum object_type type, unsigned char *sha1)
S
Sergey Vlasov 已提交
349
{
N
Nicolas Pitre 已提交
350
	hash_sha1_file(data, size, typename(type), sha1);
351
	if (has_sha1_file(sha1)) {
352 353 354 355 356 357 358 359 360 361
		void *has_data;
		enum object_type has_type;
		unsigned long has_size;
		has_data = read_sha1_file(sha1, &has_type, &has_size);
		if (!has_data)
			die("cannot read existing object %s", sha1_to_hex(sha1));
		if (size != has_size || type != has_type ||
		    memcmp(data, has_data, size) != 0)
			die("SHA1 COLLISION FOUND WITH %s !", sha1_to_hex(sha1));
	}
S
Sergey Vlasov 已提交
362 363
}

364
static void resolve_delta(struct object_entry *delta_obj, void *base_data,
S
Sergey Vlasov 已提交
365 366 367 368 369 370
			  unsigned long base_size, enum object_type type)
{
	void *delta_data;
	unsigned long delta_size;
	void *result;
	unsigned long result_size;
371
	union delta_base delta_base;
S
Sergey Vlasov 已提交
372 373
	int j, first, last;

374 375 376
	delta_obj->real_type = type;
	delta_data = get_data_from_pack(delta_obj);
	delta_size = delta_obj->size;
S
Sergey Vlasov 已提交
377 378 379 380
	result = patch_delta(base_data, base_size, delta_data, delta_size,
			     &result_size);
	free(delta_data);
	if (!result)
381
		bad_object(delta_obj->offset, "failed to apply delta");
382
	sha1_object(result, result_size, type, delta_obj->sha1);
383
	nr_resolved_deltas++;
384

385
	hashcpy(delta_base.sha1, delta_obj->sha1);
386
	if (!find_delta_children(&delta_base, &first, &last)) {
387 388 389 390 391
		for (j = first; j <= last; j++) {
			struct object_entry *child = objects + deltas[j].obj_no;
			if (child->real_type == OBJ_REF_DELTA)
				resolve_delta(child, result, result_size, type);
		}
392 393 394
	}

	memset(&delta_base, 0, sizeof(delta_base));
395
	delta_base.offset = delta_obj->offset;
396
	if (!find_delta_children(&delta_base, &first, &last)) {
397 398 399 400 401
		for (j = first; j <= last; j++) {
			struct object_entry *child = objects + deltas[j].obj_no;
			if (child->real_type == OBJ_OFS_DELTA)
				resolve_delta(child, result, result_size, type);
		}
S
Sergey Vlasov 已提交
402
	}
403

S
Sergey Vlasov 已提交
404 405 406 407 408 409 410
	free(result);
}

static int compare_delta_entry(const void *a, const void *b)
{
	const struct delta_entry *delta_a = a;
	const struct delta_entry *delta_b = b;
411
	return memcmp(&delta_a->base, &delta_b->base, UNION_BASE_SZ);
S
Sergey Vlasov 已提交
412 413
}

414 415
/* Parse all objects and return the pack content SHA1 hash */
static void parse_pack_objects(unsigned char *sha1)
S
Sergey Vlasov 已提交
416
{
N
Nicolas Pitre 已提交
417
	int i, percent = -1;
418
	struct delta_entry *delta = deltas;
S
Sergey Vlasov 已提交
419
	void *data;
420
	struct stat st;
S
Sergey Vlasov 已提交
421 422 423 424 425

	/*
	 * First pass:
	 * - find locations of all objects;
	 * - calculate SHA1 of all non-delta objects;
426
	 * - remember base (SHA1 or offset) for all deltas.
S
Sergey Vlasov 已提交
427
	 */
N
Nicolas Pitre 已提交
428 429
	if (verbose)
		fprintf(stderr, "Indexing %d objects.\n", nr_objects);
S
Sergey Vlasov 已提交
430 431
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *obj = &objects[i];
432
		data = unpack_raw_entry(obj, &delta->base);
S
Sergey Vlasov 已提交
433
		obj->real_type = obj->type;
434 435
		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) {
			nr_deltas++;
436
			delta->obj_no = i;
437
			delta++;
S
Sergey Vlasov 已提交
438
		} else
439
			sha1_object(data, obj->size, obj->type, obj->sha1);
S
Sergey Vlasov 已提交
440
		free(data);
N
Nicolas Pitre 已提交
441 442
		if (verbose)
			percent = display_progress(i+1, nr_objects, percent);
S
Sergey Vlasov 已提交
443
	}
444
	objects[i].offset = consumed_bytes;
N
Nicolas Pitre 已提交
445 446
	if (verbose)
		fputc('\n', stderr);
447 448

	/* Check pack integrity */
449
	flush();
450 451
	SHA1_Final(sha1, &input_ctx);
	if (hashcmp(fill(20), sha1))
452
		die("pack is corrupted (SHA1 mismatch)");
453
	use(20);
454 455 456

	/* If input_fd is a file, we should have reached its end now. */
	if (fstat(input_fd, &st))
457
		die("cannot fstat packfile: %s", strerror(errno));
J
Johannes Schindelin 已提交
458 459
	if (S_ISREG(st.st_mode) &&
			lseek(input_fd, 0, SEEK_CUR) - input_len != st.st_size)
460
		die("pack has junk at the end");
S
Sergey Vlasov 已提交
461

N
Nicolas Pitre 已提交
462 463 464
	if (!nr_deltas)
		return;

465
	/* Sort deltas by base SHA1/offset for fast searching */
S
Sergey Vlasov 已提交
466 467 468 469 470 471 472 473 474 475 476
	qsort(deltas, nr_deltas, sizeof(struct delta_entry),
	      compare_delta_entry);

	/*
	 * Second pass:
	 * - for all non-delta objects, look if it is used as a base for
	 *   deltas;
	 * - if used as a base, uncompress the object and apply all deltas,
	 *   recursively checking if the resulting object is used as a base
	 *   for some more deltas.
	 */
N
Nicolas Pitre 已提交
477 478
	if (verbose)
		fprintf(stderr, "Resolving %d deltas.\n", nr_deltas);
S
Sergey Vlasov 已提交
479 480
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *obj = &objects[i];
481 482
		union delta_base base;
		int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last;
S
Sergey Vlasov 已提交
483

484
		if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA)
S
Sergey Vlasov 已提交
485
			continue;
486
		hashcpy(base.sha1, obj->sha1);
487
		ref = !find_delta_children(&base, &ref_first, &ref_last);
488 489
		memset(&base, 0, sizeof(base));
		base.offset = obj->offset;
490
		ofs = !find_delta_children(&base, &ofs_first, &ofs_last);
491
		if (!ref && !ofs)
S
Sergey Vlasov 已提交
492
			continue;
493
		data = get_data_from_pack(obj);
494
		if (ref)
495 496 497 498
			for (j = ref_first; j <= ref_last; j++) {
				struct object_entry *child = objects + deltas[j].obj_no;
				if (child->real_type == OBJ_REF_DELTA)
					resolve_delta(child, data,
499
						      obj->size, obj->type);
500
			}
501
		if (ofs)
502 503 504 505
			for (j = ofs_first; j <= ofs_last; j++) {
				struct object_entry *child = objects + deltas[j].obj_no;
				if (child->real_type == OBJ_OFS_DELTA)
					resolve_delta(child, data,
506
						      obj->size, obj->type);
507
			}
S
Sergey Vlasov 已提交
508
		free(data);
N
Nicolas Pitre 已提交
509 510 511
		if (verbose)
			percent = display_progress(nr_resolved_deltas,
						   nr_deltas, percent);
S
Sergey Vlasov 已提交
512
	}
N
Nicolas Pitre 已提交
513 514
	if (verbose && nr_resolved_deltas == nr_deltas)
		fputc('\n', stderr);
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
}

static int write_compressed(int fd, void *in, unsigned int size)
{
	z_stream stream;
	unsigned long maxsize;
	void *out;

	memset(&stream, 0, sizeof(stream));
	deflateInit(&stream, zlib_compression_level);
	maxsize = deflateBound(&stream, size);
	out = xmalloc(maxsize);

	/* Compress it */
	stream.next_in = in;
	stream.avail_in = size;
	stream.next_out = out;
	stream.avail_out = maxsize;
	while (deflate(&stream, Z_FINISH) == Z_OK);
	deflateEnd(&stream);

	size = stream.total_out;
	write_or_die(fd, out, size);
	free(out);
	return size;
}

542
static void append_obj_to_pack(const unsigned char *sha1, void *buf,
543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
			       unsigned long size, enum object_type type)
{
	struct object_entry *obj = &objects[nr_objects++];
	unsigned char header[10];
	unsigned long s = size;
	int n = 0;
	unsigned char c = (type << 4) | (s & 15);
	s >>= 4;
	while (s) {
		header[n++] = c | 0x80;
		c = s & 0x7f;
		s >>= 7;
	}
	header[n++] = c;
	write_or_die(output_fd, header, n);
	obj[1].offset = obj[0].offset + n;
	obj[1].offset += write_compressed(output_fd, buf, size);
560
	hashcpy(obj->sha1, sha1);
561 562 563 564 565 566 567 568
}

static int delta_pos_compare(const void *_a, const void *_b)
{
	struct delta_entry *a = *(struct delta_entry **)_a;
	struct delta_entry *b = *(struct delta_entry **)_b;
	return a->obj_no - b->obj_no;
}
S
Sergey Vlasov 已提交
569

570 571 572
static void fix_unresolved_deltas(int nr_unresolved)
{
	struct delta_entry **sorted_by_pos;
N
Nicolas Pitre 已提交
573
	int i, n = 0, percent = -1;
574 575 576 577 578 579 580 581 582 583 584 585

	/*
	 * Since many unresolved deltas may well be themselves base objects
	 * for more unresolved deltas, we really want to include the
	 * smallest number of base objects that would cover as much delta
	 * as possible by picking the
	 * trunc deltas first, allowing for other deltas to resolve without
	 * additional base objects.  Since most base objects are to be found
	 * before deltas depending on them, a good heuristic is to start
	 * resolving deltas in the same order as their position in the pack.
	 */
	sorted_by_pos = xmalloc(nr_unresolved * sizeof(*sorted_by_pos));
S
Sergey Vlasov 已提交
586
	for (i = 0; i < nr_deltas; i++) {
587 588 589
		if (objects[deltas[i].obj_no].real_type != OBJ_REF_DELTA)
			continue;
		sorted_by_pos[n++] = &deltas[i];
S
Sergey Vlasov 已提交
590
	}
591 592 593 594 595 596
	qsort(sorted_by_pos, n, sizeof(*sorted_by_pos), delta_pos_compare);

	for (i = 0; i < n; i++) {
		struct delta_entry *d = sorted_by_pos[i];
		void *data;
		unsigned long size;
597
		enum object_type type;
598 599 600 601
		int j, first, last;

		if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
			continue;
602
		data = read_sha1_file(d->base.sha1, &type, &size);
603 604 605
		if (!data)
			continue;

606
		find_delta_children(&d->base, &first, &last);
607 608 609
		for (j = first; j <= last; j++) {
			struct object_entry *child = objects + deltas[j].obj_no;
			if (child->real_type == OBJ_REF_DELTA)
610
				resolve_delta(child, data, size, type);
611 612
		}

613 614 615
		if (check_sha1_signature(d->base.sha1, data, size, typename(type)))
			die("local object %s is corrupt", sha1_to_hex(d->base.sha1));
		append_obj_to_pack(d->base.sha1, data, size, type);
616
		free(data);
N
Nicolas Pitre 已提交
617 618 619
		if (verbose)
			percent = display_progress(nr_resolved_deltas,
						   nr_deltas, percent);
620 621
	}
	free(sorted_by_pos);
N
Nicolas Pitre 已提交
622 623
	if (verbose)
		fputc('\n', stderr);
624 625 626 627 628 629 630 631 632 633 634
}

static void readjust_pack_header_and_sha1(unsigned char *sha1)
{
	struct pack_header hdr;
	SHA_CTX ctx;
	int size;

	/* Rewrite pack header with updated object number */
	if (lseek(output_fd, 0, SEEK_SET) != 0)
		die("cannot seek back: %s", strerror(errno));
635
	if (read_in_full(output_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654
		die("cannot read pack header back: %s", strerror(errno));
	hdr.hdr_entries = htonl(nr_objects);
	if (lseek(output_fd, 0, SEEK_SET) != 0)
		die("cannot seek back: %s", strerror(errno));
	write_or_die(output_fd, &hdr, sizeof(hdr));
	if (lseek(output_fd, 0, SEEK_SET) != 0)
		die("cannot seek back: %s", strerror(errno));

	/* Recompute and store the new pack's SHA1 */
	SHA1_Init(&ctx);
	do {
		unsigned char *buf[4096];
		size = xread(output_fd, buf, sizeof(buf));
		if (size < 0)
			die("cannot read pack data back: %s", strerror(errno));
		SHA1_Update(&ctx, buf, size);
	} while (size > 0);
	SHA1_Final(sha1, &ctx);
	write_or_die(output_fd, sha1, 20);
S
Sergey Vlasov 已提交
655 656 657 658 659 660
}

static int sha1_compare(const void *_a, const void *_b)
{
	struct object_entry *a = *(struct object_entry **)_a;
	struct object_entry *b = *(struct object_entry **)_b;
661
	return hashcmp(a->sha1, b->sha1);
S
Sergey Vlasov 已提交
662 663
}

664 665 666 667
/*
 * On entry *sha1 contains the pack content SHA1 hash, on exit it is
 * the SHA1 hash of sorted object names.
 */
668
static const char *write_index_file(const char *index_name, unsigned char *sha1)
S
Sergey Vlasov 已提交
669 670
{
	struct sha1file *f;
671
	struct object_entry **sorted_by_sha, **list, **last;
S
Sergey Vlasov 已提交
672
	unsigned int array[256];
673
	int i, fd;
J
Junio C Hamano 已提交
674
	SHA_CTX ctx;
S
Sergey Vlasov 已提交
675

676 677 678 679 680 681 682 683 684 685 686 687 688
	if (nr_objects) {
		sorted_by_sha =
			xcalloc(nr_objects, sizeof(struct object_entry *));
		list = sorted_by_sha;
		last = sorted_by_sha + nr_objects;
		for (i = 0; i < nr_objects; ++i)
			sorted_by_sha[i] = &objects[i];
		qsort(sorted_by_sha, nr_objects, sizeof(sorted_by_sha[0]),
		      sha1_compare);

	}
	else
		sorted_by_sha = list = last = NULL;
S
Sergey Vlasov 已提交
689

690 691 692
	if (!index_name) {
		static char tmpfile[PATH_MAX];
		snprintf(tmpfile, sizeof(tmpfile),
693
			 "%s/tmp_idx_XXXXXX", get_object_directory());
694 695 696 697 698 699 700 701 702
		fd = mkstemp(tmpfile);
		index_name = xstrdup(tmpfile);
	} else {
		unlink(index_name);
		fd = open(index_name, O_CREAT|O_EXCL|O_WRONLY, 0600);
	}
	if (fd < 0)
		die("unable to create %s: %s", index_name, strerror(errno));
	f = sha1fd(fd, index_name);
S
Sergey Vlasov 已提交
703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721

	/*
	 * Write the first-level table (the list is sorted,
	 * but we use a 256-entry lookup to be able to avoid
	 * having to do eight extra binary search iterations).
	 */
	for (i = 0; i < 256; i++) {
		struct object_entry **next = list;
		while (next < last) {
			struct object_entry *obj = *next;
			if (obj->sha1[0] != i)
				break;
			next++;
		}
		array[i] = htonl(next - sorted_by_sha);
		list = next;
	}
	sha1write(f, array, 256 * sizeof(int));

J
Junio C Hamano 已提交
722 723 724 725 726
	/* recompute the SHA1 hash of sorted object names.
	 * currently pack-objects does not do this, but that
	 * can be fixed.
	 */
	SHA1_Init(&ctx);
S
Sergey Vlasov 已提交
727 728 729 730 731 732 733 734 735
	/*
	 * Write the actual SHA1 entries..
	 */
	list = sorted_by_sha;
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *obj = *list++;
		unsigned int offset = htonl(obj->offset);
		sha1write(f, &offset, 4);
		sha1write(f, obj->sha1, 20);
J
Junio C Hamano 已提交
736
		SHA1_Update(&ctx, obj->sha1, 20);
S
Sergey Vlasov 已提交
737
	}
738
	sha1write(f, sha1, 20);
S
Sergey Vlasov 已提交
739 740
	sha1close(f, NULL, 1);
	free(sorted_by_sha);
J
Junio C Hamano 已提交
741
	SHA1_Final(sha1, &ctx);
742 743 744 745 746
	return index_name;
}

static void final(const char *final_pack_name, const char *curr_pack_name,
		  const char *final_index_name, const char *curr_index_name,
747
		  const char *keep_name, const char *keep_msg,
748 749
		  unsigned char *sha1)
{
750
	const char *report = "pack";
751 752 753 754 755 756 757 758 759 760 761 762
	char name[PATH_MAX];
	int err;

	if (!from_stdin) {
		close(input_fd);
	} else {
		err = close(output_fd);
		if (err)
			die("error while closing pack file: %s", strerror(errno));
		chmod(curr_pack_name, 0444);
	}

763 764 765 766 767 768 769
	if (keep_msg) {
		int keep_fd, keep_msg_len = strlen(keep_msg);
		if (!keep_name) {
			snprintf(name, sizeof(name), "%s/pack/pack-%s.keep",
				 get_object_directory(), sha1_to_hex(sha1));
			keep_name = name;
		}
770 771 772 773 774 775 776 777 778 779
		keep_fd = open(keep_name, O_RDWR|O_CREAT|O_EXCL, 0600);
		if (keep_fd < 0) {
			if (errno != EEXIST)
				die("cannot write keep file");
		} else {
			if (keep_msg_len > 0) {
				write_or_die(keep_fd, keep_msg, keep_msg_len);
				write_or_die(keep_fd, "\n", 1);
			}
			close(keep_fd);
780
			report = "keep";
781 782 783
		}
	}

784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
	if (final_pack_name != curr_pack_name) {
		if (!final_pack_name) {
			snprintf(name, sizeof(name), "%s/pack/pack-%s.pack",
				 get_object_directory(), sha1_to_hex(sha1));
			final_pack_name = name;
		}
		if (move_temp_to_file(curr_pack_name, final_pack_name))
			die("cannot store pack file");
	}

	chmod(curr_index_name, 0444);
	if (final_index_name != curr_index_name) {
		if (!final_index_name) {
			snprintf(name, sizeof(name), "%s/pack/pack-%s.idx",
				 get_object_directory(), sha1_to_hex(sha1));
			final_index_name = name;
		}
		if (move_temp_to_file(curr_index_name, final_index_name))
			die("cannot store index file");
	}
804 805 806 807 808 809 810

	if (!from_stdin) {
		printf("%s\n", sha1_to_hex(sha1));
	} else {
		char buf[48];
		int len = snprintf(buf, sizeof(buf), "%s\t%s\n",
				   report, sha1_to_hex(sha1));
811
		write_or_die(1, buf, len);
812 813 814 815 816 817 818 819 820 821 822 823 824

		/*
		 * Let's just mimic git-unpack-objects here and write
		 * the last part of the input buffer to stdout.
		 */
		while (input_len) {
			err = xwrite(1, input_buffer + input_offset, input_len);
			if (err <= 0)
				break;
			input_len -= err;
			input_offset += err;
		}
	}
S
Sergey Vlasov 已提交
825 826 827 828
}

int main(int argc, char **argv)
{
829
	int i, fix_thin_pack = 0;
830 831
	const char *curr_pack, *pack_name = NULL;
	const char *curr_index, *index_name = NULL;
832 833
	const char *keep_name = NULL, *keep_msg = NULL;
	char *index_name_buf = NULL, *keep_name_buf = NULL;
J
Junio C Hamano 已提交
834
	unsigned char sha1[20];
S
Sergey Vlasov 已提交
835 836 837 838 839

	for (i = 1; i < argc; i++) {
		const char *arg = argv[i];

		if (*arg == '-') {
840 841
			if (!strcmp(arg, "--stdin")) {
				from_stdin = 1;
842 843
			} else if (!strcmp(arg, "--fix-thin")) {
				fix_thin_pack = 1;
844 845
			} else if (!strcmp(arg, "--keep")) {
				keep_msg = "";
846
			} else if (!prefixcmp(arg, "--keep=")) {
847
				keep_msg = arg + 7;
848
			} else if (!prefixcmp(arg, "--pack_header=")) {
849 850 851 852 853 854 855 856 857 858 859 860
				struct pack_header *hdr;
				char *c;

				hdr = (struct pack_header *)input_buffer;
				hdr->hdr_signature = htonl(PACK_SIGNATURE);
				hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
				if (*c != ',')
					die("bad %s", arg);
				hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
				if (*c)
					die("bad %s", arg);
				input_len = sizeof(*hdr);
N
Nicolas Pitre 已提交
861 862
			} else if (!strcmp(arg, "-v")) {
				verbose = 1;
863
			} else if (!strcmp(arg, "-o")) {
S
Sergey Vlasov 已提交
864 865 866 867 868 869 870 871 872 873 874 875 876
				if (index_name || (i+1) >= argc)
					usage(index_pack_usage);
				index_name = argv[++i];
			} else
				usage(index_pack_usage);
			continue;
		}

		if (pack_name)
			usage(index_pack_usage);
		pack_name = arg;
	}

877
	if (!pack_name && !from_stdin)
S
Sergey Vlasov 已提交
878
		usage(index_pack_usage);
879 880
	if (fix_thin_pack && !from_stdin)
		die("--fix-thin cannot be used without --stdin");
881
	if (!index_name && pack_name) {
S
Sergey Vlasov 已提交
882
		int len = strlen(pack_name);
883
		if (!has_extension(pack_name, ".pack"))
S
Sergey Vlasov 已提交
884 885
			die("packfile name '%s' does not end with '.pack'",
			    pack_name);
P
Pavel Roskin 已提交
886
		index_name_buf = xmalloc(len);
S
Sergey Vlasov 已提交
887 888 889 890
		memcpy(index_name_buf, pack_name, len - 5);
		strcpy(index_name_buf + len - 5, ".idx");
		index_name = index_name_buf;
	}
891 892 893 894 895 896 897 898 899 900
	if (keep_msg && !keep_name && pack_name) {
		int len = strlen(pack_name);
		if (!has_extension(pack_name, ".pack"))
			die("packfile name '%s' does not end with '.pack'",
			    pack_name);
		keep_name_buf = xmalloc(len);
		memcpy(keep_name_buf, pack_name, len - 5);
		strcpy(keep_name_buf + len - 5, ".keep");
		keep_name = keep_name_buf;
	}
S
Sergey Vlasov 已提交
901

902
	curr_pack = open_pack_file(pack_name);
S
Sergey Vlasov 已提交
903
	parse_pack_header();
904 905
	objects = xmalloc((nr_objects + 1) * sizeof(struct object_entry));
	deltas = xmalloc(nr_objects * sizeof(struct delta_entry));
N
Nicolas Pitre 已提交
906 907
	if (verbose)
		setup_progress_signal();
908
	parse_pack_objects(sha1);
909 910 911
	if (nr_deltas != nr_resolved_deltas) {
		if (fix_thin_pack) {
			int nr_unresolved = nr_deltas - nr_resolved_deltas;
N
Nicolas Pitre 已提交
912
			int nr_objects_initial = nr_objects;
913 914 915 916 917 918
			if (nr_unresolved <= 0)
				die("confusion beyond insanity");
			objects = xrealloc(objects,
					   (nr_objects + nr_unresolved + 1)
					   * sizeof(*objects));
			fix_unresolved_deltas(nr_unresolved);
N
Nicolas Pitre 已提交
919 920 921
			if (verbose)
				fprintf(stderr, "%d objects were added to complete this thin pack.\n",
					nr_objects - nr_objects_initial);
922 923 924 925 926 927 928 929 930
			readjust_pack_header_and_sha1(sha1);
		}
		if (nr_deltas != nr_resolved_deltas)
			die("pack has %d unresolved deltas",
			    nr_deltas - nr_resolved_deltas);
	} else {
		/* Flush remaining pack final 20-byte SHA1. */
		flush();
	}
S
Sergey Vlasov 已提交
931
	free(deltas);
932
	curr_index = write_index_file(index_name, sha1);
933 934 935 936
	final(pack_name, curr_pack,
		index_name, curr_index,
		keep_name, keep_msg,
		sha1);
S
Sergey Vlasov 已提交
937 938
	free(objects);
	free(index_name_buf);
939
	free(keep_name_buf);
S
Sergey Vlasov 已提交
940 941 942

	return 0;
}