sha1_file.c 65.1 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 * GIT - The information manager from hell
 *
 * Copyright (C) Linus Torvalds, 2005
 *
 * This handles basic git sha1 object files - packing, unpacking,
 * creation etc.
 */
#include "cache.h"
10
#include "delta.h"
11
#include "pack.h"
12 13 14 15
#include "blob.h"
#include "commit.h"
#include "tag.h"
#include "tree.h"
16
#include "refs.h"
17
#include "pack-revindex.h"
18
#include "sha1-lookup.h"
19

20 21 22 23 24 25 26 27
#ifndef O_NOATIME
#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
#define O_NOATIME 01000000
#else
#define O_NOATIME 0
#endif
#endif

28 29
#ifdef NO_C99_FORMAT
#define SZ_FMT "lu"
30
static unsigned long sz_fmt(size_t s) { return (unsigned long)s; }
31 32
#else
#define SZ_FMT "zu"
33
static size_t sz_fmt(size_t s) { return s; }
34 35
#endif

36
const unsigned char null_sha1[20];
J
Junio C Hamano 已提交
37

38
static int git_open_noatime(const char *name, struct packed_git *p);
39

40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
/*
 * This is meant to hold a *small* number of objects that you would
 * want read_sha1_file() to be able to return, but yet you do not want
 * to write them into the object store (e.g. a browse-only
 * application).
 */
static struct cached_object {
	unsigned char sha1[20];
	enum object_type type;
	void *buf;
	unsigned long size;
} *cached_objects;
static int cached_object_nr, cached_object_alloc;

static struct cached_object empty_tree = {
55
	EMPTY_TREE_SHA1_BIN_LITERAL,
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
	OBJ_TREE,
	"",
	0
};

static struct cached_object *find_cached_object(const unsigned char *sha1)
{
	int i;
	struct cached_object *co = cached_objects;

	for (i = 0; i < cached_object_nr; i++, co++) {
		if (!hashcmp(co->sha1, sha1))
			return co;
	}
	if (!hashcmp(sha1, empty_tree.sha1))
		return &empty_tree;
	return NULL;
}

75 76
int safe_create_leading_directories(char *path)
{
77
	char *pos = path + offset_1st_component(path);
78 79
	struct stat st;

80 81 82 83
	while (pos) {
		pos = strchr(pos, '/');
		if (!pos)
			break;
84 85 86 87 88
		while (*++pos == '/')
			;
		if (!*pos)
			break;
		*--pos = '\0';
89 90 91
		if (!stat(path, &st)) {
			/* path exists */
			if (!S_ISDIR(st.st_mode)) {
92
				*pos = '/';
93
				return -3;
94
			}
95
		}
96 97 98 99
		else if (mkdir(path, 0777)) {
			*pos = '/';
			return -1;
		}
100 101 102 103
		else if (adjust_shared_perm(path)) {
			*pos = '/';
			return -2;
		}
104 105 106 107
		*pos++ = '/';
	}
	return 0;
}
108

109 110 111 112 113 114 115 116 117
int safe_create_leading_directories_const(const char *path)
{
	/* path points to cache entries, so xstrdup before messing with it */
	char *buf = xstrdup(path);
	int result = safe_create_leading_directories(buf);
	free(buf);
	return result;
}

118 119 120 121 122 123 124 125 126 127 128 129
static void fill_sha1_path(char *pathbuf, const unsigned char *sha1)
{
	int i;
	for (i = 0; i < 20; i++) {
		static char hex[] = "0123456789abcdef";
		unsigned int val = sha1[i];
		char *pos = pathbuf + i*2 + (i > 0);
		*pos++ = hex[val >> 4];
		*pos = hex[val & 0xf];
	}
}

130 131
/*
 * NOTE! This returns a statically allocated buffer, so you have to be
132
 * careful about using it. Do an "xstrdup()" if you need to save the
133
 * filename.
134 135 136
 *
 * Also note that this returns the location for creating.  Reading
 * SHA1 file can happen from any alternate directory listed in the
J
Junio C Hamano 已提交
137
 * DB_ENVIRONMENT environment variable if it is not found in
138
 * the primary object database.
139 140 141
 */
char *sha1_file_name(const unsigned char *sha1)
{
142 143 144
	static char buf[PATH_MAX];
	const char *objdir;
	int len;
145

146 147 148 149 150 151 152 153 154 155 156 157
	objdir = get_object_directory();
	len = strlen(objdir);

	/* '/' + sha1(2) + '/' + sha1(38) + '\0' */
	if (len + 43 > PATH_MAX)
		die("insanely long object directory %s", objdir);
	memcpy(buf, objdir, len);
	buf[len] = '/';
	buf[len+3] = '/';
	buf[len+42] = '\0';
	fill_sha1_path(buf + len + 1, sha1);
	return buf;
158 159
}

160
static char *sha1_get_pack_name(const unsigned char *sha1,
J
Junio C Hamano 已提交
161
				char **name, char **base, const char *which)
162 163
{
	static const char hex[] = "0123456789abcdef";
164
	char *buf;
165 166
	int i;

167
	if (!*base) {
168 169
		const char *sha1_file_directory = get_object_directory();
		int len = strlen(sha1_file_directory);
170
		*base = xmalloc(len + 60);
J
Junio C Hamano 已提交
171 172
		sprintf(*base, "%s/pack/pack-1234567890123456789012345678901234567890.%s",
			sha1_file_directory, which);
173
		*name = *base + len + 11;
174 175
	}

176
	buf = *name;
177 178 179 180 181 182

	for (i = 0; i < 20; i++) {
		unsigned int val = *sha1++;
		*buf++ = hex[val >> 4];
		*buf++ = hex[val & 0xf];
	}
J
Junio C Hamano 已提交
183

184
	return *base;
185 186
}

187
char *sha1_pack_name(const unsigned char *sha1)
188
{
189
	static char *name, *base;
190

J
Junio C Hamano 已提交
191
	return sha1_get_pack_name(sha1, &name, &base, "pack");
192
}
193

194 195 196
char *sha1_pack_index_name(const unsigned char *sha1)
{
	static char *name, *base;
J
Junio C Hamano 已提交
197

J
Junio C Hamano 已提交
198
	return sha1_get_pack_name(sha1, &name, &base, "idx");
199 200
}

201 202
struct alternate_object_database *alt_odb_list;
static struct alternate_object_database **alt_odb_tail;
203

M
Martin Waitz 已提交
204 205
static void read_info_alternates(const char * alternates, int depth);

J
Junio C Hamano 已提交
206 207
/*
 * Prepare alternate object database registry.
208 209 210 211 212
 *
 * The variable alt_odb_list points at the list of struct
 * alternate_object_database.  The elements on this list come from
 * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT
 * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates,
213 214
 * whose contents is similar to that environment variable but can be
 * LF separated.  Its base points at a statically allocated buffer that
215 216 217 218 219
 * contains "/the/directory/corresponding/to/.git/objects/...", while
 * its name points just after the slash at the end of ".git/objects/"
 * in the example above, and has enough space to hold 40-byte hex
 * SHA1, an extra slash for the first level indirection, and the
 * terminating NUL.
J
Junio C Hamano 已提交
220
 */
M
Martin Waitz 已提交
221
static int link_alt_odb_entry(const char * entry, int len, const char * relative_base, int depth)
222
{
223
	const char *objdir = get_object_directory();
M
Martin Waitz 已提交
224 225 226 227 228
	struct alternate_object_database *ent;
	struct alternate_object_database *alt;
	/* 43 = 40-byte + 2 '/' + terminating NUL */
	int pfxlen = len;
	int entlen = pfxlen + 43;
229
	int base_len = -1;
230

231
	if (!is_absolute_path(entry) && relative_base) {
M
Martin Waitz 已提交
232 233 234 235 236 237 238 239
		/* Relative alt-odb */
		if (base_len < 0)
			base_len = strlen(relative_base) + 1;
		entlen += base_len;
		pfxlen += base_len;
	}
	ent = xmalloc(sizeof(*ent) + entlen);

240
	if (!is_absolute_path(entry) && relative_base) {
M
Martin Waitz 已提交
241 242 243 244 245 246 247 248 249 250 251 252
		memcpy(ent->base, relative_base, base_len - 1);
		ent->base[base_len - 1] = '/';
		memcpy(ent->base + base_len, entry, len);
	}
	else
		memcpy(ent->base, entry, pfxlen);

	ent->name = ent->base + pfxlen + 1;
	ent->base[pfxlen + 3] = '/';
	ent->base[pfxlen] = ent->base[entlen-1] = 0;

	/* Detect cases where alternate disappeared */
253
	if (!is_directory(ent->base)) {
M
Martin Waitz 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
		error("object directory %s does not exist; "
		      "check .git/objects/info/alternates.",
		      ent->base);
		free(ent);
		return -1;
	}

	/* Prevent the common mistake of listing the same
	 * thing twice, or object directory itself.
	 */
	for (alt = alt_odb_list; alt; alt = alt->next) {
		if (!memcmp(ent->base, alt->base, pfxlen)) {
			free(ent);
			return -1;
		}
	}
	if (!memcmp(ent->base, objdir, pfxlen)) {
		free(ent);
		return -1;
	}

	/* add the alternate entry */
	*alt_odb_tail = ent;
	alt_odb_tail = &(ent->next);
	ent->next = NULL;

	/* recursively add alternates */
	read_info_alternates(ent->base, depth + 1);

	ent->base[pfxlen] = '/';

	return 0;
}

static void link_alt_odb_entries(const char *alt, const char *ep, int sep,
				 const char *relative_base, int depth)
{
	const char *cp, *last;

	if (depth > 5) {
		error("%s: ignoring alternate object stores, nesting too deep.",
				relative_base);
		return;
	}

299
	last = alt;
300 301 302 303 304 305 306 307
	while (last < ep) {
		cp = last;
		if (cp < ep && *cp == '#') {
			while (cp < ep && *cp != sep)
				cp++;
			last = cp + 1;
			continue;
		}
M
Martin Waitz 已提交
308 309
		while (cp < ep && *cp != sep)
			cp++;
310
		if (last != cp) {
311
			if (!is_absolute_path(last) && depth) {
M
Martin Waitz 已提交
312 313 314 315 316
				error("%s: ignoring relative alternate object store %s",
						relative_base, last);
			} else {
				link_alt_odb_entry(last, cp - last,
						relative_base, depth);
317
			}
318
		}
319
		while (cp < ep && *cp == sep)
320 321
			cp++;
		last = cp;
322
	}
323 324
}

M
Martin Waitz 已提交
325
static void read_info_alternates(const char * relative_base, int depth)
326
{
327
	char *map;
328
	size_t mapsz;
329
	struct stat st;
330 331 332 333 334
	const char alt_file_name[] = "info/alternates";
	/* Given that relative_base is no longer than PATH_MAX,
	   ensure that "path" has enough space to append "/", the
	   file name, "info/alternates", and a trailing NUL.  */
	char path[PATH_MAX + 1 + sizeof alt_file_name];
M
Martin Waitz 已提交
335
	int fd;
336

337
	sprintf(path, "%s/%s", relative_base, alt_file_name);
338
	fd = git_open_noatime(path, NULL);
339 340 341 342
	if (fd < 0)
		return;
	if (fstat(fd, &st) || (st.st_size == 0)) {
		close(fd);
343
		return;
344
	}
345 346
	mapsz = xsize_t(st.st_size);
	map = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd, 0);
347 348
	close(fd);

349
	link_alt_odb_entries(map, map + mapsz, '\n', relative_base, depth);
M
Martin Waitz 已提交
350

351
	munmap(map, mapsz);
352 353
}

354 355 356
void add_to_alternates_file(const char *reference)
{
	struct lock_file *lock = xcalloc(1, sizeof(struct lock_file));
357
	int fd = hold_lock_file_for_append(lock, git_path("objects/info/alternates"), LOCK_DIE_ON_ERROR);
358 359 360 361 362 363 364 365
	char *alt = mkpath("%s/objects\n", reference);
	write_or_die(fd, alt, strlen(alt));
	if (commit_lock_file(lock))
		die("could not close alternates file");
	if (alt_odb_tail)
		link_alt_odb_entries(alt, alt + strlen(alt), '\n', NULL, 0);
}

366 367 368 369 370 371 372 373 374 375
void foreach_alt_odb(alt_odb_fn fn, void *cb)
{
	struct alternate_object_database *ent;

	prepare_alt_odb();
	for (ent = alt_odb_list; ent; ent = ent->next)
		if (fn(ent, cb))
			return;
}

M
Martin Waitz 已提交
376 377
void prepare_alt_odb(void)
{
T
Timo Hirvonen 已提交
378
	const char *alt;
M
Martin Waitz 已提交
379

S
Shawn O. Pearce 已提交
380 381 382
	if (alt_odb_tail)
		return;

M
Martin Waitz 已提交
383 384 385 386
	alt = getenv(ALTERNATE_DB_ENVIRONMENT);
	if (!alt) alt = "";

	alt_odb_tail = &alt_odb_list;
387
	link_alt_odb_entries(alt, alt + strlen(alt), PATH_SEP, NULL, 0);
M
Martin Waitz 已提交
388 389 390 391

	read_info_alternates(get_object_directory(), 0);
}

392
static int has_loose_object_local(const unsigned char *sha1)
393 394
{
	char *name = sha1_file_name(sha1);
395 396
	return !access(name, F_OK);
}
397

398 399 400
int has_loose_object_nonlocal(const unsigned char *sha1)
{
	struct alternate_object_database *alt;
401
	prepare_alt_odb();
402
	for (alt = alt_odb_list; alt; alt = alt->next) {
403
		fill_sha1_path(alt->name, sha1);
404 405
		if (!access(alt->base, F_OK))
			return 1;
406
	}
407
	return 0;
408 409
}

410 411 412 413 414 415
static int has_loose_object(const unsigned char *sha1)
{
	return has_loose_object_local(sha1) ||
	       has_loose_object_nonlocal(sha1);
}

416
static unsigned int pack_used_ctr;
417 418 419 420
static unsigned int pack_mmap_calls;
static unsigned int peak_pack_open_windows;
static unsigned int pack_open_windows;
static size_t peak_pack_mapped;
421
static size_t pack_mapped;
422
struct packed_git *packed_git;
423

J
Junio C Hamano 已提交
424
void pack_report(void)
425 426
{
	fprintf(stderr,
427 428 429
		"pack_report: getpagesize()            = %10" SZ_FMT "\n"
		"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
		"pack_report: core.packedGitLimit      = %10" SZ_FMT "\n",
430 431 432
		sz_fmt(getpagesize()),
		sz_fmt(packed_git_window_size),
		sz_fmt(packed_git_limit));
433 434 435 436
	fprintf(stderr,
		"pack_report: pack_used_ctr            = %10u\n"
		"pack_report: pack_mmap_calls          = %10u\n"
		"pack_report: pack_open_windows        = %10u / %10u\n"
437 438
		"pack_report: pack_mapped              = "
			"%10" SZ_FMT " / %10" SZ_FMT "\n",
439 440 441
		pack_used_ctr,
		pack_mmap_calls,
		pack_open_windows, peak_pack_open_windows,
442
		sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
443 444
}

445
static int check_packed_git_idx(const char *path,  struct packed_git *p)
446 447
{
	void *idx_map;
448
	struct pack_idx_header *hdr;
449
	size_t idx_size;
450
	uint32_t version, nr, i, *index;
451
	int fd = git_open_noatime(path, p);
452
	struct stat st;
453

454 455 456 457 458 459
	if (fd < 0)
		return -1;
	if (fstat(fd, &st)) {
		close(fd);
		return -1;
	}
460
	idx_size = xsize_t(st.st_size);
461 462 463 464
	if (idx_size < 4 * 256 + 20 + 20) {
		close(fd);
		return error("index file %s is too small", path);
	}
465
	idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
466 467
	close(fd);

468 469
	hdr = idx_map;
	if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
470 471 472
		version = ntohl(hdr->idx_version);
		if (version < 2 || version > 2) {
			munmap(idx_map, idx_size);
473
			return error("index file %s is version %"PRIu32
474 475 476 477 478 479
				     " and is not supported by this binary"
				     " (try upgrading GIT to a newer version)",
				     path, version);
		}
	} else
		version = 1;
480

481
	nr = 0;
482
	index = idx_map;
483 484
	if (version > 1)
		index += 2;  /* skip index header */
485
	for (i = 0; i < 256; i++) {
486
		uint32_t n = ntohl(index[i]);
487 488
		if (n < nr) {
			munmap(idx_map, idx_size);
489
			return error("non-monotonic index %s", path);
490
		}
491 492 493
		nr = n;
	}

494 495 496 497 498 499 500 501 502 503
	if (version == 1) {
		/*
		 * Total size:
		 *  - 256 index entries 4 bytes each
		 *  - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
		 *  - 20-byte SHA1 of the packfile
		 *  - 20-byte SHA1 file checksum
		 */
		if (idx_size != 4*256 + nr * 24 + 20 + 20) {
			munmap(idx_map, idx_size);
504
			return error("wrong index v1 file size in %s", path);
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
		}
	} else if (version == 2) {
		/*
		 * Minimum size:
		 *  - 8 bytes of header
		 *  - 256 index entries 4 bytes each
		 *  - 20-byte sha1 entry * nr
		 *  - 4-byte crc entry * nr
		 *  - 4-byte offset entry * nr
		 *  - 20-byte SHA1 of the packfile
		 *  - 20-byte SHA1 file checksum
		 * And after the 4-byte offset table might be a
		 * variable sized table containing 8-byte entries
		 * for offsets larger than 2^31.
		 */
		unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
L
Linus Torvalds 已提交
521 522 523 524
		unsigned long max_size = min_size;
		if (nr)
			max_size += (nr - 1)*8;
		if (idx_size < min_size || idx_size > max_size) {
525
			munmap(idx_map, idx_size);
526
			return error("wrong index v2 file size in %s", path);
527
		}
528 529 530 531 532 533 534 535 536
		if (idx_size != min_size &&
		    /*
		     * make sure we can deal with large pack offsets.
		     * 31-bit signed offset won't be enough, neither
		     * 32-bit unsigned one will be.
		     */
		    (sizeof(off_t) <= 4)) {
			munmap(idx_map, idx_size);
			return error("pack too large for current definition of off_t in %s", path);
537
		}
538
	}
539

540
	p->index_version = version;
541 542
	p->index_data = idx_map;
	p->index_size = idx_size;
N
Nicolas Pitre 已提交
543
	p->num_objects = nr;
544 545 546
	return 0;
}

547
int open_pack_index(struct packed_git *p)
548 549 550 551 552 553 554 555 556 557 558 559 560 561
{
	char *idx_name;
	int ret;

	if (p->index_data)
		return 0;

	idx_name = xstrdup(p->pack_name);
	strcpy(idx_name + strlen(idx_name) - strlen(".pack"), ".idx");
	ret = check_packed_git_idx(idx_name, p);
	free(idx_name);
	return ret;
}

562 563 564 565
static void scan_windows(struct packed_git *p,
	struct packed_git **lru_p,
	struct pack_window **lru_w,
	struct pack_window **lru_l)
566
{
567 568 569 570 571 572 573 574
	struct pack_window *w, *w_l;

	for (w_l = NULL, w = p->windows; w; w = w->next) {
		if (!w->inuse_cnt) {
			if (!*lru_w || w->last_used < (*lru_w)->last_used) {
				*lru_p = p;
				*lru_w = w;
				*lru_l = w_l;
575 576
			}
		}
577
		w_l = w;
578
	}
579 580
}

581
static int unuse_one_window(struct packed_git *current, int keep_fd)
582 583 584 585 586 587 588 589
{
	struct packed_git *p, *lru_p = NULL;
	struct pack_window *lru_w = NULL, *lru_l = NULL;

	if (current)
		scan_windows(current, &lru_p, &lru_w, &lru_l);
	for (p = packed_git; p; p = p->next)
		scan_windows(p, &lru_p, &lru_w, &lru_l);
590 591 592 593 594 595 596
	if (lru_p) {
		munmap(lru_w->base, lru_w->len);
		pack_mapped -= lru_w->len;
		if (lru_l)
			lru_l->next = lru_w->next;
		else {
			lru_p->windows = lru_w->next;
597
			if (!lru_p->windows && lru_p->pack_fd != keep_fd) {
598 599 600 601 602
				close(lru_p->pack_fd);
				lru_p->pack_fd = -1;
			}
		}
		free(lru_w);
603
		pack_open_windows--;
604 605 606
		return 1;
	}
	return 0;
607 608
}

609
void release_pack_memory(size_t need, int fd)
610 611
{
	size_t cur = pack_mapped;
612
	while (need >= (cur - pack_mapped) && unuse_one_window(NULL, fd))
613 614 615
		; /* nothing */
}

616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
void *xmmap(void *start, size_t length,
	int prot, int flags, int fd, off_t offset)
{
	void *ret = mmap(start, length, prot, flags, fd, offset);
	if (ret == MAP_FAILED) {
		if (!length)
			return NULL;
		release_pack_memory(length, fd);
		ret = mmap(start, length, prot, flags, fd, offset);
		if (ret == MAP_FAILED)
			die_errno("Out of memory? mmap failed");
	}
	return ret;
}

631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646
void close_pack_windows(struct packed_git *p)
{
	while (p->windows) {
		struct pack_window *w = p->windows;

		if (w->inuse_cnt)
			die("pack '%s' still has open windows to it",
			    p->pack_name);
		munmap(w->base, w->len);
		pack_mapped -= w->len;
		pack_open_windows--;
		p->windows = w->next;
		free(w);
	}
}

647
void unuse_pack(struct pack_window **w_cursor)
648
{
649 650 651 652 653
	struct pack_window *w = *w_cursor;
	if (w) {
		w->inuse_cnt--;
		*w_cursor = NULL;
	}
654 655
}

656 657 658 659 660 661 662 663
void close_pack_index(struct packed_git *p)
{
	if (p->index_data) {
		munmap((void *)p->index_data, p->index_size);
		p->index_data = NULL;
	}
}

664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680
/*
 * This is used by git-repack in case a newly created pack happens to
 * contain the same set of objects as an existing one.  In that case
 * the resulting file might be different even if its name would be the
 * same.  It is best to close any reference to the old pack before it is
 * replaced on disk.  Of course no index pointers nor windows for given pack
 * must subsist at this point.  If ever objects from this pack are requested
 * again, the new version of the pack will be reinitialized through
 * reprepare_packed_git().
 */
void free_pack_by_name(const char *pack_name)
{
	struct packed_git *p, **pp = &packed_git;

	while (*pp) {
		p = *pp;
		if (strcmp(pack_name, p->pack_name) == 0) {
681
			clear_delta_base_cache();
682 683 684
			close_pack_windows(p);
			if (p->pack_fd != -1)
				close(p->pack_fd);
685
			close_pack_index(p);
686 687 688 689 690 691 692 693 694
			free(p->bad_object_sha1);
			*pp = p->next;
			free(p);
			return;
		}
		pp = &p->next;
	}
}

695 696 697 698 699
/*
 * Do not call this directly as this leaks p->pack_fd on error return;
 * call open_packed_git() instead.
 */
static int open_packed_git_1(struct packed_git *p)
700
{
701 702 703 704
	struct stat st;
	struct pack_header hdr;
	unsigned char sha1[20];
	unsigned char *idx_sha1;
705
	long fd_flag;
706

707 708 709
	if (!p->index_data && open_pack_index(p))
		return error("packfile %s index unavailable", p->pack_name);

710
	p->pack_fd = git_open_noatime(p->pack_name, p);
711
	if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
712
		return -1;
713 714

	/* If we created the struct before we had the pack we lack size. */
715 716
	if (!p->pack_size) {
		if (!S_ISREG(st.st_mode))
717
			return error("packfile %s not a regular file", p->pack_name);
718
		p->pack_size = st.st_size;
719
	} else if (p->pack_size != st.st_size)
720
		return error("packfile %s size changed", p->pack_name);
721

722 723 724 725 726
	/* We leave these file descriptors open with sliding mmap;
	 * there is no point keeping them open across exec(), though.
	 */
	fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
	if (fd_flag < 0)
727
		return error("cannot determine file descriptor flags");
728 729
	fd_flag |= FD_CLOEXEC;
	if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
730
		return error("cannot set FD_CLOEXEC");
731

732
	/* Verify we recognize this pack file format. */
733
	if (read_in_full(p->pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
734
		return error("file %s is far too short to be a packfile", p->pack_name);
735
	if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
736
		return error("file %s is not a GIT packfile", p->pack_name);
737
	if (!pack_version_ok(hdr.hdr_version))
738 739
		return error("packfile %s is version %"PRIu32" and not"
			" supported (try upgrading GIT to a newer version)",
740 741 742
			p->pack_name, ntohl(hdr.hdr_version));

	/* Verify the pack matches its index. */
N
Nicolas Pitre 已提交
743
	if (p->num_objects != ntohl(hdr.hdr_entries))
744 745
		return error("packfile %s claims to have %"PRIu32" objects"
			     " while index indicates %"PRIu32" objects",
N
Nicolas Pitre 已提交
746 747
			     p->pack_name, ntohl(hdr.hdr_entries),
			     p->num_objects);
748
	if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
749
		return error("end of packfile %s is unavailable", p->pack_name);
750
	if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1))
751
		return error("packfile %s signature is unavailable", p->pack_name);
752
	idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40;
753
	if (hashcmp(sha1, idx_sha1))
754 755
		return error("packfile %s does not match index", p->pack_name);
	return 0;
756 757
}

758 759 760 761 762 763 764 765 766 767 768
static int open_packed_git(struct packed_git *p)
{
	if (!open_packed_git_1(p))
		return 0;
	if (p->pack_fd != -1) {
		close(p->pack_fd);
		p->pack_fd = -1;
	}
	return -1;
}

769
static int in_window(struct pack_window *win, off_t offset)
770 771 772 773 774 775 776 777 778 779 780 781
{
	/* We must promise at least 20 bytes (one hash) after the
	 * offset is available from this window, otherwise the offset
	 * is not actually in this window and a different window (which
	 * has that one hash excess) must be used.  This is to support
	 * the object header and delta base parsing routines below.
	 */
	off_t win_off = win->offset;
	return win_off <= offset
		&& (offset + 20) <= (win_off + win->len);
}

782
unsigned char *use_pack(struct packed_git *p,
783
		struct pack_window **w_cursor,
784
		off_t offset,
785
		unsigned int *left)
786
{
787
	struct pack_window *win = *w_cursor;
788

789 790
	if (p->pack_fd == -1 && open_packed_git(p))
		die("packfile %s cannot be accessed", p->pack_name);
791

F
Felipe Contreras 已提交
792
	/* Since packfiles end in a hash of their content and it's
793 794 795 796 797 798 799 800 801 802 803 804 805 806 807
	 * pointless to ask for an offset into the middle of that
	 * hash, and the in_window function above wouldn't match
	 * don't allow an offset too close to the end of the file.
	 */
	if (offset > (p->pack_size - 20))
		die("offset beyond end of packfile (truncated pack?)");

	if (!win || !in_window(win, offset)) {
		if (win)
			win->inuse_cnt--;
		for (win = p->windows; win; win = win->next) {
			if (in_window(win, offset))
				break;
		}
		if (!win) {
808
			size_t window_align = packed_git_window_size / 2;
809
			off_t len;
810
			win = xcalloc(1, sizeof(*win));
811
			win->offset = (offset / window_align) * window_align;
812 813 814 815
			len = p->pack_size - win->offset;
			if (len > packed_git_window_size)
				len = packed_git_window_size;
			win->len = (size_t)len;
816
			pack_mapped += win->len;
817
			while (packed_git_limit < pack_mapped
818
				&& unuse_one_window(p, p->pack_fd))
819
				; /* nothing */
820
			win->base = xmmap(NULL, win->len,
821 822 823
				PROT_READ, MAP_PRIVATE,
				p->pack_fd, win->offset);
			if (win->base == MAP_FAILED)
824 825 826
				die("packfile %s cannot be mapped: %s",
					p->pack_name,
					strerror(errno));
827 828 829 830 831 832
			pack_mmap_calls++;
			pack_open_windows++;
			if (pack_mapped > peak_pack_mapped)
				peak_pack_mapped = pack_mapped;
			if (pack_open_windows > peak_pack_open_windows)
				peak_pack_open_windows = pack_open_windows;
833 834 835
			win->next = p->windows;
			p->windows = win;
		}
836
	}
837 838 839 840 841
	if (win != *w_cursor) {
		win->last_used = pack_used_ctr++;
		win->inuse_cnt++;
		*w_cursor = win;
	}
842
	offset -= win->offset;
843
	if (left)
844
		*left = win->len - xsize_t(offset);
845
	return win->base + offset;
846 847
}

848 849 850 851 852 853 854 855
static struct packed_git *alloc_packed_git(int extra)
{
	struct packed_git *p = xmalloc(sizeof(*p) + extra);
	memset(p, 0, sizeof(*p));
	p->pack_fd = -1;
	return p;
}

856 857 858 859 860
static void try_to_free_pack_memory(size_t size)
{
	release_pack_memory(size, -1);
}

861
struct packed_git *add_packed_git(const char *path, int path_len, int local)
862
{
863
	static int have_set_try_to_free_routine;
864
	struct stat st;
865
	struct packed_git *p = alloc_packed_git(path_len + 2);
866

867 868 869 870 871
	if (!have_set_try_to_free_routine) {
		have_set_try_to_free_routine = 1;
		set_try_to_free_routine(try_to_free_pack_memory);
	}

872 873 874 875 876
	/*
	 * Make sure a corresponding .pack file exists and that
	 * the index looks sane.
	 */
	path_len -= strlen(".idx");
877 878
	if (path_len < 1) {
		free(p);
879
		return NULL;
880
	}
881
	memcpy(p->pack_name, path, path_len);
882 883 884 885 886

	strcpy(p->pack_name + path_len, ".keep");
	if (!access(p->pack_name, F_OK))
		p->pack_keep = 1;

887
	strcpy(p->pack_name + path_len, ".pack");
888
	if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
889
		free(p);
890 891
		return NULL;
	}
892

893 894 895 896
	/* ok, it looks sane as far as we can check without
	 * actually mapping the pack file.
	 */
	p->pack_size = st.st_size;
897
	p->pack_local = local;
898
	p->mtime = st.st_mtime;
899 900
	if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
		hashclr(p->sha1);
901 902 903
	return p;
}

904
struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
905
{
906
	const char *path = sha1_pack_name(sha1);
907
	struct packed_git *p = alloc_packed_git(strlen(path) + 1);
908

909 910
	strcpy(p->pack_name, path);
	hashcpy(p->sha1, sha1);
911 912
	if (check_packed_git_idx(idx_path, p)) {
		free(p);
913
		return NULL;
914
	}
915 916 917 918 919 920 921 922 923 924

	return p;
}

void install_packed_git(struct packed_git *pack)
{
	pack->next = packed_git;
	packed_git = pack;
}

925
static void prepare_packed_git_one(char *objdir, int local)
926
{
927 928 929 930
	/* Ensure that this buffer is large enough so that we can
	   append "/pack/" without clobbering the stack even if
	   strlen(objdir) were PATH_MAX.  */
	char path[PATH_MAX + 1 + 4 + 1 + 1];
931 932 933 934 935 936 937
	int len;
	DIR *dir;
	struct dirent *de;

	sprintf(path, "%s/pack", objdir);
	len = strlen(path);
	dir = opendir(path);
938
	while (!dir && errno == EMFILE && unuse_one_window(NULL, -1))
939
		dir = opendir(path);
940
	if (!dir) {
J
Junio C Hamano 已提交
941
		if (errno != ENOENT)
942
			error("unable to open object pack directory: %s: %s",
J
Junio C Hamano 已提交
943
			      path, strerror(errno));
944
		return;
945
	}
946 947 948 949 950
	path[len++] = '/';
	while ((de = readdir(dir)) != NULL) {
		int namelen = strlen(de->d_name);
		struct packed_git *p;

951
		if (!has_extension(de->d_name, ".idx"))
952 953
			continue;

954 955 956
		if (len + namelen + 1 > sizeof(path))
			continue;

957
		/* Don't reopen a pack we already have. */
958
		strcpy(path + len, de->d_name);
959 960 961 962 963 964
		for (p = packed_git; p; p = p->next) {
			if (!memcmp(path, p->pack_name, len + namelen - 4))
				break;
		}
		if (p)
			continue;
965 966 967
		/* See if it really is a valid .idx file with corresponding
		 * .pack file that we can map.
		 */
968
		p = add_packed_git(path, len + namelen, local);
969 970
		if (!p)
			continue;
971
		install_packed_git(p);
972
	}
973
	closedir(dir);
974 975
}

976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
static int sort_pack(const void *a_, const void *b_)
{
	struct packed_git *a = *((struct packed_git **)a_);
	struct packed_git *b = *((struct packed_git **)b_);
	int st;

	/*
	 * Local packs tend to contain objects specific to our
	 * variant of the project than remote ones.  In addition,
	 * remote ones could be on a network mounted filesystem.
	 * Favor local ones for these reasons.
	 */
	st = a->pack_local - b->pack_local;
	if (st)
		return -st;

	/*
	 * Younger packs tend to contain more recent objects,
	 * and more recent objects tend to get accessed more
	 * often.
	 */
	if (a->mtime < b->mtime)
		return 1;
	else if (a->mtime == b->mtime)
		return 0;
	return -1;
}

static void rearrange_packed_git(void)
{
	struct packed_git **ary, *p;
	int i, n;

	for (n = 0, p = packed_git; p; p = p->next)
		n++;
	if (n < 2)
		return;

	/* prepare an array of packed_git for easier sorting */
	ary = xcalloc(n, sizeof(struct packed_git *));
	for (n = 0, p = packed_git; p; p = p->next)
		ary[n++] = p;

	qsort(ary, n, sizeof(struct packed_git *), sort_pack);

	/* link them back again */
	for (i = 0; i < n - 1; i++)
		ary[i]->next = ary[i + 1];
	ary[n - 1]->next = NULL;
	packed_git = ary[0];

	free(ary);
}

1030
static int prepare_packed_git_run_once = 0;
1031
void prepare_packed_git(void)
1032
{
1033
	struct alternate_object_database *alt;
1034

1035
	if (prepare_packed_git_run_once)
1036
		return;
1037
	prepare_packed_git_one(get_object_directory(), 1);
1038
	prepare_alt_odb();
1039
	for (alt = alt_odb_list; alt; alt = alt->next) {
1040
		alt->name[-1] = 0;
1041
		prepare_packed_git_one(alt->base, 0);
1042
		alt->name[-1] = '/';
1043
	}
1044
	rearrange_packed_git();
1045 1046 1047
	prepare_packed_git_run_once = 1;
}

1048
void reprepare_packed_git(void)
1049
{
1050
	discard_revindex();
1051 1052
	prepare_packed_git_run_once = 0;
	prepare_packed_git();
1053 1054
}

1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066
static void mark_bad_packed_object(struct packed_git *p,
				   const unsigned char *sha1)
{
	unsigned i;
	for (i = 0; i < p->num_bad_objects; i++)
		if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
			return;
	p->bad_object_sha1 = xrealloc(p->bad_object_sha1, 20 * (p->num_bad_objects + 1));
	hashcpy(p->bad_object_sha1 + 20 * p->num_bad_objects, sha1);
	p->num_bad_objects++;
}

1067
static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
1068 1069 1070 1071 1072 1073 1074
{
	struct packed_git *p;
	unsigned i;

	for (p = packed_git; p; p = p->next)
		for (i = 0; i < p->num_bad_objects; i++)
			if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1075 1076
				return p;
	return NULL;
1077 1078
}

1079
int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
1080 1081
{
	unsigned char real_sha1[20];
1082
	hash_sha1_file(map, size, type, real_sha1);
1083
	return hashcmp(sha1, real_sha1) ? -1 : 0;
1084 1085
}

1086
static int git_open_noatime(const char *name, struct packed_git *p)
1087 1088 1089
{
	static int sha1_file_open_flag = O_NOATIME;

1090 1091
	for (;;) {
		int fd = open(name, O_RDONLY | sha1_file_open_flag);
1092
		if (fd >= 0)
1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104
			return fd;

		/* Might the failure be insufficient file descriptors? */
		if (errno == EMFILE) {
			if (unuse_one_window(p, -1))
				continue;
			else
				return -1;
		}

		/* Might the failure be due to O_NOATIME? */
		if (errno != ENOENT && sha1_file_open_flag) {
1105
			sha1_file_open_flag = 0;
1106 1107 1108 1109
			continue;
		}

		return -1;
1110 1111 1112 1113 1114 1115 1116 1117 1118
	}
}

static int open_sha1_file(const unsigned char *sha1)
{
	int fd;
	char *name = sha1_file_name(sha1);
	struct alternate_object_database *alt;

1119
	fd = git_open_noatime(name, NULL);
1120 1121 1122 1123 1124 1125 1126 1127
	if (fd >= 0)
		return fd;

	prepare_alt_odb();
	errno = ENOENT;
	for (alt = alt_odb_list; alt; alt = alt->next) {
		name = alt->name;
		fill_sha1_path(name, sha1);
1128
		fd = git_open_noatime(alt->base, NULL);
1129 1130 1131 1132 1133 1134
		if (fd >= 0)
			return fd;
	}
	return -1;
}

J
Junio C Hamano 已提交
1135
static void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
1136 1137
{
	void *map;
1138
	int fd;
1139

1140 1141 1142 1143
	fd = open_sha1_file(sha1);
	map = NULL;
	if (fd >= 0) {
		struct stat st;
1144

1145 1146 1147
		if (!fstat(fd, &st)) {
			*size = xsize_t(st.st_size);
			map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
1148
		}
1149
		close(fd);
1150 1151 1152 1153
	}
	return map;
}

1154
static int legacy_loose_object(unsigned char *map)
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169
{
	unsigned int word;

	/*
	 * Is it a zlib-compressed buffer? If so, the first byte
	 * must be 0x78 (15-bit window size, deflated), and the
	 * first 16-bit word is evenly divisible by 31
	 */
	word = (map[0] << 8) + map[1];
	if (map[0] == 0x78 && !(word % 31))
		return 1;
	else
		return 0;
}

1170 1171
unsigned long unpack_object_header_buffer(const unsigned char *buf,
		unsigned long len, enum object_type *type, unsigned long *sizep)
1172
{
1173
	unsigned shift;
1174
	unsigned long size, c;
1175 1176 1177 1178 1179 1180 1181
	unsigned long used = 0;

	c = buf[used++];
	*type = (c >> 4) & 7;
	size = c & 15;
	shift = 4;
	while (c & 0x80) {
1182
		if (len <= used || bitsizeof(long) <= shift) {
1183
			error("bad object header");
1184
			return 0;
1185
		}
1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
		c = buf[used++];
		size += (c & 0x7f) << shift;
		shift += 7;
	}
	*sizep = size;
	return used;
}

static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz)
{
	unsigned long size, used;
	static const char valid_loose_object_type[8] = {
		0, /* OBJ_EXT */
		1, 1, 1, 1, /* "commit", "tree", "blob", "tag" */
		0, /* "delta" and others are invalid in a loose object */
1201
	};
1202
	enum object_type type;
1203

1204 1205 1206 1207 1208
	/* Get the data stream */
	memset(stream, 0, sizeof(*stream));
	stream->next_in = map;
	stream->avail_in = mapsize;
	stream->next_out = buffer;
1209 1210
	stream->avail_out = bufsiz;

1211
	if (legacy_loose_object(map)) {
1212 1213
		git_inflate_init(stream);
		return git_inflate(stream, 0);
1214 1215
	}

1216 1217 1218 1219 1220 1221 1222 1223

	/*
	 * There used to be a second loose object header format which
	 * was meant to mimic the in-pack format, allowing for direct
	 * copy of the object data.  This format turned up not to be
	 * really worth it and we don't write it any longer.  But we
	 * can still read it.
	 */
1224
	used = unpack_object_header_buffer(map, mapsize, &type, &size);
1225
	if (!used || !valid_loose_object_type[type])
1226
		return -1;
1227 1228
	map += used;
	mapsize -= used;
1229 1230 1231 1232

	/* Set up the stream for the rest.. */
	stream->next_in = map;
	stream->avail_in = mapsize;
1233
	git_inflate_init(stream);
1234 1235

	/* And generate the fake traditional header */
1236
	stream->total_out = 1 + snprintf(buffer, bufsiz, "%s %lu",
1237
					 typename(type), size);
1238
	return 0;
1239 1240
}

1241
static void *unpack_sha1_rest(z_stream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
1242 1243
{
	int bytes = strlen(buffer) + 1;
1244
	unsigned char *buf = xmallocz(size);
1245
	unsigned long n;
1246
	int status = Z_OK;
1247

1248 1249 1250 1251 1252
	n = stream->total_out - bytes;
	if (n > size)
		n = size;
	memcpy(buf, (char *) buffer + bytes, n);
	bytes = n;
1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266
	if (bytes <= size) {
		/*
		 * The above condition must be (bytes <= size), not
		 * (bytes < size).  In other words, even though we
		 * expect no more output and set avail_out to zer0,
		 * the input zlib stream may have bytes that express
		 * "this concludes the stream", and we *do* want to
		 * eat that input.
		 *
		 * Otherwise we would not be able to test that we
		 * consumed all the input to reach the expected size;
		 * we also want to check that zlib tells us that all
		 * went well with status == Z_STREAM_END at the end.
		 */
1267 1268
		stream->next_out = buf + bytes;
		stream->avail_out = size - bytes;
1269
		while (status == Z_OK)
1270
			status = git_inflate(stream, Z_FINISH);
1271
	}
1272
	if (status == Z_STREAM_END && !stream->avail_in) {
1273
		git_inflate_end(stream);
1274 1275 1276 1277 1278 1279 1280 1281 1282 1283
		return buf;
	}

	if (status < 0)
		error("corrupt loose object '%s'", sha1_to_hex(sha1));
	else if (stream->avail_in)
		error("garbage at end of loose object '%s'",
		      sha1_to_hex(sha1));
	free(buf);
	return NULL;
1284 1285 1286 1287 1288 1289 1290
}

/*
 * We used to just use "sscanf()", but that's actually way
 * too permissive for what we want to check. So do an anal
 * object header parse by hand.
 */
1291
static int parse_sha1_header(const char *hdr, unsigned long *sizep)
1292
{
1293
	char type[10];
1294 1295 1296 1297
	int i;
	unsigned long size;

	/*
J
Junio C Hamano 已提交
1298
	 * The type can be at most ten bytes (including the
1299
	 * terminating '\0' that we add), and is followed by
1300
	 * a space.
1301
	 */
1302
	i = 0;
1303 1304 1305 1306
	for (;;) {
		char c = *hdr++;
		if (c == ' ')
			break;
1307 1308
		type[i++] = c;
		if (i >= sizeof(type))
1309 1310
			return -1;
	}
1311
	type[i] = 0;
1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333

	/*
	 * The length must follow immediately, and be in canonical
	 * decimal format (ie "010" is not valid).
	 */
	size = *hdr++ - '0';
	if (size > 9)
		return -1;
	if (size) {
		for (;;) {
			unsigned long c = *hdr - '0';
			if (c > 9)
				break;
			hdr++;
			size = size * 10 + c;
		}
	}
	*sizep = size;

	/*
	 * The length must be followed by a zero byte
	 */
1334
	return *hdr ? -1 : type_from_string(type);
1335 1336
}

1337
static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
1338
{
1339
	int ret;
1340
	z_stream stream;
1341
	char hdr[8192];
1342

1343
	ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr));
1344
	if (ret < Z_OK || (*type = parse_sha1_header(hdr, size)) < 0)
1345 1346
		return NULL;

1347
	return unpack_sha1_rest(&stream, hdr, *size, sha1);
1348 1349
}

N
Nicolas Pitre 已提交
1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362
unsigned long get_size_from_delta(struct packed_git *p,
				  struct pack_window **w_curs,
			          off_t curpos)
{
	const unsigned char *data;
	unsigned char delta_head[20], *in;
	z_stream stream;
	int st;

	memset(&stream, 0, sizeof(stream));
	stream.next_out = delta_head;
	stream.avail_out = sizeof(delta_head);

1363
	git_inflate_init(&stream);
N
Nicolas Pitre 已提交
1364 1365 1366
	do {
		in = use_pack(p, w_curs, curpos, &stream.avail_in);
		stream.next_in = in;
1367
		st = git_inflate(&stream, Z_FINISH);
N
Nicolas Pitre 已提交
1368 1369 1370
		curpos += stream.next_in - in;
	} while ((st == Z_OK || st == Z_BUF_ERROR) &&
		 stream.total_out < sizeof(delta_head));
1371
	git_inflate_end(&stream);
1372 1373 1374 1375
	if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
		error("delta data unpack-initial failed");
		return 0;
	}
N
Nicolas Pitre 已提交
1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388

	/* Examine the initial part of the delta to figure out
	 * the result size.
	 */
	data = delta_head;

	/* ignore base size */
	get_delta_hdr_size(&data, delta_head+sizeof(delta_head));

	/* Read the result size */
	return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
}

1389
static off_t get_delta_base(struct packed_git *p,
1390
				    struct pack_window **w_curs,
1391
				    off_t *curpos,
1392
				    enum object_type type,
1393
				    off_t delta_obj_offset)
1394
{
1395
	unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1396
	off_t base_offset;
1397

1398 1399 1400 1401 1402 1403
	/* use_pack() assured us we have [base_info, base_info + 20)
	 * as a range that we can look at without walking off the
	 * end of the mapped window.  Its actually the hash size
	 * that is assured.  An OFS_DELTA longer than the hash size
	 * is stupid, as then a REF_DELTA would be smaller to store.
	 */
1404
	if (type == OBJ_OFS_DELTA) {
1405 1406 1407 1408 1409
		unsigned used = 0;
		unsigned char c = base_info[used++];
		base_offset = c & 127;
		while (c & 128) {
			base_offset += 1;
1410
			if (!base_offset || MSB(base_offset, 7))
1411
				return 0;  /* overflow */
1412 1413 1414 1415
			c = base_info[used++];
			base_offset = (base_offset << 7) + (c & 127);
		}
		base_offset = delta_obj_offset - base_offset;
1416
		if (base_offset <= 0 || base_offset >= delta_obj_offset)
1417
			return 0;  /* out of bound */
1418
		*curpos += used;
1419
	} else if (type == OBJ_REF_DELTA) {
1420 1421
		/* The base entry _must_ be in the same pack */
		base_offset = find_pack_entry_one(base_info, p);
1422
		*curpos += 20;
1423 1424
	} else
		die("I am totally screwed");
1425
	return base_offset;
1426 1427
}

J
Junio C Hamano 已提交
1428
/* forward declaration for a mutually recursive function */
1429
static int packed_object_info(struct packed_git *p, off_t offset,
1430
			      unsigned long *sizep);
J
Junio C Hamano 已提交
1431

N
Nicolas Pitre 已提交
1432
static int packed_delta_info(struct packed_git *p,
1433
			     struct pack_window **w_curs,
1434
			     off_t curpos,
1435
			     enum object_type type,
1436
			     off_t obj_offset,
N
Nicolas Pitre 已提交
1437
			     unsigned long *sizep)
1438
{
1439
	off_t base_offset;
J
Junio C Hamano 已提交
1440

1441
	base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1442 1443
	if (!base_offset)
		return OBJ_BAD;
1444
	type = packed_object_info(p, base_offset, NULL);
1445
	if (type <= OBJ_NONE) {
1446 1447 1448 1449 1450 1451
		struct revindex_entry *revidx;
		const unsigned char *base_sha1;
		revidx = find_pack_revindex(p, base_offset);
		if (!revidx)
			return OBJ_BAD;
		base_sha1 = nth_packed_object_sha1(p, revidx->nr);
1452 1453 1454 1455 1456
		mark_bad_packed_object(p, base_sha1);
		type = sha1_object_info(base_sha1, NULL);
		if (type <= OBJ_NONE)
			return OBJ_BAD;
	}
J
Junio C Hamano 已提交
1457

1458 1459 1460 1461 1462
	/* We choose to only get the type of the base object and
	 * ignore potentially corrupt pack file that expects the delta
	 * based on a base with a wrong size.  This saves tons of
	 * inflate() calls.
	 */
1463
	if (sizep) {
N
Nicolas Pitre 已提交
1464
		*sizep = get_size_from_delta(p, w_curs, curpos);
1465 1466 1467
		if (*sizep == 0)
			type = OBJ_BAD;
	}
1468 1469

	return type;
1470 1471
}

1472 1473
static int unpack_object_header(struct packed_git *p,
				struct pack_window **w_curs,
1474
				off_t *curpos,
1475
				unsigned long *sizep)
1476
{
1477 1478
	unsigned char *base;
	unsigned int left;
1479
	unsigned long used;
1480
	enum object_type type;
1481

1482 1483
	/* use_pack() assures us we have [base, base + 20) available
	 * as a range that we can look at at.  (Its actually the hash
P
Pavel Roskin 已提交
1484
	 * size that is assured.)  With our object header encoding
1485 1486 1487
	 * the maximum deflated object size is 2^137, which is just
	 * insane, so we know won't exceed what we have been given.
	 */
1488
	base = use_pack(p, w_curs, *curpos, &left);
1489 1490 1491 1492 1493
	used = unpack_object_header_buffer(base, left, &type, sizep);
	if (!used) {
		type = OBJ_BAD;
	} else
		*curpos += used;
1494

1495
	return type;
1496 1497
}

1498
const char *packed_object_info_detail(struct packed_git *p,
1499
				      off_t obj_offset,
1500 1501 1502 1503
				      unsigned long *size,
				      unsigned long *store_size,
				      unsigned int *delta_chain_length,
				      unsigned char *base_sha1)
1504
{
1505
	struct pack_window *w_curs = NULL;
1506 1507
	off_t curpos;
	unsigned long dummy;
N
Nicolas Pitre 已提交
1508
	unsigned char *next_sha1;
1509
	enum object_type type;
1510
	struct revindex_entry *revidx;
1511

N
Nicolas Pitre 已提交
1512
	*delta_chain_length = 0;
1513
	curpos = obj_offset;
1514
	type = unpack_object_header(p, &w_curs, &curpos, size);
N
Nicolas Pitre 已提交
1515

1516 1517 1518
	revidx = find_pack_revindex(p, obj_offset);
	*store_size = revidx[1].offset - obj_offset;

N
Nicolas Pitre 已提交
1519
	for (;;) {
1520
		switch (type) {
N
Nicolas Pitre 已提交
1521
		default:
1522
			die("pack %s contains unknown object type %d",
1523
			    p->pack_name, type);
N
Nicolas Pitre 已提交
1524 1525 1526 1527
		case OBJ_COMMIT:
		case OBJ_TREE:
		case OBJ_BLOB:
		case OBJ_TAG:
1528
			unuse_pack(&w_curs);
1529
			return typename(type);
1530
		case OBJ_OFS_DELTA:
1531
			obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
1532 1533 1534
			if (!obj_offset)
				die("pack %s contains bad delta base reference of type %s",
				    p->pack_name, typename(type));
1535
			if (*delta_chain_length == 0) {
1536 1537
				revidx = find_pack_revindex(p, obj_offset);
				hashcpy(base_sha1, nth_packed_object_sha1(p, revidx->nr));
1538 1539 1540
			}
			break;
		case OBJ_REF_DELTA:
1541
			next_sha1 = use_pack(p, &w_curs, curpos, NULL);
N
Nicolas Pitre 已提交
1542 1543
			if (*delta_chain_length == 0)
				hashcpy(base_sha1, next_sha1);
1544
			obj_offset = find_pack_entry_one(next_sha1, p);
N
Nicolas Pitre 已提交
1545 1546 1547
			break;
		}
		(*delta_chain_length)++;
1548
		curpos = obj_offset;
1549
		type = unpack_object_header(p, &w_curs, &curpos, &dummy);
1550 1551 1552
	}
}

1553
static int packed_object_info(struct packed_git *p, off_t obj_offset,
1554
			      unsigned long *sizep)
1555
{
1556
	struct pack_window *w_curs = NULL;
1557 1558
	unsigned long size;
	off_t curpos = obj_offset;
1559
	enum object_type type;
1560

1561
	type = unpack_object_header(p, &w_curs, &curpos, &size);
1562

1563
	switch (type) {
1564 1565
	case OBJ_OFS_DELTA:
	case OBJ_REF_DELTA:
1566 1567
		type = packed_delta_info(p, &w_curs, curpos,
					 type, obj_offset, sizep);
1568
		break;
1569 1570 1571 1572
	case OBJ_COMMIT:
	case OBJ_TREE:
	case OBJ_BLOB:
	case OBJ_TAG:
1573 1574
		if (sizep)
			*sizep = size;
1575
		break;
1576
	default:
1577 1578 1579
		error("unknown object type %i at offset %"PRIuMAX" in %s",
		      type, (uintmax_t)obj_offset, p->pack_name);
		type = OBJ_BAD;
1580
	}
1581
	unuse_pack(&w_curs);
1582
	return type;
1583 1584
}

1585
static void *unpack_compressed_entry(struct packed_git *p,
1586
				    struct pack_window **w_curs,
1587
				    off_t curpos,
1588
				    unsigned long size)
1589 1590 1591
{
	int st;
	z_stream stream;
1592
	unsigned char *buffer, *in;
1593

1594
	buffer = xmallocz(size);
1595 1596
	memset(&stream, 0, sizeof(stream));
	stream.next_out = buffer;
1597
	stream.avail_out = size + 1;
1598

1599
	git_inflate_init(&stream);
1600
	do {
1601
		in = use_pack(p, w_curs, curpos, &stream.avail_in);
1602
		stream.next_in = in;
1603
		st = git_inflate(&stream, Z_FINISH);
1604 1605
		if (!stream.avail_out)
			break; /* the payload is larger than it should be */
1606
		curpos += stream.next_in - in;
1607
	} while (st == Z_OK || st == Z_BUF_ERROR);
1608
	git_inflate_end(&stream);
1609 1610 1611 1612 1613 1614 1615 1616
	if ((st != Z_STREAM_END) || stream.total_out != size) {
		free(buffer);
		return NULL;
	}

	return buffer;
}

1617 1618
#define MAX_DELTA_CACHE (256)

1619
static size_t delta_base_cached;
1620 1621 1622 1623 1624 1625

static struct delta_base_cache_lru_list {
	struct delta_base_cache_lru_list *prev;
	struct delta_base_cache_lru_list *next;
} delta_base_cache_lru = { &delta_base_cache_lru, &delta_base_cache_lru };

1626
static struct delta_base_cache_entry {
1627 1628
	struct delta_base_cache_lru_list lru;
	void *data;
1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640
	struct packed_git *p;
	off_t base_offset;
	unsigned long size;
	enum object_type type;
} delta_base_cache[MAX_DELTA_CACHE];

static unsigned long pack_entry_hash(struct packed_git *p, off_t base_offset)
{
	unsigned long hash;

	hash = (unsigned long)p + (unsigned long)base_offset;
	hash += (hash >> 8) + (hash >> 16);
1641
	return hash % MAX_DELTA_CACHE;
1642 1643
}

1644
static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
1645
	unsigned long *base_size, enum object_type *type, int keep_cache)
1646
{
1647 1648 1649 1650 1651
	void *ret;
	unsigned long hash = pack_entry_hash(p, base_offset);
	struct delta_base_cache_entry *ent = delta_base_cache + hash;

	ret = ent->data;
1652 1653
	if (!ret || ent->p != p || ent->base_offset != base_offset)
		return unpack_entry(p, base_offset, type, base_size);
1654

1655
	if (!keep_cache) {
1656
		ent->data = NULL;
1657 1658
		ent->lru.next->prev = ent->lru.prev;
		ent->lru.prev->next = ent->lru.next;
1659
		delta_base_cached -= ent->size;
P
Pierre Habouzit 已提交
1660 1661
	} else {
		ret = xmemdupz(ent->data, ent->size);
1662
	}
1663 1664 1665
	*type = ent->type;
	*base_size = ent->size;
	return ret;
1666 1667
}

1668 1669 1670 1671 1672
static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
{
	if (ent->data) {
		free(ent->data);
		ent->data = NULL;
1673 1674
		ent->lru.next->prev = ent->lru.prev;
		ent->lru.prev->next = ent->lru.next;
1675 1676 1677 1678
		delta_base_cached -= ent->size;
	}
}

1679 1680 1681 1682 1683 1684 1685
void clear_delta_base_cache(void)
{
	unsigned long p;
	for (p = 0; p < MAX_DELTA_CACHE; p++)
		release_delta_base_cache(&delta_base_cache[p]);
}

1686 1687 1688
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
	void *base, unsigned long base_size, enum object_type type)
{
1689
	unsigned long hash = pack_entry_hash(p, base_offset);
1690
	struct delta_base_cache_entry *ent = delta_base_cache + hash;
1691
	struct delta_base_cache_lru_list *lru;
1692

1693 1694
	release_delta_base_cache(ent);
	delta_base_cached += base_size;
1695 1696 1697 1698 1699 1700

	for (lru = delta_base_cache_lru.next;
	     delta_base_cached > delta_base_cache_limit
	     && lru != &delta_base_cache_lru;
	     lru = lru->next) {
		struct delta_base_cache_entry *f = (void *)lru;
1701 1702 1703
		if (f->type == OBJ_BLOB)
			release_delta_base_cache(f);
	}
1704 1705 1706 1707 1708 1709 1710
	for (lru = delta_base_cache_lru.next;
	     delta_base_cached > delta_base_cache_limit
	     && lru != &delta_base_cache_lru;
	     lru = lru->next) {
		struct delta_base_cache_entry *f = (void *)lru;
		release_delta_base_cache(f);
	}
1711

1712 1713 1714 1715 1716
	ent->p = p;
	ent->base_offset = base_offset;
	ent->type = type;
	ent->data = base;
	ent->size = base_size;
1717 1718 1719 1720
	ent->lru.next = &delta_base_cache_lru;
	ent->lru.prev = delta_base_cache_lru.prev;
	delta_base_cache_lru.prev->next = &ent->lru;
	delta_base_cache_lru.prev = &ent->lru;
1721 1722
}

1723 1724 1725
static void *read_object(const unsigned char *sha1, enum object_type *type,
			 unsigned long *size);

1726
static void *unpack_delta_entry(struct packed_git *p,
1727
				struct pack_window **w_curs,
1728
				off_t curpos,
1729
				unsigned long delta_size,
1730
				off_t obj_offset,
1731
				enum object_type *type,
1732
				unsigned long *sizep)
1733
{
1734
	void *delta_data, *result, *base;
1735 1736
	unsigned long base_size;
	off_t base_offset;
N
Nicolas Pitre 已提交
1737

1738
	base_offset = get_delta_base(p, w_curs, &curpos, *type, obj_offset);
1739 1740 1741 1742 1743 1744
	if (!base_offset) {
		error("failed to validate delta base reference "
		      "at offset %"PRIuMAX" from %s",
		      (uintmax_t)curpos, p->pack_name);
		return NULL;
	}
1745
	unuse_pack(w_curs);
1746
	base = cache_or_unpack_entry(p, base_offset, &base_size, type, 0);
1747 1748 1749 1750 1751 1752 1753
	if (!base) {
		/*
		 * We're probably in deep shit, but let's try to fetch
		 * the required base anyway from another pack or loose.
		 * This is costly but should happen only in the presence
		 * of a corrupted pack, and is better than failing outright.
		 */
1754 1755 1756 1757 1758 1759
		struct revindex_entry *revidx;
		const unsigned char *base_sha1;
		revidx = find_pack_revindex(p, base_offset);
		if (!revidx)
			return NULL;
		base_sha1 = nth_packed_object_sha1(p, revidx->nr);
1760 1761 1762 1763 1764
		error("failed to read delta base object %s"
		      " at offset %"PRIuMAX" from %s",
		      sha1_to_hex(base_sha1), (uintmax_t)base_offset,
		      p->pack_name);
		mark_bad_packed_object(p, base_sha1);
1765
		base = read_object(base_sha1, type, &base_size);
1766 1767 1768
		if (!base)
			return NULL;
	}
1769

1770
	delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size);
1771 1772 1773 1774 1775 1776 1777
	if (!delta_data) {
		error("failed to unpack compressed delta "
		      "at offset %"PRIuMAX" from %s",
		      (uintmax_t)curpos, p->pack_name);
		free(base);
		return NULL;
	}
1778 1779
	result = patch_delta(base, base_size,
			     delta_data, delta_size,
1780
			     sizep);
1781 1782 1783
	if (!result)
		die("failed to apply delta");
	free(delta_data);
1784
	add_delta_base_cache(p, base_offset, base, base_size, *type);
1785 1786 1787
	return result;
}

1788 1789
int do_check_packed_object_crc;

1790
void *unpack_entry(struct packed_git *p, off_t obj_offset,
1791
		   enum object_type *type, unsigned long *sizep)
1792
{
1793
	struct pack_window *w_curs = NULL;
1794
	off_t curpos = obj_offset;
1795
	void *data;
1796

1797 1798 1799 1800 1801 1802 1803 1804 1805
	if (do_check_packed_object_crc && p->index_version > 1) {
		struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
		unsigned long len = revidx[1].offset - obj_offset;
		if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
			const unsigned char *sha1 =
				nth_packed_object_sha1(p, revidx->nr);
			error("bad packed object CRC for %s",
			      sha1_to_hex(sha1));
			mark_bad_packed_object(p, sha1);
1806
			unuse_pack(&w_curs);
1807 1808 1809 1810
			return NULL;
		}
	}

1811 1812
	*type = unpack_object_header(p, &w_curs, &curpos, sizep);
	switch (*type) {
1813 1814
	case OBJ_OFS_DELTA:
	case OBJ_REF_DELTA:
1815 1816
		data = unpack_delta_entry(p, &w_curs, curpos, *sizep,
					  obj_offset, type, sizep);
1817
		break;
1818 1819 1820 1821
	case OBJ_COMMIT:
	case OBJ_TREE:
	case OBJ_BLOB:
	case OBJ_TAG:
1822
		data = unpack_compressed_entry(p, &w_curs, curpos, *sizep);
1823
		break;
1824
	default:
1825 1826 1827
		data = NULL;
		error("unknown object type %i at offset %"PRIuMAX" in %s",
		      *type, (uintmax_t)obj_offset, p->pack_name);
1828
	}
1829
	unuse_pack(&w_curs);
1830
	return data;
1831 1832
}

1833
const unsigned char *nth_packed_object_sha1(struct packed_git *p,
1834
					    uint32_t n)
1835
{
1836
	const unsigned char *index = p->index_data;
1837 1838 1839 1840 1841
	if (!index) {
		if (open_pack_index(p))
			return NULL;
		index = p->index_data;
	}
N
Nicolas Pitre 已提交
1842
	if (n >= p->num_objects)
1843
		return NULL;
1844 1845 1846 1847 1848 1849 1850 1851 1852
	index += 4 * 256;
	if (p->index_version == 1) {
		return index + 24 * n + 4;
	} else {
		index += 8;
		return index + 20 * n;
	}
}

N
Nicolas Pitre 已提交
1853
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868
{
	const unsigned char *index = p->index_data;
	index += 4 * 256;
	if (p->index_version == 1) {
		return ntohl(*((uint32_t *)(index + 24 * n)));
	} else {
		uint32_t off;
		index += 8 + p->num_objects * (20 + 4);
		off = ntohl(*((uint32_t *)(index + 4 * n)));
		if (!(off & 0x80000000))
			return off;
		index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
		return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
				   ntohl(*((uint32_t *)(index + 4)));
	}
1869 1870
}

1871
off_t find_pack_entry_one(const unsigned char *sha1,
N
Nicolas Pitre 已提交
1872
				  struct packed_git *p)
1873
{
1874 1875
	const uint32_t *level1_ofs = p->index_data;
	const unsigned char *index = p->index_data;
1876 1877 1878 1879 1880 1881
	unsigned hi, lo, stride;
	static int use_lookup = -1;
	static int debug_lookup = -1;

	if (debug_lookup < 0)
		debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");
1882

1883 1884 1885 1886 1887 1888
	if (!index) {
		if (open_pack_index(p))
			return 0;
		level1_ofs = p->index_data;
		index = p->index_data;
	}
1889 1890 1891 1892
	if (p->index_version > 1) {
		level1_ofs += 2;
		index += 8;
	}
1893
	index += 4 * 256;
1894 1895
	hi = ntohl(level1_ofs[*sha1]);
	lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
1896 1897 1898 1899 1900 1901 1902 1903
	if (p->index_version > 1) {
		stride = 20;
	} else {
		stride = 24;
		index += 4;
	}

	if (debug_lookup)
1904
		printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n",
1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915
		       sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);

	if (use_lookup < 0)
		use_lookup = !!getenv("GIT_USE_LOOKUP");
	if (use_lookup) {
		int pos = sha1_entry_pos(index, stride, 0,
					 lo, hi, p->num_objects, sha1);
		if (pos < 0)
			return 0;
		return nth_packed_object_offset(p, pos);
	}
1916 1917

	do {
1918
		unsigned mi = (lo + hi) / 2;
1919 1920 1921 1922 1923
		int cmp = hashcmp(index + mi * stride, sha1);

		if (debug_lookup)
			printf("lo %u hi %u rg %u mi %u\n",
			       lo, hi, hi - lo, mi);
N
Nicolas Pitre 已提交
1924
		if (!cmp)
1925
			return nth_packed_object_offset(p, mi);
1926 1927 1928 1929 1930 1931 1932 1933
		if (cmp > 0)
			hi = mi;
		else
			lo = mi+1;
	} while (lo < hi);
	return 0;
}

1934
static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
1935
{
1936
	static struct packed_git *last_found = (void *)1;
1937
	struct packed_git *p;
1938
	off_t offset;
N
Nicolas Pitre 已提交
1939

1940
	prepare_packed_git();
1941 1942 1943
	if (!packed_git)
		return 0;
	p = (last_found == (void *)1) ? packed_git : last_found;
1944

1945
	do {
1946 1947 1948 1949 1950 1951 1952
		if (p->num_bad_objects) {
			unsigned i;
			for (i = 0; i < p->num_bad_objects; i++)
				if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
					goto next;
		}

N
Nicolas Pitre 已提交
1953 1954
		offset = find_pack_entry_one(sha1, p);
		if (offset) {
1955 1956 1957 1958 1959 1960 1961 1962 1963 1964
			/*
			 * We are about to tell the caller where they can
			 * locate the requested object.  We better make
			 * sure the packfile is still here and can be
			 * accessed before supplying that answer, as
			 * it may have been deleted since the index
			 * was loaded!
			 */
			if (p->pack_fd == -1 && open_packed_git(p)) {
				error("packfile %s cannot be accessed", p->pack_name);
1965
				goto next;
1966
			}
N
Nicolas Pitre 已提交
1967 1968 1969
			e->offset = offset;
			e->p = p;
			hashcpy(e->sha1, sha1);
1970
			last_found = p;
1971
			return 1;
N
Nicolas Pitre 已提交
1972
		}
1973 1974 1975 1976 1977 1978 1979 1980 1981

		next:
		if (p == last_found)
			p = packed_git;
		else
			p = p->next;
		if (p == last_found)
			p = p->next;
	} while (p);
1982 1983 1984
	return 0;
}

J
Junio C Hamano 已提交
1985
struct packed_git *find_sha1_pack(const unsigned char *sha1,
1986 1987 1988 1989 1990
				  struct packed_git *packs)
{
	struct packed_git *p;

	for (p = packs; p; p = p->next) {
N
Nicolas Pitre 已提交
1991
		if (find_pack_entry_one(sha1, p))
1992 1993 1994
			return p;
	}
	return NULL;
1995

1996 1997
}

1998
static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep)
1999
{
2000
	int status;
2001 2002 2003
	unsigned long mapsize, size;
	void *map;
	z_stream stream;
N
Nicolas Pitre 已提交
2004
	char hdr[32];
2005

2006
	map = map_sha1_file(sha1, &mapsize);
2007 2008
	if (!map)
		return error("unable to find %s", sha1_to_hex(sha1));
2009 2010 2011
	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
		status = error("unable to unpack %s header",
			       sha1_to_hex(sha1));
2012
	else if ((status = parse_sha1_header(hdr, &size)) < 0)
2013
		status = error("unable to parse %s header", sha1_to_hex(sha1));
2014 2015
	else if (sizep)
		*sizep = size;
2016
	git_inflate_end(&stream);
2017 2018 2019 2020
	munmap(map, mapsize);
	return status;
}

2021
int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
2022
{
2023
	struct cached_object *co;
2024
	struct pack_entry e;
2025
	int status;
2026

2027 2028 2029 2030 2031 2032 2033
	co = find_cached_object(sha1);
	if (co) {
		if (sizep)
			*sizep = co->size;
		return co->type;
	}

2034
	if (!find_pack_entry(sha1, &e)) {
2035 2036 2037 2038 2039 2040
		/* Most likely it's a loose object. */
		status = sha1_loose_object_info(sha1, sizep);
		if (status >= 0)
			return status;

		/* Not a loose object; someone else may have just packed it. */
2041
		reprepare_packed_git();
2042
		if (!find_pack_entry(sha1, &e))
2043
			return status;
2044
	}
2045 2046 2047 2048 2049 2050 2051 2052

	status = packed_object_info(e.p, e.offset, sizep);
	if (status < 0) {
		mark_bad_packed_object(e.p, sha1);
		status = sha1_object_info(sha1, sizep);
	}

	return status;
2053 2054
}

2055 2056
static void *read_packed_sha1(const unsigned char *sha1,
			      enum object_type *type, unsigned long *size)
2057 2058
{
	struct pack_entry e;
2059
	void *data;
2060

2061
	if (!find_pack_entry(sha1, &e))
2062
		return NULL;
2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073
	data = cache_or_unpack_entry(e.p, e.offset, size, type, 1);
	if (!data) {
		/*
		 * We're probably in deep shit, but let's try to fetch
		 * the required object anyway from another pack or loose.
		 * This should happen only in the presence of a corrupted
		 * pack, and is better than failing outright.
		 */
		error("failed to read object %s at offset %"PRIuMAX" from %s",
		      sha1_to_hex(sha1), (uintmax_t)e.offset, e.p->pack_name);
		mark_bad_packed_object(e.p, sha1);
2074
		data = read_object(sha1, type, size);
2075 2076
	}
	return data;
2077 2078
}

2079 2080
int pretend_sha1_file(void *buf, unsigned long len, enum object_type type,
		      unsigned char *sha1)
2081 2082 2083
{
	struct cached_object *co;

2084
	hash_sha1_file(buf, len, typename(type), sha1);
2085 2086 2087 2088 2089 2090 2091 2092 2093 2094
	if (has_sha1_file(sha1) || find_cached_object(sha1))
		return 0;
	if (cached_object_alloc <= cached_object_nr) {
		cached_object_alloc = alloc_nr(cached_object_alloc);
		cached_objects = xrealloc(cached_objects,
					  sizeof(*cached_objects) *
					  cached_object_alloc);
	}
	co = &cached_objects[cached_object_nr++];
	co->size = len;
2095
	co->type = type;
J
Junio C Hamano 已提交
2096 2097
	co->buf = xmalloc(len);
	memcpy(co->buf, buf, len);
2098 2099 2100 2101
	hashcpy(co->sha1, sha1);
	return 0;
}

2102 2103
static void *read_object(const unsigned char *sha1, enum object_type *type,
			 unsigned long *size)
2104 2105 2106
{
	unsigned long mapsize;
	void *map, *buf;
2107 2108 2109 2110
	struct cached_object *co;

	co = find_cached_object(sha1);
	if (co) {
2111
		*type = co->type;
2112
		*size = co->size;
P
Pierre Habouzit 已提交
2113
		return xmemdupz(co->buf, co->size);
2114
	}
2115

2116 2117 2118
	buf = read_packed_sha1(sha1, type, size);
	if (buf)
		return buf;
2119
	map = map_sha1_file(sha1, &mapsize);
2120
	if (map) {
2121
		buf = unpack_sha1_file(map, mapsize, type, size, sha1);
2122 2123 2124
		munmap(map, mapsize);
		return buf;
	}
2125
	reprepare_packed_git();
2126
	return read_packed_sha1(sha1, type, size);
2127 2128
}

2129 2130 2131 2132 2133
/*
 * This function dies on corrupt objects; the callers who want to
 * deal with them should arrange to call read_object() and give error
 * messages themselves.
 */
2134 2135 2136 2137
void *read_sha1_file_repl(const unsigned char *sha1,
			  enum object_type *type,
			  unsigned long *size,
			  const unsigned char **replacement)
2138
{
2139
	const unsigned char *repl = lookup_replace_object(sha1);
2140
	void *data;
2141
	char *path;
2142 2143
	const struct packed_git *p;

2144 2145
	errno = 0;
	data = read_object(repl, type, size);
2146 2147 2148 2149 2150
	if (data) {
		if (replacement)
			*replacement = repl;
		return data;
	}
2151

2152
	if (errno && errno != ENOENT)
2153 2154
		die_errno("failed to read object %s", sha1_to_hex(sha1));

2155
	/* die if we replaced an object with one that does not exist */
2156
	if (repl != sha1)
2157 2158 2159
		die("replacement %s not found for %s",
		    sha1_to_hex(repl), sha1_to_hex(sha1));

2160 2161 2162 2163
	if (has_loose_object(repl)) {
		path = sha1_file_name(sha1);
		die("loose object %s (stored in %s) is corrupt",
		    sha1_to_hex(repl), path);
2164
	}
2165

2166 2167 2168
	if ((p = has_packed_and_bad(repl)) != NULL)
		die("packed object %s (stored in %s) is corrupt",
		    sha1_to_hex(repl), p->pack_name);
2169

2170
	return NULL;
2171 2172
}

2173
void *read_object_with_reference(const unsigned char *sha1,
2174
				 const char *required_type_name,
2175 2176
				 unsigned long *size,
				 unsigned char *actual_sha1_return)
2177
{
2178
	enum object_type type, required_type;
2179 2180
	void *buffer;
	unsigned long isize;
2181
	unsigned char actual_sha1[20];
2182

2183
	required_type = type_from_string(required_type_name);
2184
	hashcpy(actual_sha1, sha1);
2185 2186 2187
	while (1) {
		int ref_length = -1;
		const char *ref_type = NULL;
2188

2189
		buffer = read_sha1_file(actual_sha1, &type, &isize);
2190 2191
		if (!buffer)
			return NULL;
2192
		if (type == required_type) {
2193 2194
			*size = isize;
			if (actual_sha1_return)
2195
				hashcpy(actual_sha1_return, actual_sha1);
2196 2197 2198
			return buffer;
		}
		/* Handle references */
2199
		else if (type == OBJ_COMMIT)
2200
			ref_type = "tree ";
2201
		else if (type == OBJ_TAG)
2202 2203 2204 2205 2206 2207
			ref_type = "object ";
		else {
			free(buffer);
			return NULL;
		}
		ref_length = strlen(ref_type);
2208

2209 2210
		if (ref_length + 40 > isize ||
		    memcmp(buffer, ref_type, ref_length) ||
2211
		    get_sha1_hex((char *) buffer + ref_length, actual_sha1)) {
2212 2213 2214
			free(buffer);
			return NULL;
		}
2215
		free(buffer);
2216 2217
		/* Now we have the ID of the referred-to object in
		 * actual_sha1.  Check again. */
2218 2219 2220
	}
}

N
Nicolas Pitre 已提交
2221
static void write_sha1_file_prepare(const void *buf, unsigned long len,
R
Rene Scharfe 已提交
2222
                                    const char *type, unsigned char *sha1,
N
Nicolas Pitre 已提交
2223
                                    char *hdr, int *hdrlen)
2224
{
2225
	git_SHA_CTX c;
2226 2227

	/* Generate the header */
N
Nicolas Pitre 已提交
2228
	*hdrlen = sprintf(hdr, "%s %lu", type, len)+1;
2229 2230

	/* Sha1.. */
2231 2232 2233 2234
	git_SHA1_Init(&c);
	git_SHA1_Update(&c, hdr, *hdrlen);
	git_SHA1_Update(&c, buf, len);
	git_SHA1_Final(sha1, &c);
2235 2236
}

2237
/*
2238 2239 2240 2241
 * Move the just written object into its final resting place.
 * NEEDSWORK: this should be renamed to finalize_temp_file() as
 * "moving" is only a part of what it does, when no patch between
 * master to pu changes the call sites of this function.
2242
 */
2243
int move_temp_to_file(const char *tmpfile, const char *filename)
2244
{
2245
	int ret = 0;
2246

2247
	if (object_creation_mode == OBJECT_CREATION_USES_RENAMES)
2248 2249
		goto try_rename;
	else if (link(tmpfile, filename))
2250
		ret = errno;
2251 2252 2253 2254 2255 2256 2257 2258 2259

	/*
	 * Coda hack - coda doesn't like cross-directory links,
	 * so we fall back to a rename, which will mean that it
	 * won't be able to check collisions, but that's not a
	 * big deal.
	 *
	 * The same holds for FAT formatted media.
	 *
2260
	 * When this succeeds, we just return.  We have nothing
2261 2262 2263
	 * left to unlink.
	 */
	if (ret && ret != EEXIST) {
2264
	try_rename:
2265
		if (!rename(tmpfile, filename))
2266
			goto out;
2267
		ret = errno;
2268
	}
2269
	unlink_or_warn(tmpfile);
2270 2271
	if (ret) {
		if (ret != EEXIST) {
2272
			return error("unable to write sha1 filename %s: %s\n", filename, strerror(ret));
2273 2274 2275 2276
		}
		/* FIXME!!! Collision check here ? */
	}

2277
out:
2278
	if (adjust_shared_perm(filename))
2279
		return error("unable to set permission to '%s'", filename);
2280 2281 2282
	return 0;
}

L
Linus Torvalds 已提交
2283 2284
static int write_buffer(int fd, const void *buf, size_t len)
{
L
Linus Torvalds 已提交
2285
	if (write_in_full(fd, buf, len) < 0)
2286
		return error("file write error (%s)", strerror(errno));
L
Linus Torvalds 已提交
2287 2288 2289
	return 0;
}

N
Nicolas Pitre 已提交
2290
int hash_sha1_file(const void *buf, unsigned long len, const char *type,
R
Rene Scharfe 已提交
2291 2292
                   unsigned char *sha1)
{
N
Nicolas Pitre 已提交
2293
	char hdr[32];
R
Rene Scharfe 已提交
2294 2295 2296 2297 2298
	int hdrlen;
	write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
	return 0;
}

2299 2300 2301
/* Finalize a file on disk, and close it. */
static void close_sha1_file(int fd)
{
2302 2303
	if (fsync_object_files)
		fsync_or_die(fd, "sha1 file");
2304
	if (close(fd) != 0)
2305
		die_errno("error when closing sha1 file");
2306 2307
}

2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333
/* Size of directory component, including the ending '/' */
static inline int directory_size(const char *filename)
{
	const char *s = strrchr(filename, '/');
	if (!s)
		return 0;
	return s - filename + 1;
}

/*
 * This creates a temporary file in the same directory as the final
 * 'filename'
 *
 * We want to avoid cross-directory filename renames, because those
 * can have problems on various filesystems (FAT, NFS, Coda).
 */
static int create_tmpfile(char *buffer, size_t bufsiz, const char *filename)
{
	int fd, dirlen = directory_size(filename);

	if (dirlen + 20 > bufsiz) {
		errno = ENAMETOOLONG;
		return -1;
	}
	memcpy(buffer, filename, dirlen);
	strcpy(buffer + dirlen, "tmp_obj_XXXXXX");
2334
	fd = git_mkstemp_mode(buffer, 0444);
2335
	if (fd < 0 && dirlen && errno == ENOENT) {
2336
		/* Make sure the directory exists */
P
Patrick Higgins 已提交
2337
		memcpy(buffer, filename, dirlen);
2338
		buffer[dirlen-1] = 0;
2339
		if (mkdir(buffer, 0777) || adjust_shared_perm(buffer))
2340 2341 2342 2343
			return -1;

		/* Try again */
		strcpy(buffer + dirlen - 1, "/tmp_obj_XXXXXX");
2344
		fd = git_mkstemp_mode(buffer, 0444);
2345 2346 2347 2348
	}
	return fd;
}

2349
static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
2350
			      const void *buf, unsigned long len, time_t mtime)
2351
{
2352
	int fd, ret;
2353
	unsigned char compressed[4096];
2354
	z_stream stream;
2355 2356
	git_SHA_CTX c;
	unsigned char parano_sha1[20];
2357
	char *filename;
2358
	static char tmpfile[PATH_MAX];
2359

R
Rene Scharfe 已提交
2360
	filename = sha1_file_name(sha1);
2361
	fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename);
2362
	while (fd < 0 && errno == EMFILE && unuse_one_window(NULL, -1))
2363
		fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename);
2364
	if (fd < 0) {
2365
		if (errno == EACCES)
2366 2367 2368
			return error("insufficient permission for adding an object to repository database %s\n", get_object_directory());
		else
			return error("unable to create temporary sha1 filename %s: %s\n", tmpfile, strerror(errno));
2369 2370
	}

2371 2372
	/* Set it up */
	memset(&stream, 0, sizeof(stream));
2373
	deflateInit(&stream, zlib_compression_level);
2374
	stream.next_out = compressed;
2375
	stream.avail_out = sizeof(compressed);
2376
	git_SHA1_Init(&c);
2377 2378

	/* First header.. */
N
Nicolas Pitre 已提交
2379
	stream.next_in = (unsigned char *)hdr;
2380
	stream.avail_in = hdrlen;
2381 2382
	while (deflate(&stream, 0) == Z_OK)
		/* nothing */;
2383
	git_SHA1_Update(&c, hdr, hdrlen);
2384 2385

	/* Then the data itself.. */
2386
	stream.next_in = (void *)buf;
2387
	stream.avail_in = len;
2388
	do {
2389
		unsigned char *in0 = stream.next_in;
2390
		ret = deflate(&stream, Z_FINISH);
2391
		git_SHA1_Update(&c, in0, stream.next_in - in0);
2392 2393 2394 2395 2396 2397
		if (write_buffer(fd, compressed, stream.next_out - compressed) < 0)
			die("unable to write sha1 file");
		stream.next_out = compressed;
		stream.avail_out = sizeof(compressed);
	} while (ret == Z_OK);

2398 2399 2400 2401 2402
	if (ret != Z_STREAM_END)
		die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret);
	ret = deflateEnd(&stream);
	if (ret != Z_OK)
		die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret);
2403 2404 2405
	git_SHA1_Final(parano_sha1, &c);
	if (hashcmp(sha1, parano_sha1) != 0)
		die("confused by unstable object source data for %s", sha1_to_hex(sha1));
2406

2407
	close_sha1_file(fd);
2408

2409 2410 2411 2412 2413 2414 2415 2416 2417
	if (mtime) {
		struct utimbuf utb;
		utb.actime = mtime;
		utb.modtime = mtime;
		if (utime(tmpfile, &utb) < 0)
			warning("failed utime() on %s: %s",
				tmpfile, strerror(errno));
	}

2418
	return move_temp_to_file(tmpfile, filename);
2419
}
2420

2421
int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444
{
	unsigned char sha1[20];
	char hdr[32];
	int hdrlen;

	/* Normally if we have it in the pack then we do not bother writing
	 * it out into .git/objects/??/?{38} file.
	 */
	write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
	if (returnsha1)
		hashcpy(returnsha1, sha1);
	if (has_sha1_file(sha1))
		return 0;
	return write_loose_object(sha1, hdr, hdrlen, buf, len, 0);
}

int force_object_loose(const unsigned char *sha1, time_t mtime)
{
	void *buf;
	unsigned long len;
	enum object_type type;
	char hdr[32];
	int hdrlen;
2445
	int ret;
2446

2447
	if (has_loose_object(sha1))
2448 2449 2450 2451 2452
		return 0;
	buf = read_packed_sha1(sha1, &type, &len);
	if (!buf)
		return error("cannot read sha1_file for %s", sha1_to_hex(sha1));
	hdrlen = sprintf(hdr, "%s %lu", typename(type), len) + 1;
2453 2454 2455 2456
	ret = write_loose_object(sha1, hdr, hdrlen, buf, len, mtime);
	free(buf);

	return ret;
2457 2458
}

2459 2460 2461 2462 2463 2464 2465 2466
int has_pack_index(const unsigned char *sha1)
{
	struct stat st;
	if (stat(sha1_pack_index_name(sha1), &st))
		return 0;
	return 1;
}

2467 2468 2469 2470 2471 2472
int has_sha1_pack(const unsigned char *sha1)
{
	struct pack_entry e;
	return find_pack_entry(sha1, &e);
}

2473 2474
int has_sha1_file(const unsigned char *sha1)
{
2475 2476
	struct pack_entry e;

2477
	if (find_pack_entry(sha1, &e))
2478
		return 1;
2479
	return has_loose_object(sha1);
2480
}
J
Junio C Hamano 已提交
2481

2482 2483
static int index_mem(unsigned char *sha1, void *buf, size_t size,
		     int write_object, enum object_type type, const char *path)
2484
{
L
Linus Torvalds 已提交
2485
	int ret, re_allocated = 0;
J
Junio C Hamano 已提交
2486

2487
	if (!type)
2488
		type = OBJ_BLOB;
L
Linus Torvalds 已提交
2489 2490 2491 2492

	/*
	 * Convert blobs to git internal format
	 */
2493
	if ((type == OBJ_BLOB) && path) {
2494
		struct strbuf nbuf = STRBUF_INIT;
2495 2496
		if (convert_to_git(path, buf, size, &nbuf,
		                   write_object ? safe_crlf : 0)) {
2497
			buf = strbuf_detach(&nbuf, &size);
L
Linus Torvalds 已提交
2498 2499 2500 2501
			re_allocated = 1;
		}
	}

2502
	if (write_object)
2503
		ret = write_sha1_file(buf, size, typename(type), sha1);
R
Rene Scharfe 已提交
2504
	else
2505
		ret = hash_sha1_file(buf, size, typename(type), sha1);
2506
	if (re_allocated)
L
Linus Torvalds 已提交
2507
		free(buf);
2508 2509 2510
	return ret;
}

2511 2512
#define SMALL_FILE_SIZE (32*1024)

2513 2514 2515 2516 2517 2518 2519
int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
	     enum object_type type, const char *path)
{
	int ret;
	size_t size = xsize_t(st->st_size);

	if (!S_ISREG(st->st_mode)) {
2520
		struct strbuf sbuf = STRBUF_INIT;
2521 2522 2523 2524 2525 2526
		if (strbuf_read(&sbuf, fd, 4096) >= 0)
			ret = index_mem(sha1, sbuf.buf, sbuf.len, write_object,
					type, path);
		else
			ret = -1;
		strbuf_release(&sbuf);
2527 2528
	} else if (!size) {
		ret = index_mem(sha1, NULL, size, write_object, type, path);
2529 2530 2531 2532 2533 2534 2535 2536
	} else if (size <= SMALL_FILE_SIZE) {
		char *buf = xmalloc(size);
		if (size == read_in_full(fd, buf, size))
			ret = index_mem(sha1, buf, size, write_object, type,
					path);
		else
			ret = error("short read %s", strerror(errno));
		free(buf);
2537
	} else {
2538 2539
		void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
		ret = index_mem(sha1, buf, size, write_object, type, path);
2540
		munmap(buf, size);
2541
	}
2542
	close(fd);
2543
	return ret;
J
Junio C Hamano 已提交
2544
}
2545 2546 2547 2548

int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object)
{
	int fd;
2549
	struct strbuf sb = STRBUF_INIT;
2550 2551 2552 2553 2554 2555 2556

	switch (st->st_mode & S_IFMT) {
	case S_IFREG:
		fd = open(path, O_RDONLY);
		if (fd < 0)
			return error("open(\"%s\"): %s", path,
				     strerror(errno));
2557
		if (index_fd(sha1, fd, st, write_object, OBJ_BLOB, path) < 0)
2558 2559 2560 2561
			return error("%s: failed to insert into database",
				     path);
		break;
	case S_IFLNK:
2562
		if (strbuf_readlink(&sb, path, st->st_size)) {
2563 2564 2565 2566
			char *errstr = strerror(errno);
			return error("readlink(\"%s\"): %s", path,
			             errstr);
		}
R
Rene Scharfe 已提交
2567
		if (!write_object)
2568 2569
			hash_sha1_file(sb.buf, sb.len, blob_type, sha1);
		else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1))
2570 2571
			return error("%s: failed to insert into database",
				     path);
2572
		strbuf_release(&sb);
2573
		break;
2574 2575
	case S_IFDIR:
		return resolve_gitlink_ref(path, "HEAD", sha1);
2576 2577 2578 2579 2580
	default:
		return error("%s: unsupported file type", path);
	}
	return 0;
}
2581 2582 2583

int read_pack_header(int fd, struct pack_header *header)
{
2584 2585 2586 2587
	if (read_in_full(fd, header, sizeof(*header)) < sizeof(*header))
		/* "eof before pack header was fully read" */
		return PH_ERROR_EOF;

2588 2589 2590 2591 2592 2593 2594 2595
	if (header->hdr_signature != htonl(PACK_SIGNATURE))
		/* "protocol error (pack signature mismatch detected)" */
		return PH_ERROR_PACK_SIGNATURE;
	if (!pack_version_ok(header->hdr_version))
		/* "protocol error (pack version unsupported)" */
		return PH_ERROR_PROTOCOL;
	return 0;
}
J
Jeff King 已提交
2596 2597 2598 2599 2600 2601 2602 2603 2604 2605

void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
{
	enum object_type type = sha1_object_info(sha1, NULL);
	if (type < 0)
		die("%s is not a valid object", sha1_to_hex(sha1));
	if (type != expect)
		die("%s is not a valid '%s' object", sha1_to_hex(sha1),
		    typename(expect));
}