sha1_file.c 92.3 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 * GIT - The information manager from hell
 *
 * Copyright (C) Linus Torvalds, 2005
 *
 * This handles basic git sha1 object files - packing, unpacking,
 * creation etc.
 */
#include "cache.h"
10
#include "string-list.h"
11
#include "lockfile.h"
12
#include "delta.h"
13
#include "pack.h"
14 15
#include "blob.h"
#include "commit.h"
16
#include "run-command.h"
17 18
#include "tag.h"
#include "tree.h"
19
#include "tree-walk.h"
20
#include "refs.h"
21
#include "pack-revindex.h"
22
#include "sha1-lookup.h"
23
#include "bulk-checkin.h"
24
#include "streaming.h"
25
#include "dir.h"
26
#include "mru.h"
J
Jeff King 已提交
27
#include "list.h"
28
#include "mergesort.h"
29

30 31 32 33 34 35 36 37
#ifndef O_NOATIME
#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
#define O_NOATIME 01000000
#else
#define O_NOATIME 0
#endif
#endif

38 39
#define SZ_FMT PRIuMAX
static inline uintmax_t sz_fmt(size_t s) { return s; }
40

41
const unsigned char null_sha1[20];
42
const struct object_id null_oid;
43 44 45 46 47 48
const struct object_id empty_tree_oid = {
	EMPTY_TREE_SHA1_BIN_LITERAL
};
const struct object_id empty_blob_oid = {
	EMPTY_BLOB_SHA1_BIN_LITERAL
};
J
Junio C Hamano 已提交
49

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
/*
 * This is meant to hold a *small* number of objects that you would
 * want read_sha1_file() to be able to return, but yet you do not want
 * to write them into the object store (e.g. a browse-only
 * application).
 */
static struct cached_object {
	unsigned char sha1[20];
	enum object_type type;
	void *buf;
	unsigned long size;
} *cached_objects;
static int cached_object_nr, cached_object_alloc;

static struct cached_object empty_tree = {
65
	EMPTY_TREE_SHA1_BIN_LITERAL,
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
	OBJ_TREE,
	"",
	0
};

static struct cached_object *find_cached_object(const unsigned char *sha1)
{
	int i;
	struct cached_object *co = cached_objects;

	for (i = 0; i < cached_object_nr; i++, co++) {
		if (!hashcmp(co->sha1, sha1))
			return co;
	}
	if (!hashcmp(sha1, empty_tree.sha1))
		return &empty_tree;
	return NULL;
}

85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
int mkdir_in_gitdir(const char *path)
{
	if (mkdir(path, 0777)) {
		int saved_errno = errno;
		struct stat st;
		struct strbuf sb = STRBUF_INIT;

		if (errno != EEXIST)
			return -1;
		/*
		 * Are we looking at a path in a symlinked worktree
		 * whose original repository does not yet have it?
		 * e.g. .git/rr-cache pointing at its original
		 * repository in which the user hasn't performed any
		 * conflict resolution yet?
		 */
		if (lstat(path, &st) || !S_ISLNK(st.st_mode) ||
		    strbuf_readlink(&sb, path, st.st_size) ||
		    !is_absolute_path(sb.buf) ||
		    mkdir(sb.buf, 0777)) {
			strbuf_release(&sb);
			errno = saved_errno;
			return -1;
		}
		strbuf_release(&sb);
	}
	return adjust_shared_perm(path);
}

114
enum scld_error safe_create_leading_directories(char *path)
115
{
116
	char *next_component = path + offset_1st_component(path);
117
	enum scld_error ret = SCLD_OK;
118

119
	while (ret == SCLD_OK && next_component) {
120
		struct stat st;
121
		char *slash = next_component, slash_character;
122

123 124 125 126
		while (*slash && !is_dir_sep(*slash))
			slash++;

		if (!*slash)
127
			break;
128

129
		next_component = slash + 1;
130
		while (is_dir_sep(*next_component))
131
			next_component++;
132
		if (!*next_component)
133
			break;
134

135
		slash_character = *slash;
136
		*slash = '\0';
137 138
		if (!stat(path, &st)) {
			/* path exists */
139
			if (!S_ISDIR(st.st_mode))
140
				ret = SCLD_EXISTS;
141
		} else if (mkdir(path, 0777)) {
142
			if (errno == EEXIST &&
143
			    !stat(path, &st) && S_ISDIR(st.st_mode))
144
				; /* somebody created it since we checked */
145 146 147 148 149 150 151 152 153 154 155
			else if (errno == ENOENT)
				/*
				 * Either mkdir() failed because
				 * somebody just pruned the containing
				 * directory, or stat() failed because
				 * the file that was in our way was
				 * just removed.  Either way, inform
				 * the caller that it might be worth
				 * trying again:
				 */
				ret = SCLD_VANISHED;
156
			else
157
				ret = SCLD_FAILED;
158
		} else if (adjust_shared_perm(path)) {
159
			ret = SCLD_PERMS;
160
		}
161
		*slash = slash_character;
162
	}
163
	return ret;
164
}
165

166
enum scld_error safe_create_leading_directories_const(const char *path)
167 168 169
{
	/* path points to cache entries, so xstrdup before messing with it */
	char *buf = xstrdup(path);
170
	enum scld_error result = safe_create_leading_directories(buf);
171 172 173 174
	free(buf);
	return result;
}

175 176 177 178 179 180 181 182 183 184 185 186
static void fill_sha1_path(char *pathbuf, const unsigned char *sha1)
{
	int i;
	for (i = 0; i < 20; i++) {
		static char hex[] = "0123456789abcdef";
		unsigned int val = sha1[i];
		char *pos = pathbuf + i*2 + (i > 0);
		*pos++ = hex[val >> 4];
		*pos = hex[val & 0xf];
	}
}

187
const char *sha1_file_name(const unsigned char *sha1)
188
{
189 190 191
	static char buf[PATH_MAX];
	const char *objdir;
	int len;
192

193 194 195 196 197 198 199 200 201 202 203 204
	objdir = get_object_directory();
	len = strlen(objdir);

	/* '/' + sha1(2) + '/' + sha1(38) + '\0' */
	if (len + 43 > PATH_MAX)
		die("insanely long object directory %s", objdir);
	memcpy(buf, objdir, len);
	buf[len] = '/';
	buf[len+3] = '/';
	buf[len+42] = '\0';
	fill_sha1_path(buf + len + 1, sha1);
	return buf;
205 206
}

207 208 209 210 211
/*
 * Return the name of the pack or index file with the specified sha1
 * in its filename.  *base and *name are scratch space that must be
 * provided by the caller.  which should be "pack" or "idx".
 */
212
static char *sha1_get_pack_name(const unsigned char *sha1,
J
Jeff King 已提交
213 214
				struct strbuf *buf,
				const char *which)
215
{
J
Jeff King 已提交
216 217 218 219
	strbuf_reset(buf);
	strbuf_addf(buf, "%s/pack/pack-%s.%s", get_object_directory(),
		    sha1_to_hex(sha1), which);
	return buf->buf;
220 221
}

222
char *sha1_pack_name(const unsigned char *sha1)
223
{
J
Jeff King 已提交
224 225
	static struct strbuf buf = STRBUF_INIT;
	return sha1_get_pack_name(sha1, &buf, "pack");
226
}
227

228 229
char *sha1_pack_index_name(const unsigned char *sha1)
{
J
Jeff King 已提交
230 231
	static struct strbuf buf = STRBUF_INIT;
	return sha1_get_pack_name(sha1, &buf, "idx");
232 233
}

234 235
struct alternate_object_database *alt_odb_list;
static struct alternate_object_database **alt_odb_tail;
236

237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
/*
 * Return non-zero iff the path is usable as an alternate object database.
 */
static int alt_odb_usable(struct strbuf *path, const char *normalized_objdir)
{
	struct alternate_object_database *alt;

	/* Detect cases where alternate disappeared */
	if (!is_directory(path->buf)) {
		error("object directory %s does not exist; "
		      "check .git/objects/info/alternates.",
		      path->buf);
		return 0;
	}

	/*
	 * Prevent the common mistake of listing the same
	 * thing twice, or object directory itself.
	 */
	for (alt = alt_odb_list; alt; alt = alt->next) {
		if (path->len == alt->name - alt->base - 1 &&
		    !memcmp(path->buf, alt->base, path->len))
			return 0;
	}
	if (!fspathcmp(path->buf, normalized_objdir))
		return 0;

	return 1;
}

J
Junio C Hamano 已提交
267 268
/*
 * Prepare alternate object database registry.
269 270 271 272 273
 *
 * The variable alt_odb_list points at the list of struct
 * alternate_object_database.  The elements on this list come from
 * non-empty elements from colon separated ALTERNATE_DB_ENVIRONMENT
 * environment variable, and $GIT_OBJECT_DIRECTORY/info/alternates,
274 275
 * whose contents is similar to that environment variable but can be
 * LF separated.  Its base points at a statically allocated buffer that
276 277 278 279 280
 * contains "/the/directory/corresponding/to/.git/objects/...", while
 * its name points just after the slash at the end of ".git/objects/"
 * in the example above, and has enough space to hold 40-byte hex
 * SHA1, an extra slash for the first level indirection, and the
 * terminating NUL.
J
Junio C Hamano 已提交
281
 */
282 283
static int link_alt_odb_entry(const char *entry, const char *relative_base,
	int depth, const char *normalized_objdir)
284
{
M
Martin Waitz 已提交
285
	struct alternate_object_database *ent;
286
	size_t entlen;
287
	struct strbuf pathbuf = STRBUF_INIT;
288

289
	if (!is_absolute_path(entry) && relative_base) {
290 291
		strbuf_addstr(&pathbuf, real_path(relative_base));
		strbuf_addch(&pathbuf, '/');
M
Martin Waitz 已提交
292
	}
293
	strbuf_addstr(&pathbuf, entry);
M
Martin Waitz 已提交
294

295 296 297 298 299 300
	if (strbuf_normalize_path(&pathbuf) < 0) {
		error("unable to normalize alternate object path: %s",
		      pathbuf.buf);
		strbuf_release(&pathbuf);
		return -1;
	}
301 302 303 304 305

	/*
	 * The trailing slash after the directory name is given by
	 * this function at the end. Remove duplicates.
	 */
306 307
	while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/')
		strbuf_setlen(&pathbuf, pathbuf.len - 1);
308

309 310
	if (!alt_odb_usable(&pathbuf, normalized_objdir)) {
		strbuf_release(&pathbuf);
M
Martin Waitz 已提交
311 312 313
		return -1;
	}

314 315 316 317 318 319 320 321
	entlen = st_add(pathbuf.len, 43); /* '/' + 2 hex + '/' + 38 hex + NUL */
	ent = xmalloc(st_add(sizeof(*ent), entlen));
	memcpy(ent->base, pathbuf.buf, pathbuf.len);

	ent->name = ent->base + pathbuf.len + 1;
	ent->base[pathbuf.len] = '/';
	ent->base[pathbuf.len + 3] = '/';
	ent->base[entlen-1] = 0;
M
Martin Waitz 已提交
322 323 324 325 326 327 328

	/* add the alternate entry */
	*alt_odb_tail = ent;
	alt_odb_tail = &(ent->next);
	ent->next = NULL;

	/* recursively add alternates */
329
	read_info_alternates(pathbuf.buf, depth + 1);
M
Martin Waitz 已提交
330

331
	strbuf_release(&pathbuf);
M
Martin Waitz 已提交
332 333 334
	return 0;
}

335
static void link_alt_odb_entries(const char *alt, int len, int sep,
M
Martin Waitz 已提交
336 337
				 const char *relative_base, int depth)
{
338 339 340
	struct string_list entries = STRING_LIST_INIT_NODUP;
	char *alt_copy;
	int i;
341
	struct strbuf objdirbuf = STRBUF_INIT;
M
Martin Waitz 已提交
342 343 344 345 346 347 348

	if (depth > 5) {
		error("%s: ignoring alternate object stores, nesting too deep.",
				relative_base);
		return;
	}

J
Junio C Hamano 已提交
349
	strbuf_add_absolute_path(&objdirbuf, get_object_directory());
350 351 352
	if (strbuf_normalize_path(&objdirbuf) < 0)
		die("unable to normalize object directory: %s",
		    objdirbuf.buf);
353

354
	alt_copy = xmemdupz(alt, len);
355 356 357 358
	string_list_split_in_place(&entries, alt_copy, sep, -1);
	for (i = 0; i < entries.nr; i++) {
		const char *entry = entries.items[i].string;
		if (entry[0] == '\0' || entry[0] == '#')
359
			continue;
360 361 362 363
		if (!is_absolute_path(entry) && depth) {
			error("%s: ignoring relative alternate object store %s",
					relative_base, entry);
		} else {
364
			link_alt_odb_entry(entry, relative_base, depth, objdirbuf.buf);
365 366
		}
	}
367 368
	string_list_clear(&entries, 0);
	free(alt_copy);
369
	strbuf_release(&objdirbuf);
370 371
}

372
void read_info_alternates(const char * relative_base, int depth)
373
{
374
	char *map;
375
	size_t mapsz;
376
	struct stat st;
377
	char *path;
M
Martin Waitz 已提交
378
	int fd;
379

380
	path = xstrfmt("%s/info/alternates", relative_base);
381
	fd = git_open_noatime(path);
382
	free(path);
383 384 385 386
	if (fd < 0)
		return;
	if (fstat(fd, &st) || (st.st_size == 0)) {
		close(fd);
387
		return;
388
	}
389 390
	mapsz = xsize_t(st.st_size);
	map = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd, 0);
391 392
	close(fd);

393
	link_alt_odb_entries(map, mapsz, '\n', relative_base, depth);
M
Martin Waitz 已提交
394

395
	munmap(map, mapsz);
396 397
}

398 399 400
void add_to_alternates_file(const char *reference)
{
	struct lock_file *lock = xcalloc(1, sizeof(struct lock_file));
401 402 403 404 405 406 407 408 409 410 411 412 413
	char *alts = git_pathdup("objects/info/alternates");
	FILE *in, *out;

	hold_lock_file_for_update(lock, alts, LOCK_DIE_ON_ERROR);
	out = fdopen_lock_file(lock, "w");
	if (!out)
		die_errno("unable to fdopen alternates lockfile");

	in = fopen(alts, "r");
	if (in) {
		struct strbuf line = STRBUF_INIT;
		int found = 0;

414
		while (strbuf_getline(&line, in) != EOF) {
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
			if (!strcmp(reference, line.buf)) {
				found = 1;
				break;
			}
			fprintf_or_die(out, "%s\n", line.buf);
		}

		strbuf_release(&line);
		fclose(in);

		if (found) {
			rollback_lock_file(lock);
			lock = NULL;
		}
	}
	else if (errno != ENOENT)
		die_errno("unable to read alternates file");

	if (lock) {
		fprintf_or_die(out, "%s\n", reference);
		if (commit_lock_file(lock))
			die_errno("unable to move new alternates file into place");
		if (alt_odb_tail)
			link_alt_odb_entries(reference, strlen(reference), '\n', NULL, 0);
	}
	free(alts);
441 442
}

443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
/*
 * Compute the exact path an alternate is at and returns it. In case of
 * error NULL is returned and the human readable error is added to `err`
 * `path` may be relative and should point to $GITDIR.
 * `err` must not be null.
 */
char *compute_alternate_path(const char *path, struct strbuf *err)
{
	char *ref_git = NULL;
	const char *repo, *ref_git_s;
	int seen_error = 0;

	ref_git_s = real_path_if_valid(path);
	if (!ref_git_s) {
		seen_error = 1;
		strbuf_addf(err, _("path '%s' does not exist"), path);
		goto out;
	} else
		/*
		 * Beware: read_gitfile(), real_path() and mkpath()
		 * return static buffer
		 */
		ref_git = xstrdup(ref_git_s);

	repo = read_gitfile(ref_git);
	if (!repo)
		repo = read_gitfile(mkpath("%s/.git", ref_git));
	if (repo) {
		free(ref_git);
		ref_git = xstrdup(repo);
	}

	if (!repo && is_directory(mkpath("%s/.git/objects", ref_git))) {
		char *ref_git_git = mkpathdup("%s/.git", ref_git);
		free(ref_git);
		ref_git = ref_git_git;
	} else if (!is_directory(mkpath("%s/objects", ref_git))) {
		struct strbuf sb = STRBUF_INIT;
		seen_error = 1;
		if (get_common_dir(&sb, ref_git)) {
			strbuf_addf(err,
				    _("reference repository '%s' as a linked "
				      "checkout is not supported yet."),
				    path);
			goto out;
		}

		strbuf_addf(err, _("reference repository '%s' is not a "
					"local repository."), path);
		goto out;
	}

	if (!access(mkpath("%s/shallow", ref_git), F_OK)) {
		strbuf_addf(err, _("reference repository '%s' is shallow"),
			    path);
		seen_error = 1;
		goto out;
	}

	if (!access(mkpath("%s/info/grafts", ref_git), F_OK)) {
		strbuf_addf(err,
			    _("reference repository '%s' is grafted"),
			    path);
		seen_error = 1;
		goto out;
	}

out:
	if (seen_error) {
		free(ref_git);
		ref_git = NULL;
	}

	return ref_git;
}

519
int foreach_alt_odb(alt_odb_fn fn, void *cb)
520 521
{
	struct alternate_object_database *ent;
522
	int r = 0;
523 524

	prepare_alt_odb();
525 526 527 528 529 530
	for (ent = alt_odb_list; ent; ent = ent->next) {
		r = fn(ent, cb);
		if (r)
			break;
	}
	return r;
531 532
}

M
Martin Waitz 已提交
533 534
void prepare_alt_odb(void)
{
T
Timo Hirvonen 已提交
535
	const char *alt;
M
Martin Waitz 已提交
536

S
Shawn O. Pearce 已提交
537 538 539
	if (alt_odb_tail)
		return;

M
Martin Waitz 已提交
540 541 542 543
	alt = getenv(ALTERNATE_DB_ENVIRONMENT);
	if (!alt) alt = "";

	alt_odb_tail = &alt_odb_list;
544
	link_alt_odb_entries(alt, strlen(alt), PATH_SEP, NULL, 0);
M
Martin Waitz 已提交
545 546 547 548

	read_info_alternates(get_object_directory(), 0);
}

549
/* Returns 1 if we have successfully freshened the file, 0 otherwise. */
550
static int freshen_file(const char *fn)
551
{
552 553 554
	struct utimbuf t;
	t.actime = t.modtime = time(NULL);
	return !utime(fn, &t);
555
}
556

557 558 559 560 561 562 563
/*
 * All of the check_and_freshen functions return 1 if the file exists and was
 * freshened (if freshening was requested), 0 otherwise. If they return
 * 0, you should not assume that it is safe to skip a write of the object (it
 * either does not exist on disk, or has a stale mtime and may be subject to
 * pruning).
 */
564 565 566 567
static int check_and_freshen_file(const char *fn, int freshen)
{
	if (access(fn, F_OK))
		return 0;
568
	if (freshen && !freshen_file(fn))
569 570 571 572 573 574 575 576 577 578
		return 0;
	return 1;
}

static int check_and_freshen_local(const unsigned char *sha1, int freshen)
{
	return check_and_freshen_file(sha1_file_name(sha1), freshen);
}

static int check_and_freshen_nonlocal(const unsigned char *sha1, int freshen)
579 580
{
	struct alternate_object_database *alt;
581
	prepare_alt_odb();
582
	for (alt = alt_odb_list; alt; alt = alt->next) {
583
		fill_sha1_path(alt->name, sha1);
584
		if (check_and_freshen_file(alt->base, freshen))
585
			return 1;
586
	}
587
	return 0;
588 589
}

590 591 592 593 594 595 596 597 598 599 600
static int check_and_freshen(const unsigned char *sha1, int freshen)
{
	return check_and_freshen_local(sha1, freshen) ||
	       check_and_freshen_nonlocal(sha1, freshen);
}

int has_loose_object_nonlocal(const unsigned char *sha1)
{
	return check_and_freshen_nonlocal(sha1, 0);
}

601 602
static int has_loose_object(const unsigned char *sha1)
{
603
	return check_and_freshen(sha1, 0);
604 605
}

606
static unsigned int pack_used_ctr;
607 608 609
static unsigned int pack_mmap_calls;
static unsigned int peak_pack_open_windows;
static unsigned int pack_open_windows;
610 611
static unsigned int pack_open_fds;
static unsigned int pack_max_fds;
612
static size_t peak_pack_mapped;
613
static size_t pack_mapped;
614
struct packed_git *packed_git;
615

616 617 618
static struct mru packed_git_mru_storage;
struct mru *packed_git_mru = &packed_git_mru_storage;

J
Junio C Hamano 已提交
619
void pack_report(void)
620 621
{
	fprintf(stderr,
622 623 624
		"pack_report: getpagesize()            = %10" SZ_FMT "\n"
		"pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n"
		"pack_report: core.packedGitLimit      = %10" SZ_FMT "\n",
625 626 627
		sz_fmt(getpagesize()),
		sz_fmt(packed_git_window_size),
		sz_fmt(packed_git_limit));
628 629 630 631
	fprintf(stderr,
		"pack_report: pack_used_ctr            = %10u\n"
		"pack_report: pack_mmap_calls          = %10u\n"
		"pack_report: pack_open_windows        = %10u / %10u\n"
632 633
		"pack_report: pack_mapped              = "
			"%10" SZ_FMT " / %10" SZ_FMT "\n",
634 635 636
		pack_used_ctr,
		pack_mmap_calls,
		pack_open_windows, peak_pack_open_windows,
637
		sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped));
638 639
}

640 641 642 643 644 645
/*
 * Open and mmap the index file at path, perform a couple of
 * consistency checks, then record its information to p.  Return 0 on
 * success.
 */
static int check_packed_git_idx(const char *path, struct packed_git *p)
646 647
{
	void *idx_map;
648
	struct pack_idx_header *hdr;
649
	size_t idx_size;
650
	uint32_t version, nr, i, *index;
651
	int fd = git_open_noatime(path);
652
	struct stat st;
653

654 655 656 657 658 659
	if (fd < 0)
		return -1;
	if (fstat(fd, &st)) {
		close(fd);
		return -1;
	}
660
	idx_size = xsize_t(st.st_size);
661 662 663 664
	if (idx_size < 4 * 256 + 20 + 20) {
		close(fd);
		return error("index file %s is too small", path);
	}
665
	idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
666 667
	close(fd);

668 669
	hdr = idx_map;
	if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
670 671 672
		version = ntohl(hdr->idx_version);
		if (version < 2 || version > 2) {
			munmap(idx_map, idx_size);
673
			return error("index file %s is version %"PRIu32
674 675 676 677 678 679
				     " and is not supported by this binary"
				     " (try upgrading GIT to a newer version)",
				     path, version);
		}
	} else
		version = 1;
680

681
	nr = 0;
682
	index = idx_map;
683 684
	if (version > 1)
		index += 2;  /* skip index header */
685
	for (i = 0; i < 256; i++) {
686
		uint32_t n = ntohl(index[i]);
687 688
		if (n < nr) {
			munmap(idx_map, idx_size);
689
			return error("non-monotonic index %s", path);
690
		}
691 692 693
		nr = n;
	}

694 695 696 697 698 699 700 701 702 703
	if (version == 1) {
		/*
		 * Total size:
		 *  - 256 index entries 4 bytes each
		 *  - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
		 *  - 20-byte SHA1 of the packfile
		 *  - 20-byte SHA1 file checksum
		 */
		if (idx_size != 4*256 + nr * 24 + 20 + 20) {
			munmap(idx_map, idx_size);
704
			return error("wrong index v1 file size in %s", path);
705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
		}
	} else if (version == 2) {
		/*
		 * Minimum size:
		 *  - 8 bytes of header
		 *  - 256 index entries 4 bytes each
		 *  - 20-byte sha1 entry * nr
		 *  - 4-byte crc entry * nr
		 *  - 4-byte offset entry * nr
		 *  - 20-byte SHA1 of the packfile
		 *  - 20-byte SHA1 file checksum
		 * And after the 4-byte offset table might be a
		 * variable sized table containing 8-byte entries
		 * for offsets larger than 2^31.
		 */
		unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
L
Linus Torvalds 已提交
721 722 723 724
		unsigned long max_size = min_size;
		if (nr)
			max_size += (nr - 1)*8;
		if (idx_size < min_size || idx_size > max_size) {
725
			munmap(idx_map, idx_size);
726
			return error("wrong index v2 file size in %s", path);
727
		}
728 729 730 731 732 733 734 735 736
		if (idx_size != min_size &&
		    /*
		     * make sure we can deal with large pack offsets.
		     * 31-bit signed offset won't be enough, neither
		     * 32-bit unsigned one will be.
		     */
		    (sizeof(off_t) <= 4)) {
			munmap(idx_map, idx_size);
			return error("pack too large for current definition of off_t in %s", path);
737
		}
738
	}
739

740
	p->index_version = version;
741 742
	p->index_data = idx_map;
	p->index_size = idx_size;
N
Nicolas Pitre 已提交
743
	p->num_objects = nr;
744 745 746
	return 0;
}

747
int open_pack_index(struct packed_git *p)
748 749
{
	char *idx_name;
750
	size_t len;
751 752 753 754 755
	int ret;

	if (p->index_data)
		return 0;

756 757 758
	if (!strip_suffix(p->pack_name, ".pack", &len))
		die("BUG: pack_name does not end in .pack");
	idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name);
759 760 761 762 763
	ret = check_packed_git_idx(idx_name, p);
	free(idx_name);
	return ret;
}

764 765 766 767
static void scan_windows(struct packed_git *p,
	struct packed_git **lru_p,
	struct pack_window **lru_w,
	struct pack_window **lru_l)
768
{
769 770 771 772 773 774 775 776
	struct pack_window *w, *w_l;

	for (w_l = NULL, w = p->windows; w; w = w->next) {
		if (!w->inuse_cnt) {
			if (!*lru_w || w->last_used < (*lru_w)->last_used) {
				*lru_p = p;
				*lru_w = w;
				*lru_l = w_l;
777 778
			}
		}
779
		w_l = w;
780
	}
781 782
}

783
static int unuse_one_window(struct packed_git *current)
784 785 786 787 788 789 790 791
{
	struct packed_git *p, *lru_p = NULL;
	struct pack_window *lru_w = NULL, *lru_l = NULL;

	if (current)
		scan_windows(current, &lru_p, &lru_w, &lru_l);
	for (p = packed_git; p; p = p->next)
		scan_windows(p, &lru_p, &lru_w, &lru_l);
792 793 794 795 796
	if (lru_p) {
		munmap(lru_w->base, lru_w->len);
		pack_mapped -= lru_w->len;
		if (lru_l)
			lru_l->next = lru_w->next;
797
		else
798 799
			lru_p->windows = lru_w->next;
		free(lru_w);
800
		pack_open_windows--;
801 802 803
		return 1;
	}
	return 0;
804 805
}

806
void release_pack_memory(size_t need)
807 808
{
	size_t cur = pack_mapped;
809
	while (need >= (cur - pack_mapped) && unuse_one_window(NULL))
810 811 812
		; /* nothing */
}

813 814 815 816 817 818 819 820 821 822 823 824 825
static void mmap_limit_check(size_t length)
{
	static size_t limit = 0;
	if (!limit) {
		limit = git_env_ulong("GIT_MMAP_LIMIT", 0);
		if (!limit)
			limit = SIZE_MAX;
	}
	if (length > limit)
		die("attempting to mmap %"PRIuMAX" over limit %"PRIuMAX,
		    (uintmax_t)length, (uintmax_t)limit);
}

J
Jeff King 已提交
826 827
void *xmmap_gently(void *start, size_t length,
		  int prot, int flags, int fd, off_t offset)
828
{
829 830 831 832
	void *ret;

	mmap_limit_check(length);
	ret = mmap(start, length, prot, flags, fd, offset);
833 834 835
	if (ret == MAP_FAILED) {
		if (!length)
			return NULL;
836
		release_pack_memory(length);
837 838 839 840 841
		ret = mmap(start, length, prot, flags, fd, offset);
	}
	return ret;
}

J
Jeff King 已提交
842 843 844 845 846
void *xmmap(void *start, size_t length,
	int prot, int flags, int fd, off_t offset)
{
	void *ret = xmmap_gently(start, length, prot, flags, fd, offset);
	if (ret == MAP_FAILED)
J
Junio C Hamano 已提交
847
		die_errno("mmap failed");
J
Jeff King 已提交
848 849 850
	return ret;
}

851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866
void close_pack_windows(struct packed_git *p)
{
	while (p->windows) {
		struct pack_window *w = p->windows;

		if (w->inuse_cnt)
			die("pack '%s' still has open windows to it",
			    p->pack_name);
		munmap(w->base, w->len);
		pack_mapped -= w->len;
		pack_open_windows--;
		p->windows = w->next;
		free(w);
	}
}

867 868 869 870 871 872 873 874 875 876 877 878
static int close_pack_fd(struct packed_git *p)
{
	if (p->pack_fd < 0)
		return 0;

	close(p->pack_fd);
	pack_open_fds--;
	p->pack_fd = -1;

	return 1;
}

879 880 881 882 883 884 885 886 887 888 889 890 891
static void close_pack(struct packed_git *p)
{
	close_pack_windows(p);
	close_pack_fd(p);
	close_pack_index(p);
}

void close_all_packs(void)
{
	struct packed_git *p;

	for (p = packed_git; p; p = p->next)
		if (p->do_not_close)
892
			die("BUG: want to close pack marked 'do-not-close'");
893 894 895 896 897
		else
			close_pack(p);
}


898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
/*
 * The LRU pack is the one with the oldest MRU window, preferring packs
 * with no used windows, or the oldest mtime if it has no windows allocated.
 */
static void find_lru_pack(struct packed_git *p, struct packed_git **lru_p, struct pack_window **mru_w, int *accept_windows_inuse)
{
	struct pack_window *w, *this_mru_w;
	int has_windows_inuse = 0;

	/*
	 * Reject this pack if it has windows and the previously selected
	 * one does not.  If this pack does not have windows, reject
	 * it if the pack file is newer than the previously selected one.
	 */
	if (*lru_p && !*mru_w && (p->windows || p->mtime > (*lru_p)->mtime))
		return;

	for (w = this_mru_w = p->windows; w; w = w->next) {
		/*
		 * Reject this pack if any of its windows are in use,
		 * but the previously selected pack did not have any
		 * inuse windows.  Otherwise, record that this pack
		 * has windows in use.
		 */
		if (w->inuse_cnt) {
			if (*accept_windows_inuse)
				has_windows_inuse = 1;
			else
				return;
		}

		if (w->last_used > this_mru_w->last_used)
			this_mru_w = w;

		/*
		 * Reject this pack if it has windows that have been
		 * used more recently than the previously selected pack.
		 * If the previously selected pack had windows inuse and
		 * we have not encountered a window in this pack that is
		 * inuse, skip this check since we prefer a pack with no
		 * inuse windows to one that has inuse windows.
		 */
		if (*mru_w && *accept_windows_inuse == has_windows_inuse &&
		    this_mru_w->last_used > (*mru_w)->last_used)
			return;
	}

	/*
	 * Select this pack.
	 */
	*mru_w = this_mru_w;
	*lru_p = p;
	*accept_windows_inuse = has_windows_inuse;
}

static int close_one_pack(void)
{
	struct packed_git *p, *lru_p = NULL;
	struct pack_window *mru_w = NULL;
	int accept_windows_inuse = 1;

	for (p = packed_git; p; p = p->next) {
		if (p->pack_fd == -1)
			continue;
		find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse);
	}

965 966
	if (lru_p)
		return close_pack_fd(lru_p);
967 968 969 970

	return 0;
}

971
void unuse_pack(struct pack_window **w_cursor)
972
{
973 974 975 976 977
	struct pack_window *w = *w_cursor;
	if (w) {
		w->inuse_cnt--;
		*w_cursor = NULL;
	}
978 979
}

980 981 982 983 984 985 986 987
void close_pack_index(struct packed_git *p)
{
	if (p->index_data) {
		munmap((void *)p->index_data, p->index_size);
		p->index_data = NULL;
	}
}

988 989 990
static unsigned int get_max_fd_limit(void)
{
#ifdef RLIMIT_NOFILE
991 992
	{
		struct rlimit lim;
993

994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
		if (!getrlimit(RLIMIT_NOFILE, &lim))
			return lim.rlim_cur;
	}
#endif

#ifdef _SC_OPEN_MAX
	{
		long open_max = sysconf(_SC_OPEN_MAX);
		if (0 < open_max)
			return open_max;
		/*
		 * Otherwise, we got -1 for one of the two
		 * reasons:
		 *
		 * (1) sysconf() did not understand _SC_OPEN_MAX
		 *     and signaled an error with -1; or
		 * (2) sysconf() said there is no limit.
		 *
		 * We _could_ clear errno before calling sysconf() to
		 * tell these two cases apart and return a huge number
		 * in the latter case to let the caller cap it to a
		 * value that is not so selfish, but letting the
		 * fallback OPEN_MAX codepath take care of these cases
		 * is a lot simpler.
		 */
	}
#endif
1021

1022
#ifdef OPEN_MAX
1023 1024 1025 1026 1027 1028
	return OPEN_MAX;
#else
	return 1; /* see the caller ;-) */
#endif
}

1029 1030 1031 1032 1033
/*
 * Do not call this directly as this leaks p->pack_fd on error return;
 * call open_packed_git() instead.
 */
static int open_packed_git_1(struct packed_git *p)
1034
{
1035 1036 1037 1038
	struct stat st;
	struct pack_header hdr;
	unsigned char sha1[20];
	unsigned char *idx_sha1;
1039
	long fd_flag;
1040

1041 1042 1043
	if (!p->index_data && open_pack_index(p))
		return error("packfile %s index unavailable", p->pack_name);

1044
	if (!pack_max_fds) {
1045
		unsigned int max_fds = get_max_fd_limit();
1046 1047 1048 1049 1050 1051 1052 1053

		/* Save 3 for stdin/stdout/stderr, 22 for work */
		if (25 < max_fds)
			pack_max_fds = max_fds - 25;
		else
			pack_max_fds = 1;
	}

1054
	while (pack_max_fds <= pack_open_fds && close_one_pack())
1055 1056
		; /* nothing */

1057
	p->pack_fd = git_open_noatime(p->pack_name);
1058
	if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
1059
		return -1;
1060
	pack_open_fds++;
1061 1062

	/* If we created the struct before we had the pack we lack size. */
1063 1064
	if (!p->pack_size) {
		if (!S_ISREG(st.st_mode))
1065
			return error("packfile %s not a regular file", p->pack_name);
1066
		p->pack_size = st.st_size;
1067
	} else if (p->pack_size != st.st_size)
1068
		return error("packfile %s size changed", p->pack_name);
1069

1070 1071 1072 1073 1074
	/* We leave these file descriptors open with sliding mmap;
	 * there is no point keeping them open across exec(), though.
	 */
	fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
	if (fd_flag < 0)
1075
		return error("cannot determine file descriptor flags");
1076 1077
	fd_flag |= FD_CLOEXEC;
	if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
1078
		return error("cannot set FD_CLOEXEC");
1079

1080
	/* Verify we recognize this pack file format. */
1081
	if (read_in_full(p->pack_fd, &hdr, sizeof(hdr)) != sizeof(hdr))
1082
		return error("file %s is far too short to be a packfile", p->pack_name);
1083
	if (hdr.hdr_signature != htonl(PACK_SIGNATURE))
1084
		return error("file %s is not a GIT packfile", p->pack_name);
1085
	if (!pack_version_ok(hdr.hdr_version))
1086 1087
		return error("packfile %s is version %"PRIu32" and not"
			" supported (try upgrading GIT to a newer version)",
1088 1089 1090
			p->pack_name, ntohl(hdr.hdr_version));

	/* Verify the pack matches its index. */
N
Nicolas Pitre 已提交
1091
	if (p->num_objects != ntohl(hdr.hdr_entries))
1092 1093
		return error("packfile %s claims to have %"PRIu32" objects"
			     " while index indicates %"PRIu32" objects",
N
Nicolas Pitre 已提交
1094 1095
			     p->pack_name, ntohl(hdr.hdr_entries),
			     p->num_objects);
1096
	if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1)
1097
		return error("end of packfile %s is unavailable", p->pack_name);
1098
	if (read_in_full(p->pack_fd, sha1, sizeof(sha1)) != sizeof(sha1))
1099
		return error("packfile %s signature is unavailable", p->pack_name);
1100
	idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40;
1101
	if (hashcmp(sha1, idx_sha1))
1102 1103
		return error("packfile %s does not match index", p->pack_name);
	return 0;
1104 1105
}

1106 1107 1108 1109
static int open_packed_git(struct packed_git *p)
{
	if (!open_packed_git_1(p))
		return 0;
1110
	close_pack_fd(p);
1111 1112 1113
	return -1;
}

1114
static int in_window(struct pack_window *win, off_t offset)
1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126
{
	/* We must promise at least 20 bytes (one hash) after the
	 * offset is available from this window, otherwise the offset
	 * is not actually in this window and a different window (which
	 * has that one hash excess) must be used.  This is to support
	 * the object header and delta base parsing routines below.
	 */
	off_t win_off = win->offset;
	return win_off <= offset
		&& (offset + 20) <= (win_off + win->len);
}

1127
unsigned char *use_pack(struct packed_git *p,
1128
		struct pack_window **w_cursor,
1129
		off_t offset,
1130
		unsigned long *left)
1131
{
1132
	struct pack_window *win = *w_cursor;
1133

F
Felipe Contreras 已提交
1134
	/* Since packfiles end in a hash of their content and it's
1135 1136 1137 1138
	 * pointless to ask for an offset into the middle of that
	 * hash, and the in_window function above wouldn't match
	 * don't allow an offset too close to the end of the file.
	 */
1139 1140
	if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
		die("packfile %s cannot be accessed", p->pack_name);
1141 1142
	if (offset > (p->pack_size - 20))
		die("offset beyond end of packfile (truncated pack?)");
1143
	if (offset < 0)
1144
		die(_("offset before end of packfile (broken .idx?)"));
1145 1146 1147 1148 1149 1150 1151 1152 1153

	if (!win || !in_window(win, offset)) {
		if (win)
			win->inuse_cnt--;
		for (win = p->windows; win; win = win->next) {
			if (in_window(win, offset))
				break;
		}
		if (!win) {
1154
			size_t window_align = packed_git_window_size / 2;
1155
			off_t len;
1156 1157 1158 1159

			if (p->pack_fd == -1 && open_packed_git(p))
				die("packfile %s cannot be accessed", p->pack_name);

1160
			win = xcalloc(1, sizeof(*win));
1161
			win->offset = (offset / window_align) * window_align;
1162 1163 1164 1165
			len = p->pack_size - win->offset;
			if (len > packed_git_window_size)
				len = packed_git_window_size;
			win->len = (size_t)len;
1166
			pack_mapped += win->len;
1167
			while (packed_git_limit < pack_mapped
1168
				&& unuse_one_window(p))
1169
				; /* nothing */
1170
			win->base = xmmap(NULL, win->len,
1171 1172 1173
				PROT_READ, MAP_PRIVATE,
				p->pack_fd, win->offset);
			if (win->base == MAP_FAILED)
1174 1175
				die_errno("packfile %s cannot be mapped",
					  p->pack_name);
1176
			if (!win->offset && win->len == p->pack_size
1177 1178
				&& !p->do_not_close)
				close_pack_fd(p);
1179 1180 1181 1182 1183 1184
			pack_mmap_calls++;
			pack_open_windows++;
			if (pack_mapped > peak_pack_mapped)
				peak_pack_mapped = pack_mapped;
			if (pack_open_windows > peak_pack_open_windows)
				peak_pack_open_windows = pack_open_windows;
1185 1186 1187
			win->next = p->windows;
			p->windows = win;
		}
1188
	}
1189 1190 1191 1192 1193
	if (win != *w_cursor) {
		win->last_used = pack_used_ctr++;
		win->inuse_cnt++;
		*w_cursor = win;
	}
1194
	offset -= win->offset;
1195
	if (left)
1196
		*left = win->len - xsize_t(offset);
1197
	return win->base + offset;
1198 1199
}

1200 1201
static struct packed_git *alloc_packed_git(int extra)
{
1202
	struct packed_git *p = xmalloc(st_add(sizeof(*p), extra));
1203 1204 1205 1206 1207
	memset(p, 0, sizeof(*p));
	p->pack_fd = -1;
	return p;
}

1208 1209
static void try_to_free_pack_memory(size_t size)
{
1210
	release_pack_memory(size);
1211 1212
}

1213
struct packed_git *add_packed_git(const char *path, size_t path_len, int local)
1214
{
1215
	static int have_set_try_to_free_routine;
1216
	struct stat st;
1217 1218
	size_t alloc;
	struct packed_git *p;
1219

1220 1221 1222 1223 1224
	if (!have_set_try_to_free_routine) {
		have_set_try_to_free_routine = 1;
		set_try_to_free_routine(try_to_free_pack_memory);
	}

1225 1226 1227 1228
	/*
	 * Make sure a corresponding .pack file exists and that
	 * the index looks sane.
	 */
1229
	if (!strip_suffix_mem(path, &path_len, ".idx"))
1230
		return NULL;
1231 1232 1233 1234 1235

	/*
	 * ".pack" is long enough to hold any suffix we're adding (and
	 * the use xsnprintf double-checks that)
	 */
1236
	alloc = st_add3(path_len, strlen(".pack"), 1);
1237
	p = alloc_packed_git(alloc);
1238
	memcpy(p->pack_name, path, path_len);
1239

1240
	xsnprintf(p->pack_name + path_len, alloc - path_len, ".keep");
1241 1242 1243
	if (!access(p->pack_name, F_OK))
		p->pack_keep = 1;

1244
	xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack");
1245
	if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) {
1246
		free(p);
1247 1248
		return NULL;
	}
1249

1250 1251 1252 1253
	/* ok, it looks sane as far as we can check without
	 * actually mapping the pack file.
	 */
	p->pack_size = st.st_size;
1254
	p->pack_local = local;
1255
	p->mtime = st.st_mtime;
1256 1257
	if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1))
		hashclr(p->sha1);
1258 1259 1260
	return p;
}

1261
struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
1262
{
1263
	const char *path = sha1_pack_name(sha1);
1264
	size_t alloc = st_add(strlen(path), 1);
1265
	struct packed_git *p = alloc_packed_git(alloc);
1266

1267
	memcpy(p->pack_name, path, alloc); /* includes NUL */
1268
	hashcpy(p->sha1, sha1);
1269 1270
	if (check_packed_git_idx(idx_path, p)) {
		free(p);
1271
		return NULL;
1272
	}
1273 1274 1275 1276 1277 1278

	return p;
}

void install_packed_git(struct packed_git *pack)
{
1279 1280 1281
	if (pack->pack_fd != -1)
		pack_open_fds++;

1282 1283 1284 1285
	pack->next = packed_git;
	packed_git = pack;
}

1286
void (*report_garbage)(unsigned seen_bits, const char *path);
1287 1288 1289 1290

static void report_helper(const struct string_list *list,
			  int seen_bits, int first, int last)
{
1291
	if (seen_bits == (PACKDIR_FILE_PACK|PACKDIR_FILE_IDX))
1292
		return;
1293

1294
	for (; first < last; first++)
1295
		report_garbage(seen_bits, list->items[first].string);
1296 1297 1298 1299 1300 1301 1302 1303 1304
}

static void report_pack_garbage(struct string_list *list)
{
	int i, baselen = -1, first = 0, seen_bits = 0;

	if (!report_garbage)
		return;

1305
	string_list_sort(list);
1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317

	for (i = 0; i < list->nr; i++) {
		const char *path = list->items[i].string;
		if (baselen != -1 &&
		    strncmp(path, list->items[first].string, baselen)) {
			report_helper(list, seen_bits, first, i);
			baselen = -1;
			seen_bits = 0;
		}
		if (baselen == -1) {
			const char *dot = strrchr(path, '.');
			if (!dot) {
1318
				report_garbage(PACKDIR_FILE_GARBAGE, path);
1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331
				continue;
			}
			baselen = dot - path + 1;
			first = i;
		}
		if (!strcmp(path + baselen, "pack"))
			seen_bits |= 1;
		else if (!strcmp(path + baselen, "idx"))
			seen_bits |= 2;
	}
	report_helper(list, seen_bits, first, list->nr);
}

1332
static void prepare_packed_git_one(char *objdir, int local)
1333
{
1334 1335
	struct strbuf path = STRBUF_INIT;
	size_t dirnamelen;
1336 1337
	DIR *dir;
	struct dirent *de;
1338
	struct string_list garbage = STRING_LIST_INIT_DUP;
1339

1340 1341 1342
	strbuf_addstr(&path, objdir);
	strbuf_addstr(&path, "/pack");
	dir = opendir(path.buf);
1343
	if (!dir) {
J
Junio C Hamano 已提交
1344
		if (errno != ENOENT)
1345 1346
			error_errno("unable to open object pack directory: %s",
				    path.buf);
1347
		strbuf_release(&path);
1348
		return;
1349
	}
1350 1351
	strbuf_addch(&path, '/');
	dirnamelen = path.len;
1352 1353
	while ((de = readdir(dir)) != NULL) {
		struct packed_git *p;
1354
		size_t base_len;
1355 1356

		if (is_dot_or_dotdot(de->d_name))
1357 1358
			continue;

1359 1360
		strbuf_setlen(&path, dirnamelen);
		strbuf_addstr(&path, de->d_name);
1361

1362 1363
		base_len = path.len;
		if (strip_suffix_mem(path.buf, &base_len, ".idx")) {
1364 1365
			/* Don't reopen a pack we already have. */
			for (p = packed_git; p; p = p->next) {
1366 1367 1368 1369
				size_t len;
				if (strip_suffix(p->pack_name, ".pack", &len) &&
				    len == base_len &&
				    !memcmp(p->pack_name, path.buf, len))
1370 1371 1372 1373 1374 1375 1376
					break;
			}
			if (p == NULL &&
			    /*
			     * See if it really is a valid .idx file with
			     * corresponding .pack file that we can map.
			     */
1377
			    (p = add_packed_git(path.buf, path.len, local)) != NULL)
1378
				install_packed_git(p);
1379
		}
1380 1381 1382 1383

		if (!report_garbage)
			continue;

J
Jeff King 已提交
1384 1385 1386 1387
		if (ends_with(de->d_name, ".idx") ||
		    ends_with(de->d_name, ".pack") ||
		    ends_with(de->d_name, ".bitmap") ||
		    ends_with(de->d_name, ".keep"))
1388
			string_list_append(&garbage, path.buf);
1389
		else
1390
			report_garbage(PACKDIR_FILE_GARBAGE, path.buf);
1391
	}
1392
	closedir(dir);
1393 1394
	report_pack_garbage(&garbage);
	string_list_clear(&garbage, 0);
1395
	strbuf_release(&path);
1396 1397
}

1398 1399 1400 1401 1402 1403 1404 1405 1406 1407
static void *get_next_packed_git(const void *p)
{
	return ((const struct packed_git *)p)->next;
}

static void set_next_packed_git(void *p, void *next)
{
	((struct packed_git *)p)->next = next;
}

1408 1409
static int sort_pack(const void *a_, const void *b_)
{
1410 1411
	const struct packed_git *a = a_;
	const struct packed_git *b = b_;
1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437
	int st;

	/*
	 * Local packs tend to contain objects specific to our
	 * variant of the project than remote ones.  In addition,
	 * remote ones could be on a network mounted filesystem.
	 * Favor local ones for these reasons.
	 */
	st = a->pack_local - b->pack_local;
	if (st)
		return -st;

	/*
	 * Younger packs tend to contain more recent objects,
	 * and more recent objects tend to get accessed more
	 * often.
	 */
	if (a->mtime < b->mtime)
		return 1;
	else if (a->mtime == b->mtime)
		return 0;
	return -1;
}

static void rearrange_packed_git(void)
{
1438 1439
	packed_git = llist_mergesort(packed_git, get_next_packed_git,
				     set_next_packed_git, sort_pack);
1440 1441
}

1442 1443 1444 1445 1446 1447 1448 1449 1450
static void prepare_packed_git_mru(void)
{
	struct packed_git *p;

	mru_clear(packed_git_mru);
	for (p = packed_git; p; p = p->next)
		mru_append(packed_git_mru, p);
}

1451
static int prepare_packed_git_run_once = 0;
1452
void prepare_packed_git(void)
1453
{
1454
	struct alternate_object_database *alt;
1455

1456
	if (prepare_packed_git_run_once)
1457
		return;
1458
	prepare_packed_git_one(get_object_directory(), 1);
1459
	prepare_alt_odb();
1460
	for (alt = alt_odb_list; alt; alt = alt->next) {
1461
		alt->name[-1] = 0;
1462
		prepare_packed_git_one(alt->base, 0);
1463
		alt->name[-1] = '/';
1464
	}
1465
	rearrange_packed_git();
1466
	prepare_packed_git_mru();
1467 1468 1469
	prepare_packed_git_run_once = 1;
}

1470
void reprepare_packed_git(void)
1471 1472 1473
{
	prepare_packed_git_run_once = 0;
	prepare_packed_git();
1474 1475
}

1476 1477 1478 1479 1480
static void mark_bad_packed_object(struct packed_git *p,
				   const unsigned char *sha1)
{
	unsigned i;
	for (i = 0; i < p->num_bad_objects; i++)
1481
		if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i))
1482
			return;
1483 1484 1485 1486
	p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
				      st_mult(GIT_SHA1_RAWSZ,
					      st_add(p->num_bad_objects, 1)));
	hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1);
1487 1488 1489
	p->num_bad_objects++;
}

1490
static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
1491 1492 1493 1494 1495 1496 1497
{
	struct packed_git *p;
	unsigned i;

	for (p = packed_git; p; p = p->next)
		for (i = 0; i < p->num_bad_objects; i++)
			if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
1498 1499
				return p;
	return NULL;
1500 1501
}

1502 1503 1504 1505 1506 1507 1508 1509
/*
 * With an in-core object data in "map", rehash it to make sure the
 * object name actually matches "sha1" to detect object corruption.
 * With "map" == NULL, try reading the object named with "sha1" using
 * the streaming interface and rehash it to do the same.
 */
int check_sha1_signature(const unsigned char *sha1, void *map,
			 unsigned long size, const char *type)
1510 1511
{
	unsigned char real_sha1[20];
1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527
	enum object_type obj_type;
	struct git_istream *st;
	git_SHA_CTX c;
	char hdr[32];
	int hdrlen;

	if (map) {
		hash_sha1_file(map, size, type, real_sha1);
		return hashcmp(sha1, real_sha1) ? -1 : 0;
	}

	st = open_istream(sha1, &obj_type, &size, NULL);
	if (!st)
		return -1;

	/* Generate the header */
1528
	hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(obj_type), size) + 1;
1529 1530 1531 1532 1533 1534 1535 1536

	/* Sha1.. */
	git_SHA1_Init(&c);
	git_SHA1_Update(&c, hdr, hdrlen);
	for (;;) {
		char buf[1024 * 16];
		ssize_t readlen = read_istream(st, buf, sizeof(buf));

1537 1538 1539 1540
		if (readlen < 0) {
			close_istream(st);
			return -1;
		}
1541 1542 1543 1544 1545 1546
		if (!readlen)
			break;
		git_SHA1_Update(&c, buf, readlen);
	}
	git_SHA1_Final(real_sha1, &c);
	close_istream(st);
1547
	return hashcmp(sha1, real_sha1) ? -1 : 0;
1548 1549
}

1550
int git_open_noatime(const char *name)
1551 1552 1553
{
	static int sha1_file_open_flag = O_NOATIME;

1554
	for (;;) {
1555 1556 1557 1558
		int fd;

		errno = 0;
		fd = open(name, O_RDONLY | sha1_file_open_flag);
1559
		if (fd >= 0)
1560 1561 1562 1563
			return fd;

		/* Might the failure be due to O_NOATIME? */
		if (errno != ENOENT && sha1_file_open_flag) {
1564
			sha1_file_open_flag = 0;
1565 1566 1567 1568
			continue;
		}

		return -1;
1569 1570 1571
	}
}

1572 1573 1574 1575
static int stat_sha1_file(const unsigned char *sha1, struct stat *st)
{
	struct alternate_object_database *alt;

1576
	if (!lstat(sha1_file_name(sha1), st))
1577 1578 1579 1580 1581
		return 0;

	prepare_alt_odb();
	errno = ENOENT;
	for (alt = alt_odb_list; alt; alt = alt->next) {
1582
		fill_sha1_path(alt->name, sha1);
1583 1584 1585 1586 1587 1588 1589
		if (!lstat(alt->base, st))
			return 0;
	}

	return -1;
}

1590 1591 1592 1593
static int open_sha1_file(const unsigned char *sha1)
{
	int fd;
	struct alternate_object_database *alt;
1594
	int most_interesting_errno;
1595

1596
	fd = git_open_noatime(sha1_file_name(sha1));
1597 1598
	if (fd >= 0)
		return fd;
1599
	most_interesting_errno = errno;
1600 1601 1602

	prepare_alt_odb();
	for (alt = alt_odb_list; alt; alt = alt->next) {
1603
		fill_sha1_path(alt->name, sha1);
1604
		fd = git_open_noatime(alt->base);
1605 1606
		if (fd >= 0)
			return fd;
1607 1608
		if (most_interesting_errno == ENOENT)
			most_interesting_errno = errno;
1609
	}
1610
	errno = most_interesting_errno;
1611 1612 1613
	return -1;
}

1614
void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
1615 1616
{
	void *map;
1617
	int fd;
1618

1619 1620 1621 1622
	fd = open_sha1_file(sha1);
	map = NULL;
	if (fd >= 0) {
		struct stat st;
1623

1624 1625
		if (!fstat(fd, &st)) {
			*size = xsize_t(st.st_size);
1626 1627 1628 1629 1630
			if (!*size) {
				/* mmap() is forbidden on empty files */
				error("object file %s is empty", sha1_file_name(sha1));
				return NULL;
			}
1631
			map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
1632
		}
1633
		close(fd);
1634 1635 1636 1637
	}
	return map;
}

1638 1639
unsigned long unpack_object_header_buffer(const unsigned char *buf,
		unsigned long len, enum object_type *type, unsigned long *sizep)
1640
{
1641
	unsigned shift;
1642
	unsigned long size, c;
1643 1644 1645 1646 1647 1648 1649
	unsigned long used = 0;

	c = buf[used++];
	*type = (c >> 4) & 7;
	size = c & 15;
	shift = 4;
	while (c & 0x80) {
1650
		if (len <= used || bitsizeof(long) <= shift) {
1651
			error("bad object header");
1652 1653
			size = used = 0;
			break;
1654
		}
1655 1656 1657 1658 1659 1660 1661 1662
		c = buf[used++];
		size += (c & 0x7f) << shift;
		shift += 7;
	}
	*sizep = size;
	return used;
}

1663 1664 1665
static int unpack_sha1_short_header(git_zstream *stream,
				    unsigned char *map, unsigned long mapsize,
				    void *buffer, unsigned long bufsiz)
1666
{
1667 1668 1669 1670 1671
	/* Get the data stream */
	memset(stream, 0, sizeof(*stream));
	stream->next_in = map;
	stream->avail_in = mapsize;
	stream->next_out = buffer;
1672 1673
	stream->avail_out = bufsiz;

1674
	git_inflate_init(stream);
1675
	return git_inflate(stream, 0);
1676 1677
}

1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693
int unpack_sha1_header(git_zstream *stream,
		       unsigned char *map, unsigned long mapsize,
		       void *buffer, unsigned long bufsiz)
{
	int status = unpack_sha1_short_header(stream, map, mapsize,
					      buffer, bufsiz);

	if (status < Z_OK)
		return status;

	/* Make sure we have the terminating NUL */
	if (!memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
		return -1;
	return 0;
}

1694 1695 1696 1697 1698 1699
static int unpack_sha1_header_to_strbuf(git_zstream *stream, unsigned char *map,
					unsigned long mapsize, void *buffer,
					unsigned long bufsiz, struct strbuf *header)
{
	int status;

1700 1701 1702
	status = unpack_sha1_short_header(stream, map, mapsize, buffer, bufsiz);
	if (status < Z_OK)
		return -1;
1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729

	/*
	 * Check if entire header is unpacked in the first iteration.
	 */
	if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
		return 0;

	/*
	 * buffer[0..bufsiz] was not large enough.  Copy the partial
	 * result out to header, and then append the result of further
	 * reading the stream.
	 */
	strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
	stream->next_out = buffer;
	stream->avail_out = bufsiz;

	do {
		status = git_inflate(stream, 0);
		strbuf_add(header, buffer, stream->next_out - (unsigned char *)buffer);
		if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
			return 0;
		stream->next_out = buffer;
		stream->avail_out = bufsiz;
	} while (status != Z_STREAM_END);
	return -1;
}

1730
static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long size, const unsigned char *sha1)
1731 1732
{
	int bytes = strlen(buffer) + 1;
1733
	unsigned char *buf = xmallocz(size);
1734
	unsigned long n;
1735
	int status = Z_OK;
1736

1737 1738 1739 1740 1741
	n = stream->total_out - bytes;
	if (n > size)
		n = size;
	memcpy(buf, (char *) buffer + bytes, n);
	bytes = n;
1742 1743 1744 1745
	if (bytes <= size) {
		/*
		 * The above condition must be (bytes <= size), not
		 * (bytes < size).  In other words, even though we
J
Junio C Hamano 已提交
1746
		 * expect no more output and set avail_out to zero,
1747 1748 1749 1750 1751 1752 1753 1754 1755
		 * the input zlib stream may have bytes that express
		 * "this concludes the stream", and we *do* want to
		 * eat that input.
		 *
		 * Otherwise we would not be able to test that we
		 * consumed all the input to reach the expected size;
		 * we also want to check that zlib tells us that all
		 * went well with status == Z_STREAM_END at the end.
		 */
1756 1757
		stream->next_out = buf + bytes;
		stream->avail_out = size - bytes;
1758
		while (status == Z_OK)
1759
			status = git_inflate(stream, Z_FINISH);
1760
	}
1761
	if (status == Z_STREAM_END && !stream->avail_in) {
1762
		git_inflate_end(stream);
1763 1764 1765 1766 1767 1768 1769 1770 1771 1772
		return buf;
	}

	if (status < 0)
		error("corrupt loose object '%s'", sha1_to_hex(sha1));
	else if (stream->avail_in)
		error("garbage at end of loose object '%s'",
		      sha1_to_hex(sha1));
	free(buf);
	return NULL;
1773 1774 1775 1776 1777 1778 1779
}

/*
 * We used to just use "sscanf()", but that's actually way
 * too permissive for what we want to check. So do an anal
 * object header parse by hand.
 */
1780 1781
static int parse_sha1_header_extended(const char *hdr, struct object_info *oi,
			       unsigned int flags)
1782
{
1783
	const char *type_buf = hdr;
1784
	unsigned long size;
1785
	int type, type_len = 0;
1786 1787

	/*
1788
	 * The type can be of any size but is followed by
1789
	 * a space.
1790 1791 1792
	 */
	for (;;) {
		char c = *hdr++;
1793 1794
		if (!c)
			return -1;
1795 1796
		if (c == ' ')
			break;
1797
		type_len++;
1798
	}
1799 1800 1801 1802 1803 1804

	type = type_from_string_gently(type_buf, type_len, 1);
	if (oi->typename)
		strbuf_add(oi->typename, type_buf, type_len);
	/*
	 * Set type to 0 if its an unknown object and
V
Ville Skyttä 已提交
1805
	 * we're obtaining the type using '--allow-unknown-type'
1806 1807 1808 1809 1810 1811 1812 1813
	 * option.
	 */
	if ((flags & LOOKUP_UNKNOWN_OBJECT) && (type < 0))
		type = 0;
	else if (type < 0)
		die("invalid object type");
	if (oi->typep)
		*oi->typep = type;
1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830

	/*
	 * The length must follow immediately, and be in canonical
	 * decimal format (ie "010" is not valid).
	 */
	size = *hdr++ - '0';
	if (size > 9)
		return -1;
	if (size) {
		for (;;) {
			unsigned long c = *hdr - '0';
			if (c > 9)
				break;
			hdr++;
			size = size * 10 + c;
		}
	}
1831 1832 1833

	if (oi->sizep)
		*oi->sizep = size;
1834 1835 1836 1837

	/*
	 * The length must be followed by a zero byte
	 */
1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848
	return *hdr ? -1 : type;
}

int parse_sha1_header(const char *hdr, unsigned long *sizep)
{
	struct object_info oi;

	oi.sizep = sizep;
	oi.typename = NULL;
	oi.typep = NULL;
	return parse_sha1_header_extended(hdr, &oi, LOOKUP_REPLACE_OBJECT);
1849 1850
}

1851
static void *unpack_sha1_file(void *map, unsigned long mapsize, enum object_type *type, unsigned long *size, const unsigned char *sha1)
1852
{
1853
	int ret;
1854
	git_zstream stream;
1855
	char hdr[8192];
1856

1857
	ret = unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr));
1858
	if (ret < Z_OK || (*type = parse_sha1_header(hdr, size)) < 0)
1859 1860
		return NULL;

1861
	return unpack_sha1_rest(&stream, hdr, *size, sha1);
1862 1863
}

N
Nicolas Pitre 已提交
1864 1865 1866 1867 1868 1869
unsigned long get_size_from_delta(struct packed_git *p,
				  struct pack_window **w_curs,
			          off_t curpos)
{
	const unsigned char *data;
	unsigned char delta_head[20], *in;
1870
	git_zstream stream;
N
Nicolas Pitre 已提交
1871 1872 1873 1874 1875 1876
	int st;

	memset(&stream, 0, sizeof(stream));
	stream.next_out = delta_head;
	stream.avail_out = sizeof(delta_head);

1877
	git_inflate_init(&stream);
N
Nicolas Pitre 已提交
1878 1879 1880
	do {
		in = use_pack(p, w_curs, curpos, &stream.avail_in);
		stream.next_in = in;
1881
		st = git_inflate(&stream, Z_FINISH);
N
Nicolas Pitre 已提交
1882 1883 1884
		curpos += stream.next_in - in;
	} while ((st == Z_OK || st == Z_BUF_ERROR) &&
		 stream.total_out < sizeof(delta_head));
1885
	git_inflate_end(&stream);
1886 1887 1888 1889
	if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) {
		error("delta data unpack-initial failed");
		return 0;
	}
N
Nicolas Pitre 已提交
1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902

	/* Examine the initial part of the delta to figure out
	 * the result size.
	 */
	data = delta_head;

	/* ignore base size */
	get_delta_hdr_size(&data, delta_head+sizeof(delta_head));

	/* Read the result size */
	return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
}

1903
static off_t get_delta_base(struct packed_git *p,
1904
				    struct pack_window **w_curs,
1905
				    off_t *curpos,
1906
				    enum object_type type,
1907
				    off_t delta_obj_offset)
1908
{
1909
	unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
1910
	off_t base_offset;
1911

1912 1913 1914 1915 1916 1917
	/* use_pack() assured us we have [base_info, base_info + 20)
	 * as a range that we can look at without walking off the
	 * end of the mapped window.  Its actually the hash size
	 * that is assured.  An OFS_DELTA longer than the hash size
	 * is stupid, as then a REF_DELTA would be smaller to store.
	 */
1918
	if (type == OBJ_OFS_DELTA) {
1919 1920 1921 1922 1923
		unsigned used = 0;
		unsigned char c = base_info[used++];
		base_offset = c & 127;
		while (c & 128) {
			base_offset += 1;
1924
			if (!base_offset || MSB(base_offset, 7))
1925
				return 0;  /* overflow */
1926 1927 1928 1929
			c = base_info[used++];
			base_offset = (base_offset << 7) + (c & 127);
		}
		base_offset = delta_obj_offset - base_offset;
1930
		if (base_offset <= 0 || base_offset >= delta_obj_offset)
1931
			return 0;  /* out of bound */
1932
		*curpos += used;
1933
	} else if (type == OBJ_REF_DELTA) {
1934 1935
		/* The base entry _must_ be in the same pack */
		base_offset = find_pack_entry_one(base_info, p);
1936
		*curpos += 20;
1937 1938
	} else
		die("I am totally screwed");
1939
	return base_offset;
1940 1941
}

1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973
/*
 * Like get_delta_base above, but we return the sha1 instead of the pack
 * offset. This means it is cheaper for REF deltas (we do not have to do
 * the final object lookup), but more expensive for OFS deltas (we
 * have to load the revidx to convert the offset back into a sha1).
 */
static const unsigned char *get_delta_base_sha1(struct packed_git *p,
						struct pack_window **w_curs,
						off_t curpos,
						enum object_type type,
						off_t delta_obj_offset)
{
	if (type == OBJ_REF_DELTA) {
		unsigned char *base = use_pack(p, w_curs, curpos, NULL);
		return base;
	} else if (type == OBJ_OFS_DELTA) {
		struct revindex_entry *revidx;
		off_t base_offset = get_delta_base(p, w_curs, &curpos,
						   type, delta_obj_offset);

		if (!base_offset)
			return NULL;

		revidx = find_pack_revindex(p, base_offset);
		if (!revidx)
			return NULL;

		return nth_packed_object_sha1(p, revidx->nr);
	} else
		return NULL;
}

1974 1975 1976 1977
int unpack_object_header(struct packed_git *p,
			 struct pack_window **w_curs,
			 off_t *curpos,
			 unsigned long *sizep)
1978
{
1979
	unsigned char *base;
1980
	unsigned long left;
1981
	unsigned long used;
1982
	enum object_type type;
1983

1984
	/* use_pack() assures us we have [base, base + 20) available
1985
	 * as a range that we can look at.  (Its actually the hash
P
Pavel Roskin 已提交
1986
	 * size that is assured.)  With our object header encoding
1987 1988 1989
	 * the maximum deflated object size is 2^137, which is just
	 * insane, so we know won't exceed what we have been given.
	 */
1990
	base = use_pack(p, w_curs, *curpos, &left);
1991 1992 1993 1994 1995
	used = unpack_object_header_buffer(base, left, &type, sizep);
	if (!used) {
		type = OBJ_BAD;
	} else
		*curpos += used;
1996

1997
	return type;
1998 1999
}

2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
{
	int type;
	struct revindex_entry *revidx;
	const unsigned char *sha1;
	revidx = find_pack_revindex(p, obj_offset);
	if (!revidx)
		return OBJ_BAD;
	sha1 = nth_packed_object_sha1(p, revidx->nr);
	mark_bad_packed_object(p, sha1);
	type = sha1_object_info(sha1, NULL);
	if (type <= OBJ_NONE)
		return OBJ_BAD;
	return type;
}

#define POI_STACK_PREALLOC 64

2018 2019 2020 2021 2022
static enum object_type packed_to_object_type(struct packed_git *p,
					      off_t obj_offset,
					      enum object_type type,
					      struct pack_window **w_curs,
					      off_t curpos)
2023
{
2024 2025 2026
	off_t small_poi_stack[POI_STACK_PREALLOC];
	off_t *poi_stack = small_poi_stack;
	int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
2027

2028 2029
	while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
		off_t base_offset;
2030
		unsigned long size;
2031 2032 2033
		/* Push the object we're going to leave behind */
		if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
			poi_stack_alloc = alloc_nr(poi_stack_nr);
J
Jeff King 已提交
2034
			ALLOC_ARRAY(poi_stack, poi_stack_alloc);
2035 2036 2037 2038 2039 2040
			memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr);
		} else {
			ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc);
		}
		poi_stack[poi_stack_nr++] = obj_offset;
		/* If parsing the base offset fails, just unwind */
2041
		base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
2042 2043 2044
		if (!base_offset)
			goto unwind;
		curpos = obj_offset = base_offset;
2045
		type = unpack_object_header(p, w_curs, &curpos, &size);
2046 2047 2048 2049 2050 2051 2052 2053 2054 2055
		if (type <= OBJ_NONE) {
			/* If getting the base itself fails, we first
			 * retry the base, otherwise unwind */
			type = retry_bad_packed_offset(p, base_offset);
			if (type > OBJ_NONE)
				goto out;
			goto unwind;
		}
	}

2056
	switch (type) {
2057
	case OBJ_BAD:
2058 2059 2060 2061
	case OBJ_COMMIT:
	case OBJ_TREE:
	case OBJ_BLOB:
	case OBJ_TAG:
2062
		break;
2063
	default:
2064 2065 2066
		error("unknown object type %i at offset %"PRIuMAX" in %s",
		      type, (uintmax_t)obj_offset, p->pack_name);
		type = OBJ_BAD;
2067
	}
2068 2069 2070 2071

out:
	if (poi_stack != small_poi_stack)
		free(poi_stack);
2072
	return type;
2073 2074 2075 2076 2077 2078 2079 2080 2081 2082

unwind:
	while (poi_stack_nr) {
		obj_offset = poi_stack[--poi_stack_nr];
		type = retry_bad_packed_offset(p, obj_offset);
		if (type > OBJ_NONE)
			goto out;
	}
	type = OBJ_BAD;
	goto out;
2083 2084
}

2085
static int packed_object_info(struct packed_git *p, off_t obj_offset,
2086
			      struct object_info *oi)
2087 2088 2089 2090 2091 2092
{
	struct pack_window *w_curs = NULL;
	unsigned long size;
	off_t curpos = obj_offset;
	enum object_type type;

2093 2094 2095 2096
	/*
	 * We always get the representation type, but only convert it to
	 * a "real" type later if the caller is interested.
	 */
2097 2098
	type = unpack_object_header(p, &w_curs, &curpos, &size);

2099
	if (oi->sizep) {
2100 2101 2102 2103 2104 2105 2106 2107
		if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
			off_t tmp_pos = curpos;
			off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
							   type, obj_offset);
			if (!base_offset) {
				type = OBJ_BAD;
				goto out;
			}
2108 2109
			*oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
			if (*oi->sizep == 0) {
2110 2111 2112 2113
				type = OBJ_BAD;
				goto out;
			}
		} else {
2114
			*oi->sizep = size;
2115 2116 2117
		}
	}

2118
	if (oi->disk_sizep) {
2119
		struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
2120
		*oi->disk_sizep = revidx[1].offset - obj_offset;
2121 2122
	}

2123 2124 2125
	if (oi->typep) {
		*oi->typep = packed_to_object_type(p, obj_offset, type, &w_curs, curpos);
		if (*oi->typep < 0) {
2126 2127 2128 2129
			type = OBJ_BAD;
			goto out;
		}
	}
2130

2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146
	if (oi->delta_base_sha1) {
		if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
			const unsigned char *base;

			base = get_delta_base_sha1(p, &w_curs, curpos,
						   type, obj_offset);
			if (!base) {
				type = OBJ_BAD;
				goto out;
			}

			hashcpy(oi->delta_base_sha1, base);
		} else
			hashclr(oi->delta_base_sha1);
	}

2147 2148 2149 2150 2151
out:
	unuse_pack(&w_curs);
	return type;
}

2152
static void *unpack_compressed_entry(struct packed_git *p,
2153
				    struct pack_window **w_curs,
2154
				    off_t curpos,
2155
				    unsigned long size)
2156 2157
{
	int st;
2158
	git_zstream stream;
2159
	unsigned char *buffer, *in;
2160

2161 2162 2163
	buffer = xmallocz_gently(size);
	if (!buffer)
		return NULL;
2164 2165
	memset(&stream, 0, sizeof(stream));
	stream.next_out = buffer;
2166
	stream.avail_out = size + 1;
2167

2168
	git_inflate_init(&stream);
2169
	do {
2170
		in = use_pack(p, w_curs, curpos, &stream.avail_in);
2171
		stream.next_in = in;
2172
		st = git_inflate(&stream, Z_FINISH);
2173 2174
		if (!stream.avail_out)
			break; /* the payload is larger than it should be */
2175
		curpos += stream.next_in - in;
2176
	} while (st == Z_OK || st == Z_BUF_ERROR);
2177
	git_inflate_end(&stream);
2178 2179 2180 2181 2182 2183 2184 2185
	if ((st != Z_STREAM_END) || stream.total_out != size) {
		free(buffer);
		return NULL;
	}

	return buffer;
}

J
Jeff King 已提交
2186
static struct hashmap delta_base_cache;
2187
static size_t delta_base_cached;
2188

J
Jeff King 已提交
2189
static LIST_HEAD(delta_base_cache_lru);
2190

J
Jeff King 已提交
2191
struct delta_base_cache_key {
2192 2193
	struct packed_git *p;
	off_t base_offset;
J
Jeff King 已提交
2194 2195 2196 2197 2198 2199 2200
};

struct delta_base_cache_entry {
	struct hashmap hash;
	struct delta_base_cache_key key;
	struct list_head lru;
	void *data;
2201 2202
	unsigned long size;
	enum object_type type;
J
Jeff King 已提交
2203
};
2204

J
Jeff King 已提交
2205
static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset)
2206
{
J
Jeff King 已提交
2207
	unsigned int hash;
2208

J
Jeff King 已提交
2209
	hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset;
2210
	hash += (hash >> 8) + (hash >> 16);
J
Jeff King 已提交
2211
	return hash;
2212 2213
}

2214 2215
static struct delta_base_cache_entry *
get_delta_base_cache_entry(struct packed_git *p, off_t base_offset)
2216
{
J
Jeff King 已提交
2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232
	struct hashmap_entry entry;
	struct delta_base_cache_key key;

	if (!delta_base_cache.cmpfn)
		return NULL;

	hashmap_entry_init(&entry, pack_entry_hash(p, base_offset));
	key.p = p;
	key.base_offset = base_offset;
	return hashmap_get(&delta_base_cache, &entry, &key);
}

static int delta_base_cache_key_eq(const struct delta_base_cache_key *a,
				   const struct delta_base_cache_key *b)
{
	return a->p == b->p && a->base_offset == b->base_offset;
2233 2234
}

J
Jeff King 已提交
2235 2236
static int delta_base_cache_hash_cmp(const void *va, const void *vb,
				     const void *vkey)
2237
{
J
Jeff King 已提交
2238 2239 2240 2241 2242 2243
	const struct delta_base_cache_entry *a = va, *b = vb;
	const struct delta_base_cache_key *key = vkey;
	if (key)
		return !delta_base_cache_key_eq(&a->key, key);
	else
		return !delta_base_cache_key_eq(&a->key, &b->key);
2244 2245
}

2246 2247
static int in_delta_base_cache(struct packed_git *p, off_t base_offset)
{
J
Jeff King 已提交
2248
	return !!get_delta_base_cache_entry(p, base_offset);
2249 2250
}

2251 2252 2253 2254 2255 2256
/*
 * Remove the entry from the cache, but do _not_ free the associated
 * entry data. The caller takes ownership of the "data" buffer, and
 * should copy out any fields it wants before detaching.
 */
static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
2257
{
J
Jeff King 已提交
2258
	hashmap_remove(&delta_base_cache, ent, &ent->key);
J
Jeff King 已提交
2259
	list_del(&ent->lru);
2260
	delta_base_cached -= ent->size;
J
Jeff King 已提交
2261
	free(ent);
2262 2263
}

2264
static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
2265
	unsigned long *base_size, enum object_type *type)
2266
{
2267
	struct delta_base_cache_entry *ent;
2268

2269
	ent = get_delta_base_cache_entry(p, base_offset);
J
Jeff King 已提交
2270
	if (!ent)
2271
		return unpack_entry(p, base_offset, type, base_size);
2272 2273 2274

	*type = ent->type;
	*base_size = ent->size;
2275
	return xmemdupz(ent->data, ent->size);
2276 2277
}

2278 2279
static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
{
J
Jeff King 已提交
2280 2281
	free(ent->data);
	detach_delta_base_cache_entry(ent);
2282 2283
}

2284 2285
void clear_delta_base_cache(void)
{
J
Jeff King 已提交
2286 2287 2288 2289 2290 2291 2292
	struct hashmap_iter iter;
	struct delta_base_cache_entry *entry;
	for (entry = hashmap_iter_first(&delta_base_cache, &iter);
	     entry;
	     entry = hashmap_iter_next(&iter)) {
		release_delta_base_cache(entry);
	}
2293 2294
}

2295 2296 2297
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
	void *base, unsigned long base_size, enum object_type type)
{
J
Jeff King 已提交
2298
	struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
2299
	struct list_head *lru, *tmp;
2300

2301
	delta_base_cached += base_size;
2302

2303
	list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
J
Jeff King 已提交
2304 2305 2306 2307
		struct delta_base_cache_entry *f =
			list_entry(lru, struct delta_base_cache_entry, lru);
		if (delta_base_cached <= delta_base_cache_limit)
			break;
2308 2309
		release_delta_base_cache(f);
	}
2310

J
Jeff King 已提交
2311 2312
	ent->key.p = p;
	ent->key.base_offset = base_offset;
2313 2314 2315
	ent->type = type;
	ent->data = base;
	ent->size = base_size;
J
Jeff King 已提交
2316
	list_add_tail(&ent->lru, &delta_base_cache_lru);
J
Jeff King 已提交
2317 2318 2319 2320 2321

	if (!delta_base_cache.cmpfn)
		hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, 0);
	hashmap_entry_init(ent, pack_entry_hash(p, base_offset));
	hashmap_add(&delta_base_cache, ent);
2322 2323
}

2324 2325 2326
static void *read_object(const unsigned char *sha1, enum object_type *type,
			 unsigned long *size);

2327 2328
static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
{
2329 2330 2331
	static struct trace_key pack_access = TRACE_KEY_INIT(PACK_ACCESS);
	trace_printf_key(&pack_access, "%s %"PRIuMAX"\n",
			 p->pack_name, (uintmax_t)obj_offset);
2332 2333
}

2334 2335
int do_check_packed_object_crc;

2336 2337 2338 2339 2340 2341 2342
#define UNPACK_ENTRY_STACK_PREALLOC 64
struct unpack_entry_stack_ent {
	off_t obj_offset;
	off_t curpos;
	unsigned long size;
};

2343
void *unpack_entry(struct packed_git *p, off_t obj_offset,
2344
		   enum object_type *final_type, unsigned long *final_size)
2345
{
2346
	struct pack_window *w_curs = NULL;
2347
	off_t curpos = obj_offset;
2348 2349 2350 2351 2352 2353 2354
	void *data = NULL;
	unsigned long size;
	enum object_type type;
	struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC];
	struct unpack_entry_stack_ent *delta_stack = small_delta_stack;
	int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC;
	int base_from_cache = 0;
2355

2356
	write_pack_access_log(p, obj_offset);
2357

2358 2359 2360 2361 2362 2363
	/* PHASE 1: drill down to the innermost base object */
	for (;;) {
		off_t base_offset;
		int i;
		struct delta_base_cache_entry *ent;

2364
		ent = get_delta_base_cache_entry(p, curpos);
J
Jeff King 已提交
2365
		if (ent) {
2366 2367 2368
			type = ent->type;
			data = ent->data;
			size = ent->size;
2369
			detach_delta_base_cache_entry(ent);
2370 2371 2372 2373
			base_from_cache = 1;
			break;
		}

2374 2375
		if (do_check_packed_object_crc && p->index_version > 1) {
			struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
2376
			off_t len = revidx[1].offset - obj_offset;
2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405
			if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
				const unsigned char *sha1 =
					nth_packed_object_sha1(p, revidx->nr);
				error("bad packed object CRC for %s",
				      sha1_to_hex(sha1));
				mark_bad_packed_object(p, sha1);
				unuse_pack(&w_curs);
				return NULL;
			}
		}

		type = unpack_object_header(p, &w_curs, &curpos, &size);
		if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA)
			break;

		base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
		if (!base_offset) {
			error("failed to validate delta base reference "
			      "at offset %"PRIuMAX" from %s",
			      (uintmax_t)curpos, p->pack_name);
			/* bail to phase 2, in hopes of recovery */
			data = NULL;
			break;
		}

		/* push object, proceed to base */
		if (delta_stack_nr >= delta_stack_alloc
		    && delta_stack == small_delta_stack) {
			delta_stack_alloc = alloc_nr(delta_stack_nr);
J
Jeff King 已提交
2406
			ALLOC_ARRAY(delta_stack, delta_stack_alloc);
2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417
			memcpy(delta_stack, small_delta_stack,
			       sizeof(*delta_stack)*delta_stack_nr);
		} else {
			ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc);
		}
		i = delta_stack_nr++;
		delta_stack[i].obj_offset = obj_offset;
		delta_stack[i].curpos = curpos;
		delta_stack[i].size = size;

		curpos = obj_offset = base_offset;
2418 2419
	}

2420 2421
	/* PHASE 2: handle the base */
	switch (type) {
2422 2423
	case OBJ_OFS_DELTA:
	case OBJ_REF_DELTA:
2424
		if (data)
2425
			die("BUG: unpack_entry: left loop at a valid delta");
2426
		break;
2427 2428 2429 2430
	case OBJ_COMMIT:
	case OBJ_TREE:
	case OBJ_BLOB:
	case OBJ_TAG:
2431 2432
		if (!base_from_cache)
			data = unpack_compressed_entry(p, &w_curs, curpos, size);
2433
		break;
2434
	default:
2435 2436
		data = NULL;
		error("unknown object type %i at offset %"PRIuMAX" in %s",
2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497
		      type, (uintmax_t)obj_offset, p->pack_name);
	}

	/* PHASE 3: apply deltas in order */

	/* invariants:
	 *   'data' holds the base data, or NULL if there was corruption
	 */
	while (delta_stack_nr) {
		void *delta_data;
		void *base = data;
		unsigned long delta_size, base_size = size;
		int i;

		data = NULL;

		if (base)
			add_delta_base_cache(p, obj_offset, base, base_size, type);

		if (!base) {
			/*
			 * We're probably in deep shit, but let's try to fetch
			 * the required base anyway from another pack or loose.
			 * This is costly but should happen only in the presence
			 * of a corrupted pack, and is better than failing outright.
			 */
			struct revindex_entry *revidx;
			const unsigned char *base_sha1;
			revidx = find_pack_revindex(p, obj_offset);
			if (revidx) {
				base_sha1 = nth_packed_object_sha1(p, revidx->nr);
				error("failed to read delta base object %s"
				      " at offset %"PRIuMAX" from %s",
				      sha1_to_hex(base_sha1), (uintmax_t)obj_offset,
				      p->pack_name);
				mark_bad_packed_object(p, base_sha1);
				base = read_object(base_sha1, &type, &base_size);
			}
		}

		i = --delta_stack_nr;
		obj_offset = delta_stack[i].obj_offset;
		curpos = delta_stack[i].curpos;
		delta_size = delta_stack[i].size;

		if (!base)
			continue;

		delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size);

		if (!delta_data) {
			error("failed to unpack compressed delta "
			      "at offset %"PRIuMAX" from %s",
			      (uintmax_t)curpos, p->pack_name);
			data = NULL;
			continue;
		}

		data = patch_delta(base, base_size,
				   delta_data, delta_size,
				   &size);
2498 2499 2500 2501 2502 2503 2504 2505 2506

		/*
		 * We could not apply the delta; warn the user, but keep going.
		 * Our failure will be noticed either in the next iteration of
		 * the loop, or if this is the final delta, in the caller when
		 * we return NULL. Those code paths will take care of making
		 * a more explicit warning and retrying with another copy of
		 * the object.
		 */
2507
		if (!data)
2508
			error("failed to apply delta");
2509

2510
		free(delta_data);
2511
	}
2512 2513 2514 2515

	*final_type = type;
	*final_size = size;

2516
	unuse_pack(&w_curs);
2517 2518 2519 2520

	if (delta_stack != small_delta_stack)
		free(delta_stack);

2521
	return data;
2522 2523
}

2524
const unsigned char *nth_packed_object_sha1(struct packed_git *p,
2525
					    uint32_t n)
2526
{
2527
	const unsigned char *index = p->index_data;
2528 2529 2530 2531 2532
	if (!index) {
		if (open_pack_index(p))
			return NULL;
		index = p->index_data;
	}
N
Nicolas Pitre 已提交
2533
	if (n >= p->num_objects)
2534
		return NULL;
2535 2536 2537 2538 2539 2540 2541 2542 2543
	index += 4 * 256;
	if (p->index_version == 1) {
		return index + 24 * n + 4;
	} else {
		index += 8;
		return index + 20 * n;
	}
}

2544 2545 2546 2547 2548 2549
void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
{
	const unsigned char *ptr = vptr;
	const unsigned char *start = p->index_data;
	const unsigned char *end = start + p->index_size;
	if (ptr < start)
2550
		die(_("offset before start of pack index for %s (corrupt index?)"),
2551 2552 2553
		    p->pack_name);
	/* No need to check for underflow; .idx files must be at least 8 bytes */
	if (ptr >= end - 8)
2554
		die(_("offset beyond end of pack index for %s (truncated index?)"),
2555 2556 2557
		    p->pack_name);
}

N
Nicolas Pitre 已提交
2558
off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570
{
	const unsigned char *index = p->index_data;
	index += 4 * 256;
	if (p->index_version == 1) {
		return ntohl(*((uint32_t *)(index + 24 * n)));
	} else {
		uint32_t off;
		index += 8 + p->num_objects * (20 + 4);
		off = ntohl(*((uint32_t *)(index + 4 * n)));
		if (!(off & 0x80000000))
			return off;
		index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
2571
		check_pack_index_ptr(p, index);
2572 2573 2574
		return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
				   ntohl(*((uint32_t *)(index + 4)));
	}
2575 2576
}

2577
off_t find_pack_entry_one(const unsigned char *sha1,
N
Nicolas Pitre 已提交
2578
				  struct packed_git *p)
2579
{
2580 2581
	const uint32_t *level1_ofs = p->index_data;
	const unsigned char *index = p->index_data;
2582 2583 2584 2585 2586 2587
	unsigned hi, lo, stride;
	static int use_lookup = -1;
	static int debug_lookup = -1;

	if (debug_lookup < 0)
		debug_lookup = !!getenv("GIT_DEBUG_LOOKUP");
2588

2589 2590 2591 2592 2593 2594
	if (!index) {
		if (open_pack_index(p))
			return 0;
		level1_ofs = p->index_data;
		index = p->index_data;
	}
2595 2596 2597 2598
	if (p->index_version > 1) {
		level1_ofs += 2;
		index += 8;
	}
2599
	index += 4 * 256;
2600 2601
	hi = ntohl(level1_ofs[*sha1]);
	lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1]));
2602 2603 2604 2605 2606 2607 2608 2609
	if (p->index_version > 1) {
		stride = 20;
	} else {
		stride = 24;
		index += 4;
	}

	if (debug_lookup)
2610
		printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n",
2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621
		       sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects);

	if (use_lookup < 0)
		use_lookup = !!getenv("GIT_USE_LOOKUP");
	if (use_lookup) {
		int pos = sha1_entry_pos(index, stride, 0,
					 lo, hi, p->num_objects, sha1);
		if (pos < 0)
			return 0;
		return nth_packed_object_offset(p, pos);
	}
2622 2623

	do {
2624
		unsigned mi = (lo + hi) / 2;
2625 2626 2627 2628 2629
		int cmp = hashcmp(index + mi * stride, sha1);

		if (debug_lookup)
			printf("lo %u hi %u rg %u mi %u\n",
			       lo, hi, hi - lo, mi);
N
Nicolas Pitre 已提交
2630
		if (!cmp)
2631
			return nth_packed_object_offset(p, mi);
2632 2633 2634 2635 2636 2637 2638 2639
		if (cmp > 0)
			hi = mi;
		else
			lo = mi+1;
	} while (lo < hi);
	return 0;
}

2640
int is_pack_valid(struct packed_git *p)
2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660
{
	/* An already open pack is known to be valid. */
	if (p->pack_fd != -1)
		return 1;

	/* If the pack has one window completely covering the
	 * file size, the pack is known to be valid even if
	 * the descriptor is not currently open.
	 */
	if (p->windows) {
		struct pack_window *w = p->windows;

		if (!w->offset && w->len == p->pack_size)
			return 1;
	}

	/* Force the pack to open to prove its valid. */
	return !open_packed_git(p);
}

2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684
static int fill_pack_entry(const unsigned char *sha1,
			   struct pack_entry *e,
			   struct packed_git *p)
{
	off_t offset;

	if (p->num_bad_objects) {
		unsigned i;
		for (i = 0; i < p->num_bad_objects; i++)
			if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
				return 0;
	}

	offset = find_pack_entry_one(sha1, p);
	if (!offset)
		return 0;

	/*
	 * We are about to tell the caller where they can locate the
	 * requested object.  We better make sure the packfile is
	 * still here and can be accessed before supplying that
	 * answer, as it may have been deleted since the index was
	 * loaded!
	 */
2685
	if (!is_pack_valid(p))
2686 2687 2688 2689 2690 2691 2692
		return 0;
	e->offset = offset;
	e->p = p;
	hashcpy(e->sha1, sha1);
	return 1;
}

2693 2694 2695 2696
/*
 * Iff a pack file contains the object named by sha1, return true and
 * store its location to e.
 */
2697
static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
2698
{
2699
	struct mru_entry *p;
N
Nicolas Pitre 已提交
2700

2701
	prepare_packed_git();
2702 2703
	if (!packed_git)
		return 0;
2704

2705 2706 2707
	for (p = packed_git_mru->head; p; p = p->next) {
		if (fill_pack_entry(sha1, e, p->item)) {
			mru_mark(packed_git_mru, p);
2708 2709
			return 1;
		}
2710
	}
2711 2712 2713
	return 0;
}

J
Junio C Hamano 已提交
2714
struct packed_git *find_sha1_pack(const unsigned char *sha1,
2715 2716 2717 2718 2719
				  struct packed_git *packs)
{
	struct packed_git *p;

	for (p = packs; p; p = p->next) {
N
Nicolas Pitre 已提交
2720
		if (find_pack_entry_one(sha1, p))
2721 2722 2723
			return p;
	}
	return NULL;
2724

2725 2726
}

2727
static int sha1_loose_object_info(const unsigned char *sha1,
2728 2729
				  struct object_info *oi,
				  int flags)
2730
{
2731 2732
	int status = 0;
	unsigned long mapsize;
2733
	void *map;
2734
	git_zstream stream;
N
Nicolas Pitre 已提交
2735
	char hdr[32];
2736
	struct strbuf hdrbuf = STRBUF_INIT;
2737

2738 2739 2740
	if (oi->delta_base_sha1)
		hashclr(oi->delta_base_sha1);

2741 2742
	/*
	 * If we don't care about type or size, then we don't
2743 2744 2745 2746 2747
	 * need to look inside the object at all. Note that we
	 * do not optimize out the stat call, even if the
	 * caller doesn't care about the disk-size, since our
	 * return value implicitly indicates whether the
	 * object even exists.
2748
	 */
2749
	if (!oi->typep && !oi->typename && !oi->sizep) {
2750 2751 2752 2753
		struct stat st;
		if (stat_sha1_file(sha1, &st) < 0)
			return -1;
		if (oi->disk_sizep)
2754
			*oi->disk_sizep = st.st_size;
2755 2756 2757
		return 0;
	}

2758
	map = map_sha1_file(sha1, &mapsize);
2759
	if (!map)
2760
		return -1;
2761 2762
	if (oi->disk_sizep)
		*oi->disk_sizep = mapsize;
2763 2764 2765 2766 2767
	if ((flags & LOOKUP_UNKNOWN_OBJECT)) {
		if (unpack_sha1_header_to_strbuf(&stream, map, mapsize, hdr, sizeof(hdr), &hdrbuf) < 0)
			status = error("unable to unpack %s header with --allow-unknown-type",
				       sha1_to_hex(sha1));
	} else if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
2768 2769
		status = error("unable to unpack %s header",
			       sha1_to_hex(sha1));
2770 2771 2772 2773 2774 2775 2776
	if (status < 0)
		; /* Do nothing */
	else if (hdrbuf.len) {
		if ((status = parse_sha1_header_extended(hdrbuf.buf, oi, flags)) < 0)
			status = error("unable to parse %s header with --allow-unknown-type",
				       sha1_to_hex(sha1));
	} else if ((status = parse_sha1_header_extended(hdr, oi, flags)) < 0)
2777
		status = error("unable to parse %s header", sha1_to_hex(sha1));
2778
	git_inflate_end(&stream);
2779
	munmap(map, mapsize);
2780
	if (status && oi->typep)
2781
		*oi->typep = status;
2782
	strbuf_release(&hdrbuf);
2783
	return 0;
2784 2785
}

2786
int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi, unsigned flags)
2787
{
2788
	struct cached_object *co;
2789
	struct pack_entry e;
2790
	int rtype;
2791
	enum object_type real_type;
2792
	const unsigned char *real = lookup_replace_object_extended(sha1, flags);
2793

2794
	co = find_cached_object(real);
2795
	if (co) {
2796 2797
		if (oi->typep)
			*(oi->typep) = co->type;
2798 2799
		if (oi->sizep)
			*(oi->sizep) = co->size;
2800 2801
		if (oi->disk_sizep)
			*(oi->disk_sizep) = 0;
2802 2803
		if (oi->delta_base_sha1)
			hashclr(oi->delta_base_sha1);
2804 2805
		if (oi->typename)
			strbuf_addstr(oi->typename, typename(co->type));
2806
		oi->whence = OI_CACHED;
2807
		return 0;
2808 2809
	}

2810
	if (!find_pack_entry(real, &e)) {
2811
		/* Most likely it's a loose object. */
2812
		if (!sha1_loose_object_info(real, oi, flags)) {
2813
			oi->whence = OI_LOOSE;
2814
			return 0;
2815
		}
2816 2817

		/* Not a loose object; someone else may have just packed it. */
2818
		reprepare_packed_git();
2819
		if (!find_pack_entry(real, &e))
2820
			return -1;
2821
	}
2822

2823 2824 2825 2826 2827 2828 2829
	/*
	 * packed_object_info() does not follow the delta chain to
	 * find out the real type, unless it is given oi->typep.
	 */
	if (oi->typename && !oi->typep)
		oi->typep = &real_type;

2830
	rtype = packed_object_info(e.p, e.offset, oi);
2831
	if (rtype < 0) {
2832
		mark_bad_packed_object(e.p, real);
2833 2834
		if (oi->typep == &real_type)
			oi->typep = NULL;
2835
		return sha1_object_info_extended(real, oi, 0);
2836 2837
	} else if (in_delta_base_cache(e.p, e.offset)) {
		oi->whence = OI_DBCACHED;
2838 2839 2840 2841 2842 2843
	} else {
		oi->whence = OI_PACKED;
		oi->u.packed.offset = e.offset;
		oi->u.packed.pack = e.p;
		oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA ||
					 rtype == OBJ_OFS_DELTA);
2844
	}
2845 2846 2847 2848
	if (oi->typename)
		strbuf_addstr(oi->typename, typename(*oi->typep));
	if (oi->typep == &real_type)
		oi->typep = NULL;
2849

2850
	return 0;
2851 2852
}

2853
/* returns enum object_type or negative */
2854 2855
int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
{
2856
	enum object_type type;
R
Ramsay Jones 已提交
2857
	struct object_info oi = {NULL};
2858

2859
	oi.typep = &type;
2860
	oi.sizep = sizep;
2861
	if (sha1_object_info_extended(sha1, &oi, LOOKUP_REPLACE_OBJECT) < 0)
2862 2863
		return -1;
	return type;
2864 2865
}

2866 2867
static void *read_packed_sha1(const unsigned char *sha1,
			      enum object_type *type, unsigned long *size)
2868 2869
{
	struct pack_entry e;
2870
	void *data;
2871

2872
	if (!find_pack_entry(sha1, &e))
2873
		return NULL;
2874
	data = cache_or_unpack_entry(e.p, e.offset, size, type);
2875 2876 2877 2878 2879 2880 2881 2882 2883 2884
	if (!data) {
		/*
		 * We're probably in deep shit, but let's try to fetch
		 * the required object anyway from another pack or loose.
		 * This should happen only in the presence of a corrupted
		 * pack, and is better than failing outright.
		 */
		error("failed to read object %s at offset %"PRIuMAX" from %s",
		      sha1_to_hex(sha1), (uintmax_t)e.offset, e.p->pack_name);
		mark_bad_packed_object(e.p, sha1);
2885
		data = read_object(sha1, type, size);
2886 2887
	}
	return data;
2888 2889
}

2890 2891
int pretend_sha1_file(void *buf, unsigned long len, enum object_type type,
		      unsigned char *sha1)
2892 2893 2894
{
	struct cached_object *co;

2895
	hash_sha1_file(buf, len, typename(type), sha1);
2896 2897
	if (has_sha1_file(sha1) || find_cached_object(sha1))
		return 0;
2898
	ALLOC_GROW(cached_objects, cached_object_nr + 1, cached_object_alloc);
2899 2900
	co = &cached_objects[cached_object_nr++];
	co->size = len;
2901
	co->type = type;
J
Junio C Hamano 已提交
2902 2903
	co->buf = xmalloc(len);
	memcpy(co->buf, buf, len);
2904 2905 2906 2907
	hashcpy(co->sha1, sha1);
	return 0;
}

2908 2909
static void *read_object(const unsigned char *sha1, enum object_type *type,
			 unsigned long *size)
2910 2911 2912
{
	unsigned long mapsize;
	void *map, *buf;
2913 2914 2915 2916
	struct cached_object *co;

	co = find_cached_object(sha1);
	if (co) {
2917
		*type = co->type;
2918
		*size = co->size;
P
Pierre Habouzit 已提交
2919
		return xmemdupz(co->buf, co->size);
2920
	}
2921

2922 2923 2924
	buf = read_packed_sha1(sha1, type, size);
	if (buf)
		return buf;
2925
	map = map_sha1_file(sha1, &mapsize);
2926
	if (map) {
2927
		buf = unpack_sha1_file(map, mapsize, type, size, sha1);
2928 2929 2930
		munmap(map, mapsize);
		return buf;
	}
2931
	reprepare_packed_git();
2932
	return read_packed_sha1(sha1, type, size);
2933 2934
}

2935 2936 2937 2938 2939
/*
 * This function dies on corrupt objects; the callers who want to
 * deal with them should arrange to call read_object() and give error
 * messages themselves.
 */
2940 2941 2942 2943
void *read_sha1_file_extended(const unsigned char *sha1,
			      enum object_type *type,
			      unsigned long *size,
			      unsigned flag)
2944
{
2945
	void *data;
2946
	const struct packed_git *p;
2947
	const unsigned char *repl = lookup_replace_object_extended(sha1, flag);
2948

2949 2950
	errno = 0;
	data = read_object(repl, type, size);
2951
	if (data)
2952
		return data;
2953

2954
	if (errno && errno != ENOENT)
2955 2956
		die_errno("failed to read object %s", sha1_to_hex(sha1));

2957
	/* die if we replaced an object with one that does not exist */
2958
	if (repl != sha1)
2959 2960 2961
		die("replacement %s not found for %s",
		    sha1_to_hex(repl), sha1_to_hex(sha1));

2962
	if (has_loose_object(repl)) {
2963 2964
		const char *path = sha1_file_name(sha1);

2965 2966
		die("loose object %s (stored in %s) is corrupt",
		    sha1_to_hex(repl), path);
2967
	}
2968

2969 2970 2971
	if ((p = has_packed_and_bad(repl)) != NULL)
		die("packed object %s (stored in %s) is corrupt",
		    sha1_to_hex(repl), p->pack_name);
2972

2973
	return NULL;
2974 2975
}

2976
void *read_object_with_reference(const unsigned char *sha1,
2977
				 const char *required_type_name,
2978 2979
				 unsigned long *size,
				 unsigned char *actual_sha1_return)
2980
{
2981
	enum object_type type, required_type;
2982 2983
	void *buffer;
	unsigned long isize;
2984
	unsigned char actual_sha1[20];
2985

2986
	required_type = type_from_string(required_type_name);
2987
	hashcpy(actual_sha1, sha1);
2988 2989 2990
	while (1) {
		int ref_length = -1;
		const char *ref_type = NULL;
2991

2992
		buffer = read_sha1_file(actual_sha1, &type, &isize);
2993 2994
		if (!buffer)
			return NULL;
2995
		if (type == required_type) {
2996 2997
			*size = isize;
			if (actual_sha1_return)
2998
				hashcpy(actual_sha1_return, actual_sha1);
2999 3000 3001
			return buffer;
		}
		/* Handle references */
3002
		else if (type == OBJ_COMMIT)
3003
			ref_type = "tree ";
3004
		else if (type == OBJ_TAG)
3005 3006 3007 3008 3009 3010
			ref_type = "object ";
		else {
			free(buffer);
			return NULL;
		}
		ref_length = strlen(ref_type);
3011

3012 3013
		if (ref_length + 40 > isize ||
		    memcmp(buffer, ref_type, ref_length) ||
3014
		    get_sha1_hex((char *) buffer + ref_length, actual_sha1)) {
3015 3016 3017
			free(buffer);
			return NULL;
		}
3018
		free(buffer);
3019 3020
		/* Now we have the ID of the referred-to object in
		 * actual_sha1.  Check again. */
3021 3022 3023
	}
}

N
Nicolas Pitre 已提交
3024
static void write_sha1_file_prepare(const void *buf, unsigned long len,
R
Rene Scharfe 已提交
3025
                                    const char *type, unsigned char *sha1,
N
Nicolas Pitre 已提交
3026
                                    char *hdr, int *hdrlen)
3027
{
3028
	git_SHA_CTX c;
3029 3030

	/* Generate the header */
3031
	*hdrlen = xsnprintf(hdr, *hdrlen, "%s %lu", type, len)+1;
3032 3033

	/* Sha1.. */
3034 3035 3036 3037
	git_SHA1_Init(&c);
	git_SHA1_Update(&c, hdr, *hdrlen);
	git_SHA1_Update(&c, buf, len);
	git_SHA1_Final(sha1, &c);
3038 3039
}

3040
/*
3041
 * Move the just written object into its final resting place.
3042
 */
3043
int finalize_object_file(const char *tmpfile, const char *filename)
3044
{
3045
	int ret = 0;
3046

3047
	if (object_creation_mode == OBJECT_CREATION_USES_RENAMES)
3048 3049
		goto try_rename;
	else if (link(tmpfile, filename))
3050
		ret = errno;
3051 3052 3053 3054 3055 3056 3057 3058 3059

	/*
	 * Coda hack - coda doesn't like cross-directory links,
	 * so we fall back to a rename, which will mean that it
	 * won't be able to check collisions, but that's not a
	 * big deal.
	 *
	 * The same holds for FAT formatted media.
	 *
3060
	 * When this succeeds, we just return.  We have nothing
3061 3062 3063
	 * left to unlink.
	 */
	if (ret && ret != EEXIST) {
3064
	try_rename:
3065
		if (!rename(tmpfile, filename))
3066
			goto out;
3067
		ret = errno;
3068
	}
3069
	unlink_or_warn(tmpfile);
3070 3071
	if (ret) {
		if (ret != EEXIST) {
3072
			return error_errno("unable to write sha1 filename %s", filename);
3073 3074 3075 3076
		}
		/* FIXME!!! Collision check here ? */
	}

3077
out:
3078
	if (adjust_shared_perm(filename))
3079
		return error("unable to set permission to '%s'", filename);
3080 3081 3082
	return 0;
}

L
Linus Torvalds 已提交
3083 3084
static int write_buffer(int fd, const void *buf, size_t len)
{
L
Linus Torvalds 已提交
3085
	if (write_in_full(fd, buf, len) < 0)
3086
		return error_errno("file write error");
L
Linus Torvalds 已提交
3087 3088 3089
	return 0;
}

N
Nicolas Pitre 已提交
3090
int hash_sha1_file(const void *buf, unsigned long len, const char *type,
R
Rene Scharfe 已提交
3091 3092
                   unsigned char *sha1)
{
N
Nicolas Pitre 已提交
3093
	char hdr[32];
3094
	int hdrlen = sizeof(hdr);
R
Rene Scharfe 已提交
3095 3096 3097 3098
	write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
	return 0;
}

3099 3100 3101
/* Finalize a file on disk, and close it. */
static void close_sha1_file(int fd)
{
3102 3103
	if (fsync_object_files)
		fsync_or_die(fd, "sha1 file");
3104
	if (close(fd) != 0)
3105
		die_errno("error when closing sha1 file");
3106 3107
}

3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123
/* Size of directory component, including the ending '/' */
static inline int directory_size(const char *filename)
{
	const char *s = strrchr(filename, '/');
	if (!s)
		return 0;
	return s - filename + 1;
}

/*
 * This creates a temporary file in the same directory as the final
 * 'filename'
 *
 * We want to avoid cross-directory filename renames, because those
 * can have problems on various filesystems (FAT, NFS, Coda).
 */
J
Jeff King 已提交
3124
static int create_tmpfile(struct strbuf *tmp, const char *filename)
3125 3126 3127
{
	int fd, dirlen = directory_size(filename);

J
Jeff King 已提交
3128 3129 3130 3131
	strbuf_reset(tmp);
	strbuf_add(tmp, filename, dirlen);
	strbuf_addstr(tmp, "tmp_obj_XXXXXX");
	fd = git_mkstemp_mode(tmp->buf, 0444);
3132
	if (fd < 0 && dirlen && errno == ENOENT) {
J
Jeff King 已提交
3133 3134 3135 3136 3137 3138 3139 3140 3141
		/*
		 * Make sure the directory exists; note that the contents
		 * of the buffer are undefined after mkstemp returns an
		 * error, so we have to rewrite the whole buffer from
		 * scratch.
		 */
		strbuf_reset(tmp);
		strbuf_add(tmp, filename, dirlen - 1);
		if (mkdir(tmp->buf, 0777) && errno != EEXIST)
3142
			return -1;
J
Jeff King 已提交
3143
		if (adjust_shared_perm(tmp->buf))
3144 3145 3146
			return -1;

		/* Try again */
J
Jeff King 已提交
3147 3148
		strbuf_addstr(tmp, "/tmp_obj_XXXXXX");
		fd = git_mkstemp_mode(tmp->buf, 0444);
3149 3150 3151 3152
	}
	return fd;
}

3153
static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
3154
			      const void *buf, unsigned long len, time_t mtime)
3155
{
3156
	int fd, ret;
3157
	unsigned char compressed[4096];
3158
	git_zstream stream;
3159 3160
	git_SHA_CTX c;
	unsigned char parano_sha1[20];
J
Jeff King 已提交
3161
	static struct strbuf tmp_file = STRBUF_INIT;
3162
	const char *filename = sha1_file_name(sha1);
3163

J
Jeff King 已提交
3164
	fd = create_tmpfile(&tmp_file, filename);
3165
	if (fd < 0) {
3166
		if (errno == EACCES)
3167
			return error("insufficient permission for adding an object to repository database %s", get_object_directory());
3168
		else
3169
			return error_errno("unable to create temporary file");
3170 3171
	}

3172
	/* Set it up */
3173
	git_deflate_init(&stream, zlib_compression_level);
3174
	stream.next_out = compressed;
3175
	stream.avail_out = sizeof(compressed);
3176
	git_SHA1_Init(&c);
3177 3178

	/* First header.. */
N
Nicolas Pitre 已提交
3179
	stream.next_in = (unsigned char *)hdr;
3180
	stream.avail_in = hdrlen;
3181 3182
	while (git_deflate(&stream, 0) == Z_OK)
		; /* nothing */
3183
	git_SHA1_Update(&c, hdr, hdrlen);
3184 3185

	/* Then the data itself.. */
3186
	stream.next_in = (void *)buf;
3187
	stream.avail_in = len;
3188
	do {
3189
		unsigned char *in0 = stream.next_in;
3190
		ret = git_deflate(&stream, Z_FINISH);
3191
		git_SHA1_Update(&c, in0, stream.next_in - in0);
3192 3193 3194 3195 3196 3197
		if (write_buffer(fd, compressed, stream.next_out - compressed) < 0)
			die("unable to write sha1 file");
		stream.next_out = compressed;
		stream.avail_out = sizeof(compressed);
	} while (ret == Z_OK);

3198 3199
	if (ret != Z_STREAM_END)
		die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret);
3200
	ret = git_deflate_end_gently(&stream);
3201 3202
	if (ret != Z_OK)
		die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret);
3203 3204 3205
	git_SHA1_Final(parano_sha1, &c);
	if (hashcmp(sha1, parano_sha1) != 0)
		die("confused by unstable object source data for %s", sha1_to_hex(sha1));
3206

3207
	close_sha1_file(fd);
3208

3209 3210 3211 3212
	if (mtime) {
		struct utimbuf utb;
		utb.actime = mtime;
		utb.modtime = mtime;
J
Jeff King 已提交
3213
		if (utime(tmp_file.buf, &utb) < 0)
3214
			warning_errno("failed utime() on %s", tmp_file.buf);
3215 3216
	}

J
Jeff King 已提交
3217
	return finalize_object_file(tmp_file.buf, filename);
3218
}
3219

3220 3221 3222 3223 3224 3225 3226 3227
static int freshen_loose_object(const unsigned char *sha1)
{
	return check_and_freshen(sha1, 1);
}

static int freshen_packed_object(const unsigned char *sha1)
{
	struct pack_entry e;
3228 3229 3230 3231 3232 3233 3234 3235
	if (!find_pack_entry(sha1, &e))
		return 0;
	if (e.p->freshened)
		return 1;
	if (!freshen_file(e.p->pack_name))
		return 0;
	e.p->freshened = 1;
	return 1;
3236 3237
}

3238
int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1)
3239 3240
{
	char hdr[32];
3241
	int hdrlen = sizeof(hdr);
3242 3243 3244 3245 3246

	/* Normally if we have it in the pack then we do not bother writing
	 * it out into .git/objects/??/?{38} file.
	 */
	write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen);
3247
	if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
3248 3249 3250 3251
		return 0;
	return write_loose_object(sha1, hdr, hdrlen, buf, len, 0);
}

3252 3253 3254 3255 3256 3257 3258
int hash_sha1_file_literally(const void *buf, unsigned long len, const char *type,
			     unsigned char *sha1, unsigned flags)
{
	char *header;
	int hdrlen, status = 0;

	/* type string, SP, %lu of the length plus NUL must fit this */
3259 3260
	hdrlen = strlen(type) + 32;
	header = xmalloc(hdrlen);
3261 3262 3263 3264
	write_sha1_file_prepare(buf, len, type, sha1, header, &hdrlen);

	if (!(flags & HASH_WRITE_OBJECT))
		goto cleanup;
J
Junio C Hamano 已提交
3265
	if (freshen_packed_object(sha1) || freshen_loose_object(sha1))
3266 3267 3268 3269 3270 3271 3272 3273
		goto cleanup;
	status = write_loose_object(sha1, header, hdrlen, buf, len, 0);

cleanup:
	free(header);
	return status;
}

3274 3275 3276 3277 3278 3279 3280
int force_object_loose(const unsigned char *sha1, time_t mtime)
{
	void *buf;
	unsigned long len;
	enum object_type type;
	char hdr[32];
	int hdrlen;
3281
	int ret;
3282

3283
	if (has_loose_object(sha1))
3284 3285 3286 3287
		return 0;
	buf = read_packed_sha1(sha1, &type, &len);
	if (!buf)
		return error("cannot read sha1_file for %s", sha1_to_hex(sha1));
3288
	hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %lu", typename(type), len) + 1;
3289 3290 3291 3292
	ret = write_loose_object(sha1, hdr, hdrlen, buf, len, mtime);
	free(buf);

	return ret;
3293 3294
}

3295 3296 3297 3298 3299 3300 3301 3302
int has_pack_index(const unsigned char *sha1)
{
	struct stat st;
	if (stat(sha1_pack_index_name(sha1), &st))
		return 0;
	return 1;
}

3303 3304 3305 3306 3307 3308
int has_sha1_pack(const unsigned char *sha1)
{
	struct pack_entry e;
	return find_pack_entry(sha1, &e);
}

3309
int has_sha1_file_with_flags(const unsigned char *sha1, int flags)
3310
{
3311 3312
	struct pack_entry e;

3313
	if (find_pack_entry(sha1, &e))
3314
		return 1;
3315 3316
	if (has_loose_object(sha1))
		return 1;
3317 3318
	if (flags & HAS_SHA1_QUICK)
		return 0;
3319 3320
	reprepare_packed_git();
	return find_pack_entry(sha1, &e);
3321 3322 3323 3324 3325
}

int has_object_file(const struct object_id *oid)
{
	return has_sha1_file(oid->hash);
3326
}
J
Junio C Hamano 已提交
3327

3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355
static void check_tree(const void *buf, size_t size)
{
	struct tree_desc desc;
	struct name_entry entry;

	init_tree_desc(&desc, buf, size);
	while (tree_entry(&desc, &entry))
		/* do nothing
		 * tree_entry() will die() on malformed entries */
		;
}

static void check_commit(const void *buf, size_t size)
{
	struct commit c;
	memset(&c, 0, sizeof(c));
	if (parse_commit_buffer(&c, buf, size))
		die("corrupt commit");
}

static void check_tag(const void *buf, size_t size)
{
	struct tag t;
	memset(&t, 0, sizeof(t));
	if (parse_tag_buffer(&t, buf, size))
		die("corrupt tag");
}

3356
static int index_mem(unsigned char *sha1, void *buf, size_t size,
3357 3358
		     enum object_type type,
		     const char *path, unsigned flags)
3359
{
L
Linus Torvalds 已提交
3360
	int ret, re_allocated = 0;
3361
	int write_object = flags & HASH_WRITE_OBJECT;
J
Junio C Hamano 已提交
3362

3363
	if (!type)
3364
		type = OBJ_BLOB;
L
Linus Torvalds 已提交
3365 3366 3367 3368

	/*
	 * Convert blobs to git internal format
	 */
3369
	if ((type == OBJ_BLOB) && path) {
3370
		struct strbuf nbuf = STRBUF_INIT;
3371
		if (convert_to_git(path, buf, size, &nbuf,
3372
				   write_object ? safe_crlf : SAFE_CRLF_FALSE)) {
3373
			buf = strbuf_detach(&nbuf, &size);
L
Linus Torvalds 已提交
3374 3375 3376
			re_allocated = 1;
		}
	}
3377
	if (flags & HASH_FORMAT_CHECK) {
3378 3379 3380 3381 3382 3383 3384
		if (type == OBJ_TREE)
			check_tree(buf, size);
		if (type == OBJ_COMMIT)
			check_commit(buf, size);
		if (type == OBJ_TAG)
			check_tag(buf, size);
	}
L
Linus Torvalds 已提交
3385

3386
	if (write_object)
3387
		ret = write_sha1_file(buf, size, typename(type), sha1);
R
Rene Scharfe 已提交
3388
	else
3389
		ret = hash_sha1_file(buf, size, typename(type), sha1);
3390
	if (re_allocated)
L
Linus Torvalds 已提交
3391
		free(buf);
3392 3393 3394
	return ret;
}

3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417
static int index_stream_convert_blob(unsigned char *sha1, int fd,
				     const char *path, unsigned flags)
{
	int ret;
	const int write_object = flags & HASH_WRITE_OBJECT;
	struct strbuf sbuf = STRBUF_INIT;

	assert(path);
	assert(would_convert_to_git_filter_fd(path));

	convert_to_git_filter_fd(path, fd, &sbuf,
				 write_object ? safe_crlf : SAFE_CRLF_FALSE);

	if (write_object)
		ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
				      sha1);
	else
		ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
				     sha1);
	strbuf_release(&sbuf);
	return ret;
}

3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431
static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
		      const char *path, unsigned flags)
{
	struct strbuf sbuf = STRBUF_INIT;
	int ret;

	if (strbuf_read(&sbuf, fd, 4096) >= 0)
		ret = index_mem(sha1, sbuf.buf, sbuf.len, type,	path, flags);
	else
		ret = -1;
	strbuf_release(&sbuf);
	return ret;
}

3432 3433
#define SMALL_FILE_SIZE (32*1024)

3434 3435 3436
static int index_core(unsigned char *sha1, int fd, size_t size,
		      enum object_type type, const char *path,
		      unsigned flags)
3437 3438 3439
{
	int ret;

3440
	if (!size) {
3441
		ret = index_mem(sha1, "", size, type, path, flags);
3442 3443 3444
	} else if (size <= SMALL_FILE_SIZE) {
		char *buf = xmalloc(size);
		if (size == read_in_full(fd, buf, size))
3445
			ret = index_mem(sha1, buf, size, type, path, flags);
3446
		else
3447
			ret = error_errno("short read");
3448
		free(buf);
3449
	} else {
3450
		void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
3451
		ret = index_mem(sha1, buf, size, type, path, flags);
3452
		munmap(buf, size);
3453
	}
3454 3455 3456
	return ret;
}

3457
/*
3458 3459
 * This creates one packfile per large blob unless bulk-checkin
 * machinery is "plugged".
3460 3461 3462 3463
 *
 * This also bypasses the usual "convert-to-git" dance, and that is on
 * purpose. We could write a streaming version of the converting
 * functions and insert that before feeding the data to fast-import
3464 3465 3466 3467 3468 3469 3470
 * (or equivalent in-core API described above). However, that is
 * somewhat complicated, as we do not know the size of the filter
 * result, which we need to know beforehand when writing a git object.
 * Since the primary motivation for trying to stream from the working
 * tree file and to avoid mmaping it in core is to deal with large
 * binary blobs, they generally do not want to get any conversion, and
 * callers should avoid this code path when filters are requested.
3471 3472 3473 3474 3475
 */
static int index_stream(unsigned char *sha1, int fd, size_t size,
			enum object_type type, const char *path,
			unsigned flags)
{
3476
	return index_bulk_checkin(sha1, fd, size, type, path, flags);
3477 3478
}

3479 3480 3481 3482 3483
int index_fd(unsigned char *sha1, int fd, struct stat *st,
	     enum object_type type, const char *path, unsigned flags)
{
	int ret;

3484 3485 3486 3487
	/*
	 * Call xsize_t() only when needed to avoid potentially unnecessary
	 * die() for large files.
	 */
3488 3489 3490
	if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path))
		ret = index_stream_convert_blob(sha1, fd, path, flags);
	else if (!S_ISREG(st->st_mode))
3491
		ret = index_pipe(sha1, fd, type, path, flags);
3492
	else if (st->st_size <= big_file_threshold || type != OBJ_BLOB ||
3493
		 (path && would_convert_to_git(path)))
3494 3495
		ret = index_core(sha1, fd, xsize_t(st->st_size), type, path,
				 flags);
3496
	else
3497 3498
		ret = index_stream(sha1, fd, xsize_t(st->st_size), type, path,
				   flags);
3499
	close(fd);
3500
	return ret;
J
Junio C Hamano 已提交
3501
}
3502

3503
int index_path(unsigned char *sha1, const char *path, struct stat *st, unsigned flags)
3504 3505
{
	int fd;
3506
	struct strbuf sb = STRBUF_INIT;
3507 3508 3509 3510 3511

	switch (st->st_mode & S_IFMT) {
	case S_IFREG:
		fd = open(path, O_RDONLY);
		if (fd < 0)
3512
			return error_errno("open(\"%s\")", path);
3513
		if (index_fd(sha1, fd, st, OBJ_BLOB, path, flags) < 0)
3514 3515 3516 3517
			return error("%s: failed to insert into database",
				     path);
		break;
	case S_IFLNK:
3518 3519
		if (strbuf_readlink(&sb, path, st->st_size))
			return error_errno("readlink(\"%s\")", path);
3520
		if (!(flags & HASH_WRITE_OBJECT))
3521 3522
			hash_sha1_file(sb.buf, sb.len, blob_type, sha1);
		else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1))
3523 3524
			return error("%s: failed to insert into database",
				     path);
3525
		strbuf_release(&sb);
3526
		break;
3527 3528
	case S_IFDIR:
		return resolve_gitlink_ref(path, "HEAD", sha1);
3529 3530 3531 3532 3533
	default:
		return error("%s: unsupported file type", path);
	}
	return 0;
}
3534 3535 3536

int read_pack_header(int fd, struct pack_header *header)
{
3537 3538 3539 3540
	if (read_in_full(fd, header, sizeof(*header)) < sizeof(*header))
		/* "eof before pack header was fully read" */
		return PH_ERROR_EOF;

3541 3542 3543 3544 3545 3546 3547 3548
	if (header->hdr_signature != htonl(PACK_SIGNATURE))
		/* "protocol error (pack signature mismatch detected)" */
		return PH_ERROR_PACK_SIGNATURE;
	if (!pack_version_ok(header->hdr_version))
		/* "protocol error (pack version unsupported)" */
		return PH_ERROR_PROTOCOL;
	return 0;
}
J
Jeff King 已提交
3549 3550 3551 3552 3553 3554 3555 3556 3557 3558

void assert_sha1_type(const unsigned char *sha1, enum object_type expect)
{
	enum object_type type = sha1_object_info(sha1, NULL);
	if (type < 0)
		die("%s is not a valid object", sha1_to_hex(sha1));
	if (type != expect)
		die("%s is not a valid '%s' object", sha1_to_hex(sha1),
		    typename(expect));
}
3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574

static int for_each_file_in_obj_subdir(int subdir_nr,
				       struct strbuf *path,
				       each_loose_object_fn obj_cb,
				       each_loose_cruft_fn cruft_cb,
				       each_loose_subdir_fn subdir_cb,
				       void *data)
{
	size_t baselen = path->len;
	DIR *dir = opendir(path->buf);
	struct dirent *de;
	int r = 0;

	if (!dir) {
		if (errno == ENOENT)
			return 0;
3575
		return error_errno("unable to open %s", path->buf);
3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606
	}

	while ((de = readdir(dir))) {
		if (is_dot_or_dotdot(de->d_name))
			continue;

		strbuf_setlen(path, baselen);
		strbuf_addf(path, "/%s", de->d_name);

		if (strlen(de->d_name) == 38)  {
			char hex[41];
			unsigned char sha1[20];

			snprintf(hex, sizeof(hex), "%02x%s",
				 subdir_nr, de->d_name);
			if (!get_sha1_hex(hex, sha1)) {
				if (obj_cb) {
					r = obj_cb(sha1, path->buf, data);
					if (r)
						break;
				}
				continue;
			}
		}

		if (cruft_cb) {
			r = cruft_cb(de->d_name, path->buf, data);
			if (r)
				break;
		}
	}
3607
	closedir(dir);
3608

3609
	strbuf_setlen(path, baselen);
3610 3611 3612 3613 3614 3615
	if (!r && subdir_cb)
		r = subdir_cb(subdir_nr, path->buf, data);

	return r;
}

3616
int for_each_loose_file_in_objdir_buf(struct strbuf *path,
3617 3618 3619 3620 3621
			    each_loose_object_fn obj_cb,
			    each_loose_cruft_fn cruft_cb,
			    each_loose_subdir_fn subdir_cb,
			    void *data)
{
3622
	size_t baselen = path->len;
3623 3624 3625 3626
	int r = 0;
	int i;

	for (i = 0; i < 256; i++) {
3627 3628
		strbuf_addf(path, "/%02x", i);
		r = for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb,
3629
						subdir_cb, data);
3630
		strbuf_setlen(path, baselen);
3631 3632 3633 3634
		if (r)
			break;
	}

3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649
	return r;
}

int for_each_loose_file_in_objdir(const char *path,
				  each_loose_object_fn obj_cb,
				  each_loose_cruft_fn cruft_cb,
				  each_loose_subdir_fn subdir_cb,
				  void *data)
{
	struct strbuf buf = STRBUF_INIT;
	int r;

	strbuf_addstr(&buf, path);
	r = for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb,
					      subdir_cb, data);
3650
	strbuf_release(&buf);
3651

3652 3653
	return r;
}
3654 3655 3656 3657 3658 3659 3660 3661 3662 3663

struct loose_alt_odb_data {
	each_loose_object_fn *cb;
	void *data;
};

static int loose_from_alt_odb(struct alternate_object_database *alt,
			      void *vdata)
{
	struct loose_alt_odb_data *data = vdata;
3664 3665 3666 3667 3668 3669 3670 3671 3672 3673
	struct strbuf buf = STRBUF_INIT;
	int r;

	/* copy base not including trailing '/' */
	strbuf_add(&buf, alt->base, alt->name - alt->base - 1);
	r = for_each_loose_file_in_objdir_buf(&buf,
					      data->cb, NULL, NULL,
					      data->data);
	strbuf_release(&buf);
	return r;
3674 3675
}

3676
int for_each_loose_object(each_loose_object_fn cb, void *data, unsigned flags)
3677 3678 3679 3680 3681 3682 3683 3684 3685
{
	struct loose_alt_odb_data alt;
	int r;

	r = for_each_loose_file_in_objdir(get_object_directory(),
					  cb, NULL, NULL, data);
	if (r)
		return r;

3686 3687 3688
	if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
		return 0;

3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712
	alt.cb = cb;
	alt.data = data;
	return foreach_alt_odb(loose_from_alt_odb, &alt);
}

static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data)
{
	uint32_t i;
	int r = 0;

	for (i = 0; i < p->num_objects; i++) {
		const unsigned char *sha1 = nth_packed_object_sha1(p, i);

		if (!sha1)
			return error("unable to get sha1 of object %u in %s",
				     i, p->pack_name);

		r = cb(sha1, p, i, data);
		if (r)
			break;
	}
	return r;
}

3713
int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
3714 3715 3716
{
	struct packed_git *p;
	int r = 0;
3717
	int pack_errors = 0;
3718 3719 3720

	prepare_packed_git();
	for (p = packed_git; p; p = p->next) {
3721 3722
		if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local)
			continue;
3723 3724 3725 3726
		if (open_pack_index(p)) {
			pack_errors = 1;
			continue;
		}
3727 3728 3729 3730
		r = for_each_object_in_pack(p, cb, data);
		if (r)
			break;
	}
3731
	return r ? r : pack_errors;
3732
}