提交 cf2999eb 编写于 作者: J Junio C Hamano

Merge branch 'sp/mmap'

* sp/mmap: (27 commits)
  Spell default packedgitlimit slightly differently
  Increase packedGit{Limit,WindowSize} on 64 bit systems.
  Update packedGit config option documentation.
  mmap: set FD_CLOEXEC for file descriptors we keep open for mmap()
  pack-objects: fix use of use_pack().
  Fix random segfaults in pack-objects.
  Cleanup read_cache_from error handling.
  Replace mmap with xmmap, better handling MAP_FAILED.
  Release pack windows before reporting out of memory.
  Default core.packdGitWindowSize to 1 MiB if NO_MMAP.
  Test suite for sliding window mmap implementation.
  Create pack_report() as a debugging aid.
  Support unmapping windows on 'temporary' packfiles.
  Improve error message when packfile mmap fails.
  Ensure core.packedGitWindowSize cannot be less than 2 pages.
  Load core configuration in git-verify-pack.
  Fully activate the sliding window pack access.
  Unmap individual windows rather than entire files.
  Document why header parsing won't exceed a window.
  Loop over pack_windows when inflating/accessing data.
  ...

Conflicts:

	cache.h
	pack-check.c
......@@ -118,6 +118,34 @@ core.legacyheaders::
database directly (where the "http://" and "rsync://" protocols
count as direct access).
core.packedGitWindowSize::
Number of bytes of a pack file to map into memory in a
single mapping operation. Larger window sizes may allow
your system to process a smaller number of large pack files
more quickly. Smaller window sizes will negatively affect
performance due to increased calls to the operating system's
memory manager, but may improve performance when accessing
a large number of large pack files.
+
Default is 1 MiB if NO_MMAP was set at compile time, otherwise 32
MiB on 32 bit platforms and 1 GiB on 64 bit platforms. This should
be reasonable for all users/operating systems. You probably do
not need to adjust this value.
+
Common unit suffixes of 'k', 'm', or 'g' are supported.
core.packedGitLimit::
Maximum number of bytes to map simultaneously into memory
from pack files. If Git needs to access more than this many
bytes at once to complete an operation it will unmap existing
regions to reclaim virtual address space within the process.
+
Default is 256 MiB on 32 bit platforms and 8 GiB on 64 bit platforms.
This should be reasonable for all users/operating systems, except on
the largest projects. You probably do not need to adjust this value.
+
Common unit suffixes of 'k', 'm', or 'g' are supported.
alias.*::
Command aliases for the gitlink:git[1] command wrapper - e.g.
after defining "alias.last = cat-file commit HEAD", the invocation
......
......@@ -276,7 +276,52 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
* we are going to reuse the existing object data as is. make
* sure it is not corrupt.
*/
static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect)
static int check_pack_inflate(struct packed_git *p,
struct pack_window **w_curs,
unsigned long offset,
unsigned long len,
unsigned long expect)
{
z_stream stream;
unsigned char fakebuf[4096], *in;
int st;
memset(&stream, 0, sizeof(stream));
inflateInit(&stream);
do {
in = use_pack(p, w_curs, offset, &stream.avail_in);
stream.next_in = in;
stream.next_out = fakebuf;
stream.avail_out = sizeof(fakebuf);
st = inflate(&stream, Z_FINISH);
offset += stream.next_in - in;
} while (st == Z_OK || st == Z_BUF_ERROR);
inflateEnd(&stream);
return (st == Z_STREAM_END &&
stream.total_out == expect &&
stream.total_in == len) ? 0 : -1;
}
static void copy_pack_data(struct sha1file *f,
struct packed_git *p,
struct pack_window **w_curs,
unsigned long offset,
unsigned long len)
{
unsigned char *in;
unsigned int avail;
while (len) {
in = use_pack(p, w_curs, offset, &avail);
if (avail > len)
avail = len;
sha1write(f, in, avail);
offset += avail;
len -= avail;
}
}
static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect)
{
z_stream stream;
unsigned char fakebuf[4096];
......@@ -323,7 +368,7 @@ static int revalidate_loose_object(struct object_entry *entry,
return -1;
map += used;
mapsize -= used;
return check_inflate(map, mapsize, size);
return check_loose_inflate(map, mapsize, size);
}
static unsigned long write_object(struct sha1file *f,
......@@ -416,6 +461,8 @@ static unsigned long write_object(struct sha1file *f,
}
else {
struct packed_git *p = entry->in_pack;
struct pack_window *w_curs = NULL;
unsigned long offset;
if (entry->delta) {
obj_type = (allow_ofs_delta && entry->delta->offset) ?
......@@ -437,16 +484,14 @@ static unsigned long write_object(struct sha1file *f,
hdrlen += 20;
}
use_packed_git(p);
buf = (char *) p->pack_base
+ entry->in_pack_offset
+ entry->in_pack_header_size;
offset = entry->in_pack_offset + entry->in_pack_header_size;
datalen = find_packed_object_size(p, entry->in_pack_offset)
- entry->in_pack_header_size;
if (!pack_to_stdout && check_inflate(buf, datalen, entry->size))
if (!pack_to_stdout && check_pack_inflate(p, &w_curs,
offset, datalen, entry->size))
die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
sha1write(f, buf, datalen);
unuse_packed_git(p);
copy_pack_data(f, p, &w_curs, offset, datalen);
unuse_pack(&w_curs);
reused++;
}
if (entry->delta)
......@@ -937,22 +982,19 @@ static void check_object(struct object_entry *entry)
if (entry->in_pack && !entry->preferred_base) {
struct packed_git *p = entry->in_pack;
struct pack_window *w_curs = NULL;
unsigned long left = p->pack_size - entry->in_pack_offset;
unsigned long size, used;
unsigned char *buf;
struct object_entry *base_entry = NULL;
use_packed_git(p);
buf = p->pack_base;
buf += entry->in_pack_offset;
buf = use_pack(p, &w_curs, entry->in_pack_offset, NULL);
/* We want in_pack_type even if we do not reuse delta.
* There is no point not reusing non-delta representations.
*/
used = unpack_object_header_gently(buf, left,
&entry->in_pack_type, &size);
if (!used || left - used <= 20)
die("corrupt pack for %s", sha1_to_hex(entry->sha1));
/* Check if it is delta, and the base is also an object
* we are going to pack. If so we will reuse the existing
......@@ -961,21 +1003,26 @@ static void check_object(struct object_entry *entry)
if (!no_reuse_delta) {
unsigned char c, *base_name;
unsigned long ofs;
unsigned long used_0;
/* there is at least 20 bytes left in the pack */
switch (entry->in_pack_type) {
case OBJ_REF_DELTA:
base_name = buf + used;
base_name = use_pack(p, &w_curs,
entry->in_pack_offset + used, NULL);
used += 20;
break;
case OBJ_OFS_DELTA:
c = buf[used++];
buf = use_pack(p, &w_curs,
entry->in_pack_offset + used, NULL);
used_0 = 0;
c = buf[used_0++];
ofs = c & 127;
while (c & 128) {
ofs += 1;
if (!ofs || ofs & ~(~0UL >> 7))
die("delta base offset overflow in pack for %s",
sha1_to_hex(entry->sha1));
c = buf[used++];
c = buf[used_0++];
ofs = (ofs << 7) + (c & 127);
}
if (ofs >= entry->in_pack_offset)
......@@ -983,6 +1030,7 @@ static void check_object(struct object_entry *entry)
sha1_to_hex(entry->sha1));
ofs = entry->in_pack_offset - ofs;
base_name = find_packed_object_name(p, ofs);
used += used_0;
break;
default:
base_name = NULL;
......@@ -990,7 +1038,7 @@ static void check_object(struct object_entry *entry)
if (base_name)
base_entry = locate_object_entry(base_name);
}
unuse_packed_git(p);
unuse_pack(&w_curs);
entry->in_pack_header_size = used;
if (base_entry) {
......
......@@ -55,6 +55,7 @@ int cmd_verify_pack(int argc, const char **argv, const char *prefix)
int no_more_options = 0;
int nothing_done = 1;
git_config(git_default_config);
while (1 < argc) {
if (!no_more_options && argv[1][0] == '-') {
if (!strcmp("-v", argv[1]))
......
......@@ -197,6 +197,8 @@ extern int warn_ambiguous_refs;
extern int shared_repository;
extern const char *apply_default_whitespace;
extern int zlib_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
#define GIT_REPO_VERSION 0
extern int repository_format_version;
......@@ -336,14 +338,22 @@ extern struct alternate_object_database {
} *alt_odb_list;
extern void prepare_alt_odb(void);
struct pack_window {
struct pack_window *next;
unsigned char *base;
off_t offset;
size_t len;
unsigned int last_used;
unsigned int inuse_cnt;
};
extern struct packed_git {
struct packed_git *next;
unsigned long index_size;
unsigned long pack_size;
struct pack_window *windows;
unsigned int *index_base;
void *pack_base;
unsigned int pack_last_used;
unsigned int pack_use_cnt;
off_t index_size;
off_t pack_size;
int pack_fd;
int pack_local;
unsigned char sha1[20];
/* something like ".git/objects/pack/xxxxx.pack" */
......@@ -389,13 +399,14 @@ extern void install_packed_git(struct packed_git *pack);
extern struct packed_git *find_sha1_pack(const unsigned char *sha1,
struct packed_git *packs);
extern int use_packed_git(struct packed_git *);
extern void unuse_packed_git(struct packed_git *);
extern void pack_report();
extern unsigned char* use_pack(struct packed_git *, struct pack_window **, unsigned long, unsigned int *);
extern void unuse_pack(struct pack_window **);
extern struct packed_git *add_packed_git(char *, int, int);
extern int num_packed_objects(const struct packed_git *p);
extern int nth_packed_object_sha1(const struct packed_git *, int, unsigned char*);
extern unsigned long find_pack_entry_one(const unsigned char *, struct packed_git *);
extern void *unpack_entry_gently(struct packed_git *, unsigned long, char *, unsigned long *);
extern void *unpack_entry(struct packed_git *, unsigned long, char *, unsigned long *);
extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
extern void packed_object_info_detail(struct packed_git *, unsigned long, char *, unsigned long *, unsigned long *, unsigned int *, unsigned char *);
......@@ -421,6 +432,7 @@ extern char *git_commit_encoding;
extern char *git_log_output_encoding;
extern int copy_fd(int ifd, int ofd);
extern void read_or_die(int fd, void *buf, size_t count);
extern int write_in_full(int fd, const void *buf, size_t count, const char *);
extern void write_or_die(int fd, const void *buf, size_t count);
extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
......
......@@ -304,6 +304,21 @@ int git_default_config(const char *var, const char *value)
return 0;
}
if (!strcmp(var, "core.packedgitwindowsize")) {
int pgsz = getpagesize();
packed_git_window_size = git_config_int(var, value);
packed_git_window_size /= pgsz;
if (packed_git_window_size < 2)
packed_git_window_size = 2;
packed_git_window_size *= pgsz;
return 0;
}
if (!strcmp(var, "core.packedgitlimit")) {
packed_git_limit = git_config_int(var, value);
return 0;
}
if (!strcmp(var, "user.name")) {
strlcpy(git_default_name, value, sizeof(git_default_name));
return 0;
......@@ -695,7 +710,7 @@ int git_config_set_multivar(const char* key, const char* value,
}
fstat(in_fd, &st);
contents = mmap(NULL, st.st_size, PROT_READ,
contents = xmmap(NULL, st.st_size, PROT_READ,
MAP_PRIVATE, in_fd, 0);
close(in_fd);
......
......@@ -1341,10 +1341,8 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
fd = open(s->path, O_RDONLY);
if (fd < 0)
goto err_empty;
s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
s->data = xmmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (s->data == MAP_FAILED)
goto err_empty;
s->should_munmap = 1;
}
else {
......
......@@ -23,6 +23,8 @@ char *git_log_output_encoding;
int shared_repository = PERM_UMASK;
const char *apply_default_whitespace;
int zlib_compression_level = Z_DEFAULT_COMPRESSION;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
int pager_in_use;
int pager_use_color = 1;
......
......@@ -92,12 +92,21 @@ extern void set_warn_routine(void (*routine)(const char *warn, va_list params));
extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset);
extern int git_munmap(void *start, size_t length);
#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024)
#else /* NO_MMAP */
#include <sys/mman.h>
#define DEFAULT_PACKED_GIT_WINDOW_SIZE \
(sizeof(void*) >= 8 \
? 1 * 1024 * 1024 * 1024 \
: 32 * 1024 * 1024)
#endif /* NO_MMAP */
#define DEFAULT_PACKED_GIT_LIMIT \
((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256))
#ifdef NO_SETENV
#define setenv gitsetenv
extern int gitsetenv(const char *, const char *, int);
......@@ -118,11 +127,17 @@ extern char *gitstrcasestr(const char *haystack, const char *needle);
extern size_t gitstrlcpy(char *, const char *, size_t);
#endif
extern void release_pack_memory(size_t);
static inline char* xstrdup(const char *str)
{
char *ret = strdup(str);
if (!ret)
die("Out of memory, strdup failed");
if (!ret) {
release_pack_memory(strlen(str) + 1);
ret = strdup(str);
if (!ret)
die("Out of memory, strdup failed");
}
return ret;
}
......@@ -131,8 +146,14 @@ static inline void *xmalloc(size_t size)
void *ret = malloc(size);
if (!ret && !size)
ret = malloc(1);
if (!ret)
die("Out of memory, malloc failed");
if (!ret) {
release_pack_memory(size);
ret = malloc(size);
if (!ret && !size)
ret = malloc(1);
if (!ret)
die("Out of memory, malloc failed");
}
#ifdef XMALLOC_POISON
memset(ret, 0xA5, size);
#endif
......@@ -144,8 +165,14 @@ static inline void *xrealloc(void *ptr, size_t size)
void *ret = realloc(ptr, size);
if (!ret && !size)
ret = realloc(ptr, 1);
if (!ret)
die("Out of memory, realloc failed");
if (!ret) {
release_pack_memory(size);
ret = realloc(ptr, size);
if (!ret && !size)
ret = realloc(ptr, 1);
if (!ret)
die("Out of memory, realloc failed");
}
return ret;
}
......@@ -154,8 +181,27 @@ static inline void *xcalloc(size_t nmemb, size_t size)
void *ret = calloc(nmemb, size);
if (!ret && (!nmemb || !size))
ret = calloc(1, 1);
if (!ret)
die("Out of memory, calloc failed");
if (!ret) {
release_pack_memory(nmemb * size);
ret = calloc(nmemb, size);
if (!ret && (!nmemb || !size))
ret = calloc(1, 1);
if (!ret)
die("Out of memory, calloc failed");
}
return ret;
}
static inline void *xmmap(void *start, size_t length,
int prot, int flags, int fd, off_t offset)
{
void *ret = mmap(start, length, prot, flags, fd, offset);
if (ret == MAP_FAILED) {
release_pack_memory(length);
ret = mmap(start, length, prot, flags, fd, offset);
if (ret == MAP_FAILED)
die("Out of memory? mmap failed: %s", strerror(errno));
}
return ret;
}
......
#include "cache.h"
#include "pack.h"
#define BATCH (1u<<20)
static int verify_packfile(struct packed_git *p)
static int verify_packfile(struct packed_git *p,
struct pack_window **w_curs)
{
unsigned long index_size = p->index_size;
void *index_base = p->index_base;
SHA_CTX ctx;
unsigned char sha1[20];
struct pack_header *hdr;
unsigned long offset = 0, pack_sig = p->pack_size - 20;
int nr_objects, err, i;
unsigned char *packdata;
unsigned long datasize;
/* Header consistency check */
hdr = p->pack_base;
if (hdr->hdr_signature != htonl(PACK_SIGNATURE))
return error("Packfile %s signature mismatch", p->pack_name);
if (!pack_version_ok(hdr->hdr_version))
return error("Packfile version %d unsupported",
ntohl(hdr->hdr_version));
nr_objects = ntohl(hdr->hdr_entries);
if (num_packed_objects(p) != nr_objects)
return error("Packfile claims to have %d objects, "
"while idx size expects %d", nr_objects,
num_packed_objects(p));
/* Check integrity of pack data with its SHA-1 checksum */
/* Note that the pack header checks are actually performed by
* use_pack when it first opens the pack file. If anything
* goes wrong during those checks then the call will die out
* immediately.
*/
SHA1_Init(&ctx);
packdata = p->pack_base;
datasize = p->pack_size - 20;
while (datasize) {
unsigned long batch = (datasize < BATCH) ? datasize : BATCH;
SHA1_Update(&ctx, packdata, batch);
datasize -= batch;
packdata += batch;
while (offset < pack_sig) {
unsigned int remaining;
unsigned char *in = use_pack(p, w_curs, offset, &remaining);
offset += remaining;
if (offset > pack_sig)
remaining -= offset - pack_sig;
SHA1_Update(&ctx, in, remaining);
}
SHA1_Final(sha1, &ctx);
if (hashcmp(sha1, (unsigned char *)(p->pack_base) + p->pack_size - 20))
if (hashcmp(sha1, use_pack(p, w_curs, pack_sig, NULL)))
return error("Packfile %s SHA1 mismatch with itself",
p->pack_name);
if (hashcmp(sha1, (unsigned char *)index_base + index_size - 40))
return error("Packfile %s SHA1 mismatch with idx",
p->pack_name);
unuse_pack(w_curs);
/* Make sure everything reachable from idx is valid. Since we
* have verified that nr_objects matches between idx and pack,
* we do not do scan-streaming check on the pack file.
*/
nr_objects = num_packed_objects(p);
for (i = err = 0; i < nr_objects; i++) {
unsigned char sha1[20];
void *data;
......@@ -61,7 +51,7 @@ static int verify_packfile(struct packed_git *p)
offset = find_pack_entry_one(sha1, p);
if (!offset)
die("internal error pack-check find-pack-entry-one");
data = unpack_entry_gently(p, offset, type, &size);
data = unpack_entry(p, offset, type, &size);
if (!data) {
err = error("cannot unpack %s from %s",
sha1_to_hex(sha1), p->pack_name);
......@@ -84,12 +74,10 @@ static int verify_packfile(struct packed_git *p)
static void show_pack_info(struct packed_git *p)
{
struct pack_header *hdr;
int nr_objects, i;
unsigned int chain_histogram[MAX_CHAIN];
hdr = p->pack_base;
nr_objects = ntohl(hdr->hdr_entries);
nr_objects = num_packed_objects(p);
memset(chain_histogram, 0, sizeof(chain_histogram));
for (i = 0; i < nr_objects; i++) {
......@@ -152,18 +140,16 @@ int verify_pack(struct packed_git *p, int verbose)
if (!ret) {
/* Verify pack file */
use_packed_git(p);
ret = verify_packfile(p);
unuse_packed_git(p);
struct pack_window *w_curs = NULL;
ret = verify_packfile(p, &w_curs);
unuse_pack(&w_curs);
}
if (verbose) {
if (ret)
printf("%s: bad\n", p->pack_name);
else {
use_packed_git(p);
show_pack_info(p);
unuse_packed_git(p);
printf("%s: ok\n", p->pack_name);
}
}
......
......@@ -793,16 +793,16 @@ int read_cache_from(const char *path)
die("index file open failed (%s)", strerror(errno));
}
cache_mmap = MAP_FAILED;
if (!fstat(fd, &st)) {
cache_mmap_size = st.st_size;
errno = EINVAL;
if (cache_mmap_size >= sizeof(struct cache_header) + 20)
cache_mmap = mmap(NULL, cache_mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
}
cache_mmap = xmmap(NULL, cache_mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
else
die("index file smaller than expected");
} else
die("cannot stat the open index (%s)", strerror(errno));
close(fd);
if (cache_mmap == MAP_FAILED)
die("index file mmap failed (%s)", strerror(errno));
hdr = cache_mmap;
if (verify_hdr(hdr, cache_mmap_size) < 0)
......
......@@ -1025,7 +1025,7 @@ int read_ref_at(const char *ref, unsigned long at_time, int cnt, unsigned char *
fstat(logfd, &st);
if (!st.st_size)
die("Log %s is empty.", logfile);
logdata = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, logfd, 0);
logdata = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, logfd, 0);
close(logfd);
lastrec = NULL;
......
此差异已折叠。
#!/bin/sh
#
# Copyright (c) 2006 Shawn Pearce
#
test_description='mmap sliding window tests'
. ./test-lib.sh
test_expect_success \
'setup' \
'rm -f .git/index*
for i in a b c
do
echo $i >$i &&
dd if=/dev/urandom bs=32k count=1 >>$i &&
git-update-index --add $i || return 1
done &&
echo d >d && cat c >>d && git-update-index --add d &&
tree=`git-write-tree` &&
commit1=`git-commit-tree $tree </dev/null` &&
git-update-ref HEAD $commit1 &&
git-repack -a -d &&
test "`git-count-objects`" = "0 objects, 0 kilobytes" &&
pack1=`ls .git/objects/pack/*.pack` &&
test -f "$pack1"'
test_expect_success \
'verify-pack -v, defaults' \
'git-verify-pack -v "$pack1"'
test_expect_success \
'verify-pack -v, packedGitWindowSize == 1 page' \
'git-repo-config core.packedGitWindowSize 512 &&
git-verify-pack -v "$pack1"'
test_expect_success \
'verify-pack -v, packedGit{WindowSize,Limit} == 1 page' \
'git-repo-config core.packedGitWindowSize 512 &&
git-repo-config core.packedGitLimit 512 &&
git-verify-pack -v "$pack1"'
test_expect_success \
'repack -a -d, packedGit{WindowSize,Limit} == 1 page' \
'git-repo-config core.packedGitWindowSize 512 &&
git-repo-config core.packedGitLimit 512 &&
commit2=`git-commit-tree $tree -p $commit1 </dev/null` &&
git-update-ref HEAD $commit2 &&
git-repack -a -d &&
test "`git-count-objects`" = "0 objects, 0 kilobytes" &&
pack2=`ls .git/objects/pack/*.pack` &&
test -f "$pack2"
test "$pack1" \!= "$pack2"'
test_expect_success \
'verify-pack -v, defaults' \
'git-repo-config --unset core.packedGitWindowSize &&
git-repo-config --unset core.packedGitLimit &&
git-verify-pack -v "$pack2"'
test_done
#include "cache.h"
void read_or_die(int fd, void *buf, size_t count)
{
char *p = buf;
ssize_t loaded;
while (count > 0) {
loaded = xread(fd, p, count);
if (loaded == 0)
die("unexpected end of file");
else if (loaded < 0)
die("read error (%s)", strerror(errno));
count -= loaded;
p += loaded;
}
}
void write_or_die(int fd, const void *buf, size_t count)
{
const char *p = buf;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册