diff --git a/Documentation/git-verify-pack.txt b/Documentation/git-verify-pack.txt index db019a2b8d1bb283789726bf47ee0f7b9e4a523e..ba2a157299566b4dcc88f8ecb6f0eaed755f95bb 100644 --- a/Documentation/git-verify-pack.txt +++ b/Documentation/git-verify-pack.txt @@ -32,11 +32,11 @@ OUTPUT FORMAT ------------- When specifying the -v option the format used is: - SHA1 type size offset-in-packfile + SHA1 type size size-in-pack-file offset-in-packfile for objects that are not deltified in the pack, and - SHA1 type size offset-in-packfile depth base-SHA1 + SHA1 type size size-in-packfile offset-in-packfile depth base-SHA1 for objects that are deltified. diff --git a/Makefile b/Makefile index 71f01d16b1a42d366b04f1fce621db4dac6b1976..ca5aad963ca7b5565338f49ce1705c451216c8be 100644 --- a/Makefile +++ b/Makefile @@ -304,7 +304,8 @@ LIB_H = \ run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \ tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \ utf8.h reflog-walk.h patch-ids.h attr.h decorate.h progress.h \ - mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h fsck.h + mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h fsck.h \ + pack-revindex.h DIFF_OBJS = \ diff.o diff-lib.o diffcore-break.o diffcore-order.o \ @@ -328,7 +329,7 @@ LIB_OBJS = \ color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \ convert.o attr.o decorate.o progress.o mailmap.o symlinks.o remote.o \ transport.o bundle.o walker.o parse-options.o ws.o archive.o branch.o \ - alias.o fsck.o + alias.o fsck.o pack-revindex.o BUILTIN_OBJS = \ builtin-add.o \ diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 6c8b662e7835501f7540c8323596d4e04e0b90d1..2799e6833849eea9a4cb35213c1115c44bf702ed 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -8,6 +8,7 @@ #include "tree.h" #include "delta.h" #include "pack.h" +#include "pack-revindex.h" #include "csum-file.h" #include "tree-walk.h" #include "diff.h" @@ -92,158 +93,12 @@ static unsigned long window_memory_limit = 0; static int *object_ix; static int object_ix_hashsz; -/* - * Pack index for existing packs give us easy access to the offsets into - * corresponding pack file where each object's data starts, but the entries - * do not store the size of the compressed representation (uncompressed - * size is easily available by examining the pack entry header). It is - * also rather expensive to find the sha1 for an object given its offset. - * - * We build a hashtable of existing packs (pack_revindex), and keep reverse - * index here -- pack index file is sorted by object name mapping to offset; - * this pack_revindex[].revindex array is a list of offset/index_nr pairs - * ordered by offset, so if you know the offset of an object, next offset - * is where its packed representation ends and the index_nr can be used to - * get the object sha1 from the main index. - */ -struct revindex_entry { - off_t offset; - unsigned int nr; -}; -struct pack_revindex { - struct packed_git *p; - struct revindex_entry *revindex; -}; -static struct pack_revindex *pack_revindex; -static int pack_revindex_hashsz; - /* * stats */ static uint32_t written, written_delta; static uint32_t reused, reused_delta; -static int pack_revindex_ix(struct packed_git *p) -{ - unsigned long ui = (unsigned long)p; - int i; - - ui = ui ^ (ui >> 16); /* defeat structure alignment */ - i = (int)(ui % pack_revindex_hashsz); - while (pack_revindex[i].p) { - if (pack_revindex[i].p == p) - return i; - if (++i == pack_revindex_hashsz) - i = 0; - } - return -1 - i; -} - -static void prepare_pack_ix(void) -{ - int num; - struct packed_git *p; - for (num = 0, p = packed_git; p; p = p->next) - num++; - if (!num) - return; - pack_revindex_hashsz = num * 11; - pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz); - for (p = packed_git; p; p = p->next) { - num = pack_revindex_ix(p); - num = - 1 - num; - pack_revindex[num].p = p; - } - /* revindex elements are lazily initialized */ -} - -static int cmp_offset(const void *a_, const void *b_) -{ - const struct revindex_entry *a = a_; - const struct revindex_entry *b = b_; - return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0; -} - -/* - * Ordered list of offsets of objects in the pack. - */ -static void prepare_pack_revindex(struct pack_revindex *rix) -{ - struct packed_git *p = rix->p; - int num_ent = p->num_objects; - int i; - const char *index = p->index_data; - - rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1)); - index += 4 * 256; - - if (p->index_version > 1) { - const uint32_t *off_32 = - (uint32_t *)(index + 8 + p->num_objects * (20 + 4)); - const uint32_t *off_64 = off_32 + p->num_objects; - for (i = 0; i < num_ent; i++) { - uint32_t off = ntohl(*off_32++); - if (!(off & 0x80000000)) { - rix->revindex[i].offset = off; - } else { - rix->revindex[i].offset = - ((uint64_t)ntohl(*off_64++)) << 32; - rix->revindex[i].offset |= - ntohl(*off_64++); - } - rix->revindex[i].nr = i; - } - } else { - for (i = 0; i < num_ent; i++) { - uint32_t hl = *((uint32_t *)(index + 24 * i)); - rix->revindex[i].offset = ntohl(hl); - rix->revindex[i].nr = i; - } - } - - /* This knows the pack format -- the 20-byte trailer - * follows immediately after the last object data. - */ - rix->revindex[num_ent].offset = p->pack_size - 20; - rix->revindex[num_ent].nr = -1; - qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset); -} - -static struct revindex_entry * find_packed_object(struct packed_git *p, - off_t ofs) -{ - int num; - int lo, hi; - struct pack_revindex *rix; - struct revindex_entry *revindex; - num = pack_revindex_ix(p); - if (num < 0) - die("internal error: pack revindex uninitialized"); - rix = &pack_revindex[num]; - if (!rix->revindex) - prepare_pack_revindex(rix); - revindex = rix->revindex; - lo = 0; - hi = p->num_objects + 1; - do { - int mi = (lo + hi) / 2; - if (revindex[mi].offset == ofs) { - return revindex + mi; - } - else if (ofs < revindex[mi].offset) - hi = mi; - else - lo = mi + 1; - } while (lo < hi); - die("internal error: pack revindex corrupt"); -} - -static const unsigned char *find_packed_object_name(struct packed_git *p, - off_t ofs) -{ - struct revindex_entry *entry = find_packed_object(p, ofs); - return nth_packed_object_sha1(p, entry->nr); -} static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) { @@ -510,7 +365,7 @@ static unsigned long write_object(struct sha1file *f, } hdrlen = encode_header(obj_type, entry->size, header); offset = entry->in_pack_offset; - revidx = find_packed_object(p, offset); + revidx = find_pack_revindex(p, offset); datalen = revidx[1].offset - offset; if (!pack_to_stdout && p->index_version > 1 && check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) @@ -1162,8 +1017,11 @@ static void check_object(struct object_entry *entry) die("delta base offset out of bound for %s", sha1_to_hex(entry->idx.sha1)); ofs = entry->in_pack_offset - ofs; - if (!no_reuse_delta && !entry->preferred_base) - base_ref = find_packed_object_name(p, ofs); + if (!no_reuse_delta && !entry->preferred_base) { + struct revindex_entry *revidx; + revidx = find_pack_revindex(p, ofs); + base_ref = nth_packed_object_sha1(p, revidx->nr); + } entry->in_pack_header_size = used + used_0; break; } @@ -1240,9 +1098,11 @@ static void get_object_details(void) sorted_by_offset[i] = objects + i; qsort(sorted_by_offset, nr_objects, sizeof(*sorted_by_offset), pack_offset_sort); - prepare_pack_ix(); + init_pack_revindex(); + for (i = 0; i < nr_objects; i++) check_object(sorted_by_offset[i]); + free(sorted_by_offset); } diff --git a/builtin-verify-pack.c b/builtin-verify-pack.c index 4e31c273f48e3983aaf99dc6525982d34b6fed06..4958bbbf11f5f796feedfa7480b827029f912d01 100644 --- a/builtin-verify-pack.c +++ b/builtin-verify-pack.c @@ -40,8 +40,8 @@ static int verify_one_pack(const char *path, int verbose) if (!pack) return error("packfile %s not found.", arg); + install_packed_git(pack); err = verify_pack(pack, verbose); - free(pack); return err; } diff --git a/contrib/stats/packinfo.pl b/contrib/stats/packinfo.pl index aab501ea08129cc3b8304fb8f76f206578273f85..f4a7b62cd9f1a397118b95792c04c2f70f910f9e 100755 --- a/contrib/stats/packinfo.pl +++ b/contrib/stats/packinfo.pl @@ -93,7 +93,7 @@ my @depths; while () { - my ($sha1, $type, $size, $offset, $depth, $parent) = split(/\s+/, $_); + my ($sha1, $type, $size, $space, $offset, $depth, $parent) = split(/\s+/, $_); next unless ($sha1 =~ /^[0-9a-f]{40}$/); $depths{$sha1} = $depth || 0; push(@depths, $depth || 0); diff --git a/pack-check.c b/pack-check.c index d7dd62bb8346c4cac8dbd7334e999a450c21c5ab..0f8ad2c00f21d9a0a64b541ed95fff91650d7175 100644 --- a/pack-check.c +++ b/pack-check.c @@ -1,5 +1,6 @@ #include "cache.h" #include "pack.h" +#include "pack-revindex.h" struct idx_entry { @@ -101,8 +102,10 @@ static int verify_packfile(struct packed_git *p, static void show_pack_info(struct packed_git *p) { uint32_t nr_objects, i, chain_histogram[MAX_CHAIN+1]; + nr_objects = p->num_objects; memset(chain_histogram, 0, sizeof(chain_histogram)); + init_pack_revindex(); for (i = 0; i < nr_objects; i++) { const unsigned char *sha1; @@ -125,11 +128,11 @@ static void show_pack_info(struct packed_git *p) base_sha1); printf("%s ", sha1_to_hex(sha1)); if (!delta_chain_length) - printf("%-6s %lu %"PRIuMAX"\n", - type, size, (uintmax_t)offset); + printf("%-6s %lu %lu %"PRIuMAX"\n", + type, size, store_size, (uintmax_t)offset); else { - printf("%-6s %lu %"PRIuMAX" %u %s\n", - type, size, (uintmax_t)offset, + printf("%-6s %lu %lu %"PRIuMAX" %u %s\n", + type, size, store_size, (uintmax_t)offset, delta_chain_length, sha1_to_hex(base_sha1)); if (delta_chain_length <= MAX_CHAIN) chain_histogram[delta_chain_length]++; diff --git a/pack-revindex.c b/pack-revindex.c new file mode 100644 index 0000000000000000000000000000000000000000..a8aa2cd6caefe7d37febdf5c3426cec043492b19 --- /dev/null +++ b/pack-revindex.c @@ -0,0 +1,142 @@ +#include "cache.h" +#include "pack-revindex.h" + +/* + * Pack index for existing packs give us easy access to the offsets into + * corresponding pack file where each object's data starts, but the entries + * do not store the size of the compressed representation (uncompressed + * size is easily available by examining the pack entry header). It is + * also rather expensive to find the sha1 for an object given its offset. + * + * We build a hashtable of existing packs (pack_revindex), and keep reverse + * index here -- pack index file is sorted by object name mapping to offset; + * this pack_revindex[].revindex array is a list of offset/index_nr pairs + * ordered by offset, so if you know the offset of an object, next offset + * is where its packed representation ends and the index_nr can be used to + * get the object sha1 from the main index. + */ + +struct pack_revindex { + struct packed_git *p; + struct revindex_entry *revindex; +}; + +static struct pack_revindex *pack_revindex; +static int pack_revindex_hashsz; + +static int pack_revindex_ix(struct packed_git *p) +{ + unsigned long ui = (unsigned long)p; + int i; + + ui = ui ^ (ui >> 16); /* defeat structure alignment */ + i = (int)(ui % pack_revindex_hashsz); + while (pack_revindex[i].p) { + if (pack_revindex[i].p == p) + return i; + if (++i == pack_revindex_hashsz) + i = 0; + } + return -1 - i; +} + +void init_pack_revindex(void) +{ + int num; + struct packed_git *p; + + for (num = 0, p = packed_git; p; p = p->next) + num++; + if (!num) + return; + pack_revindex_hashsz = num * 11; + pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz); + for (p = packed_git; p; p = p->next) { + num = pack_revindex_ix(p); + num = - 1 - num; + pack_revindex[num].p = p; + } + /* revindex elements are lazily initialized */ +} + +static int cmp_offset(const void *a_, const void *b_) +{ + const struct revindex_entry *a = a_; + const struct revindex_entry *b = b_; + return (a->offset < b->offset) ? -1 : (a->offset > b->offset) ? 1 : 0; +} + +/* + * Ordered list of offsets of objects in the pack. + */ +static void create_pack_revindex(struct pack_revindex *rix) +{ + struct packed_git *p = rix->p; + int num_ent = p->num_objects; + int i; + const char *index = p->index_data; + + rix->revindex = xmalloc(sizeof(*rix->revindex) * (num_ent + 1)); + index += 4 * 256; + + if (p->index_version > 1) { + const uint32_t *off_32 = + (uint32_t *)(index + 8 + p->num_objects * (20 + 4)); + const uint32_t *off_64 = off_32 + p->num_objects; + for (i = 0; i < num_ent; i++) { + uint32_t off = ntohl(*off_32++); + if (!(off & 0x80000000)) { + rix->revindex[i].offset = off; + } else { + rix->revindex[i].offset = + ((uint64_t)ntohl(*off_64++)) << 32; + rix->revindex[i].offset |= + ntohl(*off_64++); + } + rix->revindex[i].nr = i; + } + } else { + for (i = 0; i < num_ent; i++) { + uint32_t hl = *((uint32_t *)(index + 24 * i)); + rix->revindex[i].offset = ntohl(hl); + rix->revindex[i].nr = i; + } + } + + /* This knows the pack format -- the 20-byte trailer + * follows immediately after the last object data. + */ + rix->revindex[num_ent].offset = p->pack_size - 20; + rix->revindex[num_ent].nr = -1; + qsort(rix->revindex, num_ent, sizeof(*rix->revindex), cmp_offset); +} + +struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs) +{ + int num; + int lo, hi; + struct pack_revindex *rix; + struct revindex_entry *revindex; + + num = pack_revindex_ix(p); + if (num < 0) + die("internal error: pack revindex uninitialized"); + + rix = &pack_revindex[num]; + if (!rix->revindex) + create_pack_revindex(rix); + revindex = rix->revindex; + + lo = 0; + hi = p->num_objects + 1; + do { + int mi = (lo + hi) / 2; + if (revindex[mi].offset == ofs) { + return revindex + mi; + } else if (ofs < revindex[mi].offset) + hi = mi; + else + lo = mi + 1; + } while (lo < hi); + die("internal error: pack revindex corrupt"); +} diff --git a/pack-revindex.h b/pack-revindex.h new file mode 100644 index 0000000000000000000000000000000000000000..c3527a75655470b95ab4ba0900e9c1ad6a15a35f --- /dev/null +++ b/pack-revindex.h @@ -0,0 +1,12 @@ +#ifndef PACK_REVINDEX_H +#define PACK_REVINDEX_H + +struct revindex_entry { + off_t offset; + unsigned int nr; +}; + +void init_pack_revindex(void); +struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs); + +#endif diff --git a/sha1_file.c b/sha1_file.c index 1ddb96bb82b60f4da8489664d13f398d88bf5a15..445a871db31673af20017d36ff22fd106f77f510 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -14,6 +14,7 @@ #include "tag.h" #include "tree.h" #include "refs.h" +#include "pack-revindex.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -1367,11 +1368,15 @@ const char *packed_object_info_detail(struct packed_git *p, unsigned long dummy; unsigned char *next_sha1; enum object_type type; + struct revindex_entry *revidx; *delta_chain_length = 0; curpos = obj_offset; type = unpack_object_header(p, &w_curs, &curpos, size); + revidx = find_pack_revindex(p, obj_offset); + *store_size = revidx[1].offset - obj_offset; + for (;;) { switch (type) { default: @@ -1381,14 +1386,13 @@ const char *packed_object_info_detail(struct packed_git *p, case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - *store_size = 0; /* notyet */ unuse_pack(&w_curs); return typename(type); case OBJ_OFS_DELTA: obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset); if (*delta_chain_length == 0) { - /* TODO: find base_sha1 as pointed by curpos */ - hashclr(base_sha1); + revidx = find_pack_revindex(p, obj_offset); + hashcpy(base_sha1, nth_packed_object_sha1(p, revidx->nr)); } break; case OBJ_REF_DELTA: