diff --git a/cache.h b/cache.h index 3a47cdc9d26886db66379c0000bbe80565e2f742..409738ca6b4b6dcefb60937c665bf0f1456a0f5a 100644 --- a/cache.h +++ b/cache.h @@ -3,6 +3,7 @@ #include "git-compat-util.h" #include "strbuf.h" +#include "hash.h" #include SHA1_HEADER #include @@ -109,6 +110,7 @@ struct ondisk_cache_entry { }; struct cache_entry { + struct cache_entry *next; unsigned int ce_ctime; unsigned int ce_mtime; unsigned int ce_dev; @@ -131,6 +133,7 @@ struct cache_entry { #define CE_UPDATE (0x10000) #define CE_REMOVE (0x20000) #define CE_UPTODATE (0x40000) +#define CE_UNHASHED (0x80000) static inline unsigned create_ce_flags(size_t len, unsigned stage) { @@ -188,6 +191,7 @@ struct index_state { struct cache_tree *cache_tree; time_t timestamp; void *alloc; + struct hash_table name_hash; }; extern struct index_state the_index; @@ -211,6 +215,7 @@ extern struct index_state the_index; #define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL) #define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options)) #define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options)) +#define cache_name_exists(name, namelen) index_name_exists(&the_index, (name), (namelen)) #endif enum object_type { @@ -297,6 +302,7 @@ extern int read_index_from(struct index_state *, const char *path); extern int write_index(struct index_state *, int newfd); extern int discard_index(struct index_state *); extern int verify_path(const char *path); +extern int index_name_exists(struct index_state *istate, const char *name, int namelen); extern int index_name_pos(struct index_state *, const char *name, int namelen); #define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */ #define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */ diff --git a/dir.c b/dir.c index 1b9cc7a8a81002fa2971d71c41a256444e6112fe..6543105b9622212430a9e5ed131a81074e019d9a 100644 --- a/dir.c +++ b/dir.c @@ -346,7 +346,7 @@ static struct dir_entry *dir_entry_new(const char *pathname, int len) struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len) { - if (cache_name_pos(pathname, len) >= 0) + if (cache_name_exists(pathname, len)) return NULL; ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc); diff --git a/read-cache.c b/read-cache.c index 07abd5d7ebfc10998a3c6140a7a325d752d70b49..9477c0b398125c3759ae3692de80e75ae62e11e0 100644 --- a/read-cache.c +++ b/read-cache.c @@ -23,6 +23,70 @@ struct index_state the_index; +static unsigned int hash_name(const char *name, int namelen) +{ + unsigned int hash = 0x123; + + do { + unsigned char c = *name++; + hash = hash*101 + c; + } while (--namelen); + return hash; +} + +static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) +{ + void **pos; + unsigned int hash = hash_name(ce->name, ce_namelen(ce)); + + istate->cache[nr] = ce; + pos = insert_hash(hash, ce, &istate->name_hash); + if (pos) { + ce->next = *pos; + *pos = ce; + } +} + +/* + * We don't actually *remove* it, we can just mark it invalid so that + * we won't find it in lookups. + * + * Not only would we have to search the lists (simple enough), but + * we'd also have to rehash other hash buckets in case this makes the + * hash bucket empty (common). So it's much better to just mark + * it. + */ +static void remove_hash_entry(struct index_state *istate, struct cache_entry *ce) +{ + ce->ce_flags |= CE_UNHASHED; +} + +static void replace_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) +{ + struct cache_entry *old = istate->cache[nr]; + + if (ce != old) { + remove_hash_entry(istate, old); + set_index_entry(istate, nr, ce); + } + istate->cache_changed = 1; +} + +int index_name_exists(struct index_state *istate, const char *name, int namelen) +{ + unsigned int hash = hash_name(name, namelen); + struct cache_entry *ce = lookup_hash(hash, &istate->name_hash); + + while (ce) { + if (!(ce->ce_flags & CE_UNHASHED)) { + if (!cache_name_compare(name, namelen, ce->name, ce->ce_flags)) + return 1; + } + ce = ce->next; + } + return 0; +} + /* * This only updates the "non-critical" parts of the directory * cache, ie the parts that aren't tracked by GIT, and only used @@ -327,6 +391,9 @@ int index_name_pos(struct index_state *istate, const char *name, int namelen) /* Remove entry, return true if there are more entries to go.. */ int remove_index_entry_at(struct index_state *istate, int pos) { + struct cache_entry *ce = istate->cache[pos]; + + remove_hash_entry(istate, ce); istate->cache_changed = 1; istate->cache_nr--; if (pos >= istate->cache_nr) @@ -702,8 +769,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e /* existing match? Just replace it. */ if (pos >= 0) { - istate->cache_changed = 1; - istate->cache[pos] = ce; + replace_index_entry(istate, pos, ce); return 0; } pos = -pos-1; @@ -763,7 +829,7 @@ int add_index_entry(struct index_state *istate, struct cache_entry *ce, int opti memmove(istate->cache + pos + 1, istate->cache + pos, (istate->cache_nr - pos - 1) * sizeof(ce)); - istate->cache[pos] = ce; + set_index_entry(istate, pos, ce); istate->cache_changed = 1; return 0; } @@ -892,11 +958,8 @@ int refresh_index(struct index_state *istate, unsigned int flags, const char **p has_errors = 1; continue; } - istate->cache_changed = 1; - /* You can NOT just free istate->cache[i] here, since it - * might not be necessarily malloc()ed but can also come - * from mmap(). */ - istate->cache[i] = new; + + replace_index_entry(istate, i, new); } return has_errors; } @@ -971,6 +1034,20 @@ static void convert_from_disk(struct ondisk_cache_entry *ondisk, struct cache_en memcpy(ce->name, ondisk->name, len + 1); } +static inline size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) +{ + long per_entry; + + per_entry = sizeof(struct cache_entry) - sizeof(struct ondisk_cache_entry); + + /* + * Alignment can cause differences. This should be "alignof", but + * since that's a gcc'ism, just use the size of a pointer. + */ + per_entry += sizeof(void *); + return ondisk_size + entries*per_entry; +} + /* remember to discard_cache() before reading a different cache! */ int read_index_from(struct index_state *istate, const char *path) { @@ -1021,7 +1098,7 @@ int read_index_from(struct index_state *istate, const char *path) * has room for a few more flags, we can allocate using the same * index size */ - istate->alloc = xmalloc(mmap_size); + istate->alloc = xmalloc(estimate_cache_size(mmap_size, istate->cache_nr)); src_offset = sizeof(*hdr); dst_offset = 0; @@ -1032,7 +1109,7 @@ int read_index_from(struct index_state *istate, const char *path) disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset); ce = (struct cache_entry *)((char *)istate->alloc + dst_offset); convert_from_disk(disk_ce, ce); - istate->cache[i] = ce; + set_index_entry(istate, i, ce); src_offset += ondisk_ce_size(ce); dst_offset += ce_size(ce); @@ -1070,6 +1147,7 @@ int discard_index(struct index_state *istate) istate->cache_nr = 0; istate->cache_changed = 0; istate->timestamp = 0; + free_hash(&istate->name_hash); cache_tree_free(&(istate->cache_tree)); free(istate->alloc); istate->alloc = NULL;