提交 6c0f3af7 编写于 作者: S Sage Weil

ceph: add dir_layout to inode

Add a ceph_dir_layout to the inode, and calculate dentry hash values based
on the parent directory's specified dir_hash function.  This is needed
because the old default Linux dcache hash function is extremely week and
leads to a poor distribution of files among dir fragments.
Signed-off-by: NSage Weil <sage@newdream.net>
上级 3c0eee3f
...@@ -1216,6 +1216,26 @@ void ceph_dentry_lru_del(struct dentry *dn) ...@@ -1216,6 +1216,26 @@ void ceph_dentry_lru_del(struct dentry *dn)
} }
} }
/*
* Return name hash for a given dentry. This is dependent on
* the parent directory's hash function.
*/
unsigned ceph_dentry_hash(struct dentry *dn)
{
struct inode *dir = dn->d_parent->d_inode;
struct ceph_inode_info *dci = ceph_inode(dir);
switch (dci->i_dir_layout.dl_dir_hash) {
case 0: /* for backward compat */
case CEPH_STR_HASH_LINUX:
return dn->d_name.hash;
default:
return ceph_str_hash(dci->i_dir_layout.dl_dir_hash,
dn->d_name.name, dn->d_name.len);
}
}
const struct file_operations ceph_dir_fops = { const struct file_operations ceph_dir_fops = {
.read = ceph_read_dir, .read = ceph_read_dir,
.readdir = ceph_readdir, .readdir = ceph_readdir,
......
...@@ -59,7 +59,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, ...@@ -59,7 +59,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
dout("encode_fh %p connectable\n", dentry); dout("encode_fh %p connectable\n", dentry);
cfh->ino = ceph_ino(dentry->d_inode); cfh->ino = ceph_ino(dentry->d_inode);
cfh->parent_ino = ceph_ino(parent->d_inode); cfh->parent_ino = ceph_ino(parent->d_inode);
cfh->parent_name_hash = parent->d_name.hash; cfh->parent_name_hash = ceph_dentry_hash(parent);
*max_len = connected_handle_length; *max_len = connected_handle_length;
type = 2; type = 2;
} else if (*max_len >= handle_length) { } else if (*max_len >= handle_length) {
......
...@@ -297,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ...@@ -297,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_release_count = 0; ci->i_release_count = 0;
ci->i_symlink = NULL; ci->i_symlink = NULL;
memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
ci->i_fragtree = RB_ROOT; ci->i_fragtree = RB_ROOT;
mutex_init(&ci->i_fragtree_mutex); mutex_init(&ci->i_fragtree_mutex);
......
...@@ -239,6 +239,7 @@ struct ceph_inode_info { ...@@ -239,6 +239,7 @@ struct ceph_inode_info {
unsigned i_ceph_flags; unsigned i_ceph_flags;
unsigned long i_release_count; unsigned long i_release_count;
struct ceph_dir_layout i_dir_layout;
struct ceph_file_layout i_layout; struct ceph_file_layout i_layout;
char *i_symlink; char *i_symlink;
...@@ -768,6 +769,7 @@ extern void ceph_dentry_lru_add(struct dentry *dn); ...@@ -768,6 +769,7 @@ extern void ceph_dentry_lru_add(struct dentry *dn);
extern void ceph_dentry_lru_touch(struct dentry *dn); extern void ceph_dentry_lru_touch(struct dentry *dn);
extern void ceph_dentry_lru_del(struct dentry *dn); extern void ceph_dentry_lru_del(struct dentry *dn);
extern void ceph_invalidate_dentry_lease(struct dentry *dentry); extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
extern unsigned ceph_dentry_hash(struct dentry *dn);
/* /*
* our d_ops vary depending on whether the inode is live, * our d_ops vary depending on whether the inode is live,
......
...@@ -43,6 +43,10 @@ ...@@ -43,6 +43,10 @@
#define CEPH_FEATURE_NOSRCADDR (1<<1) #define CEPH_FEATURE_NOSRCADDR (1<<1)
#define CEPH_FEATURE_MONCLOCKCHECK (1<<2) #define CEPH_FEATURE_MONCLOCKCHECK (1<<2)
#define CEPH_FEATURE_FLOCK (1<<3) #define CEPH_FEATURE_FLOCK (1<<3)
#define CEPH_FEATURE_SUBSCRIBE2 (1<<4)
#define CEPH_FEATURE_MONNAMES (1<<5)
#define CEPH_FEATURE_RECONNECT_SEQ (1<<6)
#define CEPH_FEATURE_DIRLAYOUTHASH (1<<7)
/* /*
...@@ -55,10 +59,10 @@ struct ceph_file_layout { ...@@ -55,10 +59,10 @@ struct ceph_file_layout {
__le32 fl_stripe_count; /* over this many objects */ __le32 fl_stripe_count; /* over this many objects */
__le32 fl_object_size; /* until objects are this big, then move to __le32 fl_object_size; /* until objects are this big, then move to
new objects */ new objects */
__le32 fl_cas_hash; /* 0 = none; 1 = sha256 */ __le32 fl_cas_hash; /* UNUSED. 0 = none; 1 = sha256 */
/* pg -> disk layout */ /* pg -> disk layout */
__le32 fl_object_stripe_unit; /* for per-object parity, if any */ __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */
/* object -> pg layout */ /* object -> pg layout */
__le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
...@@ -69,6 +73,12 @@ struct ceph_file_layout { ...@@ -69,6 +73,12 @@ struct ceph_file_layout {
int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); int ceph_file_layout_is_valid(const struct ceph_file_layout *layout);
struct ceph_dir_layout {
__u8 dl_dir_hash; /* see ceph_hash.h for ids */
__u8 dl_unused1;
__u16 dl_unused2;
__u32 dl_unused3;
} __attribute__ ((packed));
/* crypto algorithms */ /* crypto algorithms */
#define CEPH_CRYPTO_NONE 0x0 #define CEPH_CRYPTO_NONE 0x0
...@@ -457,7 +467,7 @@ struct ceph_mds_reply_inode { ...@@ -457,7 +467,7 @@ struct ceph_mds_reply_inode {
struct ceph_timespec rctime; struct ceph_timespec rctime;
struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */ struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */
} __attribute__ ((packed)); } __attribute__ ((packed));
/* followed by frag array, then symlink string, then xattr blob */ /* followed by frag array, symlink string, dir layout, xattr blob */
/* reply_lease follows dname, and reply_inode */ /* reply_lease follows dname, and reply_inode */
struct ceph_mds_reply_lease { struct ceph_mds_reply_lease {
......
#include <linux/ceph/types.h> #include <linux/ceph/types.h>
#include <linux/module.h>
/* /*
* Robert Jenkin's hash function. * Robert Jenkin's hash function.
...@@ -104,6 +105,7 @@ unsigned ceph_str_hash(int type, const char *s, unsigned len) ...@@ -104,6 +105,7 @@ unsigned ceph_str_hash(int type, const char *s, unsigned len)
return -1; return -1;
} }
} }
EXPORT_SYMBOL(ceph_str_hash);
const char *ceph_str_hash_name(int type) const char *ceph_str_hash_name(int type)
{ {
...@@ -116,3 +118,4 @@ const char *ceph_str_hash_name(int type) ...@@ -116,3 +118,4 @@ const char *ceph_str_hash_name(int type)
return "unknown"; return "unknown";
} }
} }
EXPORT_SYMBOL(ceph_str_hash_name);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册