提交 88ffbf3e 编写于 作者: B Bob Peterson

GFS2: Use resizable hash table for glocks

This patch changes the glock hash table from a normal hash table to
a resizable hash table, which scales better. This also simplifies
a lot of code.
Signed-off-by: NBob Peterson <rpeterso@redhat.com>
Signed-off-by: NAndreas Gruenbacher <agruenba@redhat.com>
Acked-by: NSteven Whitehouse <swhiteho@redhat.com>
上级 15562c43
......@@ -34,6 +34,7 @@
#include <linux/percpu.h>
#include <linux/list_sort.h>
#include <linux/lockref.h>
#include <linux/rhashtable.h>
#include "gfs2.h"
#include "incore.h"
......@@ -50,9 +51,8 @@
#include "trace_gfs2.h"
struct gfs2_glock_iter {
int hash; /* hash bucket index */
unsigned nhash; /* Index within current bucket */
struct gfs2_sbd *sdp; /* incore superblock */
struct rhashtable_iter hti; /* rhashtable iterator */
struct gfs2_glock *gl; /* current glock struct */
loff_t last_pos; /* last position */
};
......@@ -70,44 +70,19 @@ static DEFINE_SPINLOCK(lru_lock);
#define GFS2_GL_HASH_SHIFT 15
#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
static struct dentry *gfs2_root;
/**
* gl_hash() - Turn glock number into hash bucket number
* @lock: The glock number
*
* Returns: The number of the corresponding hash bucket
*/
static unsigned int gl_hash(const struct gfs2_sbd *sdp,
const struct lm_lockname *name)
{
unsigned int h;
h = jhash(&name->ln_number, sizeof(u64), 0);
h = jhash(&name->ln_type, sizeof(unsigned int), h);
h = jhash(&sdp, sizeof(struct gfs2_sbd *), h);
h &= GFS2_GL_HASH_MASK;
return h;
}
static inline void spin_lock_bucket(unsigned int hash)
{
hlist_bl_lock(&gl_hash_table[hash]);
}
static struct rhashtable_params ht_parms = {
.nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
.key_len = sizeof(struct lm_lockname),
.key_offset = offsetof(struct gfs2_glock, gl_name),
.head_offset = offsetof(struct gfs2_glock, gl_node),
};
static inline void spin_unlock_bucket(unsigned int hash)
{
hlist_bl_unlock(&gl_hash_table[hash]);
}
static struct rhashtable gl_hash_table;
static void gfs2_glock_dealloc(struct rcu_head *rcu)
void gfs2_glock_free(struct gfs2_glock *gl)
{
struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
if (gl->gl_ops->go_flags & GLOF_ASPACE) {
kmem_cache_free(gfs2_glock_aspace_cachep, gl);
......@@ -115,13 +90,6 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(gfs2_glock_cachep, gl);
}
}
void gfs2_glock_free(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
if (atomic_dec_and_test(&sdp->sd_glock_disposal))
wake_up(&sdp->sd_glock_wait);
}
......@@ -202,39 +170,13 @@ void gfs2_glock_put(struct gfs2_glock *gl)
gfs2_glock_remove_from_lru(gl);
spin_unlock(&gl->gl_lockref.lock);
spin_lock_bucket(gl->gl_hash);
hlist_bl_del_rcu(&gl->gl_list);
spin_unlock_bucket(gl->gl_hash);
rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
trace_gfs2_glock_put(gl);
sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
}
/**
* search_bucket() - Find struct gfs2_glock by lock number
* @bucket: the bucket to search
* @name: The lock name
*
* Returns: NULL, or the struct gfs2_glock with the requested number
*/
static struct gfs2_glock *search_bucket(unsigned int hash,
const struct lm_lockname *name)
{
struct gfs2_glock *gl;
struct hlist_bl_node *h;
hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
if (!lm_name_equal(&gl->gl_name, name))
continue;
if (lockref_get_not_dead(&gl->gl_lockref))
return gl;
}
return NULL;
}
/**
* may_grant - check if its ok to grant a new lock
* @gl: The glock
......@@ -704,14 +646,14 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
struct lm_lockname name = { .ln_number = number,
.ln_type = glops->go_type,
.ln_sbd = sdp };
struct gfs2_glock *gl, *tmp;
unsigned int hash = gl_hash(sdp, &name);
struct gfs2_glock *gl, *tmp = NULL;
struct address_space *mapping;
struct kmem_cache *cachep;
int ret, tries = 0;
rcu_read_lock();
gl = search_bucket(hash, &name);
rcu_read_unlock();
gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
if (gl && !lockref_get_not_dead(&gl->gl_lockref))
gl = NULL;
*glp = gl;
if (gl)
......@@ -738,13 +680,13 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
}
atomic_inc(&sdp->sd_glock_disposal);
gl->gl_node.next = NULL;
gl->gl_flags = 0;
gl->gl_name = name;
gl->gl_lockref.count = 1;
gl->gl_state = LM_ST_UNLOCKED;
gl->gl_target = LM_ST_UNLOCKED;
gl->gl_demote_state = LM_ST_EXCLUSIVE;
gl->gl_hash = hash;
gl->gl_ops = glops;
gl->gl_dstamp = ktime_set(0, 0);
preempt_disable();
......@@ -769,22 +711,34 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping->writeback_index = 0;
}
spin_lock_bucket(hash);
tmp = search_bucket(hash, &name);
if (tmp) {
spin_unlock_bucket(hash);
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(cachep, gl);
atomic_dec(&sdp->sd_glock_disposal);
gl = tmp;
} else {
hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
spin_unlock_bucket(hash);
again:
ret = rhashtable_lookup_insert_fast(&gl_hash_table, &gl->gl_node,
ht_parms);
if (ret == 0) {
*glp = gl;
return 0;
}
*glp = gl;
if (ret == -EEXIST) {
ret = 0;
tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) {
if (++tries < 100) {
cond_resched();
goto again;
}
tmp = NULL;
ret = -ENOMEM;
}
} else {
WARN_ON_ONCE(ret);
}
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(cachep, gl);
atomic_dec(&sdp->sd_glock_disposal);
*glp = tmp;
return 0;
return ret;
}
/**
......@@ -1460,31 +1414,26 @@ static struct shrinker glock_shrinker = {
*
*/
static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
unsigned int hash)
static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
{
struct gfs2_glock *gl;
struct hlist_bl_head *head = &gl_hash_table[hash];
struct hlist_bl_node *pos;
struct rhash_head *pos, *next;
const struct bucket_table *tbl;
int i;
rcu_read_lock();
hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
if ((gl->gl_name.ln_sbd == sdp) && lockref_get_not_dead(&gl->gl_lockref))
examiner(gl);
tbl = rht_dereference_rcu(gl_hash_table.tbl, &gl_hash_table);
for (i = 0; i < tbl->size; i++) {
rht_for_each_entry_safe(gl, pos, next, tbl, i, gl_node) {
if ((gl->gl_name.ln_sbd == sdp) &&
lockref_get_not_dead(&gl->gl_lockref))
examiner(gl);
}
}
rcu_read_unlock();
cond_resched();
}
static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
{
unsigned x;
for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
examine_bucket(examiner, sdp, x);
}
/**
* thaw_glock - thaw out a glock which has an unprocessed reply waiting
* @gl: The glock to thaw
......@@ -1802,20 +1751,24 @@ static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
int __init gfs2_glock_init(void)
{
unsigned i;
for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
}
int ret;
ret = rhashtable_init(&gl_hash_table, &ht_parms);
if (ret < 0)
return ret;
glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
WQ_HIGHPRI | WQ_FREEZABLE, 0);
if (!glock_workqueue)
if (!glock_workqueue) {
rhashtable_destroy(&gl_hash_table);
return -ENOMEM;
}
gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
WQ_MEM_RECLAIM | WQ_FREEZABLE,
0);
if (!gfs2_delete_workqueue) {
destroy_workqueue(glock_workqueue);
rhashtable_destroy(&gl_hash_table);
return -ENOMEM;
}
......@@ -1827,72 +1780,41 @@ int __init gfs2_glock_init(void)
void gfs2_glock_exit(void)
{
unregister_shrinker(&glock_shrinker);
rhashtable_destroy(&gl_hash_table);
destroy_workqueue(glock_workqueue);
destroy_workqueue(gfs2_delete_workqueue);
}
static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
{
return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
struct gfs2_glock, gl_list);
}
static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
{
return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
struct gfs2_glock, gl_list);
}
static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
{
struct gfs2_glock *gl;
do {
gl = gi->gl;
if (gl) {
gi->gl = glock_hash_next(gl);
gi->nhash++;
} else {
if (gi->hash >= GFS2_GL_HASH_SIZE) {
rcu_read_unlock();
return 1;
}
gi->gl = glock_hash_chain(gi->hash);
gi->nhash = 0;
}
while (gi->gl == NULL) {
gi->hash++;
if (gi->hash >= GFS2_GL_HASH_SIZE) {
rcu_read_unlock();
return 1;
}
gi->gl = glock_hash_chain(gi->hash);
gi->nhash = 0;
gi->gl = rhashtable_walk_next(&gi->hti);
if (IS_ERR(gi->gl)) {
if (PTR_ERR(gi->gl) == -EAGAIN)
continue;
gi->gl = NULL;
}
/* Skip entries for other sb and dead entries */
} while (gi->sdp != gi->gl->gl_name.ln_sbd ||
__lockref_is_dead(&gi->gl->gl_lockref));
return 0;
} while ((gi->gl) && ((gi->sdp != gi->gl->gl_name.ln_sbd) ||
__lockref_is_dead(&gi->gl->gl_lockref)));
}
static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
{
struct gfs2_glock_iter *gi = seq->private;
loff_t n = *pos;
int ret;
if (gi->last_pos <= *pos)
n = gi->nhash + (*pos - gi->last_pos);
else
gi->hash = 0;
n = (*pos - gi->last_pos);
gi->nhash = 0;
rcu_read_lock();
ret = rhashtable_walk_start(&gi->hti);
if (ret)
return NULL;
do {
if (gfs2_glock_iter_next(gi))
return NULL;
} while (n--);
gfs2_glock_iter_next(gi);
} while (gi->gl && n--);
gi->last_pos = *pos;
return gi->gl;
......@@ -1905,9 +1827,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
(*pos)++;
gi->last_pos = *pos;
if (gfs2_glock_iter_next(gi))
return NULL;
gfs2_glock_iter_next(gi);
return gi->gl;
}
......@@ -1915,9 +1835,8 @@ static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
{
struct gfs2_glock_iter *gi = seq->private;
if (gi->gl)
rcu_read_unlock();
gi->gl = NULL;
rhashtable_walk_stop(&gi->hti);
}
static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
......@@ -1978,14 +1897,28 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file)
if (ret == 0) {
struct seq_file *seq = file->private_data;
struct gfs2_glock_iter *gi = seq->private;
gi->sdp = inode->i_private;
gi->last_pos = 0;
seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
if (seq->buf)
seq->size = GFS2_SEQ_GOODSIZE;
gi->gl = NULL;
ret = rhashtable_walk_init(&gl_hash_table, &gi->hti);
}
return ret;
}
static int gfs2_glocks_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
struct gfs2_glock_iter *gi = seq->private;
gi->gl = NULL;
rhashtable_walk_exit(&gi->hti);
return seq_release_private(inode, file);
}
static int gfs2_glstats_open(struct inode *inode, struct file *file)
{
int ret = seq_open_private(file, &gfs2_glstats_seq_ops,
......@@ -1994,9 +1927,12 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file)
struct seq_file *seq = file->private_data;
struct gfs2_glock_iter *gi = seq->private;
gi->sdp = inode->i_private;
gi->last_pos = 0;
seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
if (seq->buf)
seq->size = GFS2_SEQ_GOODSIZE;
gi->gl = NULL;
ret = rhashtable_walk_init(&gl_hash_table, &gi->hti);
}
return ret;
}
......@@ -2016,7 +1952,7 @@ static const struct file_operations gfs2_glocks_fops = {
.open = gfs2_glocks_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
.release = gfs2_glocks_release,
};
static const struct file_operations gfs2_glstats_fops = {
......@@ -2024,7 +1960,7 @@ static const struct file_operations gfs2_glstats_fops = {
.open = gfs2_glstats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
.release = gfs2_glocks_release,
};
static const struct file_operations gfs2_sbstats_fops = {
......
......@@ -22,6 +22,7 @@
#include <linux/ktime.h>
#include <linux/percpu.h>
#include <linux/lockref.h>
#include <linux/rhashtable.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
......@@ -342,7 +343,6 @@ struct gfs2_glock {
gl_req:2, /* State in last dlm request */
gl_reply:8; /* Last reply from the dlm */
unsigned int gl_hash;
unsigned long gl_demote_time; /* time of first demote request */
long gl_hold_time;
struct list_head gl_holders;
......@@ -368,7 +368,7 @@ struct gfs2_glock {
loff_t end;
} gl_vm;
};
struct rcu_head gl_rcu;
struct rhash_head gl_node;
};
#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册