提交 99f80d2f 编写于 作者: M Mike Marciniszyn 提交者: Doug Ledford

IB/hfi1: Optimize lkey validation structures

Profiling shows that the key validation is susceptible
to cache line trading when accessing the lkey table.

Fix by separating out the read mostly fields from the write
fields.   In addition the shift amount, which is function
of the lkey table size, is precomputed and stored with the
table pointer.   Since both the shift and table pointer
are in the same read mostly cacheline, this saves a cache
line in this hot path.
Reviewed-by: NSebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: NMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: NDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: NDoug Ledford <dledford@redhat.com>
上级 63df8e09
...@@ -84,6 +84,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) ...@@ -84,6 +84,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi)
lkey_table_size = rdi->dparms.lkey_table_size; lkey_table_size = rdi->dparms.lkey_table_size;
} }
rdi->lkey_table.max = 1 << lkey_table_size; rdi->lkey_table.max = 1 << lkey_table_size;
rdi->lkey_table.shift = 32 - lkey_table_size;
lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
rdi->lkey_table.table = (struct rvt_mregion __rcu **) rdi->lkey_table.table = (struct rvt_mregion __rcu **)
vmalloc_node(lk_tab_size, rdi->dparms.node); vmalloc_node(lk_tab_size, rdi->dparms.node);
...@@ -774,7 +775,6 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, ...@@ -774,7 +775,6 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
struct rvt_mregion *mr; struct rvt_mregion *mr;
unsigned n, m; unsigned n, m;
size_t off; size_t off;
struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
/* /*
* We use LKEY == zero for kernel virtual addresses * We use LKEY == zero for kernel virtual addresses
...@@ -782,6 +782,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, ...@@ -782,6 +782,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
*/ */
rcu_read_lock(); rcu_read_lock();
if (sge->lkey == 0) { if (sge->lkey == 0) {
struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
if (pd->user) if (pd->user)
goto bail; goto bail;
mr = rcu_dereference(dev->dma_mr); mr = rcu_dereference(dev->dma_mr);
...@@ -798,8 +800,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, ...@@ -798,8 +800,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->n = 0; isge->n = 0;
goto ok; goto ok;
} }
mr = rcu_dereference( mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
goto bail; goto bail;
...@@ -899,8 +900,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, ...@@ -899,8 +900,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
goto ok; goto ok;
} }
mr = rcu_dereference( mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != rkey || qp->ibqp.pd != mr->pd)) mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail; goto bail;
......
...@@ -90,11 +90,15 @@ struct rvt_mregion { ...@@ -90,11 +90,15 @@ struct rvt_mregion {
#define RVT_MAX_LKEY_TABLE_BITS 23 #define RVT_MAX_LKEY_TABLE_BITS 23
struct rvt_lkey_table { struct rvt_lkey_table {
spinlock_t lock; /* protect changes in this struct */ /* read mostly fields */
u32 next; /* next unused index (speeds search) */
u32 gen; /* generation count */
u32 max; /* size of the table */ u32 max; /* size of the table */
u32 shift; /* lkey/rkey shift */
struct rvt_mregion __rcu **table; struct rvt_mregion __rcu **table;
/* writeable fields */
/* protect changes in this struct */
spinlock_t lock ____cacheline_aligned_in_smp;
u32 next; /* next unused index (speeds search) */
u32 gen; /* generation count */
}; };
/* /*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册