提交 a6d51b68 编写于 作者: E Eli Cohen 提交者: Leon Romanovsky

net/mlx5: Introduce blue flame register allocator

Here is an implementation of an allocator that allocates blue flame
registers. A blue flame register is used for generating send doorbells.
A blue flame register can be used to generate either a regular doorbell
or a blue flame doorbell where the data to be sent is written to the
device's I/O memory hence saving the need to read the data from memory.
For blue flame kind of doorbells to succeed, the blue flame register
need to be mapped as write combining. The user can specify what kind of
send doorbells she wishes to use. If she requested write combining
mapping but that failed, the allocator will fall back to non write
combining mapping and will indicate that to the user.
Subsequent patches in this series will make use of this allocator.
Signed-off-by: NEli Cohen <eli@mellanox.com>
Reviewed-by: NMatan Barak <matanb@mellanox.com>
Signed-off-by: NLeon Romanovsky <leon@kernel.org>
Signed-off-by: NSaeed Mahameed <saeedm@mellanox.com>
上级 0b80c14f
...@@ -231,3 +231,238 @@ void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) ...@@ -231,3 +231,238 @@ void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar)
mlx5_cmd_free_uar(mdev, uar->index); mlx5_cmd_free_uar(mdev, uar->index);
} }
EXPORT_SYMBOL(mlx5_unmap_free_uar); EXPORT_SYMBOL(mlx5_unmap_free_uar);
static int uars_per_sys_page(struct mlx5_core_dev *mdev)
{
if (MLX5_CAP_GEN(mdev, uar_4k))
return MLX5_CAP_GEN(mdev, num_of_uars_per_page);
return 1;
}
static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index)
{
u32 system_page_index;
if (MLX5_CAP_GEN(mdev, uar_4k))
system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT);
else
system_page_index = index;
return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index;
}
static void up_rel_func(struct kref *kref)
{
struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count);
list_del(&up->list);
if (mlx5_cmd_free_uar(up->mdev, up->index))
mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index);
kfree(up->reg_bitmap);
kfree(up->fp_bitmap);
kfree(up);
}
static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev,
bool map_wc)
{
struct mlx5_uars_page *up;
int err = -ENOMEM;
phys_addr_t pfn;
int bfregs;
int i;
bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR;
up = kzalloc(sizeof(*up), GFP_KERNEL);
if (!up)
return ERR_PTR(err);
up->mdev = mdev;
up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL);
if (!up->reg_bitmap)
goto error1;
up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL);
if (!up->fp_bitmap)
goto error1;
for (i = 0; i < bfregs; i++)
if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR)
set_bit(i, up->reg_bitmap);
else
set_bit(i, up->fp_bitmap);
up->bfregs = bfregs;
up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR;
up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR;
err = mlx5_cmd_alloc_uar(mdev, &up->index);
if (err) {
mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err);
goto error1;
}
pfn = uar2pfn(mdev, up->index);
if (map_wc) {
up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE);
if (!up->map) {
err = -EAGAIN;
goto error2;
}
} else {
up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
if (!up->map) {
err = -ENOMEM;
goto error2;
}
}
kref_init(&up->ref_count);
mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n",
up->index, up->bfregs);
return up;
error2:
if (mlx5_cmd_free_uar(mdev, up->index))
mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index);
error1:
kfree(up->fp_bitmap);
kfree(up->reg_bitmap);
kfree(up);
return ERR_PTR(err);
}
static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi)
{
/* return the offset in bytes from the start of the page to the
* blue flame area of the UAR
*/
return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE +
(dbi % MLX5_BFREGS_PER_UAR) *
(1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET;
}
static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
bool map_wc, bool fast_path)
{
struct mlx5_bfreg_data *bfregs;
struct mlx5_uars_page *up;
struct list_head *head;
unsigned long *bitmap;
unsigned int *avail;
struct mutex *lock; /* pointer to right mutex */
int dbi;
bfregs = &mdev->priv.bfregs;
if (map_wc) {
head = &bfregs->wc_head.list;
lock = &bfregs->wc_head.lock;
} else {
head = &bfregs->reg_head.list;
lock = &bfregs->reg_head.lock;
}
mutex_lock(lock);
if (list_empty(head)) {
up = alloc_uars_page(mdev, map_wc);
if (IS_ERR(up)) {
mutex_unlock(lock);
return PTR_ERR(up);
}
list_add(&up->list, head);
} else {
up = list_entry(head->next, struct mlx5_uars_page, list);
kref_get(&up->ref_count);
}
if (fast_path) {
bitmap = up->fp_bitmap;
avail = &up->fp_avail;
} else {
bitmap = up->reg_bitmap;
avail = &up->reg_avail;
}
dbi = find_first_bit(bitmap, up->bfregs);
clear_bit(dbi, bitmap);
(*avail)--;
if (!(*avail))
list_del(&up->list);
bfreg->map = up->map + map_offset(mdev, dbi);
bfreg->up = up;
bfreg->wc = map_wc;
bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR;
mutex_unlock(lock);
return 0;
}
int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
bool map_wc, bool fast_path)
{
int err;
err = alloc_bfreg(mdev, bfreg, map_wc, fast_path);
if (!err)
return 0;
if (err == -EAGAIN && map_wc)
return alloc_bfreg(mdev, bfreg, false, fast_path);
return err;
}
EXPORT_SYMBOL(mlx5_alloc_bfreg);
static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev,
struct mlx5_uars_page *up,
struct mlx5_sq_bfreg *bfreg)
{
unsigned int uar_idx;
unsigned int bfreg_idx;
unsigned int bf_reg_size;
bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size);
uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT;
bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size;
return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx;
}
void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg)
{
struct mlx5_bfreg_data *bfregs;
struct mlx5_uars_page *up;
struct mutex *lock; /* pointer to right mutex */
unsigned int dbi;
bool fp;
unsigned int *avail;
unsigned long *bitmap;
struct list_head *head;
bfregs = &mdev->priv.bfregs;
if (bfreg->wc) {
head = &bfregs->wc_head.list;
lock = &bfregs->wc_head.lock;
} else {
head = &bfregs->reg_head.list;
lock = &bfregs->reg_head.lock;
}
up = bfreg->up;
dbi = addr_to_dbi_in_syspage(mdev, up, bfreg);
fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR;
if (fp) {
avail = &up->fp_avail;
bitmap = up->fp_bitmap;
} else {
avail = &up->reg_avail;
bitmap = up->reg_bitmap;
}
mutex_lock(lock);
(*avail)++;
set_bit(dbi, bitmap);
if (*avail == 1)
list_add_tail(&up->list, head);
kref_put(&up->ref_count, up_rel_func);
mutex_unlock(lock);
}
EXPORT_SYMBOL(mlx5_free_bfreg);
...@@ -215,6 +215,8 @@ enum { ...@@ -215,6 +215,8 @@ enum {
MLX5_BFREGS_PER_UAR = 4, MLX5_BFREGS_PER_UAR = 4,
MLX5_MAX_UARS = 1 << 8, MLX5_MAX_UARS = 1 << 8,
MLX5_NON_FP_BFREGS_PER_UAR = 2, MLX5_NON_FP_BFREGS_PER_UAR = 2,
MLX5_FP_BFREGS_PER_UAR = MLX5_BFREGS_PER_UAR -
MLX5_NON_FP_BFREGS_PER_UAR,
MLX5_MAX_BFREGS = MLX5_MAX_UARS * MLX5_MAX_BFREGS = MLX5_MAX_UARS *
MLX5_NON_FP_BFREGS_PER_UAR, MLX5_NON_FP_BFREGS_PER_UAR,
}; };
......
...@@ -452,6 +452,39 @@ struct mlx5_eq_table { ...@@ -452,6 +452,39 @@ struct mlx5_eq_table {
spinlock_t lock; spinlock_t lock;
}; };
struct mlx5_uars_page {
void __iomem *map;
bool wc;
u32 index;
struct list_head list;
unsigned int bfregs;
unsigned long *reg_bitmap; /* for non fast path bf regs */
unsigned long *fp_bitmap;
unsigned int reg_avail;
unsigned int fp_avail;
struct kref ref_count;
struct mlx5_core_dev *mdev;
};
struct mlx5_bfreg_head {
/* protect blue flame registers allocations */
struct mutex lock;
struct list_head list;
};
struct mlx5_bfreg_data {
struct mlx5_bfreg_head reg_head;
struct mlx5_bfreg_head wc_head;
};
struct mlx5_sq_bfreg {
void __iomem *map;
struct mlx5_uars_page *up;
bool wc;
u32 index;
unsigned int offset;
};
struct mlx5_uar { struct mlx5_uar {
u32 index; u32 index;
struct list_head bf_list; struct list_head bf_list;
...@@ -645,6 +678,7 @@ struct mlx5_priv { ...@@ -645,6 +678,7 @@ struct mlx5_priv {
void *pfault_ctx; void *pfault_ctx;
struct srcu_struct pfault_srcu; struct srcu_struct pfault_srcu;
#endif #endif
struct mlx5_bfreg_data bfregs;
}; };
enum mlx5_device_state { enum mlx5_device_state {
...@@ -1022,6 +1056,9 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); ...@@ -1022,6 +1056,9 @@ void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index); int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index);
void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate); void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate);
bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate); bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate);
int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg,
bool map_wc, bool fast_path);
void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg);
static inline int fw_initializing(struct mlx5_core_dev *dev) static inline int fw_initializing(struct mlx5_core_dev *dev)
{ {
......
...@@ -905,7 +905,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { ...@@ -905,7 +905,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 uc[0x1]; u8 uc[0x1];
u8 rc[0x1]; u8 rc[0x1];
u8 reserved_at_240[0xa]; u8 uar_4k[0x1];
u8 reserved_at_241[0x9];
u8 uar_sz[0x6]; u8 uar_sz[0x6];
u8 reserved_at_250[0x8]; u8 reserved_at_250[0x8];
u8 log_pg_sz[0x8]; u8 log_pg_sz[0x8];
...@@ -997,7 +998,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { ...@@ -997,7 +998,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 device_frequency_mhz[0x20]; u8 device_frequency_mhz[0x20];
u8 device_frequency_khz[0x20]; u8 device_frequency_khz[0x20];
u8 reserved_at_500[0x80]; u8 reserved_at_500[0x20];
u8 num_of_uars_per_page[0x20];
u8 reserved_at_540[0x40];
u8 reserved_at_580[0x3f]; u8 reserved_at_580[0x3f];
u8 cqe_compression[0x1]; u8 cqe_compression[0x1];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册