提交 be1222b5 编写于 作者: B Björn Töpel 提交者: Alexei Starovoitov

i40e: Separate kernel allocated rx_bi rings from AF_XDP rings

Continuing the path to support MEM_TYPE_XSK_BUFF_POOL, the AF_XDP
zero-copy/sk_buff rx_bi rings are now separate. Functions to properly
allocate the different rings are added as well.

v3->v4: Made i40e_fd_handle_status() static. (kbuild test robot)
v4->v5: Fix kdoc for i40e_clean_programming_status(). (Jakub)
Signed-off-by: NBjörn Töpel <bjorn.topel@intel.com>
Signed-off-by: NAlexei Starovoitov <ast@kernel.org>
Cc: intel-wired-lan@lists.osuosl.org
Link: https://lore.kernel.org/bpf/20200520192103.355233-8-bjorn.topel@gmail.com
上级 e1675f97
......@@ -3260,8 +3260,12 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
if (ring->vsi->type == I40E_VSI_MAIN)
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
kfree(ring->rx_bi);
ring->xsk_umem = i40e_xsk_umem(ring);
if (ring->xsk_umem) {
ret = i40e_alloc_rx_bi_zc(ring);
if (ret)
return ret;
ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
XDP_PACKET_HEADROOM;
/* For AF_XDP ZC, we disallow packets to span on
......@@ -3280,6 +3284,9 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
ring->queue_index);
} else {
ret = i40e_alloc_rx_bi(ring);
if (ret)
return ret;
ring->rx_buf_len = vsi->rx_buf_len;
if (ring->vsi->type == I40E_VSI_MAIN) {
ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
......
......@@ -521,28 +521,29 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
/**
* i40e_fd_handle_status - check the Programming Status for FD
* @rx_ring: the Rx ring for this descriptor
* @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
* @qword0_raw: qword0
* @qword1: qword1 after le_to_cpu
* @prog_id: the id originally used for programming
*
* This is used to verify if the FD programming or invalidation
* requested by SW to the HW is successful or not and take actions accordingly.
**/
void i40e_fd_handle_status(struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc, u8 prog_id)
static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
u64 qword1, u8 prog_id)
{
struct i40e_pf *pf = rx_ring->vsi->back;
struct pci_dev *pdev = pf->pdev;
struct i40e_32b_rx_wb_qw0 *qw0;
u32 fcnt_prog, fcnt_avail;
u32 error;
u64 qw;
qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
qw0 = (struct i40e_32b_rx_wb_qw0 *)&qword0_raw;
error = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
pf->fd_inv = le32_to_cpu(qw0->hi_dword.fd_id);
if (qw0->hi_dword.fd_id != 0 ||
(I40E_DEBUG_FD & pf->hw.debug_mask))
dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
pf->fd_inv);
......@@ -560,7 +561,7 @@ void i40e_fd_handle_status(struct i40e_ring *rx_ring,
/* store the current atr filter count */
pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
if (qw0->hi_dword.fd_id == 0 &&
test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) {
/* These set_bit() calls aren't atomic with the
* test_bit() here, but worse case we potentially
......@@ -589,7 +590,7 @@ void i40e_fd_handle_status(struct i40e_ring *rx_ring,
} else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
if (I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
rx_desc->wb.qword0.hi_dword.fd_id);
qw0->hi_dword.fd_id);
}
}
......@@ -1232,29 +1233,10 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
}
/**
* i40e_rx_is_programming_status - check for programming status descriptor
* @qw: qword representing status_error_len in CPU ordering
*
* The value of in the descriptor length field indicate if this
* is a programming status descriptor for flow director or FCoE
* by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
* it is a packet descriptor.
**/
static inline bool i40e_rx_is_programming_status(u64 qw)
{
/* The Rx filter programming status and SPH bit occupy the same
* spot in the descriptor. Since we don't support packet split we
* can just reuse the bit as an indication that this is a
* programming status descriptor.
*/
return qw & I40E_RXD_QW1_LENGTH_SPH_MASK;
}
/**
* i40e_clean_programming_status - try clean the programming status descriptor
* i40e_clean_programming_status - clean the programming status descriptor
* @rx_ring: the rx ring that has this descriptor
* @rx_desc: the rx descriptor written back by HW
* @qw: qword representing status_error_len in CPU ordering
* @qword0_raw: qword0
* @qword1: qword1 representing status_error_len in CPU ordering
*
* Flow director should handle FD_FILTER_STATUS to check its filter programming
* status being successful or not and take actions accordingly. FCoE should
......@@ -1262,34 +1244,16 @@ static inline bool i40e_rx_is_programming_status(u64 qw)
*
* Returns an i40e_rx_buffer to reuse if the cleanup occurred, otherwise NULL.
**/
struct i40e_rx_buffer *i40e_clean_programming_status(
struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc,
u64 qw)
void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
u64 qword1)
{
struct i40e_rx_buffer *rx_buffer;
u32 ntc;
u8 id;
if (!i40e_rx_is_programming_status(qw))
return NULL;
ntc = rx_ring->next_to_clean;
/* fetch, update, and store next to clean */
rx_buffer = i40e_rx_bi(rx_ring, ntc++);
ntc = (ntc < rx_ring->count) ? ntc : 0;
rx_ring->next_to_clean = ntc;
prefetch(I40E_RX_DESC(rx_ring, ntc));
id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
id = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
i40e_fd_handle_status(rx_ring, rx_desc, id);
return rx_buffer;
i40e_fd_handle_status(rx_ring, qword0_raw, qword1, id);
}
/**
......@@ -1341,13 +1305,25 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
return -ENOMEM;
}
int i40e_alloc_rx_bi(struct i40e_ring *rx_ring)
{
unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count;
rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL);
return rx_ring->rx_bi ? 0 : -ENOMEM;
}
static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
{
memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
}
/**
* i40e_clean_rx_ring - Free Rx buffers
* @rx_ring: ring to be cleaned
**/
void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
{
unsigned long bi_size;
u16 i;
/* ring already cleared, nothing to do */
......@@ -1393,8 +1369,10 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
}
skip_free:
bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
memset(rx_ring->rx_bi, 0, bi_size);
if (rx_ring->xsk_umem)
i40e_clear_rx_bi_zc(rx_ring);
else
i40e_clear_rx_bi(rx_ring);
/* Zero out the descriptor ring */
memset(rx_ring->desc, 0, rx_ring->size);
......@@ -1435,15 +1413,7 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
{
struct device *dev = rx_ring->dev;
int err = -ENOMEM;
int bi_size;
/* warn if we are about to overwrite the pointer */
WARN_ON(rx_ring->rx_bi);
bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
if (!rx_ring->rx_bi)
goto err;
int err;
u64_stats_init(&rx_ring->syncp);
......@@ -1456,7 +1426,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
if (!rx_ring->desc) {
dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
rx_ring->size);
goto err;
return -ENOMEM;
}
rx_ring->next_to_alloc = 0;
......@@ -1468,16 +1438,12 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
rx_ring->queue_index);
if (err < 0)
goto err;
return err;
}
rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
return 0;
err:
kfree(rx_ring->rx_bi);
rx_ring->rx_bi = NULL;
return err;
}
/**
......@@ -2387,9 +2353,12 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
*/
dma_rmb();
rx_buffer = i40e_clean_programming_status(rx_ring, rx_desc,
qword);
if (unlikely(rx_buffer)) {
if (i40e_rx_is_programming_status(qword)) {
i40e_clean_programming_status(rx_ring,
rx_desc->raw.qword[0],
qword);
rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
i40e_inc_ntc(rx_ring);
i40e_reuse_rx_page(rx_ring, rx_buffer);
cleaned_count++;
continue;
......
......@@ -296,17 +296,15 @@ struct i40e_tx_buffer {
struct i40e_rx_buffer {
dma_addr_t dma;
union {
struct {
struct page *page;
__u32 page_offset;
__u16 pagecnt_bias;
};
struct {
void *addr;
u64 handle;
};
};
struct page *page;
__u32 page_offset;
__u16 pagecnt_bias;
};
struct i40e_rx_buffer_zc {
dma_addr_t dma;
void *addr;
u64 handle;
};
struct i40e_queue_stats {
......@@ -358,6 +356,7 @@ struct i40e_ring {
union {
struct i40e_tx_buffer *tx_bi;
struct i40e_rx_buffer *rx_bi;
struct i40e_rx_buffer_zc *rx_bi_zc;
};
DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS);
u16 queue_index; /* Queue number of ring */
......@@ -495,6 +494,7 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
int i40e_alloc_rx_bi(struct i40e_ring *rx_ring);
/**
* i40e_get_head - Retrieve head from head writeback
......
......@@ -4,13 +4,9 @@
#ifndef I40E_TXRX_COMMON_
#define I40E_TXRX_COMMON_
void i40e_fd_handle_status(struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc, u8 prog_id);
int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring);
struct i40e_rx_buffer *i40e_clean_programming_status(
struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc,
u64 qw);
void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
u64 qword1);
void i40e_process_skb_fields(struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc, struct sk_buff *skb);
void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring);
......@@ -84,6 +80,38 @@ static inline void i40e_arm_wb(struct i40e_ring *tx_ring,
}
}
/**
* i40e_rx_is_programming_status - check for programming status descriptor
* @qword1: qword1 representing status_error_len in CPU ordering
*
* The value of in the descriptor length field indicate if this
* is a programming status descriptor for flow director or FCoE
* by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
* it is a packet descriptor.
**/
static inline bool i40e_rx_is_programming_status(u64 qword1)
{
/* The Rx filter programming status and SPH bit occupy the same
* spot in the descriptor. Since we don't support packet split we
* can just reuse the bit as an indication that this is a
* programming status descriptor.
*/
return qword1 & I40E_RXD_QW1_LENGTH_SPH_MASK;
}
/**
* i40e_inc_ntc: Advance the next_to_clean index
* @rx_ring: Rx ring
**/
static inline void i40e_inc_ntc(struct i40e_ring *rx_ring)
{
u32 ntc = rx_ring->next_to_clean + 1;
ntc = (ntc < rx_ring->count) ? ntc : 0;
rx_ring->next_to_clean = ntc;
prefetch(I40E_RX_DESC(rx_ring, ntc));
}
void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring);
void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi);
......
......@@ -689,7 +689,7 @@ union i40e_32byte_rx_desc {
__le64 rsvd2;
} read;
struct {
struct {
struct i40e_32b_rx_wb_qw0 {
struct {
union {
__le16 mirroring_status;
......@@ -727,6 +727,9 @@ union i40e_32byte_rx_desc {
} hi_dword;
} qword3;
} wb; /* writeback */
struct {
u64 qword[4];
} raw;
};
enum i40e_rx_desc_status_bits {
......
......@@ -9,9 +9,23 @@
#include "i40e_txrx_common.h"
#include "i40e_xsk.h"
static struct i40e_rx_buffer *i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring)
{
return &rx_ring->rx_bi[idx];
unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count;
rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL);
return rx_ring->rx_bi_zc ? 0 : -ENOMEM;
}
void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
{
memset(rx_ring->rx_bi_zc, 0,
sizeof(*rx_ring->rx_bi_zc) * rx_ring->count);
}
static struct i40e_rx_buffer_zc *i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
{
return &rx_ring->rx_bi_zc[idx];
}
/**
......@@ -238,7 +252,7 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
}
/**
* i40e_alloc_buffer_zc - Allocates an i40e_rx_buffer
* i40e_alloc_buffer_zc - Allocates an i40e_rx_buffer_zc
* @rx_ring: Rx ring
* @bi: Rx buffer to populate
*
......@@ -248,7 +262,7 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
* Returns true for a successful allocation, false otherwise
**/
static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *bi)
struct i40e_rx_buffer_zc *bi)
{
struct xdp_umem *umem = rx_ring->xsk_umem;
void *addr = bi->addr;
......@@ -279,7 +293,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
}
/**
* i40e_alloc_buffer_slow_zc - Allocates an i40e_rx_buffer
* i40e_alloc_buffer_slow_zc - Allocates an i40e_rx_buffer_zc
* @rx_ring: Rx ring
* @bi: Rx buffer to populate
*
......@@ -289,7 +303,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
* Returns true for a successful allocation, false otherwise
**/
static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *bi)
struct i40e_rx_buffer_zc *bi)
{
struct xdp_umem *umem = rx_ring->xsk_umem;
u64 handle, hr;
......@@ -318,11 +332,11 @@ static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
static __always_inline bool
__i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count,
bool alloc(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *bi))
struct i40e_rx_buffer_zc *bi))
{
u16 ntu = rx_ring->next_to_use;
union i40e_rx_desc *rx_desc;
struct i40e_rx_buffer *bi;
struct i40e_rx_buffer_zc *bi;
bool ok = true;
rx_desc = I40E_RX_DESC(rx_ring, ntu);
......@@ -402,10 +416,11 @@ static bool i40e_alloc_rx_buffers_fast_zc(struct i40e_ring *rx_ring, u16 count)
*
* Returns the received Rx buffer
**/
static struct i40e_rx_buffer *i40e_get_rx_buffer_zc(struct i40e_ring *rx_ring,
const unsigned int size)
static struct i40e_rx_buffer_zc *i40e_get_rx_buffer_zc(
struct i40e_ring *rx_ring,
const unsigned int size)
{
struct i40e_rx_buffer *bi;
struct i40e_rx_buffer_zc *bi;
bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
......@@ -427,10 +442,10 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer_zc(struct i40e_ring *rx_ring,
* recycle queue (next_to_alloc).
**/
static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *old_bi)
struct i40e_rx_buffer_zc *old_bi)
{
struct i40e_rx_buffer *new_bi = i40e_rx_bi(rx_ring,
rx_ring->next_to_alloc);
struct i40e_rx_buffer_zc *new_bi = i40e_rx_bi(rx_ring,
rx_ring->next_to_alloc);
u16 nta = rx_ring->next_to_alloc;
/* update, and store next to alloc */
......@@ -452,7 +467,7 @@ static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
**/
void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
{
struct i40e_rx_buffer *bi;
struct i40e_rx_buffer_zc *bi;
struct i40e_ring *rx_ring;
u64 hr, mask;
u16 nta;
......@@ -490,7 +505,7 @@ void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
* Returns the skb, or NULL on failure.
**/
static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *bi,
struct i40e_rx_buffer_zc *bi,
struct xdp_buff *xdp)
{
unsigned int metasize = xdp->data - xdp->data_meta;
......@@ -513,19 +528,6 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
return skb;
}
/**
* i40e_inc_ntc: Advance the next_to_clean index
* @rx_ring: Rx ring
**/
static void i40e_inc_ntc(struct i40e_ring *rx_ring)
{
u32 ntc = rx_ring->next_to_clean + 1;
ntc = (ntc < rx_ring->count) ? ntc : 0;
rx_ring->next_to_clean = ntc;
prefetch(I40E_RX_DESC(rx_ring, ntc));
}
/**
* i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring
* @rx_ring: Rx ring
......@@ -547,7 +549,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
while (likely(total_rx_packets < (unsigned int)budget)) {
struct i40e_rx_buffer *bi;
struct i40e_rx_buffer_zc *bi;
union i40e_rx_desc *rx_desc;
unsigned int size;
u64 qword;
......@@ -568,14 +570,18 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
*/
dma_rmb();
bi = i40e_clean_programming_status(rx_ring, rx_desc,
qword);
if (unlikely(bi)) {
if (i40e_rx_is_programming_status(qword)) {
i40e_clean_programming_status(rx_ring,
rx_desc->raw.qword[0],
qword);
bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
i40e_inc_ntc(rx_ring);
i40e_reuse_rx_buffer_zc(rx_ring, bi);
cleaned_count++;
continue;
}
bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
if (!size)
......@@ -832,7 +838,7 @@ void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
u16 i;
for (i = 0; i < rx_ring->count; i++) {
struct i40e_rx_buffer *rx_bi = i40e_rx_bi(rx_ring, i);
struct i40e_rx_buffer_zc *rx_bi = i40e_rx_bi(rx_ring, i);
if (!rx_bi->addr)
continue;
......
......@@ -19,5 +19,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
struct i40e_ring *tx_ring, int napi_budget);
int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring);
void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
#endif /* _I40E_XSK_H_ */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册