提交 0dcf7f50 编写于 作者: J Jakub Kicinski 提交者: David S. Miller

nfp: use TX ring pointer write back

Newer versions of the PCIe microcode support writing back the
position of the TX pointer back into host memory.  This speeds
up TX completions, because we avoid a read from device memory
(replacing PCIe read with DMA coherent read).
Signed-off-by: NJakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: NFei Qin <fei.qin@corigine.com>
Signed-off-by: NSimon Horman <simon.horman@corigine.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 07cd69c9
......@@ -392,7 +392,7 @@ void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
return;
/* Work out how many descriptors have been transmitted */
qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp);
if (qcp_rd_p == tx_ring->qcp_rd_p)
return;
......@@ -467,13 +467,14 @@ void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget)
static bool nfp_nfd3_xdp_complete(struct nfp_net_tx_ring *tx_ring)
{
struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
u32 done_pkts = 0, done_bytes = 0;
bool done_all;
int idx, todo;
u32 qcp_rd_p;
/* Work out how many descriptors have been transmitted */
qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp);
if (qcp_rd_p == tx_ring->qcp_rd_p)
return true;
......
......@@ -126,6 +126,7 @@ struct nfp_nfd3_tx_buf;
* @r_vec: Back pointer to ring vector structure
* @idx: Ring index from Linux's perspective
* @qcp_q: Pointer to base of the QCP TX queue
* @txrwb: TX pointer write back area
* @cnt: Size of the queue in number of descriptors
* @wr_p: TX ring write pointer (free running)
* @rd_p: TX ring read pointer (free running)
......@@ -145,6 +146,7 @@ struct nfp_net_tx_ring {
u32 idx;
u8 __iomem *qcp_q;
u64 *txrwb;
u32 cnt;
u32 wr_p;
......@@ -444,6 +446,8 @@ struct nfp_stat_pair {
* @ctrl_bar: Pointer to mapped control BAR
*
* @ops: Callbacks and parameters for this vNIC's NFD version
* @txrwb: TX pointer write back area (indexed by queue id)
* @txrwb_dma: TX pointer write back area DMA address
* @txd_cnt: Size of the TX ring in number of min size packets
* @rxd_cnt: Size of the RX ring in number of min size packets
* @num_r_vecs: Number of used ring vectors
......@@ -480,6 +484,9 @@ struct nfp_net_dp {
const struct nfp_dp_ops *ops;
u64 *txrwb;
dma_addr_t txrwb_dma;
unsigned int txd_cnt;
unsigned int rxd_cnt;
......
......@@ -1427,6 +1427,8 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn)
new->rx_rings = NULL;
new->num_r_vecs = 0;
new->num_stack_tx_rings = 0;
new->txrwb = NULL;
new->txrwb_dma = 0;
return new;
}
......@@ -1963,7 +1965,7 @@ void nfp_net_info(struct nfp_net *nn)
nn->fw_ver.resv, nn->fw_ver.class,
nn->fw_ver.major, nn->fw_ver.minor,
nn->max_mtu);
nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
nn->cap,
nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "",
......@@ -1981,6 +1983,7 @@ void nfp_net_info(struct nfp_net *nn)
nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "",
nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "",
nn->cap & NFP_NET_CFG_CTRL_TXRWB ? "TXRWB " : "",
nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "",
nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "",
nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ?
......@@ -2352,6 +2355,10 @@ int nfp_net_init(struct nfp_net *nn)
nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
}
/* Enable TX pointer writeback, if supported */
if (nn->cap & NFP_NET_CFG_CTRL_TXRWB)
nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXRWB;
/* Stash the re-configuration queue away. First odd queue in TX Bar */
nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
......
......@@ -99,11 +99,14 @@ static int nfp_tx_q_show(struct seq_file *file, void *data)
d_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
d_wr_p = nfp_qcp_wr_ptr_read(tx_ring->qcp_q);
seq_printf(file, "TX[%02d,%02d%s]: cnt=%u dma=%pad host=%p H_RD=%u H_WR=%u D_RD=%u D_WR=%u\n",
seq_printf(file, "TX[%02d,%02d%s]: cnt=%u dma=%pad host=%p H_RD=%u H_WR=%u D_RD=%u D_WR=%u",
tx_ring->idx, tx_ring->qcidx,
tx_ring == r_vec->tx_ring ? "" : "xdp",
tx_ring->cnt, &tx_ring->dma, tx_ring->txds,
tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p);
if (tx_ring->txrwb)
seq_printf(file, " TXRWB=%llu", *tx_ring->txrwb);
seq_putc(file, '\n');
nfp_net_debugfs_print_tx_descs(file, &nn->dp, r_vec, tx_ring,
d_rd_p, d_wr_p);
......
......@@ -44,12 +44,13 @@ void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
/**
* nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring
* @tx_ring: TX ring structure
* @dp: NFP Net data path struct
* @r_vec: IRQ vector servicing this ring
* @idx: Ring index
* @is_xdp: Is this an XDP TX ring?
*/
static void
nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring,
nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring, struct nfp_net_dp *dp,
struct nfp_net_r_vector *r_vec, unsigned int idx,
bool is_xdp)
{
......@@ -61,6 +62,7 @@ nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring,
u64_stats_init(&tx_ring->r_vec->tx_sync);
tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
tx_ring->txrwb = dp->txrwb ? &dp->txrwb[idx] : NULL;
tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
}
......@@ -187,14 +189,22 @@ int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
if (!dp->tx_rings)
return -ENOMEM;
if (dp->ctrl & NFP_NET_CFG_CTRL_TXRWB) {
dp->txrwb = dma_alloc_coherent(dp->dev,
dp->num_tx_rings * sizeof(u64),
&dp->txrwb_dma, GFP_KERNEL);
if (!dp->txrwb)
goto err_free_rings;
}
for (r = 0; r < dp->num_tx_rings; r++) {
int bias = 0;
if (r >= dp->num_stack_tx_rings)
bias = dp->num_stack_tx_rings;
nfp_net_tx_ring_init(&dp->tx_rings[r], &nn->r_vecs[r - bias],
r, bias);
nfp_net_tx_ring_init(&dp->tx_rings[r], dp,
&nn->r_vecs[r - bias], r, bias);
if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r]))
goto err_free_prev;
......@@ -211,6 +221,10 @@ int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
err_free_ring:
nfp_net_tx_ring_free(dp, &dp->tx_rings[r]);
}
if (dp->txrwb)
dma_free_coherent(dp->dev, dp->num_tx_rings * sizeof(u64),
dp->txrwb, dp->txrwb_dma);
err_free_rings:
kfree(dp->tx_rings);
return -ENOMEM;
}
......@@ -224,6 +238,9 @@ void nfp_net_tx_rings_free(struct nfp_net_dp *dp)
nfp_net_tx_ring_free(dp, &dp->tx_rings[r]);
}
if (dp->txrwb)
dma_free_coherent(dp->dev, dp->num_tx_rings * sizeof(u64),
dp->txrwb, dp->txrwb_dma);
kfree(dp->tx_rings);
}
......@@ -377,6 +394,11 @@ nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
struct nfp_net_tx_ring *tx_ring, unsigned int idx)
{
nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma);
if (tx_ring->txrwb) {
*tx_ring->txrwb = 0;
nn_writeq(nn, NFP_NET_CFG_TXR_WB_ADDR(idx),
nn->dp.txrwb_dma + idx * sizeof(u64));
}
nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt));
nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry);
}
......@@ -388,6 +410,7 @@ void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0);
nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0);
nn_writeq(nn, NFP_NET_CFG_TXR_WB_ADDR(idx), 0);
nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0);
nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0);
}
......
......@@ -60,6 +60,14 @@ static inline void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
tx_ring->wr_ptr_add = 0;
}
static inline u32
nfp_net_read_tx_cmpl(struct nfp_net_tx_ring *tx_ring, struct nfp_net_dp *dp)
{
if (tx_ring->txrwb)
return *tx_ring->txrwb;
return nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
}
static inline void nfp_net_free_frag(void *frag, bool xdp)
{
if (!xdp)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册