提交 14bf718f 编写于 作者: B Ben Hutchings

sfc: Stop TX queues before they fill up

We now have a definite upper bound on the number of descriptors per
skb; use that to stop the queue when the next packet might not fit.
Signed-off-by: NBen Hutchings <bhutchings@solarflare.com>
上级 7668ff9c
...@@ -630,6 +630,16 @@ static void efx_start_datapath(struct efx_nic *efx) ...@@ -630,6 +630,16 @@ static void efx_start_datapath(struct efx_nic *efx)
efx->rx_buffer_order = get_order(efx->rx_buffer_len + efx->rx_buffer_order = get_order(efx->rx_buffer_len +
sizeof(struct efx_rx_page_state)); sizeof(struct efx_rx_page_state));
/* We must keep at least one descriptor in a TX ring empty.
* We could avoid this when the queue size does not exactly
* match the hardware ring size, but it's not that important.
* Therefore we stop the queue when one more skb might fill
* the ring completely. We wake it when half way back to
* empty.
*/
efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
/* Initialise the channels */ /* Initialise the channels */
efx_for_each_channel(channel, efx) { efx_for_each_channel(channel, efx) {
efx_for_each_channel_tx_queue(tx_queue, channel) efx_for_each_channel_tx_queue(tx_queue, channel)
......
...@@ -665,6 +665,8 @@ struct vfdi_status; ...@@ -665,6 +665,8 @@ struct vfdi_status;
* should be allocated for this NIC * should be allocated for this NIC
* @rxq_entries: Size of receive queues requested by user. * @rxq_entries: Size of receive queues requested by user.
* @txq_entries: Size of transmit queues requested by user. * @txq_entries: Size of transmit queues requested by user.
* @txq_stop_thresh: TX queue fill level at or above which we stop it.
* @txq_wake_thresh: TX queue fill level at or below which we wake it.
* @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches
* @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches
* @sram_lim_qw: Qword address limit of SRAM * @sram_lim_qw: Qword address limit of SRAM
...@@ -775,6 +777,9 @@ struct efx_nic { ...@@ -775,6 +777,9 @@ struct efx_nic {
unsigned rxq_entries; unsigned rxq_entries;
unsigned txq_entries; unsigned txq_entries;
unsigned int txq_stop_thresh;
unsigned int txq_wake_thresh;
unsigned tx_dc_base; unsigned tx_dc_base;
unsigned rx_dc_base; unsigned rx_dc_base;
unsigned sram_lim_qw; unsigned sram_lim_qw;
......
...@@ -22,14 +22,6 @@ ...@@ -22,14 +22,6 @@
#include "nic.h" #include "nic.h"
#include "workarounds.h" #include "workarounds.h"
/*
* TX descriptor ring full threshold
*
* The tx_queue descriptor ring fill-level must fall below this value
* before we restart the netif queue
*/
#define EFX_TXQ_THRESHOLD(_efx) ((_efx)->txq_entries / 2u)
static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
struct efx_tx_buffer *buffer, struct efx_tx_buffer *buffer,
unsigned int *pkts_compl, unsigned int *pkts_compl,
...@@ -138,6 +130,56 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) ...@@ -138,6 +130,56 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
return max_descs; return max_descs;
} }
/* Get partner of a TX queue, seen as part of the same net core queue */
static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue)
{
if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD)
return tx_queue - EFX_TXQ_TYPE_OFFLOAD;
else
return tx_queue + EFX_TXQ_TYPE_OFFLOAD;
}
static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
{
/* We need to consider both queues that the net core sees as one */
struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1);
struct efx_nic *efx = txq1->efx;
unsigned int fill_level;
fill_level = max(txq1->insert_count - txq1->old_read_count,
txq2->insert_count - txq2->old_read_count);
if (likely(fill_level < efx->txq_stop_thresh))
return;
/* We used the stale old_read_count above, which gives us a
* pessimistic estimate of the fill level (which may even
* validly be >= efx->txq_entries). Now try again using
* read_count (more likely to be a cache miss).
*
* If we read read_count and then conditionally stop the
* queue, it is possible for the completion path to race with
* us and complete all outstanding descriptors in the middle,
* after which there will be no more completions to wake it.
* Therefore we stop the queue first, then read read_count
* (with a memory barrier to ensure the ordering), then
* restart the queue if the fill level turns out to be low
* enough.
*/
netif_tx_stop_queue(txq1->core_txq);
smp_mb();
txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
fill_level = max(txq1->insert_count - txq1->old_read_count,
txq2->insert_count - txq2->old_read_count);
EFX_BUG_ON_PARANOID(fill_level >= efx->txq_entries);
if (likely(fill_level < efx->txq_stop_thresh)) {
smp_mb();
if (likely(!efx->loopback_selftest))
netif_tx_start_queue(txq1->core_txq);
}
}
/* /*
* Add a socket buffer to a TX queue * Add a socket buffer to a TX queue
* *
...@@ -151,7 +193,7 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) ...@@ -151,7 +193,7 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
* This function is split out from efx_hard_start_xmit to allow the * This function is split out from efx_hard_start_xmit to allow the
* loopback test to direct packets via specific TX queues. * loopback test to direct packets via specific TX queues.
* *
* Returns NETDEV_TX_OK or NETDEV_TX_BUSY * Returns NETDEV_TX_OK.
* You must hold netif_tx_lock() to call this function. * You must hold netif_tx_lock() to call this function.
*/ */
netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
...@@ -160,12 +202,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) ...@@ -160,12 +202,11 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
struct device *dma_dev = &efx->pci_dev->dev; struct device *dma_dev = &efx->pci_dev->dev;
struct efx_tx_buffer *buffer; struct efx_tx_buffer *buffer;
skb_frag_t *fragment; skb_frag_t *fragment;
unsigned int len, unmap_len = 0, fill_level, insert_ptr; unsigned int len, unmap_len = 0, insert_ptr;
dma_addr_t dma_addr, unmap_addr = 0; dma_addr_t dma_addr, unmap_addr = 0;
unsigned int dma_len; unsigned int dma_len;
unsigned short dma_flags; unsigned short dma_flags;
int q_space, i = 0; int i = 0;
netdev_tx_t rc = NETDEV_TX_OK;
EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
...@@ -183,9 +224,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) ...@@ -183,9 +224,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
return NETDEV_TX_OK; return NETDEV_TX_OK;
} }
fill_level = tx_queue->insert_count - tx_queue->old_read_count;
q_space = efx->txq_entries - 1 - fill_level;
/* Map for DMA. Use dma_map_single rather than dma_map_page /* Map for DMA. Use dma_map_single rather than dma_map_page
* since this is more efficient on machines with sparse * since this is more efficient on machines with sparse
* memory. * memory.
...@@ -205,32 +243,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) ...@@ -205,32 +243,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
/* Add to TX queue, splitting across DMA boundaries */ /* Add to TX queue, splitting across DMA boundaries */
do { do {
if (unlikely(q_space-- <= 0)) {
/* It might be that completions have
* happened since the xmit path last
* checked. Update the xmit path's
* copy of read_count.
*/
netif_tx_stop_queue(tx_queue->core_txq);
/* This memory barrier protects the
* change of queue state from the access
* of read_count. */
smp_mb();
tx_queue->old_read_count =
ACCESS_ONCE(tx_queue->read_count);
fill_level = (tx_queue->insert_count
- tx_queue->old_read_count);
q_space = efx->txq_entries - 1 - fill_level;
if (unlikely(q_space-- <= 0)) {
rc = NETDEV_TX_BUSY;
goto unwind;
}
smp_mb();
if (likely(!efx->loopback_selftest))
netif_tx_start_queue(
tx_queue->core_txq);
}
insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
buffer = &tx_queue->buffer[insert_ptr]; buffer = &tx_queue->buffer[insert_ptr];
efx_tsoh_free(tx_queue, buffer); efx_tsoh_free(tx_queue, buffer);
...@@ -277,6 +289,8 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) ...@@ -277,6 +289,8 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
/* Pass off to hardware */ /* Pass off to hardware */
efx_nic_push_buffers(tx_queue); efx_nic_push_buffers(tx_queue);
efx_tx_maybe_stop_queue(tx_queue);
return NETDEV_TX_OK; return NETDEV_TX_OK;
dma_err: dma_err:
...@@ -288,7 +302,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) ...@@ -288,7 +302,6 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
/* Mark the packet as transmitted, and free the SKB ourselves */ /* Mark the packet as transmitted, and free the SKB ourselves */
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
unwind:
/* Work backwards until we hit the original insert pointer value */ /* Work backwards until we hit the original insert pointer value */
while (tx_queue->insert_count != tx_queue->write_count) { while (tx_queue->insert_count != tx_queue->write_count) {
unsigned int pkts_compl = 0, bytes_compl = 0; unsigned int pkts_compl = 0, bytes_compl = 0;
...@@ -309,7 +322,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) ...@@ -309,7 +322,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
DMA_TO_DEVICE); DMA_TO_DEVICE);
} }
return rc; return NETDEV_TX_OK;
} }
/* Remove packets from the TX queue /* Remove packets from the TX queue
...@@ -448,6 +461,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) ...@@ -448,6 +461,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
{ {
unsigned fill_level; unsigned fill_level;
struct efx_nic *efx = tx_queue->efx; struct efx_nic *efx = tx_queue->efx;
struct efx_tx_queue *txq2;
unsigned int pkts_compl = 0, bytes_compl = 0; unsigned int pkts_compl = 0, bytes_compl = 0;
EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask); EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask);
...@@ -455,15 +469,18 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) ...@@ -455,15 +469,18 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl); efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl); netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl);
/* See if we need to restart the netif queue. This barrier /* See if we need to restart the netif queue. This memory
* separates the update of read_count from the test of the * barrier ensures that we write read_count (inside
* queue state. */ * efx_dequeue_buffers()) before reading the queue status.
*/
smp_mb(); smp_mb();
if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
likely(efx->port_enabled) && likely(efx->port_enabled) &&
likely(netif_device_present(efx->net_dev))) { likely(netif_device_present(efx->net_dev))) {
fill_level = tx_queue->insert_count - tx_queue->read_count; txq2 = efx_tx_queue_partner(tx_queue);
if (fill_level < EFX_TXQ_THRESHOLD(efx)) fill_level = max(tx_queue->insert_count - tx_queue->read_count,
txq2->insert_count - txq2->read_count);
if (fill_level <= efx->txq_wake_thresh)
netif_tx_wake_queue(tx_queue->core_txq); netif_tx_wake_queue(tx_queue->core_txq);
} }
...@@ -776,47 +793,19 @@ efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh) ...@@ -776,47 +793,19 @@ efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
* @len: Length of fragment * @len: Length of fragment
* @final_buffer: The final buffer inserted into the queue * @final_buffer: The final buffer inserted into the queue
* *
* Push descriptors onto the TX queue. Return 0 on success or 1 if * Push descriptors onto the TX queue.
* @tx_queue full.
*/ */
static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
dma_addr_t dma_addr, unsigned len, dma_addr_t dma_addr, unsigned len,
struct efx_tx_buffer **final_buffer) struct efx_tx_buffer **final_buffer)
{ {
struct efx_tx_buffer *buffer; struct efx_tx_buffer *buffer;
struct efx_nic *efx = tx_queue->efx; struct efx_nic *efx = tx_queue->efx;
unsigned dma_len, fill_level, insert_ptr; unsigned dma_len, insert_ptr;
int q_space;
EFX_BUG_ON_PARANOID(len <= 0); EFX_BUG_ON_PARANOID(len <= 0);
fill_level = tx_queue->insert_count - tx_queue->old_read_count;
/* -1 as there is no way to represent all descriptors used */
q_space = efx->txq_entries - 1 - fill_level;
while (1) { while (1) {
if (unlikely(q_space-- <= 0)) {
/* It might be that completions have happened
* since the xmit path last checked. Update
* the xmit path's copy of read_count.
*/
netif_tx_stop_queue(tx_queue->core_txq);
/* This memory barrier protects the change of
* queue state from the access of read_count. */
smp_mb();
tx_queue->old_read_count =
ACCESS_ONCE(tx_queue->read_count);
fill_level = (tx_queue->insert_count
- tx_queue->old_read_count);
q_space = efx->txq_entries - 1 - fill_level;
if (unlikely(q_space-- <= 0)) {
*final_buffer = NULL;
return 1;
}
smp_mb();
netif_tx_start_queue(tx_queue->core_txq);
}
insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask; insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
buffer = &tx_queue->buffer[insert_ptr]; buffer = &tx_queue->buffer[insert_ptr];
++tx_queue->insert_count; ++tx_queue->insert_count;
...@@ -847,7 +836,6 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue, ...@@ -847,7 +836,6 @@ static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
EFX_BUG_ON_PARANOID(!len); EFX_BUG_ON_PARANOID(!len);
buffer->len = len; buffer->len = len;
*final_buffer = buffer; *final_buffer = buffer;
return 0;
} }
...@@ -975,20 +963,19 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx, ...@@ -975,20 +963,19 @@ static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
* @st: TSO state * @st: TSO state
* *
* Form descriptors for the current fragment, until we reach the end * Form descriptors for the current fragment, until we reach the end
* of fragment or end-of-packet. Return 0 on success, 1 if not enough * of fragment or end-of-packet.
* space in @tx_queue.
*/ */
static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
const struct sk_buff *skb, const struct sk_buff *skb,
struct tso_state *st) struct tso_state *st)
{ {
struct efx_tx_buffer *buffer; struct efx_tx_buffer *buffer;
int n, rc; int n;
if (st->in_len == 0) if (st->in_len == 0)
return 0; return;
if (st->packet_space == 0) if (st->packet_space == 0)
return 0; return;
EFX_BUG_ON_PARANOID(st->in_len <= 0); EFX_BUG_ON_PARANOID(st->in_len <= 0);
EFX_BUG_ON_PARANOID(st->packet_space <= 0); EFX_BUG_ON_PARANOID(st->packet_space <= 0);
...@@ -999,26 +986,24 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, ...@@ -999,26 +986,24 @@ static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
st->out_len -= n; st->out_len -= n;
st->in_len -= n; st->in_len -= n;
rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
if (likely(rc == 0)) {
if (st->out_len == 0) {
/* Transfer ownership of the skb */
buffer->skb = skb;
buffer->flags = EFX_TX_BUF_SKB;
} else if (st->packet_space != 0) {
buffer->flags = EFX_TX_BUF_CONT;
}
if (st->in_len == 0) { if (st->out_len == 0) {
/* Transfer ownership of the DMA mapping */ /* Transfer ownership of the skb */
buffer->unmap_len = st->unmap_len; buffer->skb = skb;
buffer->flags |= st->dma_flags; buffer->flags = EFX_TX_BUF_SKB;
st->unmap_len = 0; } else if (st->packet_space != 0) {
} buffer->flags = EFX_TX_BUF_CONT;
}
if (st->in_len == 0) {
/* Transfer ownership of the DMA mapping */
buffer->unmap_len = st->unmap_len;
buffer->flags |= st->dma_flags;
st->unmap_len = 0;
} }
st->dma_addr += n; st->dma_addr += n;
return rc;
} }
...@@ -1112,13 +1097,13 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, ...@@ -1112,13 +1097,13 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
* *
* Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
* @skb was not enqueued. In all cases @skb is consumed. Return * @skb was not enqueued. In all cases @skb is consumed. Return
* %NETDEV_TX_OK or %NETDEV_TX_BUSY. * %NETDEV_TX_OK.
*/ */
static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
struct sk_buff *skb) struct sk_buff *skb)
{ {
struct efx_nic *efx = tx_queue->efx; struct efx_nic *efx = tx_queue->efx;
int frag_i, rc, rc2 = NETDEV_TX_OK; int frag_i, rc;
struct tso_state state; struct tso_state state;
/* Find the packet protocol and sanity-check it */ /* Find the packet protocol and sanity-check it */
...@@ -1150,11 +1135,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, ...@@ -1150,11 +1135,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
goto mem_err; goto mem_err;
while (1) { while (1) {
rc = tso_fill_packet_with_fragment(tx_queue, skb, &state); tso_fill_packet_with_fragment(tx_queue, skb, &state);
if (unlikely(rc)) {
rc2 = NETDEV_TX_BUSY;
goto unwind;
}
/* Move onto the next fragment? */ /* Move onto the next fragment? */
if (state.in_len == 0) { if (state.in_len == 0) {
...@@ -1178,6 +1159,8 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, ...@@ -1178,6 +1159,8 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
/* Pass off to hardware */ /* Pass off to hardware */
efx_nic_push_buffers(tx_queue); efx_nic_push_buffers(tx_queue);
efx_tx_maybe_stop_queue(tx_queue);
tx_queue->tso_bursts++; tx_queue->tso_bursts++;
return NETDEV_TX_OK; return NETDEV_TX_OK;
...@@ -1186,7 +1169,6 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, ...@@ -1186,7 +1169,6 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
"Out of memory for TSO headers, or DMA mapping error\n"); "Out of memory for TSO headers, or DMA mapping error\n");
dev_kfree_skb_any(skb); dev_kfree_skb_any(skb);
unwind:
/* Free the DMA mapping we were in the process of writing out */ /* Free the DMA mapping we were in the process of writing out */
if (state.unmap_len) { if (state.unmap_len) {
if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE) if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE)
...@@ -1198,7 +1180,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, ...@@ -1198,7 +1180,7 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
} }
efx_enqueue_unwind(tx_queue); efx_enqueue_unwind(tx_queue);
return rc2; return NETDEV_TX_OK;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册