tx.c 32.4 KB
Newer Older
1
/****************************************************************************
B
Ben Hutchings 已提交
2
 * Driver for Solarflare network controllers and boards
3
 * Copyright 2005-2006 Fen Systems Ltd.
B
Ben Hutchings 已提交
4
 * Copyright 2005-2013 Solarflare Communications Inc.
5 6 7 8 9 10 11 12 13 14
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation, incorporated herein by reference.
 */

#include <linux/pci.h>
#include <linux/tcp.h>
#include <linux/ip.h>
#include <linux/in.h>
B
Ben Hutchings 已提交
15
#include <linux/ipv6.h>
16
#include <linux/slab.h>
B
Ben Hutchings 已提交
17
#include <net/ipv6.h>
18 19
#include <linux/if_ether.h>
#include <linux/highmem.h>
20
#include <linux/cache.h>
21 22
#include "net_driver.h"
#include "efx.h"
23
#include "io.h"
B
Ben Hutchings 已提交
24
#include "nic.h"
25
#include "workarounds.h"
26
#include "ef10_regs.h"
27

28 29 30 31 32 33 34 35
#ifdef EFX_USE_PIO

#define EFX_PIOBUF_SIZE_MAX ER_DZ_TX_PIOBUF_SIZE
#define EFX_PIOBUF_SIZE_DEF ALIGN(256, L1_CACHE_BYTES)
unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF;

#endif /* EFX_USE_PIO */

36
static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
T
Tom Herbert 已提交
37 38 39
			       struct efx_tx_buffer *buffer,
			       unsigned int *pkts_compl,
			       unsigned int *bytes_compl)
40 41
{
	if (buffer->unmap_len) {
42
		struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
43 44
		dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len -
					 buffer->unmap_len);
45
		if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
46 47
			dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
					 DMA_TO_DEVICE);
48
		else
49 50
			dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
				       DMA_TO_DEVICE);
51 52 53
		buffer->unmap_len = 0;
	}

54
	if (buffer->flags & EFX_TX_BUF_SKB) {
T
Tom Herbert 已提交
55 56
		(*pkts_compl)++;
		(*bytes_compl) += buffer->skb->len;
57
		dev_kfree_skb_any((struct sk_buff *) buffer->skb);
58 59 60
		netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
			   "TX queue %d transmission id %x complete\n",
			   tx_queue->queue, tx_queue->read_count);
61 62
	} else if (buffer->flags & EFX_TX_BUF_HEAP) {
		kfree(buffer->heap_buf);
63
	}
64

65 66
	buffer->len = 0;
	buffer->flags = 0;
67 68
}

B
Ben Hutchings 已提交
69
static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
70
			       struct sk_buff *skb);
71

72 73 74 75 76 77 78 79 80
static inline unsigned
efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr)
{
	/* Depending on the NIC revision, we can use descriptor
	 * lengths up to 8K or 8K-1.  However, since PCI Express
	 * devices must split read requests at 4K boundaries, there is
	 * little benefit from using descriptors that cross those
	 * boundaries and we keep things simple by not doing so.
	 */
81
	unsigned len = (~dma_addr & (EFX_PAGE_SIZE - 1)) + 1;
82 83 84 85 86 87 88 89

	/* Work around hardware bug for unaligned buffers. */
	if (EFX_WORKAROUND_5391(efx) && (dma_addr & 0xf))
		len = min_t(unsigned, len, 512 - (dma_addr & 0xf));

	return len;
}

90 91 92 93 94 95 96
unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
{
	/* Header and payload descriptor for each output segment, plus
	 * one for every input fragment boundary within a segment
	 */
	unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;

97 98 99 100
	/* Possibly one more per segment for the alignment workaround,
	 * or for option descriptors
	 */
	if (EFX_WORKAROUND_5391(efx) || efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
101 102 103 104 105 106 107 108 109 110
		max_descs += EFX_TSO_MAX_SEGS;

	/* Possibly more for PCIe page boundaries within input fragments */
	if (PAGE_SIZE > EFX_PAGE_SIZE)
		max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
				   DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE));

	return max_descs;
}

111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
/* Get partner of a TX queue, seen as part of the same net core queue */
static struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue)
{
	if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD)
		return tx_queue - EFX_TXQ_TYPE_OFFLOAD;
	else
		return tx_queue + EFX_TXQ_TYPE_OFFLOAD;
}

static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
{
	/* We need to consider both queues that the net core sees as one */
	struct efx_tx_queue *txq2 = efx_tx_queue_partner(txq1);
	struct efx_nic *efx = txq1->efx;
	unsigned int fill_level;

	fill_level = max(txq1->insert_count - txq1->old_read_count,
			 txq2->insert_count - txq2->old_read_count);
	if (likely(fill_level < efx->txq_stop_thresh))
		return;

	/* We used the stale old_read_count above, which gives us a
	 * pessimistic estimate of the fill level (which may even
	 * validly be >= efx->txq_entries).  Now try again using
	 * read_count (more likely to be a cache miss).
	 *
	 * If we read read_count and then conditionally stop the
	 * queue, it is possible for the completion path to race with
	 * us and complete all outstanding descriptors in the middle,
	 * after which there will be no more completions to wake it.
	 * Therefore we stop the queue first, then read read_count
	 * (with a memory barrier to ensure the ordering), then
	 * restart the queue if the fill level turns out to be low
	 * enough.
	 */
	netif_tx_stop_queue(txq1->core_txq);
	smp_mb();
	txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
	txq2->old_read_count = ACCESS_ONCE(txq2->read_count);

	fill_level = max(txq1->insert_count - txq1->old_read_count,
			 txq2->insert_count - txq2->old_read_count);
	EFX_BUG_ON_PARANOID(fill_level >= efx->txq_entries);
	if (likely(fill_level < efx->txq_stop_thresh)) {
		smp_mb();
		if (likely(!efx->loopback_selftest))
			netif_tx_start_queue(txq1->core_txq);
	}
}

161 162 163 164 165 166 167 168 169 170
/*
 * Add a socket buffer to a TX queue
 *
 * This maps all fragments of a socket buffer for DMA and adds them to
 * the TX queue.  The queue's insert pointer will be incremented by
 * the number of fragments in the socket buffer.
 *
 * If any DMA mapping fails, any mapped fragments will be unmapped,
 * the queue's insert pointer will be restored to its original value.
 *
171 172 173
 * This function is split out from efx_hard_start_xmit to allow the
 * loopback test to direct packets via specific TX queues.
 *
174
 * Returns NETDEV_TX_OK.
175 176
 * You must hold netif_tx_lock() to call this function.
 */
177
netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
178 179
{
	struct efx_nic *efx = tx_queue->efx;
180
	struct device *dma_dev = &efx->pci_dev->dev;
181 182
	struct efx_tx_buffer *buffer;
	skb_frag_t *fragment;
183
	unsigned int len, unmap_len = 0, insert_ptr;
184 185
	dma_addr_t dma_addr, unmap_addr = 0;
	unsigned int dma_len;
186
	unsigned short dma_flags;
187
	int i = 0;
188 189 190

	EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);

191
	if (skb_shinfo(skb)->gso_size)
B
Ben Hutchings 已提交
192 193
		return efx_enqueue_skb_tso(tx_queue, skb);

194 195 196
	/* Get size of the initial fragment */
	len = skb_headlen(skb);

197 198 199 200 201 202 203 204
	/* Pad if necessary */
	if (EFX_WORKAROUND_15592(efx) && skb->len <= 32) {
		EFX_BUG_ON_PARANOID(skb->data_len);
		len = 32 + 1;
		if (skb_pad(skb, len - skb->len))
			return NETDEV_TX_OK;
	}

205
	/* Map for DMA.  Use dma_map_single rather than dma_map_page
206 207 208
	 * since this is more efficient on machines with sparse
	 * memory.
	 */
209
	dma_flags = EFX_TX_BUF_MAP_SINGLE;
210
	dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE);
211 212 213

	/* Process all fragments */
	while (1) {
214 215
		if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
			goto dma_err;
216 217 218 219 220 221 222 223

		/* Store fields for marking in the per-fragment final
		 * descriptor */
		unmap_len = len;
		unmap_addr = dma_addr;

		/* Add to TX queue, splitting across DMA boundaries */
		do {
224
			insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
225
			buffer = &tx_queue->buffer[insert_ptr];
226
			EFX_BUG_ON_PARANOID(buffer->flags);
227 228 229
			EFX_BUG_ON_PARANOID(buffer->len);
			EFX_BUG_ON_PARANOID(buffer->unmap_len);

230 231
			dma_len = efx_max_tx_len(efx, dma_addr);
			if (likely(dma_len >= len))
232 233 234 235 236
				dma_len = len;

			/* Fill out per descriptor fields */
			buffer->len = dma_len;
			buffer->dma_addr = dma_addr;
237
			buffer->flags = EFX_TX_BUF_CONT;
238 239 240 241 242 243
			len -= dma_len;
			dma_addr += dma_len;
			++tx_queue->insert_count;
		} while (len);

		/* Transfer ownership of the unmapping to the final buffer */
244
		buffer->flags = EFX_TX_BUF_CONT | dma_flags;
245 246 247 248 249 250 251
		buffer->unmap_len = unmap_len;
		unmap_len = 0;

		/* Get address and size of next fragment */
		if (i >= skb_shinfo(skb)->nr_frags)
			break;
		fragment = &skb_shinfo(skb)->frags[i];
E
Eric Dumazet 已提交
252
		len = skb_frag_size(fragment);
253 254
		i++;
		/* Map for DMA */
255
		dma_flags = 0;
256
		dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len,
257
					    DMA_TO_DEVICE);
258 259 260 261
	}

	/* Transfer ownership of the skb to the final buffer */
	buffer->skb = skb;
262
	buffer->flags = EFX_TX_BUF_SKB | dma_flags;
263

T
Tom Herbert 已提交
264 265
	netdev_tx_sent_queue(tx_queue->core_txq, skb->len);

266
	/* Pass off to hardware */
267
	efx_nic_push_buffers(tx_queue);
268

269 270
	efx_tx_maybe_stop_queue(tx_queue);

271 272
	return NETDEV_TX_OK;

273
 dma_err:
274 275 276 277
	netif_err(efx, tx_err, efx->net_dev,
		  " TX queue %d could not map skb with %d bytes %d "
		  "fragments for DMA\n", tx_queue->queue, skb->len,
		  skb_shinfo(skb)->nr_frags + 1);
278 279

	/* Mark the packet as transmitted, and free the SKB ourselves */
280
	dev_kfree_skb_any(skb);
281 282 283

	/* Work backwards until we hit the original insert pointer value */
	while (tx_queue->insert_count != tx_queue->write_count) {
T
Tom Herbert 已提交
284
		unsigned int pkts_compl = 0, bytes_compl = 0;
285
		--tx_queue->insert_count;
286
		insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
287
		buffer = &tx_queue->buffer[insert_ptr];
T
Tom Herbert 已提交
288
		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
289 290 291
	}

	/* Free the fragment we were mid-way through pushing */
292
	if (unmap_len) {
293
		if (dma_flags & EFX_TX_BUF_MAP_SINGLE)
294 295
			dma_unmap_single(dma_dev, unmap_addr, unmap_len,
					 DMA_TO_DEVICE);
296
		else
297 298
			dma_unmap_page(dma_dev, unmap_addr, unmap_len,
				       DMA_TO_DEVICE);
299
	}
300

301
	return NETDEV_TX_OK;
302 303 304 305 306 307 308
}

/* Remove packets from the TX queue
 *
 * This removes packets from the TX queue, up to and including the
 * specified index.
 */
309
static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
T
Tom Herbert 已提交
310 311 312
				unsigned int index,
				unsigned int *pkts_compl,
				unsigned int *bytes_compl)
313 314 315 316
{
	struct efx_nic *efx = tx_queue->efx;
	unsigned int stop_index, read_ptr;

317 318
	stop_index = (index + 1) & tx_queue->ptr_mask;
	read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
319 320 321

	while (read_ptr != stop_index) {
		struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
322 323 324

		if (!(buffer->flags & EFX_TX_BUF_OPTION) &&
		    unlikely(buffer->len == 0)) {
325 326 327
			netif_err(efx, tx_err, efx->net_dev,
				  "TX queue %d spurious TX completion id %x\n",
				  tx_queue->queue, read_ptr);
328 329 330 331
			efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
			return;
		}

T
Tom Herbert 已提交
332
		efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
333 334

		++tx_queue->read_count;
335
		read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
336 337 338 339 340 341 342 343 344 345 346 347
	}
}

/* Initiate a packet transmission.  We use one channel per CPU
 * (sharing when we have more CPUs than channels).  On Falcon, the TX
 * completion events will be directed back to the CPU that transmitted
 * the packet, which should be cache-efficient.
 *
 * Context: non-blocking.
 * Note that returning anything other than NETDEV_TX_OK will cause the
 * OS to free the skb.
 */
348
netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
B
Ben Hutchings 已提交
349
				struct net_device *net_dev)
350
{
351
	struct efx_nic *efx = netdev_priv(net_dev);
352
	struct efx_tx_queue *tx_queue;
353
	unsigned index, type;
354

355
	EFX_WARN_ON_PARANOID(!netif_device_present(net_dev));
356

357 358 359 360 361 362
	/* PTP "event" packet */
	if (unlikely(efx_xmit_with_hwtstamp(skb)) &&
	    unlikely(efx_ptp_is_ptp_tx(efx, skb))) {
		return efx_ptp_tx(efx, skb);
	}

363 364 365 366 367 368 369
	index = skb_get_queue_mapping(skb);
	type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0;
	if (index >= efx->n_tx_channels) {
		index -= efx->n_tx_channels;
		type |= EFX_TXQ_TYPE_HIGHPRI;
	}
	tx_queue = efx_get_tx_queue(efx, index, type);
370

371
	return efx_enqueue_skb(tx_queue, skb);
372 373
}

374 375
void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue)
{
376 377
	struct efx_nic *efx = tx_queue->efx;

378
	/* Must be inverse of queue lookup in efx_hard_start_xmit() */
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
	tx_queue->core_txq =
		netdev_get_tx_queue(efx->net_dev,
				    tx_queue->queue / EFX_TXQ_TYPES +
				    ((tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI) ?
				     efx->n_tx_channels : 0));
}

int efx_setup_tc(struct net_device *net_dev, u8 num_tc)
{
	struct efx_nic *efx = netdev_priv(net_dev);
	struct efx_channel *channel;
	struct efx_tx_queue *tx_queue;
	unsigned tc;
	int rc;

	if (efx_nic_rev(efx) < EFX_REV_FALCON_B0 || num_tc > EFX_MAX_TX_TC)
		return -EINVAL;

	if (num_tc == net_dev->num_tc)
		return 0;

	for (tc = 0; tc < num_tc; tc++) {
		net_dev->tc_to_txq[tc].offset = tc * efx->n_tx_channels;
		net_dev->tc_to_txq[tc].count = efx->n_tx_channels;
	}

	if (num_tc > net_dev->num_tc) {
		/* Initialise high-priority queues as necessary */
		efx_for_each_channel(channel, efx) {
			efx_for_each_possible_channel_tx_queue(tx_queue,
							       channel) {
				if (!(tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI))
					continue;
				if (!tx_queue->buffer) {
					rc = efx_probe_tx_queue(tx_queue);
					if (rc)
						return rc;
				}
				if (!tx_queue->initialised)
					efx_init_tx_queue(tx_queue);
				efx_init_tx_queue_core_txq(tx_queue);
			}
		}
	} else {
		/* Reduce number of classes before number of queues */
		net_dev->num_tc = num_tc;
	}

	rc = netif_set_real_num_tx_queues(net_dev,
					  max_t(int, num_tc, 1) *
					  efx->n_tx_channels);
	if (rc)
		return rc;

	/* Do not destroy high-priority queues when they become
	 * unused.  We would have to flush them first, and it is
	 * fairly difficult to flush a subset of TX queues.  Leave
	 * it to efx_fini_channels().
	 */

	net_dev->num_tc = num_tc;
	return 0;
441 442
}

443 444 445 446
void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
{
	unsigned fill_level;
	struct efx_nic *efx = tx_queue->efx;
447
	struct efx_tx_queue *txq2;
T
Tom Herbert 已提交
448
	unsigned int pkts_compl = 0, bytes_compl = 0;
449

450
	EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask);
451

T
Tom Herbert 已提交
452 453
	efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
	netdev_tx_completed_queue(tx_queue->core_txq, pkts_compl, bytes_compl);
454

455 456 457
	if (pkts_compl > 1)
		++tx_queue->merge_events;

458 459 460 461
	/* See if we need to restart the netif queue.  This memory
	 * barrier ensures that we write read_count (inside
	 * efx_dequeue_buffers()) before reading the queue status.
	 */
462
	smp_mb();
463
	if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
464
	    likely(efx->port_enabled) &&
465
	    likely(netif_device_present(efx->net_dev))) {
466 467 468 469
		txq2 = efx_tx_queue_partner(tx_queue);
		fill_level = max(tx_queue->insert_count - tx_queue->read_count,
				 txq2->insert_count - txq2->read_count);
		if (fill_level <= efx->txq_wake_thresh)
470
			netif_tx_wake_queue(tx_queue->core_txq);
471
	}
472 473 474 475 476 477 478 479 480 481

	/* Check whether the hardware queue is now empty */
	if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
		tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count);
		if (tx_queue->read_count == tx_queue->old_write_count) {
			smp_mb();
			tx_queue->empty_read_count =
				tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
		}
	}
482 483
}

484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
/* Size of page-based TSO header buffers.  Larger blocks must be
 * allocated from the heap.
 */
#define TSOH_STD_SIZE	128
#define TSOH_PER_PAGE	(PAGE_SIZE / TSOH_STD_SIZE)

/* At most half the descriptors in the queue at any time will refer to
 * a TSO header buffer, since they must always be followed by a
 * payload descriptor referring to an skb.
 */
static unsigned int efx_tsoh_page_count(struct efx_tx_queue *tx_queue)
{
	return DIV_ROUND_UP(tx_queue->ptr_mask + 1, 2 * TSOH_PER_PAGE);
}

499 500 501
int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
{
	struct efx_nic *efx = tx_queue->efx;
502
	unsigned int entries;
503
	int rc;
504

505 506 507 508 509 510 511 512
	/* Create the smallest power-of-two aligned ring */
	entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
	EFX_BUG_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
	tx_queue->ptr_mask = entries - 1;

	netif_dbg(efx, probe, efx->net_dev,
		  "creating TX queue %d size %#x mask %#x\n",
		  tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
513 514

	/* Allocate software ring */
515
	tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer),
516
				   GFP_KERNEL);
517 518
	if (!tx_queue->buffer)
		return -ENOMEM;
519

520 521 522 523 524 525 526 527 528 529
	if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) {
		tx_queue->tsoh_page =
			kcalloc(efx_tsoh_page_count(tx_queue),
				sizeof(tx_queue->tsoh_page[0]), GFP_KERNEL);
		if (!tx_queue->tsoh_page) {
			rc = -ENOMEM;
			goto fail1;
		}
	}

530
	/* Allocate hardware ring */
531
	rc = efx_nic_probe_tx(tx_queue);
532
	if (rc)
533
		goto fail2;
534 535 536

	return 0;

537 538 539 540
fail2:
	kfree(tx_queue->tsoh_page);
	tx_queue->tsoh_page = NULL;
fail1:
541 542 543 544 545
	kfree(tx_queue->buffer);
	tx_queue->buffer = NULL;
	return rc;
}

546
void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
547
{
548 549
	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
		  "initialising TX queue %d\n", tx_queue->queue);
550 551 552

	tx_queue->insert_count = 0;
	tx_queue->write_count = 0;
553
	tx_queue->old_write_count = 0;
554 555
	tx_queue->read_count = 0;
	tx_queue->old_read_count = 0;
556
	tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
557 558

	/* Set up TX descriptor ring */
559
	efx_nic_init_tx(tx_queue);
560 561

	tx_queue->initialised = true;
562 563
}

564
void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
565 566 567
{
	struct efx_tx_buffer *buffer;

568 569 570
	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
		  "shutting down TX queue %d\n", tx_queue->queue);

571 572 573 574 575
	if (!tx_queue->buffer)
		return;

	/* Free any buffers left in the ring */
	while (tx_queue->read_count != tx_queue->write_count) {
T
Tom Herbert 已提交
576
		unsigned int pkts_compl = 0, bytes_compl = 0;
577
		buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
T
Tom Herbert 已提交
578
		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
579 580 581

		++tx_queue->read_count;
	}
T
Tom Herbert 已提交
582
	netdev_tx_reset_queue(tx_queue->core_txq);
583 584 585 586
}

void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
{
587 588
	int i;

589 590 591
	if (!tx_queue->buffer)
		return;

592 593
	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
		  "destroying TX queue %d\n", tx_queue->queue);
594
	efx_nic_remove_tx(tx_queue);
595

596 597 598 599 600 601 602 603
	if (tx_queue->tsoh_page) {
		for (i = 0; i < efx_tsoh_page_count(tx_queue); i++)
			efx_nic_free_buffer(tx_queue->efx,
					    &tx_queue->tsoh_page[i]);
		kfree(tx_queue->tsoh_page);
		tx_queue->tsoh_page = NULL;
	}

604 605 606 607 608
	kfree(tx_queue->buffer);
	tx_queue->buffer = NULL;
}


B
Ben Hutchings 已提交
609 610 611 612 613 614 615 616 617 618 619
/* Efx TCP segmentation acceleration.
 *
 * Why?  Because by doing it here in the driver we can go significantly
 * faster than the GSO.
 *
 * Requires TX checksum offload support.
 */

/* Number of bytes inserted at the start of a TSO header buffer,
 * similar to NET_IP_ALIGN.
 */
620
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
B
Ben Hutchings 已提交
621 622 623 624 625 626 627 628 629
#define TSOH_OFFSET	0
#else
#define TSOH_OFFSET	NET_IP_ALIGN
#endif

#define PTR_DIFF(p1, p2)  ((u8 *)(p1) - (u8 *)(p2))

/**
 * struct tso_state - TSO state for an SKB
630
 * @out_len: Remaining length in current segment
B
Ben Hutchings 已提交
631
 * @seqnum: Current sequence number
632
 * @ipv4_id: Current IPv4 ID, host endian
B
Ben Hutchings 已提交
633
 * @packet_space: Remaining space in current packet
634 635 636 637
 * @dma_addr: DMA address of current position
 * @in_len: Remaining length in current SKB fragment
 * @unmap_len: Length of SKB fragment
 * @unmap_addr: DMA address of SKB fragment
638
 * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0
B
Ben Hutchings 已提交
639
 * @protocol: Network protocol (after any VLAN header)
640 641
 * @ip_off: Offset of IP header
 * @tcp_off: Offset of TCP header
642
 * @header_len: Number of bytes of header
643
 * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload
644 645 646
 * @header_dma_addr: Header DMA address, when using option descriptors
 * @header_unmap_len: Header DMA mapped length, or 0 if not using option
 *	descriptors
B
Ben Hutchings 已提交
647 648 649 650 651
 *
 * The state used during segmentation.  It is put into this data structure
 * just to make it easy to pass into inline functions.
 */
struct tso_state {
652 653
	/* Output position */
	unsigned out_len;
B
Ben Hutchings 已提交
654
	unsigned seqnum;
655
	u16 ipv4_id;
B
Ben Hutchings 已提交
656 657
	unsigned packet_space;

658 659 660 661 662
	/* Input position */
	dma_addr_t dma_addr;
	unsigned in_len;
	unsigned unmap_len;
	dma_addr_t unmap_addr;
663
	unsigned short dma_flags;
664

B
Ben Hutchings 已提交
665
	__be16 protocol;
666 667
	unsigned int ip_off;
	unsigned int tcp_off;
668
	unsigned header_len;
669
	unsigned int ip_base_len;
670 671
	dma_addr_t header_dma_addr;
	unsigned int header_unmap_len;
B
Ben Hutchings 已提交
672 673 674 675 676
};


/*
 * Verify that our various assumptions about sk_buffs and the conditions
B
Ben Hutchings 已提交
677
 * under which TSO will be attempted hold true.  Return the protocol number.
B
Ben Hutchings 已提交
678
 */
B
Ben Hutchings 已提交
679
static __be16 efx_tso_check_protocol(struct sk_buff *skb)
B
Ben Hutchings 已提交
680
{
681 682
	__be16 protocol = skb->protocol;

B
Ben Hutchings 已提交
683
	EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
684 685 686 687 688 689
			    protocol);
	if (protocol == htons(ETH_P_8021Q)) {
		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
		protocol = veh->h_vlan_encapsulated_proto;
	}

B
Ben Hutchings 已提交
690 691 692 693 694 695
	if (protocol == htons(ETH_P_IP)) {
		EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
	} else {
		EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IPV6));
		EFX_BUG_ON_PARANOID(ipv6_hdr(skb)->nexthdr != NEXTHDR_TCP);
	}
B
Ben Hutchings 已提交
696 697 698
	EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
			     + (tcp_hdr(skb)->doff << 2u)) >
			    skb_headlen(skb));
B
Ben Hutchings 已提交
699 700

	return protocol;
B
Ben Hutchings 已提交
701 702
}

703 704
static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue,
			       struct efx_tx_buffer *buffer, unsigned int len)
B
Ben Hutchings 已提交
705
{
706
	u8 *result;
B
Ben Hutchings 已提交
707

708 709 710
	EFX_BUG_ON_PARANOID(buffer->len);
	EFX_BUG_ON_PARANOID(buffer->flags);
	EFX_BUG_ON_PARANOID(buffer->unmap_len);
B
Ben Hutchings 已提交
711

712 713 714 715 716 717 718
	if (likely(len <= TSOH_STD_SIZE - TSOH_OFFSET)) {
		unsigned index =
			(tx_queue->insert_count & tx_queue->ptr_mask) / 2;
		struct efx_buffer *page_buf =
			&tx_queue->tsoh_page[index / TSOH_PER_PAGE];
		unsigned offset =
			TSOH_STD_SIZE * (index % TSOH_PER_PAGE) + TSOH_OFFSET;
B
Ben Hutchings 已提交
719

720
		if (unlikely(!page_buf->addr) &&
721 722
		    efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE,
					 GFP_ATOMIC))
723
			return NULL;
B
Ben Hutchings 已提交
724

725 726 727 728 729
		result = (u8 *)page_buf->addr + offset;
		buffer->dma_addr = page_buf->dma_addr + offset;
		buffer->flags = EFX_TX_BUF_CONT;
	} else {
		tx_queue->tso_long_headers++;
B
Ben Hutchings 已提交
730

731 732 733 734 735
		buffer->heap_buf = kmalloc(TSOH_OFFSET + len, GFP_ATOMIC);
		if (unlikely(!buffer->heap_buf))
			return NULL;
		result = (u8 *)buffer->heap_buf + TSOH_OFFSET;
		buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP;
B
Ben Hutchings 已提交
736 737
	}

738
	buffer->len = len;
B
Ben Hutchings 已提交
739

740
	return result;
B
Ben Hutchings 已提交
741 742 743 744 745 746 747
}

/**
 * efx_tx_queue_insert - push descriptors onto the TX queue
 * @tx_queue:		Efx TX queue
 * @dma_addr:		DMA address of fragment
 * @len:		Length of fragment
748
 * @final_buffer:	The final buffer inserted into the queue
B
Ben Hutchings 已提交
749
 *
750
 * Push descriptors onto the TX queue.
B
Ben Hutchings 已提交
751
 */
752 753 754
static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
				dma_addr_t dma_addr, unsigned len,
				struct efx_tx_buffer **final_buffer)
B
Ben Hutchings 已提交
755 756 757
{
	struct efx_tx_buffer *buffer;
	struct efx_nic *efx = tx_queue->efx;
758
	unsigned dma_len, insert_ptr;
B
Ben Hutchings 已提交
759 760 761 762

	EFX_BUG_ON_PARANOID(len <= 0);

	while (1) {
763
		insert_ptr = tx_queue->insert_count & tx_queue->ptr_mask;
B
Ben Hutchings 已提交
764 765 766 767
		buffer = &tx_queue->buffer[insert_ptr];
		++tx_queue->insert_count;

		EFX_BUG_ON_PARANOID(tx_queue->insert_count -
768 769
				    tx_queue->read_count >=
				    efx->txq_entries);
B
Ben Hutchings 已提交
770 771 772

		EFX_BUG_ON_PARANOID(buffer->len);
		EFX_BUG_ON_PARANOID(buffer->unmap_len);
773
		EFX_BUG_ON_PARANOID(buffer->flags);
B
Ben Hutchings 已提交
774 775 776

		buffer->dma_addr = dma_addr;

777
		dma_len = efx_max_tx_len(efx, dma_addr);
B
Ben Hutchings 已提交
778 779 780 781 782

		/* If there is enough space to send then do so */
		if (dma_len >= len)
			break;

783 784
		buffer->len = dma_len;
		buffer->flags = EFX_TX_BUF_CONT;
B
Ben Hutchings 已提交
785 786 787 788 789 790
		dma_addr += dma_len;
		len -= dma_len;
	}

	EFX_BUG_ON_PARANOID(!len);
	buffer->len = len;
791
	*final_buffer = buffer;
B
Ben Hutchings 已提交
792 793 794 795 796 797 798 799 800 801
}


/*
 * Put a TSO header into the TX queue.
 *
 * This is special-cased because we know that it is small enough to fit in
 * a single fragment, and we know it doesn't cross a page boundary.  It
 * also allows us to not worry about end-of-packet etc.
 */
802 803
static int efx_tso_put_header(struct efx_tx_queue *tx_queue,
			      struct efx_tx_buffer *buffer, u8 *header)
B
Ben Hutchings 已提交
804
{
805 806 807 808 809 810 811 812 813 814 815 816 817 818
	if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) {
		buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev,
						  header, buffer->len,
						  DMA_TO_DEVICE);
		if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev,
					       buffer->dma_addr))) {
			kfree(buffer->heap_buf);
			buffer->len = 0;
			buffer->flags = 0;
			return -ENOMEM;
		}
		buffer->unmap_len = buffer->len;
		buffer->flags |= EFX_TX_BUF_MAP_SINGLE;
	}
B
Ben Hutchings 已提交
819 820

	++tx_queue->insert_count;
821
	return 0;
B
Ben Hutchings 已提交
822 823 824
}


825 826 827
/* Remove buffers put into a tx_queue.  None of the buffers must have
 * an skb attached.
 */
B
Ben Hutchings 已提交
828 829 830 831 832 833 834 835
static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
{
	struct efx_tx_buffer *buffer;

	/* Work backwards until we hit the original insert pointer value */
	while (tx_queue->insert_count != tx_queue->write_count) {
		--tx_queue->insert_count;
		buffer = &tx_queue->buffer[tx_queue->insert_count &
836
					   tx_queue->ptr_mask];
837
		efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
B
Ben Hutchings 已提交
838 839 840 841 842
	}
}


/* Parse the SKB header and initialise state. */
843 844
static int tso_start(struct tso_state *st, struct efx_nic *efx,
		     const struct sk_buff *skb)
B
Ben Hutchings 已提交
845
{
846 847
	bool use_options = efx_nic_rev(efx) >= EFX_REV_HUNT_A0;
	struct device *dma_dev = &efx->pci_dev->dev;
848
	unsigned int header_len, in_len;
849
	dma_addr_t dma_addr;
850

851 852
	st->ip_off = skb_network_header(skb) - skb->data;
	st->tcp_off = skb_transport_header(skb) - skb->data;
853 854 855 856
	header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u);
	in_len = skb_headlen(skb) - header_len;
	st->header_len = header_len;
	st->in_len = in_len;
857
	if (st->protocol == htons(ETH_P_IP)) {
858
		st->ip_base_len = st->header_len - st->ip_off;
B
Ben Hutchings 已提交
859
		st->ipv4_id = ntohs(ip_hdr(skb)->id);
860
	} else {
861
		st->ip_base_len = st->header_len - st->tcp_off;
B
Ben Hutchings 已提交
862
		st->ipv4_id = 0;
863
	}
B
Ben Hutchings 已提交
864 865 866 867 868 869
	st->seqnum = ntohl(tcp_hdr(skb)->seq);

	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);

870 871
	st->out_len = skb->len - header_len;

872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891
	if (!use_options) {
		st->header_unmap_len = 0;

		if (likely(in_len == 0)) {
			st->dma_flags = 0;
			st->unmap_len = 0;
			return 0;
		}

		dma_addr = dma_map_single(dma_dev, skb->data + header_len,
					  in_len, DMA_TO_DEVICE);
		st->dma_flags = EFX_TX_BUF_MAP_SINGLE;
		st->dma_addr = dma_addr;
		st->unmap_addr = dma_addr;
		st->unmap_len = in_len;
	} else {
		dma_addr = dma_map_single(dma_dev, skb->data,
					  skb_headlen(skb), DMA_TO_DEVICE);
		st->header_dma_addr = dma_addr;
		st->header_unmap_len = skb_headlen(skb);
892
		st->dma_flags = 0;
893 894
		st->dma_addr = dma_addr + header_len;
		st->unmap_len = 0;
895 896
	}

897
	return unlikely(dma_mapping_error(dma_dev, dma_addr)) ? -ENOMEM : 0;
B
Ben Hutchings 已提交
898 899
}

900 901
static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
			    skb_frag_t *frag)
B
Ben Hutchings 已提交
902
{
903
	st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0,
E
Eric Dumazet 已提交
904
					  skb_frag_size(frag), DMA_TO_DEVICE);
905
	if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) {
906
		st->dma_flags = 0;
E
Eric Dumazet 已提交
907 908
		st->unmap_len = skb_frag_size(frag);
		st->in_len = skb_frag_size(frag);
909
		st->dma_addr = st->unmap_addr;
910 911 912 913 914
		return 0;
	}
	return -ENOMEM;
}

B
Ben Hutchings 已提交
915 916 917 918 919 920 921 922

/**
 * tso_fill_packet_with_fragment - form descriptors for the current fragment
 * @tx_queue:		Efx TX queue
 * @skb:		Socket buffer
 * @st:			TSO state
 *
 * Form descriptors for the current fragment, until we reach the end
923
 * of fragment or end-of-packet.
B
Ben Hutchings 已提交
924
 */
925 926 927
static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
					  const struct sk_buff *skb,
					  struct tso_state *st)
B
Ben Hutchings 已提交
928
{
929
	struct efx_tx_buffer *buffer;
930
	int n;
B
Ben Hutchings 已提交
931

932
	if (st->in_len == 0)
933
		return;
B
Ben Hutchings 已提交
934
	if (st->packet_space == 0)
935
		return;
B
Ben Hutchings 已提交
936

937
	EFX_BUG_ON_PARANOID(st->in_len <= 0);
B
Ben Hutchings 已提交
938 939
	EFX_BUG_ON_PARANOID(st->packet_space <= 0);

940
	n = min(st->in_len, st->packet_space);
B
Ben Hutchings 已提交
941 942

	st->packet_space -= n;
943 944
	st->out_len -= n;
	st->in_len -= n;
B
Ben Hutchings 已提交
945

946
	efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
B
Ben Hutchings 已提交
947

948 949 950 951 952 953 954 955 956 957 958 959 960
	if (st->out_len == 0) {
		/* Transfer ownership of the skb */
		buffer->skb = skb;
		buffer->flags = EFX_TX_BUF_SKB;
	} else if (st->packet_space != 0) {
		buffer->flags = EFX_TX_BUF_CONT;
	}

	if (st->in_len == 0) {
		/* Transfer ownership of the DMA mapping */
		buffer->unmap_len = st->unmap_len;
		buffer->flags |= st->dma_flags;
		st->unmap_len = 0;
961 962
	}

963
	st->dma_addr += n;
B
Ben Hutchings 已提交
964 965 966 967 968 969 970 971 972 973
}


/**
 * tso_start_new_packet - generate a new header and prepare for the new packet
 * @tx_queue:		Efx TX queue
 * @skb:		Socket buffer
 * @st:			TSO state
 *
 * Generate a new header and prepare for the new packet.  Return 0 on
974
 * success, or -%ENOMEM if failed to alloc header.
B
Ben Hutchings 已提交
975
 */
976 977 978
static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
				const struct sk_buff *skb,
				struct tso_state *st)
B
Ben Hutchings 已提交
979
{
980 981
	struct efx_tx_buffer *buffer =
		&tx_queue->buffer[tx_queue->insert_count & tx_queue->ptr_mask];
982 983
	bool is_last = st->out_len <= skb_shinfo(skb)->gso_size;
	u8 tcp_flags_clear;
B
Ben Hutchings 已提交
984

985
	if (!is_last) {
986
		st->packet_space = skb_shinfo(skb)->gso_size;
987
		tcp_flags_clear = 0x09; /* mask out FIN and PSH */
B
Ben Hutchings 已提交
988
	} else {
989
		st->packet_space = st->out_len;
990
		tcp_flags_clear = 0x00;
B
Ben Hutchings 已提交
991 992
	}

993 994 995 996 997 998
	if (!st->header_unmap_len) {
		/* Allocate and insert a DMA-mapped header buffer. */
		struct tcphdr *tsoh_th;
		unsigned ip_length;
		u8 *header;
		int rc;
B
Ben Hutchings 已提交
999

1000 1001 1002
		header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len);
		if (!header)
			return -ENOMEM;
B
Ben Hutchings 已提交
1003

1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
		tsoh_th = (struct tcphdr *)(header + st->tcp_off);

		/* Copy and update the headers. */
		memcpy(header, skb->data, st->header_len);

		tsoh_th->seq = htonl(st->seqnum);
		((u8 *)tsoh_th)[13] &= ~tcp_flags_clear;

		ip_length = st->ip_base_len + st->packet_space;

		if (st->protocol == htons(ETH_P_IP)) {
			struct iphdr *tsoh_iph =
				(struct iphdr *)(header + st->ip_off);

			tsoh_iph->tot_len = htons(ip_length);
			tsoh_iph->id = htons(st->ipv4_id);
		} else {
			struct ipv6hdr *tsoh_iph =
				(struct ipv6hdr *)(header + st->ip_off);

			tsoh_iph->payload_len = htons(ip_length);
		}

		rc = efx_tso_put_header(tx_queue, buffer, header);
		if (unlikely(rc))
			return rc;
B
Ben Hutchings 已提交
1030
	} else {
1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
		/* Send the original headers with a TSO option descriptor
		 * in front
		 */
		u8 tcp_flags = ((u8 *)tcp_hdr(skb))[13] & ~tcp_flags_clear;

		buffer->flags = EFX_TX_BUF_OPTION;
		buffer->len = 0;
		buffer->unmap_len = 0;
		EFX_POPULATE_QWORD_5(buffer->option,
				     ESF_DZ_TX_DESC_IS_OPT, 1,
				     ESF_DZ_TX_OPTION_TYPE,
				     ESE_DZ_TX_OPTION_DESC_TSO,
				     ESF_DZ_TX_TSO_TCP_FLAGS, tcp_flags,
				     ESF_DZ_TX_TSO_IP_ID, st->ipv4_id,
				     ESF_DZ_TX_TSO_TCP_SEQNO, st->seqnum);
		++tx_queue->insert_count;
B
Ben Hutchings 已提交
1047

1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066
		/* We mapped the headers in tso_start().  Unmap them
		 * when the last segment is completed.
		 */
		buffer = &tx_queue->buffer[tx_queue->insert_count &
					   tx_queue->ptr_mask];
		buffer->dma_addr = st->header_dma_addr;
		buffer->len = st->header_len;
		if (is_last) {
			buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_MAP_SINGLE;
			buffer->unmap_len = st->header_unmap_len;
			/* Ensure we only unmap them once in case of a
			 * later DMA mapping error and rollback
			 */
			st->header_unmap_len = 0;
		} else {
			buffer->flags = EFX_TX_BUF_CONT;
			buffer->unmap_len = 0;
		}
		++tx_queue->insert_count;
B
Ben Hutchings 已提交
1067
	}
B
Ben Hutchings 已提交
1068

1069 1070 1071 1072
	st->seqnum += skb_shinfo(skb)->gso_size;

	/* Linux leaves suitable gaps in the IP ID space for us to fill. */
	++st->ipv4_id;
1073

B
Ben Hutchings 已提交
1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088
	++tx_queue->tso_packets;

	return 0;
}


/**
 * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
 * @tx_queue:		Efx TX queue
 * @skb:		Socket buffer
 *
 * Context: You must hold netif_tx_lock() to call this function.
 *
 * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
 * @skb was not enqueued.  In all cases @skb is consumed.  Return
1089
 * %NETDEV_TX_OK.
B
Ben Hutchings 已提交
1090 1091
 */
static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1092
			       struct sk_buff *skb)
B
Ben Hutchings 已提交
1093
{
1094
	struct efx_nic *efx = tx_queue->efx;
1095
	int frag_i, rc;
B
Ben Hutchings 已提交
1096 1097
	struct tso_state state;

B
Ben Hutchings 已提交
1098 1099
	/* Find the packet protocol and sanity-check it */
	state.protocol = efx_tso_check_protocol(skb);
B
Ben Hutchings 已提交
1100 1101 1102

	EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);

1103 1104 1105
	rc = tso_start(&state, efx, skb);
	if (rc)
		goto mem_err;
B
Ben Hutchings 已提交
1106

1107
	if (likely(state.in_len == 0)) {
B
Ben Hutchings 已提交
1108 1109 1110
		/* Grab the first payload fragment. */
		EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
		frag_i = 0;
1111 1112
		rc = tso_get_fragment(&state, efx,
				      skb_shinfo(skb)->frags + frag_i);
B
Ben Hutchings 已提交
1113 1114 1115
		if (rc)
			goto mem_err;
	} else {
1116
		/* Payload starts in the header area. */
B
Ben Hutchings 已提交
1117 1118 1119 1120 1121 1122 1123
		frag_i = -1;
	}

	if (tso_start_new_packet(tx_queue, skb, &state) < 0)
		goto mem_err;

	while (1) {
1124
		tso_fill_packet_with_fragment(tx_queue, skb, &state);
B
Ben Hutchings 已提交
1125 1126

		/* Move onto the next fragment? */
1127
		if (state.in_len == 0) {
B
Ben Hutchings 已提交
1128 1129 1130
			if (++frag_i >= skb_shinfo(skb)->nr_frags)
				/* End of payload reached. */
				break;
1131 1132
			rc = tso_get_fragment(&state, efx,
					      skb_shinfo(skb)->frags + frag_i);
B
Ben Hutchings 已提交
1133 1134 1135 1136 1137 1138 1139 1140 1141 1142
			if (rc)
				goto mem_err;
		}

		/* Start at new packet? */
		if (state.packet_space == 0 &&
		    tso_start_new_packet(tx_queue, skb, &state) < 0)
			goto mem_err;
	}

1143 1144
	netdev_tx_sent_queue(tx_queue->core_txq, skb->len);

B
Ben Hutchings 已提交
1145
	/* Pass off to hardware */
1146
	efx_nic_push_buffers(tx_queue);
B
Ben Hutchings 已提交
1147

1148 1149
	efx_tx_maybe_stop_queue(tx_queue);

B
Ben Hutchings 已提交
1150 1151 1152 1153
	tx_queue->tso_bursts++;
	return NETDEV_TX_OK;

 mem_err:
1154
	netif_err(efx, tx_err, efx->net_dev,
1155
		  "Out of memory for TSO headers, or DMA mapping error\n");
1156
	dev_kfree_skb_any(skb);
B
Ben Hutchings 已提交
1157

1158
	/* Free the DMA mapping we were in the process of writing out */
1159
	if (state.unmap_len) {
1160
		if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE)
1161 1162
			dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr,
					 state.unmap_len, DMA_TO_DEVICE);
1163
		else
1164 1165
			dma_unmap_page(&efx->pci_dev->dev, state.unmap_addr,
				       state.unmap_len, DMA_TO_DEVICE);
1166
	}
1167

1168 1169 1170 1171 1172
	/* Free the header DMA mapping, if using option descriptors */
	if (state.header_unmap_len)
		dma_unmap_single(&efx->pci_dev->dev, state.header_dma_addr,
				 state.header_unmap_len, DMA_TO_DEVICE);

B
Ben Hutchings 已提交
1173
	efx_enqueue_unwind(tx_queue);
1174
	return NETDEV_TX_OK;
B
Ben Hutchings 已提交
1175
}