rx.c 22.3 KB
Newer Older
1 2 3
/****************************************************************************
 * Driver for Solarflare Solarstorm network controllers and boards
 * Copyright 2005-2006 Fen Systems Ltd.
B
Ben Hutchings 已提交
4
 * Copyright 2005-2011 Solarflare Communications Inc.
5 6 7 8 9 10 11 12
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation, incorporated herein by reference.
 */

#include <linux/socket.h>
#include <linux/in.h>
13
#include <linux/slab.h>
14 15 16
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
17
#include <linux/prefetch.h>
18
#include <linux/moduleparam.h>
19
#include <linux/iommu.h>
20 21 22 23
#include <net/ip.h>
#include <net/checksum.h>
#include "net_driver.h"
#include "efx.h"
B
Ben Hutchings 已提交
24
#include "nic.h"
25
#include "selftest.h"
26 27 28 29 30
#include "workarounds.h"

/* Number of RX descriptors pushed at once. */
#define EFX_RX_BATCH  8

31 32 33 34 35 36 37
/* Number of RX buffers to recycle pages for.  When creating the RX page recycle
 * ring, this number is divided by the number of buffers per page to calculate
 * the number of pages to store in the RX page recycle ring.
 */
#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_BATCH)

38 39 40
/* Maximum length for an RX descriptor sharing a page */
#define EFX_RX_HALF_PAGE ((PAGE_SIZE >> 1) - sizeof(struct efx_rx_page_state) \
			  - EFX_PAGE_IP_ALIGN)
41

42 43 44 45 46 47
/* Size of buffer allocated for skb header area. */
#define EFX_SKB_HEADERS  64u

/* This is the percentage fill level below which new RX descriptors
 * will be added to the RX descriptor ring.
 */
48
static unsigned int rx_refill_threshold;
49

50 51 52 53
/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
				      EFX_RX_USR_BUF_SIZE)

54 55 56
/*
 * RX maximum head room required.
 *
57 58
 * This must be at least 1 to prevent overflow, plus one packet-worth
 * to allow pipelined receives.
59
 */
60
#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
61

62
static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
63
{
64
	return page_address(buf->page) + buf->page_offset;
65 66 67 68 69
}

static inline u32 efx_rx_buf_hash(const u8 *eh)
{
	/* The ethernet header is always directly after any hash. */
70
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || NET_IP_ALIGN % 4 == 0
71
	return __le32_to_cpup((const __le32 *)(eh - 4));
72
#else
73
	const u8 *data = eh - 4;
74 75 76 77
	return (u32)data[0]	  |
	       (u32)data[1] << 8  |
	       (u32)data[2] << 16 |
	       (u32)data[3] << 24;
78 79 80
#endif
}

81 82 83 84 85 86 87 88 89
static inline struct efx_rx_buffer *
efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
{
	if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
		return efx_rx_buffer(rx_queue, 0);
	else
		return rx_buf + 1;
}

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
static inline void efx_sync_rx_buffer(struct efx_nic *efx,
				      struct efx_rx_buffer *rx_buf,
				      unsigned int len)
{
	dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len,
				DMA_FROM_DEVICE);
}

/* Return true if this is the last RX buffer using a page. */
static inline bool efx_rx_is_last_buffer(struct efx_nic *efx,
					 struct efx_rx_buffer *rx_buf)
{
	return (rx_buf->page_offset >= (PAGE_SIZE >> 1) ||
		efx->rx_dma_len > EFX_RX_HALF_PAGE);
}

/* Check the RX page recycle ring for a page that can be reused. */
static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
{
	struct efx_nic *efx = rx_queue->efx;
	struct page *page;
	struct efx_rx_page_state *state;
	unsigned index;

	index = rx_queue->page_remove & rx_queue->page_ptr_mask;
	page = rx_queue->page_ring[index];
	if (page == NULL)
		return NULL;

	rx_queue->page_ring[index] = NULL;
	/* page_remove cannot exceed page_add. */
	if (rx_queue->page_remove != rx_queue->page_add)
		++rx_queue->page_remove;

	/* If page_count is 1 then we hold the only reference to this page. */
	if (page_count(page) == 1) {
		++rx_queue->page_recycle_count;
		return page;
	} else {
		state = page_address(page);
		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
			       PAGE_SIZE << efx->rx_buffer_order,
			       DMA_FROM_DEVICE);
		put_page(page);
		++rx_queue->page_recycle_failed;
	}

	return NULL;
}

140
/**
141
 * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
142 143 144
 *
 * @rx_queue:		Efx RX queue
 *
145 146 147 148
 * This allocates memory for EFX_RX_BATCH receive buffers, maps them for DMA,
 * and populates struct efx_rx_buffers for each one. Return a negative error
 * code or 0 on success. If a single page can be split between two buffers,
 * then the page will either be inserted fully, or not at at all.
149
 */
150
static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue)
151 152
{
	struct efx_nic *efx = rx_queue->efx;
153 154
	struct efx_rx_buffer *rx_buf;
	struct page *page;
155
	unsigned int page_offset;
156
	struct efx_rx_page_state *state;
157 158 159 160 161 162 163
	dma_addr_t dma_addr;
	unsigned index, count;

	/* We can split a page between two buffers */
	BUILD_BUG_ON(EFX_RX_BATCH & 1);

	for (count = 0; count < EFX_RX_BATCH; ++count) {
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
		page = efx_reuse_page(rx_queue);
		if (page == NULL) {
			page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC,
					   efx->rx_buffer_order);
			if (unlikely(page == NULL))
				return -ENOMEM;
			dma_addr =
				dma_map_page(&efx->pci_dev->dev, page, 0,
					     PAGE_SIZE << efx->rx_buffer_order,
					     DMA_FROM_DEVICE);
			if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
						       dma_addr))) {
				__free_pages(page, efx->rx_buffer_order);
				return -EIO;
			}
			state = page_address(page);
			state->dma_addr = dma_addr;
		} else {
			state = page_address(page);
			dma_addr = state->dma_addr;
184
		}
185
		get_page(page);
186 187

		dma_addr += sizeof(struct efx_rx_page_state);
188
		page_offset = sizeof(struct efx_rx_page_state);
189 190

	split:
191
		index = rx_queue->added_count & rx_queue->ptr_mask;
192
		rx_buf = efx_rx_buffer(rx_queue, index);
193
		rx_buf->dma_addr = dma_addr + EFX_PAGE_IP_ALIGN;
194
		rx_buf->page = page;
195
		rx_buf->page_offset = page_offset + EFX_PAGE_IP_ALIGN;
196
		rx_buf->len = efx->rx_dma_len;
197 198
		++rx_queue->added_count;

199
		if ((~count & 1) && (efx->rx_dma_len <= EFX_RX_HALF_PAGE)) {
200 201 202
			/* Use the second half of the page */
			get_page(page);
			dma_addr += (PAGE_SIZE >> 1);
203
			page_offset += (PAGE_SIZE >> 1);
204 205
			++count;
			goto split;
206 207 208 209 210 211
		}
	}

	return 0;
}

212 213 214
/* Unmap a DMA-mapped page.  This function is only called for the final RX
 * buffer in a page.
 */
215
static void efx_unmap_rx_buffer(struct efx_nic *efx,
216
				struct efx_rx_buffer *rx_buf)
217
{
218 219 220 221 222 223 224 225
	struct page *page = rx_buf->page;

	if (page) {
		struct efx_rx_page_state *state = page_address(page);
		dma_unmap_page(&efx->pci_dev->dev,
			       state->dma_addr,
			       PAGE_SIZE << efx->rx_buffer_order,
			       DMA_FROM_DEVICE);
226 227 228
	}
}

229
static void efx_free_rx_buffer(struct efx_rx_buffer *rx_buf)
230
{
231
	if (rx_buf->page) {
232
		put_page(rx_buf->page);
233
		rx_buf->page = NULL;
234 235 236
	}
}

237 238 239 240 241 242
/* Attempt to recycle the page if there is an RX recycle ring; the page can
 * only be added if this is the final RX buffer, to prevent pages being used in
 * the descriptor ring and appearing in the recycle ring simultaneously.
 */
static void efx_recycle_rx_page(struct efx_channel *channel,
				struct efx_rx_buffer *rx_buf)
243
{
244 245 246 247
	struct page *page = rx_buf->page;
	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
	struct efx_nic *efx = rx_queue->efx;
	unsigned index;
248

249 250
	/* Only recycle the page after processing the final buffer. */
	if (!efx_rx_is_last_buffer(efx, rx_buf))
251
		return;
252

253 254 255 256
	index = rx_queue->page_add & rx_queue->page_ptr_mask;
	if (rx_queue->page_ring[index] == NULL) {
		unsigned read_index = rx_queue->page_remove &
			rx_queue->page_ptr_mask;
257

258 259 260 261 262 263 264 265 266 267 268 269 270
		/* The next slot in the recycle ring is available, but
		 * increment page_remove if the read pointer currently
		 * points here.
		 */
		if (read_index == index)
			++rx_queue->page_remove;
		rx_queue->page_ring[index] = page;
		++rx_queue->page_add;
		return;
	}
	++rx_queue->page_recycle_full;
	efx_unmap_rx_buffer(efx, rx_buf);
	put_page(rx_buf->page);
271 272
}

273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
			       struct efx_rx_buffer *rx_buf)
{
	/* Release the page reference we hold for the buffer. */
	if (rx_buf->page)
		put_page(rx_buf->page);

	/* If this is the last buffer in a page, unmap and free it. */
	if (efx_rx_is_last_buffer(rx_queue->efx, rx_buf)) {
		efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
		efx_free_rx_buffer(rx_buf);
	}
	rx_buf->page = NULL;
}

/* Recycle the pages that are used by buffers that have just been received. */
289 290 291
static void efx_recycle_rx_buffers(struct efx_channel *channel,
				   struct efx_rx_buffer *rx_buf,
				   unsigned int n_frags)
292
{
293
	struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
294

295
	do {
296
		efx_recycle_rx_page(channel, rx_buf);
297 298
		rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
	} while (--n_frags);
299 300
}

301 302 303
/**
 * efx_fast_push_rx_descriptors - push new RX descriptors quickly
 * @rx_queue:		RX descriptor queue
304
 *
305
 * This will aim to fill the RX descriptor queue up to
306
 * @rx_queue->@max_fill. If there is insufficient atomic
307 308 309 310 311
 * memory to do so, a slow fill will be scheduled.
 *
 * The caller must provide serialisation (none is used here). In practise,
 * this means this function must run from the NAPI handler, or be called
 * when NAPI is disabled.
312
 */
313
void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
314
{
315 316
	unsigned fill_level;
	int space, rc = 0;
317

318
	/* Calculate current fill level, and exit if we don't need to fill */
319
	fill_level = (rx_queue->added_count - rx_queue->removed_count);
320
	EFX_BUG_ON_PARANOID(fill_level > rx_queue->efx->rxq_entries);
321
	if (fill_level >= rx_queue->fast_fill_trigger)
322
		goto out;
323 324

	/* Record minimum fill level */
325
	if (unlikely(fill_level < rx_queue->min_fill)) {
326 327
		if (fill_level)
			rx_queue->min_fill = fill_level;
328
	}
329

330
	space = rx_queue->max_fill - fill_level;
331
	EFX_BUG_ON_PARANOID(space < EFX_RX_BATCH);
332

333 334
	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
		   "RX queue %d fast-filling descriptor ring from"
335
		   " level %d to level %d\n",
336
		   efx_rx_queue_index(rx_queue), fill_level,
337 338
		   rx_queue->max_fill);

339 340

	do {
341
		rc = efx_init_rx_buffers(rx_queue);
342 343 344 345 346
		if (unlikely(rc)) {
			/* Ensure that we don't leave the rx queue empty */
			if (rx_queue->added_count == rx_queue->removed_count)
				efx_schedule_slow_fill(rx_queue);
			goto out;
347 348 349
		}
	} while ((space -= EFX_RX_BATCH) >= EFX_RX_BATCH);

350 351
	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
		   "RX queue %d fast-filled descriptor ring "
352
		   "to level %d\n", efx_rx_queue_index(rx_queue),
353
		   rx_queue->added_count - rx_queue->removed_count);
354 355

 out:
356 357
	if (rx_queue->notified_count != rx_queue->added_count)
		efx_nic_notify_rx_desc(rx_queue);
358 359
}

360
void efx_rx_slow_fill(unsigned long context)
361
{
362
	struct efx_rx_queue *rx_queue = (struct efx_rx_queue *)context;
363

364
	/* Post an event to cause NAPI to run and refill the queue */
365
	efx_nic_generate_fill_event(rx_queue);
366 367 368
	++rx_queue->slow_fill_count;
}

369 370
static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
				     struct efx_rx_buffer *rx_buf,
371
				     int len)
372 373 374 375 376 377 378 379 380 381
{
	struct efx_nic *efx = rx_queue->efx;
	unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding;

	if (likely(len <= max_len))
		return;

	/* The packet must be discarded, but this is only a fatal error
	 * if the caller indicated it was
	 */
382
	rx_buf->flags |= EFX_RX_PKT_DISCARD;
383 384

	if ((len > rx_buf->len) && EFX_WORKAROUND_8071(efx)) {
385 386 387 388
		if (net_ratelimit())
			netif_err(efx, rx_err, efx->net_dev,
				  " RX queue %d seriously overlength "
				  "RX event (0x%x > 0x%x+0x%x). Leaking\n",
389
				  efx_rx_queue_index(rx_queue), len, max_len,
390
				  efx->type->rx_buffer_padding);
391 392
		efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY);
	} else {
393 394 395 396
		if (net_ratelimit())
			netif_err(efx, rx_err, efx->net_dev,
				  " RX queue %d overlength RX event "
				  "(0x%x > 0x%x)\n",
397
				  efx_rx_queue_index(rx_queue), len, max_len);
398 399
	}

400
	efx_rx_queue_channel(rx_queue)->n_rx_overlength++;
401 402
}

403 404
/* Pass a received packet up through GRO.  GRO can handle pages
 * regardless of checksum state and skbs with a good checksum.
405
 */
406 407 408
static void
efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
		  unsigned int n_frags, u8 *eh)
409
{
H
Herbert Xu 已提交
410
	struct napi_struct *napi = &channel->napi_str;
411
	gro_result_t gro_result;
412 413
	struct efx_nic *efx = channel->efx;
	struct sk_buff *skb;
414

415
	skb = napi_get_frags(napi);
416 417 418 419 420 421
	if (unlikely(!skb)) {
		while (n_frags--) {
			put_page(rx_buf->page);
			rx_buf->page = NULL;
			rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
		}
422 423
		return;
	}
424

425 426 427 428
	if (efx->net_dev->features & NETIF_F_RXHASH)
		skb->rxhash = efx_rx_buf_hash(eh);
	skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
			  CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
429

430 431 432 433 434 435 436 437
	for (;;) {
		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
				   rx_buf->page, rx_buf->page_offset,
				   rx_buf->len);
		rx_buf->page = NULL;
		skb->len += rx_buf->len;
		if (skb_shinfo(skb)->nr_frags == n_frags)
			break;
438

439 440 441 442 443 444 445
		rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
	}

	skb->data_len = skb->len;
	skb->truesize += n_frags * efx->rx_buffer_truesize;

	skb_record_rx_queue(skb, channel->rx_queue.core_index);
446

447
	gro_result = napi_gro_frags(napi);
448 449 450
	if (gro_result != GRO_DROP)
		channel->irq_mod_score += 2;
}
451

452
/* Allocate and construct an SKB around page fragments */
453 454
static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
				     struct efx_rx_buffer *rx_buf,
455
				     unsigned int n_frags,
456 457 458 459
				     u8 *eh, int hdr_len)
{
	struct efx_nic *efx = channel->efx;
	struct sk_buff *skb;
460

461 462 463 464 465 466 467 468
	/* Allocate an SKB to store the headers */
	skb = netdev_alloc_skb(efx->net_dev, hdr_len + EFX_PAGE_SKB_ALIGN);
	if (unlikely(skb == NULL))
		return NULL;

	EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len);

	skb_reserve(skb, EFX_PAGE_SKB_ALIGN);
469
	memcpy(__skb_put(skb, hdr_len), eh, hdr_len);
470

471
	/* Append the remaining page(s) onto the frag list */
472
	if (rx_buf->len > hdr_len) {
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487
		rx_buf->page_offset += hdr_len;
		rx_buf->len -= hdr_len;

		for (;;) {
			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
					   rx_buf->page, rx_buf->page_offset,
					   rx_buf->len);
			rx_buf->page = NULL;
			skb->len += rx_buf->len;
			skb->data_len += rx_buf->len;
			if (skb_shinfo(skb)->nr_frags == n_frags)
				break;

			rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
		}
488 489
	} else {
		__free_pages(rx_buf->page, efx->rx_buffer_order);
490 491
		rx_buf->page = NULL;
		n_frags = 0;
492
	}
493

494
	skb->truesize += n_frags * efx->rx_buffer_truesize;
495 496 497 498 499

	/* Move past the ethernet header */
	skb->protocol = eth_type_trans(skb, efx->net_dev);

	return skb;
500 501 502
}

void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
503
		   unsigned int n_frags, unsigned int len, u16 flags)
504 505
{
	struct efx_nic *efx = rx_queue->efx;
506
	struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
507 508 509
	struct efx_rx_buffer *rx_buf;

	rx_buf = efx_rx_buffer(rx_queue, index);
510
	rx_buf->flags = flags;
511

512 513 514 515 516 517 518 519 520 521 522 523 524
	/* Validate the number of fragments and completed length */
	if (n_frags == 1) {
		efx_rx_packet__check_len(rx_queue, rx_buf, len);
	} else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) ||
		   unlikely(len <= (n_frags - 1) * EFX_RX_USR_BUF_SIZE) ||
		   unlikely(len > n_frags * EFX_RX_USR_BUF_SIZE) ||
		   unlikely(!efx->rx_scatter)) {
		/* If this isn't an explicit discard request, either
		 * the hardware or the driver is broken.
		 */
		WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD));
		rx_buf->flags |= EFX_RX_PKT_DISCARD;
	}
525

526
	netif_vdbg(efx, rx_status, efx->net_dev,
527
		   "RX queue %d received ids %x-%x len %d %s%s\n",
528
		   efx_rx_queue_index(rx_queue), index,
529
		   (index + n_frags - 1) & rx_queue->ptr_mask, len,
530 531
		   (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "",
		   (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : "");
532

533 534 535
	/* Discard packet, if instructed to do so.  Process the
	 * previous receive first.
	 */
536
	if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) {
537
		efx_rx_flush_packet(channel);
538
		put_page(rx_buf->page);
539 540
		efx_recycle_rx_buffers(channel, rx_buf, n_frags);
		return;
541 542
	}

543 544 545
	if (n_frags == 1)
		rx_buf->len = len;

546 547
	/* Release and/or sync the DMA mapping - assumes all RX buffers
	 * consumed in-order per RX queue.
548
	 */
549
	efx_sync_rx_buffer(efx, rx_buf, rx_buf->len);
550 551 552 553

	/* Prefetch nice and early so data will (hopefully) be in cache by
	 * the time we look at it.
	 */
554
	prefetch(efx_rx_buf_va(rx_buf));
555

556
	rx_buf->page_offset += efx->type->rx_buffer_hash_size;
557 558 559 560 561 562 563 564 565 566 567 568
	rx_buf->len -= efx->type->rx_buffer_hash_size;

	if (n_frags > 1) {
		/* Release/sync DMA mapping for additional fragments.
		 * Fix length for last fragment.
		 */
		unsigned int tail_frags = n_frags - 1;

		for (;;) {
			rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
			if (--tail_frags == 0)
				break;
569
			efx_sync_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE);
570 571
		}
		rx_buf->len = len - (n_frags - 1) * EFX_RX_USR_BUF_SIZE;
572
		efx_sync_rx_buffer(efx, rx_buf, rx_buf->len);
573
	}
574

575 576 577 578
	/* All fragments have been DMA-synced, so recycle buffers and pages. */
	rx_buf = efx_rx_buffer(rx_queue, index);
	efx_recycle_rx_buffers(channel, rx_buf, n_frags);

579 580 581
	/* Pipeline receives so that we give time for packet headers to be
	 * prefetched into cache.
	 */
582
	efx_rx_flush_packet(channel);
583 584
	channel->rx_pkt_n_frags = n_frags;
	channel->rx_pkt_index = index;
585 586
}

587
static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
588 589
			   struct efx_rx_buffer *rx_buf,
			   unsigned int n_frags)
590 591
{
	struct sk_buff *skb;
592
	u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS);
593

594
	skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len);
595
	if (unlikely(skb == NULL)) {
596
		efx_free_rx_buffer(rx_buf);
597 598 599
		return;
	}
	skb_record_rx_queue(skb, channel->rx_queue.core_index);
600 601 602 603

	/* Set the SKB flags */
	skb_checksum_none_assert(skb);

604
	if (channel->type->receive_skb)
605
		if (channel->type->receive_skb(channel, skb))
606
			return;
607 608 609

	/* Pass the packet up */
	netif_receive_skb(skb);
610 611
}

612
/* Handle a received packet.  Second half: Touches packet payload. */
613
void __efx_rx_packet(struct efx_channel *channel)
614 615
{
	struct efx_nic *efx = channel->efx;
616 617
	struct efx_rx_buffer *rx_buf =
		efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
618
	u8 *eh = efx_rx_buf_va(rx_buf);
619

620 621 622 623
	/* If we're in loopback test, then pass the packet directly to the
	 * loopback layer, and free the rx_buf here
	 */
	if (unlikely(efx->loopback_selftest)) {
624
		efx_loopback_rx_packet(efx, eh, rx_buf->len);
625
		efx_free_rx_buffer(rx_buf);
626
		goto out;
627 628
	}

629
	if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
630
		rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
631

632
	if (!channel->type->receive_skb)
633
		efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
634
	else
635 636 637
		efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
out:
	channel->rx_pkt_n_frags = 0;
638 639 640 641 642
}

int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
{
	struct efx_nic *efx = rx_queue->efx;
643
	unsigned int entries;
644 645
	int rc;

646 647 648 649 650
	/* Create the smallest power-of-two aligned ring */
	entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
	EFX_BUG_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
	rx_queue->ptr_mask = entries - 1;

651
	netif_dbg(efx, probe, efx->net_dev,
652 653 654
		  "creating RX queue %d size %#x mask %#x\n",
		  efx_rx_queue_index(rx_queue), efx->rxq_entries,
		  rx_queue->ptr_mask);
655 656

	/* Allocate RX buffers */
657
	rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
658
				   GFP_KERNEL);
659 660
	if (!rx_queue->buffer)
		return -ENOMEM;
661

662
	rc = efx_nic_probe_rx(rx_queue);
663 664 665 666
	if (rc) {
		kfree(rx_queue->buffer);
		rx_queue->buffer = NULL;
	}
667

668 669 670
	return rc;
}

671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692
void efx_init_rx_recycle_ring(struct efx_nic *efx,
			      struct efx_rx_queue *rx_queue)
{
	unsigned int bufs_in_recycle_ring, page_ring_size;

	/* Set the RX recycle ring size */
#ifdef CONFIG_PPC64
	bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
#else
	if (efx->pci_dev->dev.iommu_group)
		bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
	else
		bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
#endif /* CONFIG_PPC64 */

	page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
					    efx->rx_bufs_per_page);
	rx_queue->page_ring = kcalloc(page_ring_size,
				      sizeof(*rx_queue->page_ring), GFP_KERNEL);
	rx_queue->page_ptr_mask = page_ring_size - 1;
}

693
void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
694
{
695
	struct efx_nic *efx = rx_queue->efx;
696
	unsigned int max_fill, trigger, max_trigger;
697

698
	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
699
		  "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
700 701 702 703 704 705

	/* Initialise ptr fields */
	rx_queue->added_count = 0;
	rx_queue->notified_count = 0;
	rx_queue->removed_count = 0;
	rx_queue->min_fill = -1U;
706 707 708 709 710 711 712
	efx_init_rx_recycle_ring(efx, rx_queue);

	rx_queue->page_remove = 0;
	rx_queue->page_add = rx_queue->page_ptr_mask + 1;
	rx_queue->page_recycle_count = 0;
	rx_queue->page_recycle_failed = 0;
	rx_queue->page_recycle_full = 0;
713 714

	/* Initialise limit fields */
715
	max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
716 717 718 719 720 721 722 723
	max_trigger = max_fill - EFX_RX_BATCH;
	if (rx_refill_threshold != 0) {
		trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
		if (trigger > max_trigger)
			trigger = max_trigger;
	} else {
		trigger = max_trigger;
	}
724 725 726 727 728

	rx_queue->max_fill = max_fill;
	rx_queue->fast_fill_trigger = trigger;

	/* Set up RX descriptor ring */
729
	rx_queue->enabled = true;
730
	efx_nic_init_rx(rx_queue);
731 732 733 734 735
}

void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
{
	int i;
736
	struct efx_nic *efx = rx_queue->efx;
737 738
	struct efx_rx_buffer *rx_buf;

739
	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
740
		  "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
741

742 743 744
	/* A flush failure might have left rx_queue->enabled */
	rx_queue->enabled = false;

745
	del_timer_sync(&rx_queue->slow_fill);
746
	efx_nic_fini_rx(rx_queue);
747

748
	/* Release RX buffers from the current read ptr to the write ptr */
749
	if (rx_queue->buffer) {
750 751 752 753
		for (i = rx_queue->removed_count; i < rx_queue->added_count;
		     i++) {
			unsigned index = i & rx_queue->ptr_mask;
			rx_buf = efx_rx_buffer(rx_queue, index);
754 755 756
			efx_fini_rx_buffer(rx_queue, rx_buf);
		}
	}
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773

	/* Unmap and release the pages in the recycle ring. Remove the ring. */
	for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
		struct page *page = rx_queue->page_ring[i];
		struct efx_rx_page_state *state;

		if (page == NULL)
			continue;

		state = page_address(page);
		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
			       PAGE_SIZE << efx->rx_buffer_order,
			       DMA_FROM_DEVICE);
		put_page(page);
	}
	kfree(rx_queue->page_ring);
	rx_queue->page_ring = NULL;
774 775 776 777
}

void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
{
778
	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
779
		  "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
780

781
	efx_nic_remove_rx(rx_queue);
782 783 784 785 786 787 788 789

	kfree(rx_queue->buffer);
	rx_queue->buffer = NULL;
}


module_param(rx_refill_threshold, uint, 0444);
MODULE_PARM_DESC(rx_refill_threshold,
790
		 "RX descriptor ring refill threshold (%)");
791