efx.c 85.1 KB
Newer Older
1
/****************************************************************************
B
Ben Hutchings 已提交
2
 * Driver for Solarflare network controllers and boards
3
 * Copyright 2005-2006 Fen Systems Ltd.
B
Ben Hutchings 已提交
4
 * Copyright 2005-2013 Solarflare Communications Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation, incorporated herein by reference.
 */

#include <linux/module.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/in.h>
#include <linux/ethtool.h>
21
#include <linux/topology.h>
22
#include <linux/gfp.h>
23
#include <linux/aer.h>
24
#include <linux/interrupt.h>
25 26
#include "net_driver.h"
#include "efx.h"
B
Ben Hutchings 已提交
27
#include "nic.h"
28
#include "selftest.h"
29

30
#include "mcdi.h"
31
#include "workarounds.h"
32

33 34 35 36 37 38 39 40 41
/**************************************************************************
 *
 * Type name strings
 *
 **************************************************************************
 */

/* Loopback mode names (see LOOPBACK_MODE()) */
const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
42
const char *const efx_loopback_mode_names[] = {
43
	[LOOPBACK_NONE]		= "NONE",
44
	[LOOPBACK_DATA]		= "DATAPATH",
45 46 47
	[LOOPBACK_GMAC]		= "GMAC",
	[LOOPBACK_XGMII]	= "XGMII",
	[LOOPBACK_XGXS]		= "XGXS",
48 49 50
	[LOOPBACK_XAUI]		= "XAUI",
	[LOOPBACK_GMII]		= "GMII",
	[LOOPBACK_SGMII]	= "SGMII",
51 52 53 54 55 56
	[LOOPBACK_XGBR]		= "XGBR",
	[LOOPBACK_XFI]		= "XFI",
	[LOOPBACK_XAUI_FAR]	= "XAUI_FAR",
	[LOOPBACK_GMII_FAR]	= "GMII_FAR",
	[LOOPBACK_SGMII_FAR]	= "SGMII_FAR",
	[LOOPBACK_XFI_FAR]	= "XFI_FAR",
57 58
	[LOOPBACK_GPHY]		= "GPHY",
	[LOOPBACK_PHYXS]	= "PHYXS",
59 60
	[LOOPBACK_PCS]		= "PCS",
	[LOOPBACK_PMAPMD]	= "PMA/PMD",
61 62
	[LOOPBACK_XPORT]	= "XPORT",
	[LOOPBACK_XGMII_WS]	= "XGMII_WS",
63
	[LOOPBACK_XAUI_WS]	= "XAUI_WS",
64 65
	[LOOPBACK_XAUI_WS_FAR]  = "XAUI_WS_FAR",
	[LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
66
	[LOOPBACK_GMII_WS]	= "GMII_WS",
67 68
	[LOOPBACK_XFI_WS]	= "XFI_WS",
	[LOOPBACK_XFI_WS_FAR]	= "XFI_WS_FAR",
69
	[LOOPBACK_PHYXS_WS]	= "PHYXS_WS",
70 71 72
};

const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
73
const char *const efx_reset_type_names[] = {
74 75 76 77 78
	[RESET_TYPE_INVISIBLE]          = "INVISIBLE",
	[RESET_TYPE_ALL]                = "ALL",
	[RESET_TYPE_RECOVER_OR_ALL]     = "RECOVER_OR_ALL",
	[RESET_TYPE_WORLD]              = "WORLD",
	[RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
79
	[RESET_TYPE_MC_BIST]		= "MC_BIST",
80 81 82 83
	[RESET_TYPE_DISABLE]            = "DISABLE",
	[RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
	[RESET_TYPE_INT_ERROR]          = "INT_ERROR",
	[RESET_TYPE_RX_RECOVERY]        = "RX_RECOVERY",
84
	[RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
85 86
	[RESET_TYPE_TX_SKIP]            = "TX_SKIP",
	[RESET_TYPE_MC_FAILURE]         = "MC_FAILURE",
87
	[RESET_TYPE_MCDI_TIMEOUT]	= "MCDI_TIMEOUT (FLR)",
88 89
};

90 91 92 93 94 95
/* Reset workqueue. If any NIC has a hardware failure then a reset will be
 * queued onto this work queue. This is not a per-nic work queue, because
 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
 */
static struct workqueue_struct *reset_workqueue;

96 97 98 99 100 101
/* How often and how many times to poll for a reset while waiting for a
 * BIST that another function started to complete.
 */
#define BIST_WAIT_DELAY_MS	100
#define BIST_WAIT_DELAY_COUNT	100

102 103 104 105 106 107 108 109 110
/**************************************************************************
 *
 * Configurable values
 *
 *************************************************************************/

/*
 * Use separate channels for TX and RX events
 *
111 112
 * Set this to 1 to use separate channels for TX and RX. It allows us
 * to control interrupt affinity separately for TX and RX.
113
 *
114
 * This is only used in MSI-X interrupt mode
115
 */
116 117
static bool separate_tx_channels;
module_param(separate_tx_channels, bool, 0444);
118 119
MODULE_PARM_DESC(separate_tx_channels,
		 "Use separate channels for TX and RX");
120 121 122 123 124 125 126

/* This is the weight assigned to each of the (per-channel) virtual
 * NAPI devices.
 */
static int napi_weight = 64;

/* This is the time (in jiffies) between invocations of the hardware
127 128
 * monitor.
 * On Falcon-based NICs, this will:
129 130
 * - Check the on-board hardware monitor;
 * - Poll the link state and reconfigure the hardware as necessary.
131 132
 * On Siena-based NICs for power systems with EEH support, this will give EEH a
 * chance to start.
133
 */
S
stephen hemminger 已提交
134
static unsigned int efx_monitor_interval = 1 * HZ;
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166

/* Initial interrupt moderation settings.  They can be modified after
 * module load with ethtool.
 *
 * The default for RX should strike a balance between increasing the
 * round-trip latency and reducing overhead.
 */
static unsigned int rx_irq_mod_usec = 60;

/* Initial interrupt moderation settings.  They can be modified after
 * module load with ethtool.
 *
 * This default is chosen to ensure that a 10G link does not go idle
 * while a TX queue is stopped after it has become full.  A queue is
 * restarted when it drops below half full.  The time this takes (assuming
 * worst case 3 descriptors per packet and 1024 descriptors) is
 *   512 / 3 * 1.2 = 205 usec.
 */
static unsigned int tx_irq_mod_usec = 150;

/* This is the first interrupt mode to try out of:
 * 0 => MSI-X
 * 1 => MSI
 * 2 => legacy
 */
static unsigned int interrupt_mode;

/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
 * i.e. the number of CPUs among which we may distribute simultaneous
 * interrupt handling.
 *
 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
167
 * The default (0) means to assign an interrupt to each core.
168 169 170 171 172
 */
static unsigned int rss_cpus;
module_param(rss_cpus, uint, 0444);
MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");

173 174
static bool phy_flash_cfg;
module_param(phy_flash_cfg, bool, 0644);
175 176
MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");

177
static unsigned irq_adapt_low_thresh = 8000;
178 179 180 181
module_param(irq_adapt_low_thresh, uint, 0644);
MODULE_PARM_DESC(irq_adapt_low_thresh,
		 "Threshold score for reducing IRQ moderation");

182
static unsigned irq_adapt_high_thresh = 16000;
183 184 185 186
module_param(irq_adapt_high_thresh, uint, 0644);
MODULE_PARM_DESC(irq_adapt_high_thresh,
		 "Threshold score for increasing IRQ moderation");

187 188 189 190 191 192 193
static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
			 NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
			 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
			 NETIF_MSG_TX_ERR | NETIF_MSG_HW);
module_param(debug, uint, 0);
MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");

194 195 196 197 198
/**************************************************************************
 *
 * Utility functions and prototypes
 *
 *************************************************************************/
199

200
static int efx_soft_enable_interrupts(struct efx_nic *efx);
B
Ben Hutchings 已提交
201
static void efx_soft_disable_interrupts(struct efx_nic *efx);
202
static void efx_remove_channel(struct efx_channel *channel);
203
static void efx_remove_channels(struct efx_nic *efx);
204
static const struct efx_channel_type efx_default_channel_type;
205
static void efx_remove_port(struct efx_nic *efx);
206
static void efx_init_napi_channel(struct efx_channel *channel);
207
static void efx_fini_napi(struct efx_nic *efx);
208
static void efx_fini_napi_channel(struct efx_channel *channel);
209 210 211
static void efx_fini_struct(struct efx_nic *efx);
static void efx_start_all(struct efx_nic *efx);
static void efx_stop_all(struct efx_nic *efx);
212 213 214

#define EFX_ASSERT_RESET_SERIALISED(efx)		\
	do {						\
215
		if ((efx->state == STATE_READY) ||	\
216
		    (efx->state == STATE_RECOVERY) ||	\
217
		    (efx->state == STATE_DISABLED))	\
218 219 220
			ASSERT_RTNL();			\
	} while (0)

221 222
static int efx_check_disabled(struct efx_nic *efx)
{
223
	if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
224 225 226 227 228 229 230
		netif_err(efx, drv, efx->net_dev,
			  "device is disabled due to earlier errors\n");
		return -EIO;
	}
	return 0;
}

231 232 233 234 235 236 237 238 239 240 241 242 243
/**************************************************************************
 *
 * Event queue processing
 *
 *************************************************************************/

/* Process channel's event queue
 *
 * This function is responsible for processing the event queue of a
 * single channel.  The caller must guarantee that this function will
 * never be concurrently called more than once on the same channel,
 * though different channels may be being processed concurrently.
 */
244
static int efx_process_channel(struct efx_channel *channel, int budget)
245
{
246
	int spent;
247

248
	if (unlikely(!channel->enabled))
B
Ben Hutchings 已提交
249
		return 0;
250

251
	spent = efx_nic_process_eventq(channel, budget);
252 253 254 255
	if (spent && efx_channel_has_rx_queue(channel)) {
		struct efx_rx_queue *rx_queue =
			efx_channel_get_rx_queue(channel);

256
		efx_rx_flush_packet(channel);
257
		efx_fast_push_rx_descriptors(rx_queue, true);
258 259
	}

260
	return spent;
261 262 263 264 265 266 267 268 269 270 271
}

/* NAPI poll handler
 *
 * NAPI guarantees serialisation of polls of the same device, which
 * provides the guarantee required by efx_process_channel().
 */
static int efx_poll(struct napi_struct *napi, int budget)
{
	struct efx_channel *channel =
		container_of(napi, struct efx_channel, napi_str);
272
	struct efx_nic *efx = channel->efx;
273
	int spent;
274

275 276 277
	if (!efx_channel_lock_napi(channel))
		return budget;

278 279 280
	netif_vdbg(efx, intr, efx->net_dev,
		   "channel %d NAPI poll executing on CPU %d\n",
		   channel->channel, raw_smp_processor_id());
281

282
	spent = efx_process_channel(channel, budget);
283

284
	if (spent < budget) {
285
		if (efx_channel_has_rx_queue(channel) &&
286 287 288 289
		    efx->irq_rx_adaptive &&
		    unlikely(++channel->irq_count == 1000)) {
			if (unlikely(channel->irq_mod_score <
				     irq_adapt_low_thresh)) {
290 291
				if (channel->irq_moderation > 1) {
					channel->irq_moderation -= 1;
292
					efx->type->push_irq_moderation(channel);
293
				}
294 295
			} else if (unlikely(channel->irq_mod_score >
					    irq_adapt_high_thresh)) {
296 297 298
				if (channel->irq_moderation <
				    efx->irq_rx_moderation) {
					channel->irq_moderation += 1;
299
					efx->type->push_irq_moderation(channel);
300
				}
301 302 303 304 305
			}
			channel->irq_count = 0;
			channel->irq_mod_score = 0;
		}

306 307
		efx_filter_rfs_expire(channel);

308
		/* There is no race here; although napi_disable() will
309
		 * only wait for napi_complete(), this isn't a problem
310
		 * since efx_nic_eventq_read_ack() will have no effect if
311 312
		 * interrupts have already been disabled.
		 */
313
		napi_complete(napi);
314
		efx_nic_eventq_read_ack(channel);
315 316
	}

317
	efx_channel_unlock_napi(channel);
318
	return spent;
319 320 321 322 323 324 325 326 327
}

/* Create event queue
 * Event queue memory allocations are done only once.  If the channel
 * is reset, the memory buffer will be reused; this guards against
 * errors during channel reset and also simplifies interrupt handling.
 */
static int efx_probe_eventq(struct efx_channel *channel)
{
328 329 330
	struct efx_nic *efx = channel->efx;
	unsigned long entries;

331
	netif_dbg(efx, probe, efx->net_dev,
332
		  "chan %d create event queue\n", channel->channel);
333

334 335 336 337 338 339
	/* Build an event queue with room for one event per tx and rx buffer,
	 * plus some extra for link state events and MCDI completions. */
	entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
	EFX_BUG_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
	channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;

340
	return efx_nic_probe_eventq(channel);
341 342 343
}

/* Prepare channel's event queue */
344
static int efx_init_eventq(struct efx_channel *channel)
345
{
346
	struct efx_nic *efx = channel->efx;
347 348 349 350
	int rc;

	EFX_WARN_ON_PARANOID(channel->eventq_init);

351
	netif_dbg(efx, drv, efx->net_dev,
352
		  "chan %d init event queue\n", channel->channel);
353

354 355
	rc = efx_nic_init_eventq(channel);
	if (rc == 0) {
356
		efx->type->push_irq_moderation(channel);
357 358 359 360
		channel->eventq_read_ptr = 0;
		channel->eventq_init = true;
	}
	return rc;
361 362
}

363
/* Enable event queue processing and NAPI */
364
void efx_start_eventq(struct efx_channel *channel)
365 366 367 368
{
	netif_dbg(channel->efx, ifup, channel->efx->net_dev,
		  "chan %d start event queue\n", channel->channel);

369
	/* Make sure the NAPI handler sees the enabled flag set */
370 371 372
	channel->enabled = true;
	smp_wmb();

373
	efx_channel_enable(channel);
374 375 376 377 378
	napi_enable(&channel->napi_str);
	efx_nic_eventq_read_ack(channel);
}

/* Disable event queue processing and NAPI */
379
void efx_stop_eventq(struct efx_channel *channel)
380 381 382 383 384
{
	if (!channel->enabled)
		return;

	napi_disable(&channel->napi_str);
385 386
	while (!efx_channel_disable(channel))
		usleep_range(1000, 20000);
387 388 389
	channel->enabled = false;
}

390 391
static void efx_fini_eventq(struct efx_channel *channel)
{
392 393 394
	if (!channel->eventq_init)
		return;

395 396
	netif_dbg(channel->efx, drv, channel->efx->net_dev,
		  "chan %d fini event queue\n", channel->channel);
397

398
	efx_nic_fini_eventq(channel);
399
	channel->eventq_init = false;
400 401 402 403
}

static void efx_remove_eventq(struct efx_channel *channel)
{
404 405
	netif_dbg(channel->efx, drv, channel->efx->net_dev,
		  "chan %d remove event queue\n", channel->channel);
406

407
	efx_nic_remove_eventq(channel);
408 409 410 411 412 413 414 415
}

/**************************************************************************
 *
 * Channel handling
 *
 *************************************************************************/

416
/* Allocate and initialise a channel structure. */
417 418 419 420 421 422 423 424
static struct efx_channel *
efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
{
	struct efx_channel *channel;
	struct efx_rx_queue *rx_queue;
	struct efx_tx_queue *tx_queue;
	int j;

425 426 427
	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
	if (!channel)
		return NULL;
428

429 430 431
	channel->efx = efx;
	channel->channel = i;
	channel->type = &efx_default_channel_type;
432

433 434 435 436 437 438
	for (j = 0; j < EFX_TXQ_TYPES; j++) {
		tx_queue = &channel->tx_queue[j];
		tx_queue->efx = efx;
		tx_queue->queue = i * EFX_TXQ_TYPES + j;
		tx_queue->channel = channel;
	}
439

440 441 442 443
	rx_queue = &channel->rx_queue;
	rx_queue->efx = efx;
	setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
		    (unsigned long)rx_queue);
444

445 446 447 448 449 450 451 452 453 454 455 456 457
	return channel;
}

/* Allocate and initialise a channel structure, copying parameters
 * (but not resources) from an old channel structure.
 */
static struct efx_channel *
efx_copy_channel(const struct efx_channel *old_channel)
{
	struct efx_channel *channel;
	struct efx_rx_queue *rx_queue;
	struct efx_tx_queue *tx_queue;
	int j;
458

459 460 461 462 463 464 465 466
	channel = kmalloc(sizeof(*channel), GFP_KERNEL);
	if (!channel)
		return NULL;

	*channel = *old_channel;

	channel->napi_dev = NULL;
	memset(&channel->eventq, 0, sizeof(channel->eventq));
467

468 469 470
	for (j = 0; j < EFX_TXQ_TYPES; j++) {
		tx_queue = &channel->tx_queue[j];
		if (tx_queue->channel)
471
			tx_queue->channel = channel;
472 473
		tx_queue->buffer = NULL;
		memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
474 475 476
	}

	rx_queue = &channel->rx_queue;
477 478
	rx_queue->buffer = NULL;
	memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
479 480 481 482 483 484
	setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill,
		    (unsigned long)rx_queue);

	return channel;
}

485 486 487 488 489 490
static int efx_probe_channel(struct efx_channel *channel)
{
	struct efx_tx_queue *tx_queue;
	struct efx_rx_queue *rx_queue;
	int rc;

491 492
	netif_dbg(channel->efx, probe, channel->efx->net_dev,
		  "creating channel %d\n", channel->channel);
493

494 495 496 497
	rc = channel->type->pre_probe(channel);
	if (rc)
		goto fail;

498 499
	rc = efx_probe_eventq(channel);
	if (rc)
500
		goto fail;
501 502 503 504

	efx_for_each_channel_tx_queue(tx_queue, channel) {
		rc = efx_probe_tx_queue(tx_queue);
		if (rc)
505
			goto fail;
506 507 508 509 510
	}

	efx_for_each_channel_rx_queue(rx_queue, channel) {
		rc = efx_probe_rx_queue(rx_queue);
		if (rc)
511
			goto fail;
512 513 514 515
	}

	return 0;

516 517
fail:
	efx_remove_channel(channel);
518 519 520
	return rc;
}

521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
static void
efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
{
	struct efx_nic *efx = channel->efx;
	const char *type;
	int number;

	number = channel->channel;
	if (efx->tx_channel_offset == 0) {
		type = "";
	} else if (channel->channel < efx->tx_channel_offset) {
		type = "-rx";
	} else {
		type = "-tx";
		number -= efx->tx_channel_offset;
	}
	snprintf(buf, len, "%s%s-%d", efx->name, type, number);
}
539

540 541 542 543
static void efx_set_channel_names(struct efx_nic *efx)
{
	struct efx_channel *channel;

544 545
	efx_for_each_channel(channel, efx)
		channel->type->get_name(channel,
B
Ben Hutchings 已提交
546 547
					efx->msi_context[channel->channel].name,
					sizeof(efx->msi_context[0].name));
548 549
}

550 551 552 553 554 555 556 557
static int efx_probe_channels(struct efx_nic *efx)
{
	struct efx_channel *channel;
	int rc;

	/* Restart special buffer allocation */
	efx->next_buffer_table = 0;

558 559 560 561 562 563
	/* Probe channels in reverse, so that any 'extra' channels
	 * use the start of the buffer table. This allows the traffic
	 * channels to be resized without moving them or wasting the
	 * entries before them.
	 */
	efx_for_each_channel_rev(channel, efx) {
564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
		rc = efx_probe_channel(channel);
		if (rc) {
			netif_err(efx, probe, efx->net_dev,
				  "failed to create channel %d\n",
				  channel->channel);
			goto fail;
		}
	}
	efx_set_channel_names(efx);

	return 0;

fail:
	efx_remove_channels(efx);
	return rc;
}

581 582 583 584
/* Channels are shutdown and reinitialised whilst the NIC is running
 * to propagate configuration changes (mtu, checksum offload), or
 * to clear hardware error conditions
 */
585
static void efx_start_datapath(struct efx_nic *efx)
586
{
587
	bool old_rx_scatter = efx->rx_scatter;
588 589 590
	struct efx_tx_queue *tx_queue;
	struct efx_rx_queue *rx_queue;
	struct efx_channel *channel;
591
	size_t rx_buf_len;
592

593 594 595 596
	/* Calculate the rx buffer allocation parameters required to
	 * support the current MTU, including padding for header
	 * alignment and overruns.
	 */
597
	efx->rx_dma_len = (efx->rx_prefix_size +
598 599
			   EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
			   efx->type->rx_buffer_padding);
600
	rx_buf_len = (sizeof(struct efx_rx_page_state) +
601
		      efx->rx_ip_align + efx->rx_dma_len);
602
	if (rx_buf_len <= PAGE_SIZE) {
J
Jon Cooper 已提交
603
		efx->rx_scatter = efx->type->always_rx_scatter;
604 605
		efx->rx_buffer_order = 0;
	} else if (efx->type->can_rx_scatter) {
606
		BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
607
		BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
608 609 610
			     2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE,
				       EFX_RX_BUF_ALIGNMENT) >
			     PAGE_SIZE);
611 612 613 614 615 616 617 618
		efx->rx_scatter = true;
		efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
		efx->rx_buffer_order = 0;
	} else {
		efx->rx_scatter = false;
		efx->rx_buffer_order = get_order(rx_buf_len);
	}

619 620 621 622 623 624 625 626 627 628 629
	efx_rx_config_page_split(efx);
	if (efx->rx_buffer_order)
		netif_dbg(efx, drv, efx->net_dev,
			  "RX buf len=%u; page order=%u batch=%u\n",
			  efx->rx_dma_len, efx->rx_buffer_order,
			  efx->rx_pages_per_batch);
	else
		netif_dbg(efx, drv, efx->net_dev,
			  "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
			  efx->rx_dma_len, efx->rx_page_buf_step,
			  efx->rx_bufs_per_page, efx->rx_pages_per_batch);
630

J
Jon Cooper 已提交
631
	/* RX filters may also have scatter-enabled flags */
632
	if (efx->rx_scatter != old_rx_scatter)
633
		efx->type->filter_update_rx_scatter(efx);
634

635 636 637 638 639 640 641 642 643 644
	/* We must keep at least one descriptor in a TX ring empty.
	 * We could avoid this when the queue size does not exactly
	 * match the hardware ring size, but it's not that important.
	 * Therefore we stop the queue when one more skb might fill
	 * the ring completely.  We wake it when half way back to
	 * empty.
	 */
	efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
	efx->txq_wake_thresh = efx->txq_stop_thresh / 2;

645 646
	/* Initialise the channels */
	efx_for_each_channel(channel, efx) {
647
		efx_for_each_channel_tx_queue(tx_queue, channel) {
648
			efx_init_tx_queue(tx_queue);
649 650
			atomic_inc(&efx->active_queues);
		}
651

652
		efx_for_each_channel_rx_queue(rx_queue, channel) {
653
			efx_init_rx_queue(rx_queue);
654
			atomic_inc(&efx->active_queues);
655 656 657
			efx_stop_eventq(channel);
			efx_fast_push_rx_descriptors(rx_queue, false);
			efx_start_eventq(channel);
658
		}
659

660
		WARN_ON(channel->rx_pkt_n_frags);
661 662
	}

663 664
	efx_ptp_start_datapath(efx);

665 666
	if (netif_device_present(efx->net_dev))
		netif_tx_wake_all_queues(efx->net_dev);
667 668
}

669
static void efx_stop_datapath(struct efx_nic *efx)
670 671 672 673
{
	struct efx_channel *channel;
	struct efx_tx_queue *tx_queue;
	struct efx_rx_queue *rx_queue;
674
	int rc;
675 676 677 678

	EFX_ASSERT_RESET_SERIALISED(efx);
	BUG_ON(efx->port_enabled);

679 680
	efx_ptp_stop_datapath(efx);

681 682 683 684 685 686
	/* Stop RX refill */
	efx_for_each_channel(channel, efx) {
		efx_for_each_channel_rx_queue(rx_queue, channel)
			rx_queue->refill_enabled = false;
	}

687
	efx_for_each_channel(channel, efx) {
688 689 690 691 692 693 694 695 696 697
		/* RX packet processing is pipelined, so wait for the
		 * NAPI handler to complete.  At least event queue 0
		 * might be kept active by non-data events, so don't
		 * use napi_synchronize() but actually disable NAPI
		 * temporarily.
		 */
		if (efx_channel_has_rx_queue(channel)) {
			efx_stop_eventq(channel);
			efx_start_eventq(channel);
		}
698
	}
699

700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717
	rc = efx->type->fini_dmaq(efx);
	if (rc && EFX_WORKAROUND_7803(efx)) {
		/* Schedule a reset to recover from the flush failure. The
		 * descriptor caches reference memory we're about to free,
		 * but falcon_reconfigure_mac_wrapper() won't reconnect
		 * the MACs because of the pending reset.
		 */
		netif_err(efx, drv, efx->net_dev,
			  "Resetting to recover from flush failure\n");
		efx_schedule_reset(efx, RESET_TYPE_ALL);
	} else if (rc) {
		netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
	} else {
		netif_dbg(efx, drv, efx->net_dev,
			  "successfully flushed all queues\n");
	}

	efx_for_each_channel(channel, efx) {
718 719
		efx_for_each_channel_rx_queue(rx_queue, channel)
			efx_fini_rx_queue(rx_queue);
720
		efx_for_each_possible_channel_tx_queue(tx_queue, channel)
721 722 723 724 725 726 727 728 729
			efx_fini_tx_queue(tx_queue);
	}
}

static void efx_remove_channel(struct efx_channel *channel)
{
	struct efx_tx_queue *tx_queue;
	struct efx_rx_queue *rx_queue;

730 731
	netif_dbg(channel->efx, drv, channel->efx->net_dev,
		  "destroy chan %d\n", channel->channel);
732 733 734

	efx_for_each_channel_rx_queue(rx_queue, channel)
		efx_remove_rx_queue(rx_queue);
735
	efx_for_each_possible_channel_tx_queue(tx_queue, channel)
736 737
		efx_remove_tx_queue(tx_queue);
	efx_remove_eventq(channel);
738
	channel->type->post_remove(channel);
739 740
}

741 742 743 744 745 746 747 748 749 750 751 752 753
static void efx_remove_channels(struct efx_nic *efx)
{
	struct efx_channel *channel;

	efx_for_each_channel(channel, efx)
		efx_remove_channel(channel);
}

int
efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
{
	struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
	u32 old_rxq_entries, old_txq_entries;
754
	unsigned i, next_buffer_table = 0;
755
	int rc, rc2;
756 757 758 759

	rc = efx_check_disabled(efx);
	if (rc)
		return rc;
760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781

	/* Not all channels should be reallocated. We must avoid
	 * reallocating their buffer table entries.
	 */
	efx_for_each_channel(channel, efx) {
		struct efx_rx_queue *rx_queue;
		struct efx_tx_queue *tx_queue;

		if (channel->type->copy)
			continue;
		next_buffer_table = max(next_buffer_table,
					channel->eventq.index +
					channel->eventq.entries);
		efx_for_each_channel_rx_queue(rx_queue, channel)
			next_buffer_table = max(next_buffer_table,
						rx_queue->rxd.index +
						rx_queue->rxd.entries);
		efx_for_each_channel_tx_queue(tx_queue, channel)
			next_buffer_table = max(next_buffer_table,
						tx_queue->txd.index +
						tx_queue->txd.entries);
	}
782

783
	efx_device_detach_sync(efx);
784
	efx_stop_all(efx);
B
Ben Hutchings 已提交
785
	efx_soft_disable_interrupts(efx);
786

787
	/* Clone channels (where possible) */
788 789
	memset(other_channel, 0, sizeof(other_channel));
	for (i = 0; i < efx->n_channels; i++) {
790 791 792
		channel = efx->channel[i];
		if (channel->type->copy)
			channel = channel->type->copy(channel);
793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
		if (!channel) {
			rc = -ENOMEM;
			goto out;
		}
		other_channel[i] = channel;
	}

	/* Swap entry counts and channel pointers */
	old_rxq_entries = efx->rxq_entries;
	old_txq_entries = efx->txq_entries;
	efx->rxq_entries = rxq_entries;
	efx->txq_entries = txq_entries;
	for (i = 0; i < efx->n_channels; i++) {
		channel = efx->channel[i];
		efx->channel[i] = other_channel[i];
		other_channel[i] = channel;
	}

811 812
	/* Restart buffer table allocation */
	efx->next_buffer_table = next_buffer_table;
813 814

	for (i = 0; i < efx->n_channels; i++) {
815 816 817 818 819 820 821
		channel = efx->channel[i];
		if (!channel->type->copy)
			continue;
		rc = efx_probe_channel(channel);
		if (rc)
			goto rollback;
		efx_init_napi_channel(efx->channel[i]);
822
	}
823

824
out:
825 826 827 828 829 830 831 832 833
	/* Destroy unused channel structures */
	for (i = 0; i < efx->n_channels; i++) {
		channel = other_channel[i];
		if (channel && channel->type->copy) {
			efx_fini_napi_channel(channel);
			efx_remove_channel(channel);
			kfree(channel);
		}
	}
834

835 836 837 838 839 840 841 842 843 844
	rc2 = efx_soft_enable_interrupts(efx);
	if (rc2) {
		rc = rc ? rc : rc2;
		netif_err(efx, drv, efx->net_dev,
			  "unable to restart interrupts on channel reallocation\n");
		efx_schedule_reset(efx, RESET_TYPE_DISABLE);
	} else {
		efx_start_all(efx);
		netif_device_attach(efx->net_dev);
	}
845 846 847 848 849 850 851 852 853 854 855 856 857 858
	return rc;

rollback:
	/* Swap back */
	efx->rxq_entries = old_rxq_entries;
	efx->txq_entries = old_txq_entries;
	for (i = 0; i < efx->n_channels; i++) {
		channel = efx->channel[i];
		efx->channel[i] = other_channel[i];
		other_channel[i] = channel;
	}
	goto out;
}

859
void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
860
{
861
	mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100));
862 863
}

864 865
static const struct efx_channel_type efx_default_channel_type = {
	.pre_probe		= efx_channel_dummy_op_int,
866
	.post_remove		= efx_channel_dummy_op_void,
867 868 869 870 871 872 873 874 875 876
	.get_name		= efx_get_channel_name,
	.copy			= efx_copy_channel,
	.keep_eventq		= false,
};

int efx_channel_dummy_op_int(struct efx_channel *channel)
{
	return 0;
}

877 878 879 880
void efx_channel_dummy_op_void(struct efx_channel *channel)
{
}

881 882 883 884 885 886 887 888 889 890
/**************************************************************************
 *
 * Port handling
 *
 **************************************************************************/

/* This ensures that the kernel is kept informed (via
 * netif_carrier_on/off) of the link status, and also maintains the
 * link status's stop on the port's TX queue.
 */
S
Steve Hodgson 已提交
891
void efx_link_status_changed(struct efx_nic *efx)
892
{
893 894
	struct efx_link_state *link_state = &efx->link_state;

895 896 897 898 899 900 901
	/* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
	 * that no events are triggered between unregister_netdev() and the
	 * driver unloading. A more general condition is that NETDEV_CHANGE
	 * can only be generated between NETDEV_UP and NETDEV_DOWN */
	if (!netif_running(efx->net_dev))
		return;

902
	if (link_state->up != netif_carrier_ok(efx->net_dev)) {
903 904
		efx->n_link_state_changes++;

905
		if (link_state->up)
906 907 908 909 910 911
			netif_carrier_on(efx->net_dev);
		else
			netif_carrier_off(efx->net_dev);
	}

	/* Status message for kernel log */
B
Ben Hutchings 已提交
912
	if (link_state->up)
913
		netif_info(efx, link, efx->net_dev,
914
			   "link up at %uMbps %s-duplex (MTU %d)\n",
915
			   link_state->speed, link_state->fd ? "full" : "half",
916
			   efx->net_dev->mtu);
B
Ben Hutchings 已提交
917
	else
918
		netif_info(efx, link, efx->net_dev, "link down\n");
919 920
}

B
Ben Hutchings 已提交
921 922 923 924 925 926 927 928 929 930 931 932 933
void efx_link_set_advertising(struct efx_nic *efx, u32 advertising)
{
	efx->link_advertising = advertising;
	if (advertising) {
		if (advertising & ADVERTISED_Pause)
			efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX);
		else
			efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
		if (advertising & ADVERTISED_Asym_Pause)
			efx->wanted_fc ^= EFX_FC_TX;
	}
}

934
void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
B
Ben Hutchings 已提交
935 936 937 938 939 940 941 942 943 944 945 946 947 948
{
	efx->wanted_fc = wanted_fc;
	if (efx->link_advertising) {
		if (wanted_fc & EFX_FC_RX)
			efx->link_advertising |= (ADVERTISED_Pause |
						  ADVERTISED_Asym_Pause);
		else
			efx->link_advertising &= ~(ADVERTISED_Pause |
						   ADVERTISED_Asym_Pause);
		if (wanted_fc & EFX_FC_TX)
			efx->link_advertising ^= ADVERTISED_Asym_Pause;
	}
}

949 950
static void efx_fini_port(struct efx_nic *efx);

B
Ben Hutchings 已提交
951 952 953 954 955 956 957 958
/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
 * the MAC appropriately. All other PHY configuration changes are pushed
 * through phy_op->set_settings(), and pushed asynchronously to the MAC
 * through efx_monitor().
 *
 * Callers must hold the mac_lock
 */
int __efx_reconfigure_port(struct efx_nic *efx)
959
{
B
Ben Hutchings 已提交
960 961
	enum efx_phy_mode phy_mode;
	int rc;
962

B
Ben Hutchings 已提交
963
	WARN_ON(!mutex_is_locked(&efx->mac_lock));
964

B
Ben Hutchings 已提交
965 966
	/* Disable PHY transmit in mac level loopbacks */
	phy_mode = efx->phy_mode;
967 968 969 970 971
	if (LOOPBACK_INTERNAL(efx))
		efx->phy_mode |= PHY_MODE_TX_DISABLED;
	else
		efx->phy_mode &= ~PHY_MODE_TX_DISABLED;

B
Ben Hutchings 已提交
972
	rc = efx->type->reconfigure_port(efx);
973

B
Ben Hutchings 已提交
974 975
	if (rc)
		efx->phy_mode = phy_mode;
976

B
Ben Hutchings 已提交
977
	return rc;
978 979 980 981
}

/* Reinitialise the MAC to pick up new PHY settings, even if the port is
 * disabled. */
B
Ben Hutchings 已提交
982
int efx_reconfigure_port(struct efx_nic *efx)
983
{
B
Ben Hutchings 已提交
984 985
	int rc;

986 987 988
	EFX_ASSERT_RESET_SERIALISED(efx);

	mutex_lock(&efx->mac_lock);
B
Ben Hutchings 已提交
989
	rc = __efx_reconfigure_port(efx);
990
	mutex_unlock(&efx->mac_lock);
B
Ben Hutchings 已提交
991 992

	return rc;
993 994
}

995 996 997
/* Asynchronous work item for changing MAC promiscuity and multicast
 * hash.  Avoid a drain/rx_ingress enable by reconfiguring the current
 * MAC directly. */
998 999 1000 1001 1002
static void efx_mac_work(struct work_struct *data)
{
	struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);

	mutex_lock(&efx->mac_lock);
1003
	if (efx->port_enabled)
1004
		efx->type->reconfigure_mac(efx);
1005 1006 1007
	mutex_unlock(&efx->mac_lock);
}

1008 1009 1010 1011
static int efx_probe_port(struct efx_nic *efx)
{
	int rc;

1012
	netif_dbg(efx, probe, efx->net_dev, "create port\n");
1013

1014 1015 1016
	if (phy_flash_cfg)
		efx->phy_mode = PHY_MODE_SPECIAL;

1017 1018
	/* Connect up MAC/PHY operations table */
	rc = efx->type->probe_port(efx);
1019
	if (rc)
1020
		return rc;
1021

1022
	/* Initialise MAC address to permanent address */
1023
	ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr);
1024 1025 1026 1027 1028 1029 1030 1031

	return 0;
}

static int efx_init_port(struct efx_nic *efx)
{
	int rc;

1032
	netif_dbg(efx, drv, efx->net_dev, "init port\n");
1033

1034 1035
	mutex_lock(&efx->mac_lock);

1036
	rc = efx->phy_op->init(efx);
1037
	if (rc)
1038
		goto fail1;
1039

1040
	efx->port_initialized = true;
1041

B
Ben Hutchings 已提交
1042 1043
	/* Reconfigure the MAC before creating dma queues (required for
	 * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */
1044
	efx->type->reconfigure_mac(efx);
B
Ben Hutchings 已提交
1045 1046 1047 1048 1049 1050

	/* Ensure the PHY advertises the correct flow control settings */
	rc = efx->phy_op->reconfigure(efx);
	if (rc)
		goto fail2;

1051
	mutex_unlock(&efx->mac_lock);
1052
	return 0;
1053

1054
fail2:
1055
	efx->phy_op->fini(efx);
1056 1057
fail1:
	mutex_unlock(&efx->mac_lock);
1058
	return rc;
1059 1060 1061 1062
}

static void efx_start_port(struct efx_nic *efx)
{
1063
	netif_dbg(efx, ifup, efx->net_dev, "start port\n");
1064 1065 1066
	BUG_ON(efx->port_enabled);

	mutex_lock(&efx->mac_lock);
1067
	efx->port_enabled = true;
1068

1069
	/* Ensure MAC ingress/egress is enabled */
1070
	efx->type->reconfigure_mac(efx);
1071

1072 1073 1074
	mutex_unlock(&efx->mac_lock);
}

1075 1076 1077 1078 1079
/* Cancel work for MAC reconfiguration, periodic hardware monitoring
 * and the async self-test, wait for them to finish and prevent them
 * being scheduled again.  This doesn't cover online resets, which
 * should only be cancelled when removing the device.
 */
1080 1081
static void efx_stop_port(struct efx_nic *efx)
{
1082
	netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
1083

1084 1085
	EFX_ASSERT_RESET_SERIALISED(efx);

1086
	mutex_lock(&efx->mac_lock);
1087
	efx->port_enabled = false;
1088 1089 1090
	mutex_unlock(&efx->mac_lock);

	/* Serialise against efx_set_multicast_list() */
1091 1092
	netif_addr_lock_bh(efx->net_dev);
	netif_addr_unlock_bh(efx->net_dev);
1093 1094 1095 1096

	cancel_delayed_work_sync(&efx->monitor_work);
	efx_selftest_async_cancel(efx);
	cancel_work_sync(&efx->mac_work);
1097 1098 1099 1100
}

static void efx_fini_port(struct efx_nic *efx)
{
1101
	netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
1102 1103 1104 1105

	if (!efx->port_initialized)
		return;

1106
	efx->phy_op->fini(efx);
1107
	efx->port_initialized = false;
1108

1109
	efx->link_state.up = false;
1110 1111 1112 1113 1114
	efx_link_status_changed(efx);
}

static void efx_remove_port(struct efx_nic *efx)
{
1115
	netif_dbg(efx, drv, efx->net_dev, "destroying port\n");
1116

1117
	efx->type->remove_port(efx);
1118 1119 1120 1121 1122 1123 1124 1125
}

/**************************************************************************
 *
 * NIC handling
 *
 **************************************************************************/

1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196
static LIST_HEAD(efx_primary_list);
static LIST_HEAD(efx_unassociated_list);

static bool efx_same_controller(struct efx_nic *left, struct efx_nic *right)
{
	return left->type == right->type &&
		left->vpd_sn && right->vpd_sn &&
		!strcmp(left->vpd_sn, right->vpd_sn);
}

static void efx_associate(struct efx_nic *efx)
{
	struct efx_nic *other, *next;

	if (efx->primary == efx) {
		/* Adding primary function; look for secondaries */

		netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n");
		list_add_tail(&efx->node, &efx_primary_list);

		list_for_each_entry_safe(other, next, &efx_unassociated_list,
					 node) {
			if (efx_same_controller(efx, other)) {
				list_del(&other->node);
				netif_dbg(other, probe, other->net_dev,
					  "moving to secondary list of %s %s\n",
					  pci_name(efx->pci_dev),
					  efx->net_dev->name);
				list_add_tail(&other->node,
					      &efx->secondary_list);
				other->primary = efx;
			}
		}
	} else {
		/* Adding secondary function; look for primary */

		list_for_each_entry(other, &efx_primary_list, node) {
			if (efx_same_controller(efx, other)) {
				netif_dbg(efx, probe, efx->net_dev,
					  "adding to secondary list of %s %s\n",
					  pci_name(other->pci_dev),
					  other->net_dev->name);
				list_add_tail(&efx->node,
					      &other->secondary_list);
				efx->primary = other;
				return;
			}
		}

		netif_dbg(efx, probe, efx->net_dev,
			  "adding to unassociated list\n");
		list_add_tail(&efx->node, &efx_unassociated_list);
	}
}

static void efx_dissociate(struct efx_nic *efx)
{
	struct efx_nic *other, *next;

	list_del(&efx->node);
	efx->primary = NULL;

	list_for_each_entry_safe(other, next, &efx->secondary_list, node) {
		list_del(&other->node);
		netif_dbg(other, probe, other->net_dev,
			  "moving to unassociated list\n");
		list_add_tail(&other->node, &efx_unassociated_list);
		other->primary = NULL;
	}
}

1197 1198 1199 1200 1201
/* This configures the PCI device to enable I/O and DMA. */
static int efx_init_io(struct efx_nic *efx)
{
	struct pci_dev *pci_dev = efx->pci_dev;
	dma_addr_t dma_mask = efx->type->max_dma_mask;
1202
	unsigned int mem_map_size = efx->type->mem_map_size(efx);
1203 1204
	int rc;

1205
	netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
1206 1207 1208

	rc = pci_enable_device(pci_dev);
	if (rc) {
1209 1210
		netif_err(efx, probe, efx->net_dev,
			  "failed to enable PCI device\n");
1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221
		goto fail1;
	}

	pci_set_master(pci_dev);

	/* Set the PCI DMA mask.  Try all possibilities from our
	 * genuine mask down to 32 bits, because some architectures
	 * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
	 * masks event though they reject 46 bit masks.
	 */
	while (dma_mask > 0x7fffffffUL) {
1222
		if (dma_supported(&pci_dev->dev, dma_mask)) {
1223
			rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
1224 1225 1226
			if (rc == 0)
				break;
		}
1227 1228 1229
		dma_mask >>= 1;
	}
	if (rc) {
1230 1231
		netif_err(efx, probe, efx->net_dev,
			  "could not find a suitable DMA mask\n");
1232 1233
		goto fail2;
	}
1234 1235
	netif_dbg(efx, probe, efx->net_dev,
		  "using DMA mask %llx\n", (unsigned long long) dma_mask);
1236

1237 1238
	efx->membase_phys = pci_resource_start(efx->pci_dev, EFX_MEM_BAR);
	rc = pci_request_region(pci_dev, EFX_MEM_BAR, "sfc");
1239
	if (rc) {
1240 1241
		netif_err(efx, probe, efx->net_dev,
			  "request for memory BAR failed\n");
1242 1243 1244
		rc = -EIO;
		goto fail3;
	}
1245
	efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size);
1246
	if (!efx->membase) {
1247 1248
		netif_err(efx, probe, efx->net_dev,
			  "could not map memory BAR at %llx+%x\n",
1249
			  (unsigned long long)efx->membase_phys, mem_map_size);
1250 1251 1252
		rc = -ENOMEM;
		goto fail4;
	}
1253 1254
	netif_dbg(efx, probe, efx->net_dev,
		  "memory BAR at %llx+%x (virtual %p)\n",
1255 1256
		  (unsigned long long)efx->membase_phys, mem_map_size,
		  efx->membase);
1257 1258 1259 1260

	return 0;

 fail4:
1261
	pci_release_region(efx->pci_dev, EFX_MEM_BAR);
1262
 fail3:
1263
	efx->membase_phys = 0;
1264 1265 1266 1267 1268 1269 1270 1271
 fail2:
	pci_disable_device(efx->pci_dev);
 fail1:
	return rc;
}

static void efx_fini_io(struct efx_nic *efx)
{
1272
	netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
1273 1274 1275 1276 1277 1278 1279

	if (efx->membase) {
		iounmap(efx->membase);
		efx->membase = NULL;
	}

	if (efx->membase_phys) {
1280
		pci_release_region(efx->pci_dev, EFX_MEM_BAR);
1281
		efx->membase_phys = 0;
1282 1283 1284 1285 1286
	}

	pci_disable_device(efx->pci_dev);
}

1287
static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
1288
{
1289
	cpumask_var_t thread_mask;
1290
	unsigned int count;
1291
	int cpu;
1292

1293 1294 1295 1296 1297 1298 1299 1300
	if (rss_cpus) {
		count = rss_cpus;
	} else {
		if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
			netif_warn(efx, probe, efx->net_dev,
				   "RSS disabled due to allocation failure\n");
			return 1;
		}
1301

1302 1303 1304 1305 1306 1307 1308 1309 1310 1311
		count = 0;
		for_each_online_cpu(cpu) {
			if (!cpumask_test_cpu(cpu, thread_mask)) {
				++count;
				cpumask_or(thread_mask, thread_mask,
					   topology_thread_cpumask(cpu));
			}
		}

		free_cpumask_var(thread_mask);
R
Rusty Russell 已提交
1312 1313
	}

1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324
	/* If RSS is requested for the PF *and* VFs then we can't write RSS
	 * table entries that are inaccessible to VFs
	 */
	if (efx_sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
	    count > efx_vf_size(efx)) {
		netif_warn(efx, probe, efx->net_dev,
			   "Reducing number of RSS channels from %u to %u for "
			   "VF support. Increase vf-msix-limit to use more "
			   "channels on the PF.\n",
			   count, efx_vf_size(efx));
		count = efx_vf_size(efx);
1325 1326 1327 1328 1329 1330 1331 1332
	}

	return count;
}

/* Probe the number and type of interrupts we are able to obtain, and
 * the resulting numbers of channels and RX queues.
 */
1333
static int efx_probe_interrupts(struct efx_nic *efx)
1334
{
1335 1336
	unsigned int extra_channels = 0;
	unsigned int i, j;
1337
	int rc;
1338

1339 1340 1341 1342
	for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
		if (efx->extra_channel_type[i])
			++extra_channels;

1343
	if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
1344
		struct msix_entry xentries[EFX_MAX_CHANNELS];
1345
		unsigned int n_channels;
1346

1347
		n_channels = efx_wanted_parallelism(efx);
B
Ben Hutchings 已提交
1348 1349
		if (separate_tx_channels)
			n_channels *= 2;
1350
		n_channels += extra_channels;
1351
		n_channels = min(n_channels, efx->max_channels);
1352

B
Ben Hutchings 已提交
1353
		for (i = 0; i < n_channels; i++)
1354
			xentries[i].entry = i;
1355 1356 1357 1358 1359 1360 1361 1362
		rc = pci_enable_msix_range(efx->pci_dev,
					   xentries, 1, n_channels);
		if (rc < 0) {
			/* Fall back to single channel MSI */
			efx->interrupt_mode = EFX_INT_MODE_MSI;
			netif_err(efx, drv, efx->net_dev,
				  "could not enable MSI-X\n");
		} else if (rc < n_channels) {
1363 1364
			netif_err(efx, drv, efx->net_dev,
				  "WARNING: Insufficient MSI-X vectors"
1365
				  " available (%d < %u).\n", rc, n_channels);
1366 1367
			netif_err(efx, drv, efx->net_dev,
				  "WARNING: Performance may be reduced.\n");
B
Ben Hutchings 已提交
1368
			n_channels = rc;
1369 1370
		}

1371
		if (rc > 0) {
B
Ben Hutchings 已提交
1372
			efx->n_channels = n_channels;
1373 1374
			if (n_channels > extra_channels)
				n_channels -= extra_channels;
B
Ben Hutchings 已提交
1375
			if (separate_tx_channels) {
1376 1377 1378 1379
				efx->n_tx_channels = max(n_channels / 2, 1U);
				efx->n_rx_channels = max(n_channels -
							 efx->n_tx_channels,
							 1U);
B
Ben Hutchings 已提交
1380
			} else {
1381 1382
				efx->n_tx_channels = n_channels;
				efx->n_rx_channels = n_channels;
B
Ben Hutchings 已提交
1383
			}
1384
			for (i = 0; i < efx->n_channels; i++)
1385 1386
				efx_get_channel(efx, i)->irq =
					xentries[i].vector;
1387 1388 1389 1390 1391
		}
	}

	/* Try single interrupt MSI */
	if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
1392
		efx->n_channels = 1;
B
Ben Hutchings 已提交
1393 1394
		efx->n_rx_channels = 1;
		efx->n_tx_channels = 1;
1395 1396
		rc = pci_enable_msi(efx->pci_dev);
		if (rc == 0) {
1397
			efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
1398
		} else {
1399 1400
			netif_err(efx, drv, efx->net_dev,
				  "could not enable MSI\n");
1401 1402 1403 1404 1405 1406
			efx->interrupt_mode = EFX_INT_MODE_LEGACY;
		}
	}

	/* Assume legacy interrupts */
	if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
1407
		efx->n_channels = 1 + (separate_tx_channels ? 1 : 0);
B
Ben Hutchings 已提交
1408 1409
		efx->n_rx_channels = 1;
		efx->n_tx_channels = 1;
1410 1411
		efx->legacy_irq = efx->pci_dev->irq;
	}
1412

1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427
	/* Assign extra channels if possible */
	j = efx->n_channels;
	for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
		if (!efx->extra_channel_type[i])
			continue;
		if (efx->interrupt_mode != EFX_INT_MODE_MSIX ||
		    efx->n_channels <= extra_channels) {
			efx->extra_channel_type[i]->handle_no_channel(efx);
		} else {
			--j;
			efx_get_channel(efx, j)->type =
				efx->extra_channel_type[i];
		}
	}

1428
	/* RSS might be usable on VFs even if it is disabled on the PF */
1429
	efx->rss_spread = ((efx->n_rx_channels > 1 || !efx_sriov_wanted(efx)) ?
1430 1431
			   efx->n_rx_channels : efx_vf_size(efx));

1432
	return 0;
1433 1434
}

1435
static int efx_soft_enable_interrupts(struct efx_nic *efx)
1436
{
1437 1438
	struct efx_channel *channel, *end_channel;
	int rc;
1439

1440 1441
	BUG_ON(efx->state == STATE_DISABLED);

B
Ben Hutchings 已提交
1442 1443
	efx->irq_soft_enabled = true;
	smp_wmb();
1444 1445

	efx_for_each_channel(channel, efx) {
1446 1447 1448 1449 1450
		if (!channel->type->keep_eventq) {
			rc = efx_init_eventq(channel);
			if (rc)
				goto fail;
		}
1451 1452 1453 1454
		efx_start_eventq(channel);
	}

	efx_mcdi_mode_event(efx);
1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467

	return 0;
fail:
	end_channel = channel;
	efx_for_each_channel(channel, efx) {
		if (channel == end_channel)
			break;
		efx_stop_eventq(channel);
		if (!channel->type->keep_eventq)
			efx_fini_eventq(channel);
	}

	return rc;
1468 1469
}

B
Ben Hutchings 已提交
1470
static void efx_soft_disable_interrupts(struct efx_nic *efx)
1471 1472 1473
{
	struct efx_channel *channel;

1474 1475 1476
	if (efx->state == STATE_DISABLED)
		return;

1477 1478
	efx_mcdi_mode_poll(efx);

B
Ben Hutchings 已提交
1479 1480 1481 1482
	efx->irq_soft_enabled = false;
	smp_wmb();

	if (efx->legacy_irq)
1483 1484 1485 1486 1487 1488 1489
		synchronize_irq(efx->legacy_irq);

	efx_for_each_channel(channel, efx) {
		if (channel->irq)
			synchronize_irq(channel->irq);

		efx_stop_eventq(channel);
B
Ben Hutchings 已提交
1490
		if (!channel->type->keep_eventq)
1491
			efx_fini_eventq(channel);
1492
	}
1493 1494 1495

	/* Flush the asynchronous MCDI request queue */
	efx_mcdi_flush_async(efx);
1496 1497
}

1498
static int efx_enable_interrupts(struct efx_nic *efx)
B
Ben Hutchings 已提交
1499
{
1500 1501
	struct efx_channel *channel, *end_channel;
	int rc;
B
Ben Hutchings 已提交
1502 1503 1504 1505 1506 1507 1508 1509

	BUG_ON(efx->state == STATE_DISABLED);

	if (efx->eeh_disabled_legacy_irq) {
		enable_irq(efx->legacy_irq);
		efx->eeh_disabled_legacy_irq = false;
	}

1510
	efx->type->irq_enable_master(efx);
B
Ben Hutchings 已提交
1511 1512

	efx_for_each_channel(channel, efx) {
1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530
		if (channel->type->keep_eventq) {
			rc = efx_init_eventq(channel);
			if (rc)
				goto fail;
		}
	}

	rc = efx_soft_enable_interrupts(efx);
	if (rc)
		goto fail;

	return 0;

fail:
	end_channel = channel;
	efx_for_each_channel(channel, efx) {
		if (channel == end_channel)
			break;
B
Ben Hutchings 已提交
1531
		if (channel->type->keep_eventq)
1532
			efx_fini_eventq(channel);
B
Ben Hutchings 已提交
1533 1534
	}

1535 1536 1537
	efx->type->irq_disable_non_ev(efx);

	return rc;
B
Ben Hutchings 已提交
1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550
}

static void efx_disable_interrupts(struct efx_nic *efx)
{
	struct efx_channel *channel;

	efx_soft_disable_interrupts(efx);

	efx_for_each_channel(channel, efx) {
		if (channel->type->keep_eventq)
			efx_fini_eventq(channel);
	}

1551
	efx->type->irq_disable_non_ev(efx);
B
Ben Hutchings 已提交
1552 1553
}

1554 1555 1556 1557 1558
static void efx_remove_interrupts(struct efx_nic *efx)
{
	struct efx_channel *channel;

	/* Remove MSI/MSI-X interrupts */
1559
	efx_for_each_channel(channel, efx)
1560 1561 1562 1563 1564 1565 1566 1567
		channel->irq = 0;
	pci_disable_msi(efx->pci_dev);
	pci_disable_msix(efx->pci_dev);

	/* Remove legacy interrupt */
	efx->legacy_irq = 0;
}

1568
static void efx_set_channels(struct efx_nic *efx)
1569
{
1570 1571 1572
	struct efx_channel *channel;
	struct efx_tx_queue *tx_queue;

1573
	efx->tx_channel_offset =
B
Ben Hutchings 已提交
1574
		separate_tx_channels ? efx->n_channels - efx->n_tx_channels : 0;
1575

1576 1577
	/* We need to mark which channels really have RX and TX
	 * queues, and adjust the TX queue numbers if we have separate
1578 1579 1580
	 * RX-only and TX-only channels.
	 */
	efx_for_each_channel(channel, efx) {
1581 1582 1583 1584 1585
		if (channel->channel < efx->n_rx_channels)
			channel->rx_queue.core_index = channel->channel;
		else
			channel->rx_queue.core_index = -1;

1586 1587 1588 1589
		efx_for_each_channel_tx_queue(tx_queue, channel)
			tx_queue->queue -= (efx->tx_channel_offset *
					    EFX_TXQ_TYPES);
	}
1590 1591 1592 1593
}

static int efx_probe_nic(struct efx_nic *efx)
{
1594
	size_t i;
1595 1596
	int rc;

1597
	netif_dbg(efx, probe, efx->net_dev, "creating NIC\n");
1598 1599

	/* Carry out hardware-type specific initialisation */
1600
	rc = efx->type->probe(efx);
1601 1602 1603
	if (rc)
		return rc;

B
Ben Hutchings 已提交
1604
	/* Determine the number of channels and queues by trying to hook
1605
	 * in MSI-X interrupts. */
1606 1607
	rc = efx_probe_interrupts(efx);
	if (rc)
1608
		goto fail1;
1609

1610 1611
	efx_set_channels(efx);

1612 1613 1614
	rc = efx->type->dimension_resources(efx);
	if (rc)
		goto fail2;
1615

1616 1617
	if (efx->n_channels > 1)
		get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key));
1618
	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
1619
		efx->rx_indir_table[i] =
1620
			ethtool_rxfh_indir_default(i, efx->rss_spread);
1621

1622 1623
	netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
	netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
1624 1625

	/* Initialise the interrupt moderation settings */
1626 1627
	efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
				true);
1628 1629

	return 0;
1630

1631 1632 1633
fail2:
	efx_remove_interrupts(efx);
fail1:
1634 1635
	efx->type->remove(efx);
	return rc;
1636 1637 1638 1639
}

static void efx_remove_nic(struct efx_nic *efx)
{
1640
	netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n");
1641 1642

	efx_remove_interrupts(efx);
1643
	efx->type->remove(efx);
1644 1645
}

1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683
static int efx_probe_filters(struct efx_nic *efx)
{
	int rc;

	spin_lock_init(&efx->filter_lock);

	rc = efx->type->filter_table_probe(efx);
	if (rc)
		return rc;

#ifdef CONFIG_RFS_ACCEL
	if (efx->type->offload_features & NETIF_F_NTUPLE) {
		efx->rps_flow_id = kcalloc(efx->type->max_rx_ip_filters,
					   sizeof(*efx->rps_flow_id),
					   GFP_KERNEL);
		if (!efx->rps_flow_id) {
			efx->type->filter_table_remove(efx);
			return -ENOMEM;
		}
	}
#endif

	return 0;
}

static void efx_remove_filters(struct efx_nic *efx)
{
#ifdef CONFIG_RFS_ACCEL
	kfree(efx->rps_flow_id);
#endif
	efx->type->filter_table_remove(efx);
}

static void efx_restore_filters(struct efx_nic *efx)
{
	efx->type->filter_table_restore(efx);
}

1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695
/**************************************************************************
 *
 * NIC startup/shutdown
 *
 *************************************************************************/

static int efx_probe_all(struct efx_nic *efx)
{
	int rc;

	rc = efx_probe_nic(efx);
	if (rc) {
1696
		netif_err(efx, probe, efx->net_dev, "failed to create NIC\n");
1697 1698 1699 1700 1701
		goto fail1;
	}

	rc = efx_probe_port(efx);
	if (rc) {
1702
		netif_err(efx, probe, efx->net_dev, "failed to create port\n");
1703 1704 1705
		goto fail2;
	}

1706 1707 1708 1709 1710
	BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT);
	if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) {
		rc = -EINVAL;
		goto fail3;
	}
1711
	efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE;
1712

B
Ben Hutchings 已提交
1713 1714 1715 1716
	rc = efx_probe_filters(efx);
	if (rc) {
		netif_err(efx, probe, efx->net_dev,
			  "failed to create filter tables\n");
1717
		goto fail3;
B
Ben Hutchings 已提交
1718 1719
	}

1720 1721 1722 1723
	rc = efx_probe_channels(efx);
	if (rc)
		goto fail4;

1724 1725
	return 0;

B
Ben Hutchings 已提交
1726
 fail4:
1727
	efx_remove_filters(efx);
1728 1729 1730 1731 1732 1733 1734 1735
 fail3:
	efx_remove_port(efx);
 fail2:
	efx_remove_nic(efx);
 fail1:
	return rc;
}

1736 1737 1738 1739 1740 1741
/* If the interface is supposed to be running but is not, start
 * the hardware and software data path, regular activity for the port
 * (MAC statistics, link polling, etc.) and schedule the port to be
 * reconfigured.  Interrupts must already be enabled.  This function
 * is safe to call multiple times, so long as the NIC is not disabled.
 * Requires the RTNL lock.
1742
 */
1743 1744 1745
static void efx_start_all(struct efx_nic *efx)
{
	EFX_ASSERT_RESET_SERIALISED(efx);
1746
	BUG_ON(efx->state == STATE_DISABLED);
1747 1748 1749

	/* Check that it is appropriate to restart the interface. All
	 * of these flags are safe to read under just the rtnl lock */
1750 1751
	if (efx->port_enabled || !netif_running(efx->net_dev) ||
	    efx->reset_pending)
1752 1753 1754
		return;

	efx_start_port(efx);
1755
	efx_start_datapath(efx);
1756

1757 1758
	/* Start the hardware monitor if there is one */
	if (efx->type->monitor != NULL)
1759 1760
		queue_delayed_work(efx->workqueue, &efx->monitor_work,
				   efx_monitor_interval);
1761 1762 1763 1764 1765

	/* If link state detection is normally event-driven, we have
	 * to poll now because we could have missed a change
	 */
	if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) {
1766 1767 1768 1769 1770
		mutex_lock(&efx->mac_lock);
		if (efx->phy_op->poll(efx))
			efx_link_status_changed(efx);
		mutex_unlock(&efx->mac_lock);
	}
1771

1772
	efx->type->start_stats(efx);
1773 1774 1775 1776
	efx->type->pull_stats(efx);
	spin_lock_bh(&efx->stats_lock);
	efx->type->update_stats(efx, NULL, NULL);
	spin_unlock_bh(&efx->stats_lock);
1777 1778
}

1779 1780 1781 1782 1783
/* Quiesce the hardware and software data path, and regular activity
 * for the port without bringing the link down.  Safe to call multiple
 * times with the NIC in almost any state, but interrupts should be
 * enabled.  Requires the RTNL lock.
 */
1784 1785 1786 1787 1788 1789 1790 1791
static void efx_stop_all(struct efx_nic *efx)
{
	EFX_ASSERT_RESET_SERIALISED(efx);

	/* port_enabled can be read safely under the rtnl lock */
	if (!efx->port_enabled)
		return;

1792 1793 1794 1795 1796 1797 1798
	/* update stats before we go down so we can accurately count
	 * rx_nodesc_drops
	 */
	efx->type->pull_stats(efx);
	spin_lock_bh(&efx->stats_lock);
	efx->type->update_stats(efx, NULL, NULL);
	spin_unlock_bh(&efx->stats_lock);
1799
	efx->type->stop_stats(efx);
1800 1801
	efx_stop_port(efx);

1802 1803 1804 1805 1806 1807
	/* Stop the kernel transmit interface.  This is only valid if
	 * the device is stopped or detached; otherwise the watchdog
	 * may fire immediately.
	 */
	WARN_ON(netif_running(efx->net_dev) &&
		netif_device_present(efx->net_dev));
1808 1809 1810
	netif_tx_disable(efx->net_dev);

	efx_stop_datapath(efx);
1811 1812 1813 1814
}

static void efx_remove_all(struct efx_nic *efx)
{
1815
	efx_remove_channels(efx);
1816
	efx_remove_filters(efx);
1817 1818 1819 1820 1821 1822 1823 1824 1825 1826
	efx_remove_port(efx);
	efx_remove_nic(efx);
}

/**************************************************************************
 *
 * Interrupt moderation
 *
 **************************************************************************/

1827
static unsigned int irq_mod_ticks(unsigned int usecs, unsigned int quantum_ns)
1828
{
1829 1830
	if (usecs == 0)
		return 0;
1831
	if (usecs * 1000 < quantum_ns)
1832
		return 1; /* never round down to 0 */
1833
	return usecs * 1000 / quantum_ns;
1834 1835
}

1836
/* Set interrupt moderation parameters */
1837 1838 1839
int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
			    unsigned int rx_usecs, bool rx_adaptive,
			    bool rx_may_override_tx)
1840
{
1841
	struct efx_channel *channel;
1842 1843 1844 1845 1846
	unsigned int irq_mod_max = DIV_ROUND_UP(efx->type->timer_period_max *
						efx->timer_quantum_ns,
						1000);
	unsigned int tx_ticks;
	unsigned int rx_ticks;
1847 1848 1849

	EFX_ASSERT_RESET_SERIALISED(efx);

1850
	if (tx_usecs > irq_mod_max || rx_usecs > irq_mod_max)
1851 1852
		return -EINVAL;

1853 1854 1855
	tx_ticks = irq_mod_ticks(tx_usecs, efx->timer_quantum_ns);
	rx_ticks = irq_mod_ticks(rx_usecs, efx->timer_quantum_ns);

1856 1857 1858 1859 1860 1861 1862
	if (tx_ticks != rx_ticks && efx->tx_channel_offset == 0 &&
	    !rx_may_override_tx) {
		netif_err(efx, drv, efx->net_dev, "Channels are shared. "
			  "RX and TX IRQ moderation must be equal\n");
		return -EINVAL;
	}

1863
	efx->irq_rx_adaptive = rx_adaptive;
1864
	efx->irq_rx_moderation = rx_ticks;
1865
	efx_for_each_channel(channel, efx) {
1866
		if (efx_channel_has_rx_queue(channel))
1867
			channel->irq_moderation = rx_ticks;
1868
		else if (efx_channel_has_tx_queues(channel))
1869 1870
			channel->irq_moderation = tx_ticks;
	}
1871 1872

	return 0;
1873 1874
}

1875 1876 1877
void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
			    unsigned int *rx_usecs, bool *rx_adaptive)
{
1878 1879 1880 1881
	/* We must round up when converting ticks to microseconds
	 * because we round down when converting the other way.
	 */

1882
	*rx_adaptive = efx->irq_rx_adaptive;
1883 1884 1885
	*rx_usecs = DIV_ROUND_UP(efx->irq_rx_moderation *
				 efx->timer_quantum_ns,
				 1000);
1886 1887 1888 1889 1890 1891 1892 1893

	/* If channels are shared between RX and TX, so is IRQ
	 * moderation.  Otherwise, IRQ moderation is the same for all
	 * TX channels and is not adaptive.
	 */
	if (efx->tx_channel_offset == 0)
		*tx_usecs = *rx_usecs;
	else
1894
		*tx_usecs = DIV_ROUND_UP(
1895
			efx->channel[efx->tx_channel_offset]->irq_moderation *
1896 1897
			efx->timer_quantum_ns,
			1000);
1898 1899
}

1900 1901 1902 1903 1904 1905
/**************************************************************************
 *
 * Hardware monitor
 *
 **************************************************************************/

1906
/* Run periodically off the general workqueue */
1907 1908 1909 1910 1911
static void efx_monitor(struct work_struct *data)
{
	struct efx_nic *efx = container_of(data, struct efx_nic,
					   monitor_work.work);

1912 1913 1914
	netif_vdbg(efx, timer, efx->net_dev,
		   "hardware monitor executing on CPU %d\n",
		   raw_smp_processor_id());
1915
	BUG_ON(efx->type->monitor == NULL);
1916 1917 1918

	/* If the mac_lock is already held then it is likely a port
	 * reconfiguration is already in place, which will likely do
1919 1920 1921 1922 1923 1924
	 * most of the work of monitor() anyway. */
	if (mutex_trylock(&efx->mac_lock)) {
		if (efx->port_enabled)
			efx->type->monitor(efx);
		mutex_unlock(&efx->mac_lock);
	}
1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940

	queue_delayed_work(efx->workqueue, &efx->monitor_work,
			   efx_monitor_interval);
}

/**************************************************************************
 *
 * ioctls
 *
 *************************************************************************/

/* Net device ioctl
 * Context: process, rtnl_lock() held.
 */
static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
{
1941
	struct efx_nic *efx = netdev_priv(net_dev);
1942
	struct mii_ioctl_data *data = if_mii(ifr);
1943

1944
	if (cmd == SIOCSHWTSTAMP)
1945 1946 1947
		return efx_ptp_set_ts_config(efx, ifr);
	if (cmd == SIOCGHWTSTAMP)
		return efx_ptp_get_ts_config(efx, ifr);
1948

1949 1950 1951 1952 1953 1954
	/* Convert phy_id from older PRTAD/DEVAD format */
	if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
	    (data->phy_id & 0xfc00) == 0x0400)
		data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400;

	return mdio_mii_ioctl(&efx->mdio, data, cmd);
1955 1956 1957 1958 1959 1960 1961 1962
}

/**************************************************************************
 *
 * NAPI interface
 *
 **************************************************************************/

1963 1964 1965 1966 1967 1968 1969
static void efx_init_napi_channel(struct efx_channel *channel)
{
	struct efx_nic *efx = channel->efx;

	channel->napi_dev = efx->net_dev;
	netif_napi_add(channel->napi_dev, &channel->napi_str,
		       efx_poll, napi_weight);
1970 1971
	napi_hash_add(&channel->napi_str);
	efx_channel_init_lock(channel);
1972 1973
}

1974
static void efx_init_napi(struct efx_nic *efx)
1975 1976 1977
{
	struct efx_channel *channel;

1978 1979
	efx_for_each_channel(channel, efx)
		efx_init_napi_channel(channel);
1980 1981 1982 1983
}

static void efx_fini_napi_channel(struct efx_channel *channel)
{
1984
	if (channel->napi_dev) {
1985
		netif_napi_del(&channel->napi_str);
1986 1987
		napi_hash_del(&channel->napi_str);
	}
1988
	channel->napi_dev = NULL;
1989 1990 1991 1992 1993 1994
}

static void efx_fini_napi(struct efx_nic *efx)
{
	struct efx_channel *channel;

1995 1996
	efx_for_each_channel(channel, efx)
		efx_fini_napi_channel(channel);
1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
}

/**************************************************************************
 *
 * Kernel netpoll interface
 *
 *************************************************************************/

#ifdef CONFIG_NET_POLL_CONTROLLER

/* Although in the common case interrupts will be disabled, this is not
 * guaranteed. However, all our work happens inside the NAPI callback,
 * so no locking is required.
 */
static void efx_netpoll(struct net_device *net_dev)
{
2013
	struct efx_nic *efx = netdev_priv(net_dev);
2014 2015
	struct efx_channel *channel;

2016
	efx_for_each_channel(channel, efx)
2017 2018 2019 2020 2021
		efx_schedule_channel(channel);
}

#endif

2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052
#ifdef CONFIG_NET_RX_BUSY_POLL
static int efx_busy_poll(struct napi_struct *napi)
{
	struct efx_channel *channel =
		container_of(napi, struct efx_channel, napi_str);
	struct efx_nic *efx = channel->efx;
	int budget = 4;
	int old_rx_packets, rx_packets;

	if (!netif_running(efx->net_dev))
		return LL_FLUSH_FAILED;

	if (!efx_channel_lock_poll(channel))
		return LL_FLUSH_BUSY;

	old_rx_packets = channel->rx_queue.rx_packets;
	efx_process_channel(channel, budget);

	rx_packets = channel->rx_queue.rx_packets - old_rx_packets;

	/* There is no race condition with NAPI here.
	 * NAPI will automatically be rescheduled if it yielded during busy
	 * polling, because it was not able to take the lock and thus returned
	 * the full budget.
	 */
	efx_channel_unlock_poll(channel);

	return rx_packets;
}
#endif

2053 2054 2055 2056 2057 2058 2059 2060 2061
/**************************************************************************
 *
 * Kernel net device interface
 *
 *************************************************************************/

/* Context: process, rtnl_lock() held. */
static int efx_net_open(struct net_device *net_dev)
{
2062
	struct efx_nic *efx = netdev_priv(net_dev);
2063 2064
	int rc;

2065 2066
	netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
		  raw_smp_processor_id());
2067

2068 2069 2070
	rc = efx_check_disabled(efx);
	if (rc)
		return rc;
2071 2072
	if (efx->phy_mode & PHY_MODE_SPECIAL)
		return -EBUSY;
2073 2074
	if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL))
		return -EIO;
2075

2076 2077 2078 2079
	/* Notify the kernel of the link state polled during driver load,
	 * before the monitor starts running */
	efx_link_status_changed(efx);

2080
	efx_start_all(efx);
2081
	efx_selftest_async_start(efx);
2082 2083 2084 2085 2086 2087 2088 2089 2090
	return 0;
}

/* Context: process, rtnl_lock() held.
 * Note that the kernel will ignore our return code; this method
 * should really be a void.
 */
static int efx_net_stop(struct net_device *net_dev)
{
2091
	struct efx_nic *efx = netdev_priv(net_dev);
2092

2093 2094
	netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
		  raw_smp_processor_id());
2095

2096 2097
	/* Stop the device and flush all the channels */
	efx_stop_all(efx);
2098 2099 2100 2101

	return 0;
}

2102
/* Context: process, dev_base_lock or RTNL held, non-blocking. */
B
Ben Hutchings 已提交
2103 2104
static struct rtnl_link_stats64 *efx_net_stats(struct net_device *net_dev,
					       struct rtnl_link_stats64 *stats)
2105
{
2106
	struct efx_nic *efx = netdev_priv(net_dev);
2107

2108
	spin_lock_bh(&efx->stats_lock);
2109
	efx->type->update_stats(efx, NULL, stats);
2110 2111
	spin_unlock_bh(&efx->stats_lock);

2112 2113 2114 2115 2116 2117
	return stats;
}

/* Context: netif_tx_lock held, BHs disabled. */
static void efx_watchdog(struct net_device *net_dev)
{
2118
	struct efx_nic *efx = netdev_priv(net_dev);
2119

2120 2121 2122
	netif_err(efx, tx_err, efx->net_dev,
		  "TX stuck with port_enabled=%d: resetting channels\n",
		  efx->port_enabled);
2123

2124
	efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
2125 2126 2127 2128 2129 2130
}


/* Context: process, rtnl_lock() held. */
static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
{
2131
	struct efx_nic *efx = netdev_priv(net_dev);
2132
	int rc;
2133

2134 2135 2136
	rc = efx_check_disabled(efx);
	if (rc)
		return rc;
2137 2138 2139
	if (new_mtu > EFX_MAX_MTU)
		return -EINVAL;

2140
	netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
2141

2142 2143 2144
	efx_device_detach_sync(efx);
	efx_stop_all(efx);

B
Ben Hutchings 已提交
2145
	mutex_lock(&efx->mac_lock);
2146
	net_dev->mtu = new_mtu;
2147
	efx->type->reconfigure_mac(efx);
B
Ben Hutchings 已提交
2148 2149
	mutex_unlock(&efx->mac_lock);

2150
	efx_start_all(efx);
2151
	netif_device_attach(efx->net_dev);
2152
	return 0;
2153 2154 2155 2156
}

static int efx_set_mac_address(struct net_device *net_dev, void *data)
{
2157
	struct efx_nic *efx = netdev_priv(net_dev);
2158
	struct sockaddr *addr = data;
2159
	u8 *new_addr = addr->sa_data;
2160 2161

	if (!is_valid_ether_addr(new_addr)) {
2162 2163 2164
		netif_err(efx, drv, efx->net_dev,
			  "invalid ethernet MAC address requested: %pM\n",
			  new_addr);
2165
		return -EADDRNOTAVAIL;
2166 2167
	}

2168
	ether_addr_copy(net_dev->dev_addr, new_addr);
2169
	efx_sriov_mac_address_changed(efx);
2170 2171

	/* Reconfigure the MAC */
B
Ben Hutchings 已提交
2172
	mutex_lock(&efx->mac_lock);
2173
	efx->type->reconfigure_mac(efx);
B
Ben Hutchings 已提交
2174
	mutex_unlock(&efx->mac_lock);
2175 2176 2177 2178

	return 0;
}

2179
/* Context: netif_addr_lock held, BHs disabled. */
2180
static void efx_set_rx_mode(struct net_device *net_dev)
2181
{
2182
	struct efx_nic *efx = netdev_priv(net_dev);
2183

2184 2185 2186
	if (efx->port_enabled)
		queue_work(efx->workqueue, &efx->mac_work);
	/* Otherwise efx_start_port() will do this */
2187 2188
}

2189
static int efx_set_features(struct net_device *net_dev, netdev_features_t data)
2190 2191 2192 2193 2194
{
	struct efx_nic *efx = netdev_priv(net_dev);

	/* If disabling RX n-tuple filtering, clear existing filters */
	if (net_dev->features & ~data & NETIF_F_NTUPLE)
2195
		return efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL);
2196 2197 2198 2199

	return 0;
}

2200
static const struct net_device_ops efx_farch_netdev_ops = {
S
Stephen Hemminger 已提交
2201 2202
	.ndo_open		= efx_net_open,
	.ndo_stop		= efx_net_stop,
2203
	.ndo_get_stats64	= efx_net_stats,
S
Stephen Hemminger 已提交
2204 2205 2206 2207 2208 2209
	.ndo_tx_timeout		= efx_watchdog,
	.ndo_start_xmit		= efx_hard_start_xmit,
	.ndo_validate_addr	= eth_validate_addr,
	.ndo_do_ioctl		= efx_ioctl,
	.ndo_change_mtu		= efx_change_mtu,
	.ndo_set_mac_address	= efx_set_mac_address,
2210
	.ndo_set_rx_mode	= efx_set_rx_mode,
2211
	.ndo_set_features	= efx_set_features,
2212 2213 2214 2215 2216 2217
#ifdef CONFIG_SFC_SRIOV
	.ndo_set_vf_mac		= efx_sriov_set_vf_mac,
	.ndo_set_vf_vlan	= efx_sriov_set_vf_vlan,
	.ndo_set_vf_spoofchk	= efx_sriov_set_vf_spoofchk,
	.ndo_get_vf_config	= efx_sriov_get_vf_config,
#endif
S
Stephen Hemminger 已提交
2218 2219 2220
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_poll_controller = efx_netpoll,
#endif
2221
	.ndo_setup_tc		= efx_setup_tc,
2222 2223 2224
#ifdef CONFIG_NET_RX_BUSY_POLL
	.ndo_busy_poll		= efx_busy_poll,
#endif
2225 2226 2227
#ifdef CONFIG_RFS_ACCEL
	.ndo_rx_flow_steer	= efx_filter_rfs,
#endif
S
Stephen Hemminger 已提交
2228 2229
};

2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244
static const struct net_device_ops efx_ef10_netdev_ops = {
	.ndo_open		= efx_net_open,
	.ndo_stop		= efx_net_stop,
	.ndo_get_stats64	= efx_net_stats,
	.ndo_tx_timeout		= efx_watchdog,
	.ndo_start_xmit		= efx_hard_start_xmit,
	.ndo_validate_addr	= eth_validate_addr,
	.ndo_do_ioctl		= efx_ioctl,
	.ndo_change_mtu		= efx_change_mtu,
	.ndo_set_mac_address	= efx_set_mac_address,
	.ndo_set_rx_mode	= efx_set_rx_mode,
	.ndo_set_features	= efx_set_features,
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_poll_controller	= efx_netpoll,
#endif
2245 2246 2247
#ifdef CONFIG_NET_RX_BUSY_POLL
	.ndo_busy_poll		= efx_busy_poll,
#endif
2248 2249 2250 2251 2252
#ifdef CONFIG_RFS_ACCEL
	.ndo_rx_flow_steer	= efx_filter_rfs,
#endif
};

2253 2254 2255 2256 2257 2258 2259
static void efx_update_name(struct efx_nic *efx)
{
	strcpy(efx->name, efx->net_dev->name);
	efx_mtd_rename(efx);
	efx_set_channel_names(efx);
}

2260 2261 2262
static int efx_netdev_event(struct notifier_block *this,
			    unsigned long event, void *ptr)
{
2263
	struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
2264

2265 2266
	if ((net_dev->netdev_ops == &efx_farch_netdev_ops ||
	     net_dev->netdev_ops == &efx_ef10_netdev_ops) &&
2267 2268
	    event == NETDEV_CHANGENAME)
		efx_update_name(netdev_priv(net_dev));
2269 2270 2271 2272 2273 2274 2275 2276

	return NOTIFY_DONE;
}

static struct notifier_block efx_netdev_notifier = {
	.notifier_call = efx_netdev_event,
};

B
Ben Hutchings 已提交
2277 2278 2279 2280 2281 2282
static ssize_t
show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
{
	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
	return sprintf(buf, "%d\n", efx->phy_type);
}
2283
static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
B
Ben Hutchings 已提交
2284

2285 2286 2287
static int efx_register_netdev(struct efx_nic *efx)
{
	struct net_device *net_dev = efx->net_dev;
2288
	struct efx_channel *channel;
2289 2290 2291 2292
	int rc;

	net_dev->watchdog_timeo = 5 * HZ;
	net_dev->irq = efx->pci_dev->irq;
2293 2294 2295 2296 2297 2298
	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) {
		net_dev->netdev_ops = &efx_ef10_netdev_ops;
		net_dev->priv_flags |= IFF_UNICAST_FLT;
	} else {
		net_dev->netdev_ops = &efx_farch_netdev_ops;
	}
2299
	net_dev->ethtool_ops = &efx_ethtool_ops;
2300
	net_dev->gso_max_segs = EFX_TSO_MAX_SEGS;
2301

2302
	rtnl_lock();
2303

2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316
	/* Enable resets to be scheduled and check whether any were
	 * already requested.  If so, the NIC is probably hosed so we
	 * abort.
	 */
	efx->state = STATE_READY;
	smp_mb(); /* ensure we change state before checking reset_pending */
	if (efx->reset_pending) {
		netif_err(efx, probe, efx->net_dev,
			  "aborting probe due to scheduled reset\n");
		rc = -EIO;
		goto fail_locked;
	}

2317 2318 2319
	rc = dev_alloc_name(net_dev, net_dev->name);
	if (rc < 0)
		goto fail_locked;
2320
	efx_update_name(efx);
2321

2322 2323 2324
	/* Always start with carrier off; PHY events will detect the link */
	netif_carrier_off(net_dev);

2325 2326 2327 2328
	rc = register_netdevice(net_dev);
	if (rc)
		goto fail_locked;

2329 2330
	efx_for_each_channel(channel, efx) {
		struct efx_tx_queue *tx_queue;
2331 2332
		efx_for_each_channel_tx_queue(tx_queue, channel)
			efx_init_tx_queue_core_txq(tx_queue);
2333 2334
	}

2335 2336
	efx_associate(efx);

2337
	rtnl_unlock();
2338

B
Ben Hutchings 已提交
2339 2340
	rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
	if (rc) {
2341 2342
		netif_err(efx, drv, efx->net_dev,
			  "failed to init net dev attributes\n");
B
Ben Hutchings 已提交
2343 2344 2345
		goto fail_registered;
	}

2346
	return 0;
B
Ben Hutchings 已提交
2347

2348 2349
fail_registered:
	rtnl_lock();
2350
	efx_dissociate(efx);
2351
	unregister_netdevice(net_dev);
2352
fail_locked:
2353
	efx->state = STATE_UNINIT;
2354
	rtnl_unlock();
2355
	netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
2356
	return rc;
2357 2358 2359 2360 2361 2362 2363
}

static void efx_unregister_netdev(struct efx_nic *efx)
{
	if (!efx->net_dev)
		return;

2364
	BUG_ON(netdev_priv(efx->net_dev) != efx);
2365

2366 2367
	strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
	device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
2368 2369 2370 2371 2372

	rtnl_lock();
	unregister_netdevice(efx->net_dev);
	efx->state = STATE_UNINIT;
	rtnl_unlock();
2373 2374 2375 2376 2377 2378 2379 2380
}

/**************************************************************************
 *
 * Device reset and suspend
 *
 **************************************************************************/

B
Ben Hutchings 已提交
2381 2382
/* Tears down the entire software state and most of the hardware state
 * before reset.  */
B
Ben Hutchings 已提交
2383
void efx_reset_down(struct efx_nic *efx, enum reset_type method)
2384 2385 2386
{
	EFX_ASSERT_RESET_SERIALISED(efx);

2387 2388 2389
	if (method == RESET_TYPE_MCDI_TIMEOUT)
		efx->type->prepare_flr(efx);

B
Ben Hutchings 已提交
2390
	efx_stop_all(efx);
B
Ben Hutchings 已提交
2391
	efx_disable_interrupts(efx);
2392 2393

	mutex_lock(&efx->mac_lock);
2394 2395
	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE)
		efx->phy_op->fini(efx);
2396
	efx->type->fini(efx);
2397 2398
}

B
Ben Hutchings 已提交
2399 2400 2401 2402 2403
/* This function will always ensure that the locks acquired in
 * efx_reset_down() are released. A failure return code indicates
 * that we were unable to reinitialise the hardware, and the
 * driver should be disabled. If ok is false, then the rx and tx
 * engines are not restarted, pending a RESET_DISABLE. */
B
Ben Hutchings 已提交
2404
int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
2405 2406 2407
{
	int rc;

B
Ben Hutchings 已提交
2408
	EFX_ASSERT_RESET_SERIALISED(efx);
2409

2410 2411 2412 2413
	if (method == RESET_TYPE_MCDI_TIMEOUT)
		efx->type->finish_flr(efx);

	/* Ensure that SRAM is initialised even if we're disabling the device */
2414
	rc = efx->type->init(efx);
2415
	if (rc) {
2416
		netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
2417
		goto fail;
2418 2419
	}

2420 2421 2422
	if (!ok)
		goto fail;

2423
	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) {
2424 2425 2426 2427
		rc = efx->phy_op->init(efx);
		if (rc)
			goto fail;
		if (efx->phy_op->reconfigure(efx))
2428 2429
			netif_err(efx, drv, efx->net_dev,
				  "could not restore PHY settings\n");
2430 2431
	}

2432 2433 2434
	rc = efx_enable_interrupts(efx);
	if (rc)
		goto fail;
B
Ben Hutchings 已提交
2435
	efx_restore_filters(efx);
2436
	efx_sriov_reset(efx);
2437 2438 2439 2440 2441 2442 2443 2444 2445

	mutex_unlock(&efx->mac_lock);

	efx_start_all(efx);

	return 0;

fail:
	efx->port_initialized = false;
B
Ben Hutchings 已提交
2446 2447 2448

	mutex_unlock(&efx->mac_lock);

2449 2450 2451
	return rc;
}

2452 2453
/* Reset the NIC using the specified method.  Note that the reset may
 * fail, in which case the card will be left in an unusable state.
2454
 *
2455
 * Caller must hold the rtnl_lock.
2456
 */
2457
int efx_reset(struct efx_nic *efx, enum reset_type method)
2458
{
2459 2460
	int rc, rc2;
	bool disabled;
2461

2462 2463
	netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
		   RESET_TYPE(method));
2464

2465
	efx_device_detach_sync(efx);
B
Ben Hutchings 已提交
2466
	efx_reset_down(efx, method);
2467

2468
	rc = efx->type->reset(efx, method);
2469
	if (rc) {
2470
		netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
2471
		goto out;
2472 2473
	}

2474 2475 2476
	/* Clear flags for the scopes we covered.  We assume the NIC and
	 * driver are now quiescent so that there is no race here.
	 */
2477 2478 2479 2480
	if (method < RESET_TYPE_MAX_METHOD)
		efx->reset_pending &= -(1 << (method + 1));
	else /* it doesn't fit into the well-ordered scope hierarchy */
		__clear_bit(method, &efx->reset_pending);
2481 2482 2483 2484 2485 2486 2487

	/* Reinitialise bus-mastering, which may have been turned off before
	 * the reset was scheduled. This is still appropriate, even in the
	 * RESET_TYPE_DISABLE since this driver generally assumes the hardware
	 * can respond to requests. */
	pci_set_master(efx->pci_dev);

2488
out:
2489
	/* Leave device stopped if necessary */
2490 2491 2492
	disabled = rc ||
		method == RESET_TYPE_DISABLE ||
		method == RESET_TYPE_RECOVER_OR_DISABLE;
2493 2494 2495 2496 2497
	rc2 = efx_reset_up(efx, method, !disabled);
	if (rc2) {
		disabled = true;
		if (!rc)
			rc = rc2;
2498 2499
	}

2500
	if (disabled) {
2501
		dev_close(efx->net_dev);
2502
		netif_err(efx, drv, efx->net_dev, "has been disabled\n");
2503 2504
		efx->state = STATE_DISABLED;
	} else {
2505
		netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
2506
		netif_device_attach(efx->net_dev);
2507
	}
2508 2509 2510
	return rc;
}

2511 2512 2513 2514 2515
/* Try recovery mechanisms.
 * For now only EEH is supported.
 * Returns 0 if the recovery mechanisms are unsuccessful.
 * Returns a non-zero value otherwise.
 */
2516
int efx_try_recovery(struct efx_nic *efx)
2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536
{
#ifdef CONFIG_EEH
	/* A PCI error can occur and not be seen by EEH because nothing
	 * happens on the PCI bus. In this case the driver may fail and
	 * schedule a 'recover or reset', leading to this recovery handler.
	 * Manually call the eeh failure check function.
	 */
	struct eeh_dev *eehdev =
		of_node_to_eeh_dev(pci_device_to_OF_node(efx->pci_dev));

	if (eeh_dev_check_failure(eehdev)) {
		/* The EEH mechanisms will handle the error and reset the
		 * device if necessary.
		 */
		return 1;
	}
#endif
	return 0;
}

2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554
static void efx_wait_for_bist_end(struct efx_nic *efx)
{
	int i;

	for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) {
		if (efx_mcdi_poll_reboot(efx))
			goto out;
		msleep(BIST_WAIT_DELAY_MS);
	}

	netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n");
out:
	/* Either way unset the BIST flag. If we found no reboot we probably
	 * won't recover, but we should try.
	 */
	efx->mc_bist_for_other_fn = false;
}

2555 2556 2557 2558 2559
/* The worker thread exists so that code that cannot sleep can
 * schedule a reset for later.
 */
static void efx_reset_work(struct work_struct *data)
{
2560
	struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
2561 2562 2563 2564 2565 2566
	unsigned long pending;
	enum reset_type method;

	pending = ACCESS_ONCE(efx->reset_pending);
	method = fls(pending) - 1;

2567 2568 2569
	if (method == RESET_TYPE_MC_BIST)
		efx_wait_for_bist_end(efx);

2570 2571 2572 2573
	if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
	     method == RESET_TYPE_RECOVER_OR_ALL) &&
	    efx_try_recovery(efx))
		return;
2574

2575
	if (!pending)
2576 2577
		return;

2578
	rtnl_lock();
2579 2580 2581 2582 2583 2584

	/* We checked the state in efx_schedule_reset() but it may
	 * have changed by now.  Now that we have the RTNL lock,
	 * it cannot change again.
	 */
	if (efx->state == STATE_READY)
2585
		(void)efx_reset(efx, method);
2586

2587
	rtnl_unlock();
2588 2589 2590 2591 2592 2593
}

void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
{
	enum reset_type method;

2594 2595 2596 2597 2598 2599 2600
	if (efx->state == STATE_RECOVERY) {
		netif_dbg(efx, drv, efx->net_dev,
			  "recovering: skip scheduling %s reset\n",
			  RESET_TYPE(type));
		return;
	}

2601 2602 2603
	switch (type) {
	case RESET_TYPE_INVISIBLE:
	case RESET_TYPE_ALL:
2604
	case RESET_TYPE_RECOVER_OR_ALL:
2605 2606
	case RESET_TYPE_WORLD:
	case RESET_TYPE_DISABLE:
2607
	case RESET_TYPE_RECOVER_OR_DISABLE:
2608
	case RESET_TYPE_MC_BIST:
2609
	case RESET_TYPE_MCDI_TIMEOUT:
2610
		method = type;
2611 2612
		netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
			  RESET_TYPE(method));
2613 2614
		break;
	default:
2615
		method = efx->type->map_reset_reason(type);
2616 2617 2618
		netif_dbg(efx, drv, efx->net_dev,
			  "scheduling %s reset for %s\n",
			  RESET_TYPE(method), RESET_TYPE(type));
2619 2620
		break;
	}
2621

2622
	set_bit(method, &efx->reset_pending);
2623 2624 2625 2626 2627 2628 2629
	smp_mb(); /* ensure we change reset_pending before checking state */

	/* If we're not READY then just leave the flags set as the cue
	 * to abort probing or reschedule the reset later.
	 */
	if (ACCESS_ONCE(efx->state) != STATE_READY)
		return;
2630

2631 2632 2633 2634
	/* efx_process_channel() will no longer read events once a
	 * reset is scheduled. So switch back to poll'd MCDI completions. */
	efx_mcdi_mode_poll(efx);

2635
	queue_work(reset_workqueue, &efx->reset_work);
2636 2637 2638 2639 2640 2641 2642 2643 2644
}

/**************************************************************************
 *
 * List of NICs we support
 *
 **************************************************************************/

/* PCI device ID table */
2645
static DEFINE_PCI_DEVICE_TABLE(efx_pci_table) = {
2646 2647
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
		    PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0),
2648
	 .driver_data = (unsigned long) &falcon_a1_nic_type},
2649 2650
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
		    PCI_DEVICE_ID_SOLARFLARE_SFC4000B),
2651
	 .driver_data = (unsigned long) &falcon_b0_nic_type},
2652
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803),	/* SFC9020 */
2653
	 .driver_data = (unsigned long) &siena_a0_nic_type},
2654
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813),	/* SFL9021 */
2655
	 .driver_data = (unsigned long) &siena_a0_nic_type},
2656 2657
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903),  /* SFC9120 PF */
	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
2658 2659
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923),  /* SFC9140 PF */
	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
2660 2661 2662 2663 2664
	{0}			/* end of list */
};

/**************************************************************************
 *
2665
 * Dummy PHY/MAC operations
2666
 *
2667
 * Can be used for some unimplemented operations
2668 2669 2670 2671 2672 2673 2674 2675 2676
 * Needed so all function pointers are valid and do not have to be tested
 * before use
 *
 **************************************************************************/
int efx_port_dummy_op_int(struct efx_nic *efx)
{
	return 0;
}
void efx_port_dummy_op_void(struct efx_nic *efx) {}
S
stephen hemminger 已提交
2677 2678

static bool efx_port_dummy_op_poll(struct efx_nic *efx)
S
Steve Hodgson 已提交
2679 2680 2681
{
	return false;
}
2682

2683
static const struct efx_phy_operations efx_dummy_phy_operations = {
2684
	.init		 = efx_port_dummy_op_int,
B
Ben Hutchings 已提交
2685
	.reconfigure	 = efx_port_dummy_op_int,
S
Steve Hodgson 已提交
2686
	.poll		 = efx_port_dummy_op_poll,
2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698
	.fini		 = efx_port_dummy_op_void,
};

/**************************************************************************
 *
 * Data housekeeping
 *
 **************************************************************************/

/* This zeroes out and then fills in the invariants in a struct
 * efx_nic (including all sub-structures).
 */
2699
static int efx_init_struct(struct efx_nic *efx,
2700 2701
			   struct pci_dev *pci_dev, struct net_device *net_dev)
{
2702
	int i;
2703 2704

	/* Initialise common structures */
2705 2706
	INIT_LIST_HEAD(&efx->node);
	INIT_LIST_HEAD(&efx->secondary_list);
2707
	spin_lock_init(&efx->biu_lock);
2708 2709 2710
#ifdef CONFIG_SFC_MTD
	INIT_LIST_HEAD(&efx->mtd_list);
#endif
2711 2712
	INIT_WORK(&efx->reset_work, efx_reset_work);
	INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
2713
	INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
2714
	efx->pci_dev = pci_dev;
2715
	efx->msg_enable = debug;
2716
	efx->state = STATE_UNINIT;
2717 2718 2719
	strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));

	efx->net_dev = net_dev;
2720
	efx->rx_prefix_size = efx->type->rx_prefix_size;
2721 2722
	efx->rx_ip_align =
		NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
2723 2724
	efx->rx_packet_hash_offset =
		efx->type->rx_hash_offset - efx->type->rx_prefix_size;
2725 2726
	efx->rx_packet_ts_offset =
		efx->type->rx_ts_offset - efx->type->rx_prefix_size;
2727 2728 2729
	spin_lock_init(&efx->stats_lock);
	mutex_init(&efx->mac_lock);
	efx->phy_op = &efx_dummy_phy_operations;
2730
	efx->mdio.dev = net_dev;
2731
	INIT_WORK(&efx->mac_work, efx_mac_work);
2732
	init_waitqueue_head(&efx->flush_wq);
2733 2734

	for (i = 0; i < EFX_MAX_CHANNELS; i++) {
2735 2736 2737
		efx->channel[i] = efx_alloc_channel(efx, i, NULL);
		if (!efx->channel[i])
			goto fail;
B
Ben Hutchings 已提交
2738 2739
		efx->msi_context[i].efx = efx;
		efx->msi_context[i].index = i;
2740 2741 2742 2743 2744 2745
	}

	/* Higher numbered interrupt modes are less capable! */
	efx->interrupt_mode = max(efx->type->max_interrupt_mode,
				  interrupt_mode);

2746 2747 2748 2749
	/* Would be good to use the net_dev name, but we're too early */
	snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
		 pci_name(pci_dev));
	efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
2750
	if (!efx->workqueue)
2751
		goto fail;
2752

2753
	return 0;
2754 2755 2756 2757

fail:
	efx_fini_struct(efx);
	return -ENOMEM;
2758 2759 2760 2761
}

static void efx_fini_struct(struct efx_nic *efx)
{
2762 2763 2764 2765 2766
	int i;

	for (i = 0; i < EFX_MAX_CHANNELS; i++)
		kfree(efx->channel[i]);

2767 2768
	kfree(efx->vpd_sn);

2769 2770 2771 2772 2773 2774
	if (efx->workqueue) {
		destroy_workqueue(efx->workqueue);
		efx->workqueue = NULL;
	}
}

2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785
void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
{
	u64 n_rx_nodesc_trunc = 0;
	struct efx_channel *channel;

	efx_for_each_channel(channel, efx)
		n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc;
	stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc;
	stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
}

2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796
/**************************************************************************
 *
 * PCI interface
 *
 **************************************************************************/

/* Main body of final NIC shutdown code
 * This is called only at module unload (or hotplug removal).
 */
static void efx_pci_remove_main(struct efx_nic *efx)
{
2797 2798 2799 2800 2801 2802
	/* Flush reset_work. It can no longer be scheduled since we
	 * are not READY.
	 */
	BUG_ON(efx->state == STATE_READY);
	cancel_work_sync(&efx->reset_work);

B
Ben Hutchings 已提交
2803
	efx_disable_interrupts(efx);
2804
	efx_nic_fini_interrupt(efx);
2805
	efx_fini_port(efx);
2806
	efx->type->fini(efx);
2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823
	efx_fini_napi(efx);
	efx_remove_all(efx);
}

/* Final NIC shutdown
 * This is called only at module unload (or hotplug removal).
 */
static void efx_pci_remove(struct pci_dev *pci_dev)
{
	struct efx_nic *efx;

	efx = pci_get_drvdata(pci_dev);
	if (!efx)
		return;

	/* Mark the NIC as fini, then stop the interface */
	rtnl_lock();
2824
	efx_dissociate(efx);
2825
	dev_close(efx->net_dev);
B
Ben Hutchings 已提交
2826
	efx_disable_interrupts(efx);
2827 2828
	rtnl_unlock();

2829
	efx_sriov_fini(efx);
2830 2831
	efx_unregister_netdev(efx);

2832 2833
	efx_mtd_remove(efx);

2834 2835 2836
	efx_pci_remove_main(efx);

	efx_fini_io(efx);
2837
	netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
2838 2839 2840

	efx_fini_struct(efx);
	free_netdev(efx->net_dev);
2841 2842

	pci_disable_pcie_error_reporting(pci_dev);
2843 2844
};

2845 2846 2847 2848 2849 2850
/* NIC VPD information
 * Called during probe to display the part number of the
 * installed NIC.  VPD is potentially very large but this should
 * always appear within the first 512 bytes.
 */
#define SFC_VPD_LEN 512
2851
static void efx_probe_vpd_strings(struct efx_nic *efx)
2852 2853 2854 2855
{
	struct pci_dev *dev = efx->pci_dev;
	char vpd_data[SFC_VPD_LEN];
	ssize_t vpd_size;
2856
	int ro_start, ro_size, i, j;
2857 2858 2859 2860 2861 2862 2863 2864 2865

	/* Get the vpd data from the device */
	vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
	if (vpd_size <= 0) {
		netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n");
		return;
	}

	/* Get the Read only section */
2866 2867
	ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA);
	if (ro_start < 0) {
2868 2869 2870 2871
		netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n");
		return;
	}

2872 2873 2874
	ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
	j = ro_size;
	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893
	if (i + j > vpd_size)
		j = vpd_size - i;

	/* Get the Part number */
	i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN");
	if (i < 0) {
		netif_err(efx, drv, efx->net_dev, "Part number not found\n");
		return;
	}

	j = pci_vpd_info_field_size(&vpd_data[i]);
	i += PCI_VPD_INFO_FLD_HDR_SIZE;
	if (i + j > vpd_size) {
		netif_err(efx, drv, efx->net_dev, "Incomplete part number\n");
		return;
	}

	netif_info(efx, drv, efx->net_dev,
		   "Part Number : %.*s\n", j, &vpd_data[i]);
2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914

	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
	j = ro_size;
	i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN");
	if (i < 0) {
		netif_err(efx, drv, efx->net_dev, "Serial number not found\n");
		return;
	}

	j = pci_vpd_info_field_size(&vpd_data[i]);
	i += PCI_VPD_INFO_FLD_HDR_SIZE;
	if (i + j > vpd_size) {
		netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n");
		return;
	}

	efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL);
	if (!efx->vpd_sn)
		return;

	snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]);
2915 2916 2917
}


2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929
/* Main body of NIC initialisation
 * This is called at module load (or hotplug insertion, theoretically).
 */
static int efx_pci_probe_main(struct efx_nic *efx)
{
	int rc;

	/* Do start-of-day initialisation */
	rc = efx_probe_all(efx);
	if (rc)
		goto fail1;

2930
	efx_init_napi(efx);
2931

2932
	rc = efx->type->init(efx);
2933
	if (rc) {
2934 2935
		netif_err(efx, probe, efx->net_dev,
			  "failed to initialise NIC\n");
2936
		goto fail3;
2937 2938 2939 2940
	}

	rc = efx_init_port(efx);
	if (rc) {
2941 2942
		netif_err(efx, probe, efx->net_dev,
			  "failed to initialise port\n");
2943
		goto fail4;
2944 2945
	}

2946
	rc = efx_nic_init_interrupt(efx);
2947
	if (rc)
2948
		goto fail5;
2949 2950 2951
	rc = efx_enable_interrupts(efx);
	if (rc)
		goto fail6;
2952 2953 2954

	return 0;

2955 2956
 fail6:
	efx_nic_fini_interrupt(efx);
2957
 fail5:
2958 2959
	efx_fini_port(efx);
 fail4:
2960
	efx->type->fini(efx);
2961 2962 2963 2964 2965 2966 2967 2968 2969 2970
 fail3:
	efx_fini_napi(efx);
	efx_remove_all(efx);
 fail1:
	return rc;
}

/* NIC initialisation
 *
 * This is called at module load (or hotplug insertion,
2971
 * theoretically).  It sets up PCI mappings, resets the NIC,
2972 2973 2974 2975 2976
 * sets up and registers the network devices with the kernel and hooks
 * the interrupt service routine.  It does not prepare the device for
 * transmission; this is left to the first time one of the network
 * interfaces is brought up (i.e. efx_net_open).
 */
B
Bill Pemberton 已提交
2977
static int efx_pci_probe(struct pci_dev *pci_dev,
2978
			 const struct pci_device_id *entry)
2979 2980 2981
{
	struct net_device *net_dev;
	struct efx_nic *efx;
2982
	int rc;
2983 2984

	/* Allocate and initialise a struct net_device and struct efx_nic */
2985 2986
	net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES,
				     EFX_MAX_RX_QUEUES);
2987 2988
	if (!net_dev)
		return -ENOMEM;
2989 2990 2991
	efx = netdev_priv(net_dev);
	efx->type = (const struct efx_nic_type *) entry->driver_data;
	net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
B
Ben Hutchings 已提交
2992
			      NETIF_F_HIGHDMA | NETIF_F_TSO |
2993
			      NETIF_F_RXCSUM);
2994
	if (efx->type->offload_features & NETIF_F_V6_CSUM)
B
Ben Hutchings 已提交
2995
		net_dev->features |= NETIF_F_TSO6;
2996 2997
	/* Mask for features that also apply to VLAN devices */
	net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG |
2998 2999 3000 3001
				   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
				   NETIF_F_RXCSUM);
	/* All offloads can be toggled */
	net_dev->hw_features = net_dev->features & ~NETIF_F_HIGHDMA;
3002
	pci_set_drvdata(pci_dev, efx);
3003
	SET_NETDEV_DEV(net_dev, &pci_dev->dev);
3004
	rc = efx_init_struct(efx, pci_dev, net_dev);
3005 3006 3007
	if (rc)
		goto fail1;

3008
	netif_info(efx, probe, efx->net_dev,
3009
		   "Solarflare NIC detected\n");
3010

3011
	efx_probe_vpd_strings(efx);
3012

3013 3014 3015 3016 3017
	/* Set up basic I/O (BAR mappings etc) */
	rc = efx_init_io(efx);
	if (rc)
		goto fail2;

3018 3019 3020
	rc = efx_pci_probe_main(efx);
	if (rc)
		goto fail3;
3021 3022 3023

	rc = efx_register_netdev(efx);
	if (rc)
3024
		goto fail4;
3025

3026 3027 3028 3029 3030
	rc = efx_sriov_init(efx);
	if (rc)
		netif_err(efx, probe, efx->net_dev,
			  "SR-IOV can't be enabled rc %d\n", rc);

3031
	netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
3032

3033
	/* Try to create MTDs, but allow this to fail */
3034
	rtnl_lock();
3035
	rc = efx_mtd_probe(efx);
3036
	rtnl_unlock();
3037 3038 3039 3040
	if (rc)
		netif_warn(efx, probe, efx->net_dev,
			   "failed to create MTDs (%d)\n", rc);

3041 3042 3043 3044 3045
	rc = pci_enable_pcie_error_reporting(pci_dev);
	if (rc && rc != -EINVAL)
		netif_warn(efx, probe, efx->net_dev,
			   "pci_enable_pcie_error_reporting failed (%d)\n", rc);

3046 3047 3048
	return 0;

 fail4:
3049
	efx_pci_remove_main(efx);
3050 3051 3052 3053 3054
 fail3:
	efx_fini_io(efx);
 fail2:
	efx_fini_struct(efx);
 fail1:
S
Steve Hodgson 已提交
3055
	WARN_ON(rc > 0);
3056
	netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc);
3057 3058 3059 3060
	free_netdev(net_dev);
	return rc;
}

3061 3062 3063 3064
static int efx_pm_freeze(struct device *dev)
{
	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));

3065 3066
	rtnl_lock();

3067 3068
	if (efx->state != STATE_DISABLED) {
		efx->state = STATE_UNINIT;
3069

3070
		efx_device_detach_sync(efx);
3071

3072
		efx_stop_all(efx);
B
Ben Hutchings 已提交
3073
		efx_disable_interrupts(efx);
3074
	}
3075

3076 3077
	rtnl_unlock();

3078 3079 3080 3081 3082
	return 0;
}

static int efx_pm_thaw(struct device *dev)
{
3083
	int rc;
3084 3085
	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));

3086 3087
	rtnl_lock();

3088
	if (efx->state != STATE_DISABLED) {
3089 3090 3091
		rc = efx_enable_interrupts(efx);
		if (rc)
			goto fail;
3092

3093 3094 3095
		mutex_lock(&efx->mac_lock);
		efx->phy_op->reconfigure(efx);
		mutex_unlock(&efx->mac_lock);
3096

3097
		efx_start_all(efx);
3098

3099
		netif_device_attach(efx->net_dev);
3100

3101
		efx->state = STATE_READY;
3102

3103 3104
		efx->type->resume_wol(efx);
	}
3105

3106 3107
	rtnl_unlock();

3108 3109 3110
	/* Reschedule any quenched resets scheduled during efx_pm_freeze() */
	queue_work(reset_workqueue, &efx->reset_work);

3111
	return 0;
3112 3113 3114 3115 3116

fail:
	rtnl_unlock();

	return rc;
3117 3118 3119 3120 3121 3122 3123 3124 3125
}

static int efx_pm_poweroff(struct device *dev)
{
	struct pci_dev *pci_dev = to_pci_dev(dev);
	struct efx_nic *efx = pci_get_drvdata(pci_dev);

	efx->type->fini(efx);

3126
	efx->reset_pending = 0;
3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152

	pci_save_state(pci_dev);
	return pci_set_power_state(pci_dev, PCI_D3hot);
}

/* Used for both resume and restore */
static int efx_pm_resume(struct device *dev)
{
	struct pci_dev *pci_dev = to_pci_dev(dev);
	struct efx_nic *efx = pci_get_drvdata(pci_dev);
	int rc;

	rc = pci_set_power_state(pci_dev, PCI_D0);
	if (rc)
		return rc;
	pci_restore_state(pci_dev);
	rc = pci_enable_device(pci_dev);
	if (rc)
		return rc;
	pci_set_master(efx->pci_dev);
	rc = efx->type->reset(efx, RESET_TYPE_ALL);
	if (rc)
		return rc;
	rc = efx->type->init(efx);
	if (rc)
		return rc;
3153 3154
	rc = efx_pm_thaw(dev);
	return rc;
3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167
}

static int efx_pm_suspend(struct device *dev)
{
	int rc;

	efx_pm_freeze(dev);
	rc = efx_pm_poweroff(dev);
	if (rc)
		efx_pm_resume(dev);
	return rc;
}

3168
static const struct dev_pm_ops efx_pm_ops = {
3169 3170 3171 3172 3173 3174 3175 3176
	.suspend	= efx_pm_suspend,
	.resume		= efx_pm_resume,
	.freeze		= efx_pm_freeze,
	.thaw		= efx_pm_thaw,
	.poweroff	= efx_pm_poweroff,
	.restore	= efx_pm_resume,
};

3177 3178 3179 3180
/* A PCI error affecting this device was detected.
 * At this point MMIO and DMA may be disabled.
 * Stop the software path and request a slot reset.
 */
3181 3182
static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
					      enum pci_channel_state state)
3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198
{
	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
	struct efx_nic *efx = pci_get_drvdata(pdev);

	if (state == pci_channel_io_perm_failure)
		return PCI_ERS_RESULT_DISCONNECT;

	rtnl_lock();

	if (efx->state != STATE_DISABLED) {
		efx->state = STATE_RECOVERY;
		efx->reset_pending = 0;

		efx_device_detach_sync(efx);

		efx_stop_all(efx);
B
Ben Hutchings 已提交
3199
		efx_disable_interrupts(efx);
3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216

		status = PCI_ERS_RESULT_NEED_RESET;
	} else {
		/* If the interface is disabled we don't want to do anything
		 * with it.
		 */
		status = PCI_ERS_RESULT_RECOVERED;
	}

	rtnl_unlock();

	pci_disable_device(pdev);

	return status;
}

/* Fake a successfull reset, which will be performed later in efx_io_resume. */
3217
static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275
{
	struct efx_nic *efx = pci_get_drvdata(pdev);
	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
	int rc;

	if (pci_enable_device(pdev)) {
		netif_err(efx, hw, efx->net_dev,
			  "Cannot re-enable PCI device after reset.\n");
		status =  PCI_ERS_RESULT_DISCONNECT;
	}

	rc = pci_cleanup_aer_uncorrect_error_status(pdev);
	if (rc) {
		netif_err(efx, hw, efx->net_dev,
		"pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc);
		/* Non-fatal error. Continue. */
	}

	return status;
}

/* Perform the actual reset and resume I/O operations. */
static void efx_io_resume(struct pci_dev *pdev)
{
	struct efx_nic *efx = pci_get_drvdata(pdev);
	int rc;

	rtnl_lock();

	if (efx->state == STATE_DISABLED)
		goto out;

	rc = efx_reset(efx, RESET_TYPE_ALL);
	if (rc) {
		netif_err(efx, hw, efx->net_dev,
			  "efx_reset failed after PCI error (%d)\n", rc);
	} else {
		efx->state = STATE_READY;
		netif_dbg(efx, hw, efx->net_dev,
			  "Done resetting and resuming IO after PCI error.\n");
	}

out:
	rtnl_unlock();
}

/* For simplicity and reliability, we always require a slot reset and try to
 * reset the hardware when a pci error affecting the device is detected.
 * We leave both the link_reset and mmio_enabled callback unimplemented:
 * with our request for slot reset the mmio_enabled callback will never be
 * called, and the link_reset callback is not used by AER or EEH mechanisms.
 */
static struct pci_error_handlers efx_err_handlers = {
	.error_detected = efx_io_error_detected,
	.slot_reset	= efx_io_slot_reset,
	.resume		= efx_io_resume,
};

3276
static struct pci_driver efx_pci_driver = {
3277
	.name		= KBUILD_MODNAME,
3278 3279 3280
	.id_table	= efx_pci_table,
	.probe		= efx_pci_probe,
	.remove		= efx_pci_remove,
3281
	.driver.pm	= &efx_pm_ops,
3282
	.err_handler	= &efx_err_handlers,
3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304
};

/**************************************************************************
 *
 * Kernel module interface
 *
 *************************************************************************/

module_param(interrupt_mode, uint, 0444);
MODULE_PARM_DESC(interrupt_mode,
		 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");

static int __init efx_init_module(void)
{
	int rc;

	printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n");

	rc = register_netdevice_notifier(&efx_netdev_notifier);
	if (rc)
		goto err_notifier;

3305 3306 3307 3308
	rc = efx_init_sriov();
	if (rc)
		goto err_sriov;

3309 3310 3311 3312 3313
	reset_workqueue = create_singlethread_workqueue("sfc_reset");
	if (!reset_workqueue) {
		rc = -ENOMEM;
		goto err_reset;
	}
3314 3315 3316 3317 3318 3319 3320 3321

	rc = pci_register_driver(&efx_pci_driver);
	if (rc < 0)
		goto err_pci;

	return 0;

 err_pci:
3322 3323
	destroy_workqueue(reset_workqueue);
 err_reset:
3324 3325
	efx_fini_sriov();
 err_sriov:
3326 3327 3328 3329 3330 3331 3332 3333 3334 3335
	unregister_netdevice_notifier(&efx_netdev_notifier);
 err_notifier:
	return rc;
}

static void __exit efx_exit_module(void)
{
	printk(KERN_INFO "Solarflare NET driver unloading\n");

	pci_unregister_driver(&efx_pci_driver);
3336
	destroy_workqueue(reset_workqueue);
3337
	efx_fini_sriov();
3338 3339 3340 3341 3342 3343 3344
	unregister_netdevice_notifier(&efx_netdev_notifier);

}

module_init(efx_init_module);
module_exit(efx_exit_module);

3345 3346
MODULE_AUTHOR("Solarflare Communications and "
	      "Michael Brown <mbrown@fensystems.co.uk>");
B
Ben Hutchings 已提交
3347
MODULE_DESCRIPTION("Solarflare network driver");
3348 3349
MODULE_LICENSE("GPL");
MODULE_DEVICE_TABLE(pci, efx_pci_table);