efx.c 95.2 KB
Newer Older
1
/****************************************************************************
B
Ben Hutchings 已提交
2
 * Driver for Solarflare network controllers and boards
3
 * Copyright 2005-2006 Fen Systems Ltd.
B
Ben Hutchings 已提交
4
 * Copyright 2005-2013 Solarflare Communications Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation, incorporated herein by reference.
 */

#include <linux/module.h>
#include <linux/pci.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/in.h>
#include <linux/ethtool.h>
21
#include <linux/topology.h>
22
#include <linux/gfp.h>
23
#include <linux/aer.h>
24
#include <linux/interrupt.h>
25
#include "net_driver.h"
26 27
#include <net/gre.h>
#include <net/udp_tunnel.h>
28
#include "efx.h"
B
Ben Hutchings 已提交
29
#include "nic.h"
30
#include "io.h"
31
#include "selftest.h"
32
#include "sriov.h"
33

34
#include "mcdi.h"
35
#include "mcdi_pcol.h"
36
#include "workarounds.h"
37

38 39 40 41 42 43 44 45 46
/**************************************************************************
 *
 * Type name strings
 *
 **************************************************************************
 */

/* Loopback mode names (see LOOPBACK_MODE()) */
const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
47
const char *const efx_loopback_mode_names[] = {
48
	[LOOPBACK_NONE]		= "NONE",
49
	[LOOPBACK_DATA]		= "DATAPATH",
50 51 52
	[LOOPBACK_GMAC]		= "GMAC",
	[LOOPBACK_XGMII]	= "XGMII",
	[LOOPBACK_XGXS]		= "XGXS",
53 54 55
	[LOOPBACK_XAUI]		= "XAUI",
	[LOOPBACK_GMII]		= "GMII",
	[LOOPBACK_SGMII]	= "SGMII",
56 57 58 59 60 61
	[LOOPBACK_XGBR]		= "XGBR",
	[LOOPBACK_XFI]		= "XFI",
	[LOOPBACK_XAUI_FAR]	= "XAUI_FAR",
	[LOOPBACK_GMII_FAR]	= "GMII_FAR",
	[LOOPBACK_SGMII_FAR]	= "SGMII_FAR",
	[LOOPBACK_XFI_FAR]	= "XFI_FAR",
62 63
	[LOOPBACK_GPHY]		= "GPHY",
	[LOOPBACK_PHYXS]	= "PHYXS",
64 65
	[LOOPBACK_PCS]		= "PCS",
	[LOOPBACK_PMAPMD]	= "PMA/PMD",
66 67
	[LOOPBACK_XPORT]	= "XPORT",
	[LOOPBACK_XGMII_WS]	= "XGMII_WS",
68
	[LOOPBACK_XAUI_WS]	= "XAUI_WS",
69 70
	[LOOPBACK_XAUI_WS_FAR]  = "XAUI_WS_FAR",
	[LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
71
	[LOOPBACK_GMII_WS]	= "GMII_WS",
72 73
	[LOOPBACK_XFI_WS]	= "XFI_WS",
	[LOOPBACK_XFI_WS_FAR]	= "XFI_WS_FAR",
74
	[LOOPBACK_PHYXS_WS]	= "PHYXS_WS",
75 76 77
};

const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
78
const char *const efx_reset_type_names[] = {
79 80 81 82 83
	[RESET_TYPE_INVISIBLE]          = "INVISIBLE",
	[RESET_TYPE_ALL]                = "ALL",
	[RESET_TYPE_RECOVER_OR_ALL]     = "RECOVER_OR_ALL",
	[RESET_TYPE_WORLD]              = "WORLD",
	[RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
84
	[RESET_TYPE_DATAPATH]           = "DATAPATH",
85
	[RESET_TYPE_MC_BIST]		= "MC_BIST",
86 87 88
	[RESET_TYPE_DISABLE]            = "DISABLE",
	[RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
	[RESET_TYPE_INT_ERROR]          = "INT_ERROR",
89
	[RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
90 91
	[RESET_TYPE_TX_SKIP]            = "TX_SKIP",
	[RESET_TYPE_MC_FAILURE]         = "MC_FAILURE",
92
	[RESET_TYPE_MCDI_TIMEOUT]	= "MCDI_TIMEOUT (FLR)",
93 94
};

95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
/* UDP tunnel type names */
static const char *const efx_udp_tunnel_type_names[] = {
	[TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan",
	[TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE] = "geneve",
};

void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen)
{
	if (type < ARRAY_SIZE(efx_udp_tunnel_type_names) &&
	    efx_udp_tunnel_type_names[type] != NULL)
		snprintf(buf, buflen, "%s", efx_udp_tunnel_type_names[type]);
	else
		snprintf(buf, buflen, "type %d", type);
}

110 111 112 113 114 115
/* Reset workqueue. If any NIC has a hardware failure then a reset will be
 * queued onto this work queue. This is not a per-nic work queue, because
 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
 */
static struct workqueue_struct *reset_workqueue;

116 117 118 119 120 121
/* How often and how many times to poll for a reset while waiting for a
 * BIST that another function started to complete.
 */
#define BIST_WAIT_DELAY_MS	100
#define BIST_WAIT_DELAY_COUNT	100

122 123 124 125 126 127 128 129 130
/**************************************************************************
 *
 * Configurable values
 *
 *************************************************************************/

/*
 * Use separate channels for TX and RX events
 *
131 132
 * Set this to 1 to use separate channels for TX and RX. It allows us
 * to control interrupt affinity separately for TX and RX.
133
 *
134
 * This is only used in MSI-X interrupt mode
135
 */
136 137 138
bool efx_separate_tx_channels;
module_param(efx_separate_tx_channels, bool, 0444);
MODULE_PARM_DESC(efx_separate_tx_channels,
139
		 "Use separate channels for TX and RX");
140 141 142 143 144 145 146

/* This is the weight assigned to each of the (per-channel) virtual
 * NAPI devices.
 */
static int napi_weight = 64;

/* This is the time (in jiffies) between invocations of the hardware
147 148
 * monitor.
 * On Falcon-based NICs, this will:
149 150
 * - Check the on-board hardware monitor;
 * - Poll the link state and reconfigure the hardware as necessary.
151 152
 * On Siena-based NICs for power systems with EEH support, this will give EEH a
 * chance to start.
153
 */
S
stephen hemminger 已提交
154
static unsigned int efx_monitor_interval = 1 * HZ;
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186

/* Initial interrupt moderation settings.  They can be modified after
 * module load with ethtool.
 *
 * The default for RX should strike a balance between increasing the
 * round-trip latency and reducing overhead.
 */
static unsigned int rx_irq_mod_usec = 60;

/* Initial interrupt moderation settings.  They can be modified after
 * module load with ethtool.
 *
 * This default is chosen to ensure that a 10G link does not go idle
 * while a TX queue is stopped after it has become full.  A queue is
 * restarted when it drops below half full.  The time this takes (assuming
 * worst case 3 descriptors per packet and 1024 descriptors) is
 *   512 / 3 * 1.2 = 205 usec.
 */
static unsigned int tx_irq_mod_usec = 150;

/* This is the first interrupt mode to try out of:
 * 0 => MSI-X
 * 1 => MSI
 * 2 => legacy
 */
static unsigned int interrupt_mode;

/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
 * i.e. the number of CPUs among which we may distribute simultaneous
 * interrupt handling.
 *
 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
187
 * The default (0) means to assign an interrupt to each core.
188 189 190 191 192
 */
static unsigned int rss_cpus;
module_param(rss_cpus, uint, 0444);
MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");

193 194
static bool phy_flash_cfg;
module_param(phy_flash_cfg, bool, 0644);
195 196
MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");

197
static unsigned irq_adapt_low_thresh = 8000;
198 199 200 201
module_param(irq_adapt_low_thresh, uint, 0644);
MODULE_PARM_DESC(irq_adapt_low_thresh,
		 "Threshold score for reducing IRQ moderation");

202
static unsigned irq_adapt_high_thresh = 16000;
203 204 205 206
module_param(irq_adapt_high_thresh, uint, 0644);
MODULE_PARM_DESC(irq_adapt_high_thresh,
		 "Threshold score for increasing IRQ moderation");

207 208 209 210 211 212 213
static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
			 NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
			 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
			 NETIF_MSG_TX_ERR | NETIF_MSG_HW);
module_param(debug, uint, 0);
MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");

214 215 216 217 218
/**************************************************************************
 *
 * Utility functions and prototypes
 *
 *************************************************************************/
219

220
static int efx_soft_enable_interrupts(struct efx_nic *efx);
B
Ben Hutchings 已提交
221
static void efx_soft_disable_interrupts(struct efx_nic *efx);
222
static void efx_remove_channel(struct efx_channel *channel);
223
static void efx_remove_channels(struct efx_nic *efx);
224
static const struct efx_channel_type efx_default_channel_type;
225
static void efx_remove_port(struct efx_nic *efx);
226
static void efx_init_napi_channel(struct efx_channel *channel);
227
static void efx_fini_napi(struct efx_nic *efx);
228
static void efx_fini_napi_channel(struct efx_channel *channel);
229 230 231
static void efx_fini_struct(struct efx_nic *efx);
static void efx_start_all(struct efx_nic *efx);
static void efx_stop_all(struct efx_nic *efx);
232 233 234

#define EFX_ASSERT_RESET_SERIALISED(efx)		\
	do {						\
235
		if ((efx->state == STATE_READY) ||	\
236
		    (efx->state == STATE_RECOVERY) ||	\
237
		    (efx->state == STATE_DISABLED))	\
238 239 240
			ASSERT_RTNL();			\
	} while (0)

241 242
static int efx_check_disabled(struct efx_nic *efx)
{
243
	if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
244 245 246 247 248 249 250
		netif_err(efx, drv, efx->net_dev,
			  "device is disabled due to earlier errors\n");
		return -EIO;
	}
	return 0;
}

251 252 253 254 255 256 257 258 259 260 261 262 263
/**************************************************************************
 *
 * Event queue processing
 *
 *************************************************************************/

/* Process channel's event queue
 *
 * This function is responsible for processing the event queue of a
 * single channel.  The caller must guarantee that this function will
 * never be concurrently called more than once on the same channel,
 * though different channels may be being processed concurrently.
 */
264
static int efx_process_channel(struct efx_channel *channel, int budget)
265
{
266
	struct efx_tx_queue *tx_queue;
267
	int spent;
268

269
	if (unlikely(!channel->enabled))
B
Ben Hutchings 已提交
270
		return 0;
271

272 273 274 275 276
	efx_for_each_channel_tx_queue(tx_queue, channel) {
		tx_queue->pkts_compl = 0;
		tx_queue->bytes_compl = 0;
	}

277
	spent = efx_nic_process_eventq(channel, budget);
278 279 280 281
	if (spent && efx_channel_has_rx_queue(channel)) {
		struct efx_rx_queue *rx_queue =
			efx_channel_get_rx_queue(channel);

282
		efx_rx_flush_packet(channel);
283
		efx_fast_push_rx_descriptors(rx_queue, true);
284 285
	}

286 287 288 289 290 291 292 293
	/* Update BQL */
	efx_for_each_channel_tx_queue(tx_queue, channel) {
		if (tx_queue->bytes_compl) {
			netdev_tx_completed_queue(tx_queue->core_txq,
				tx_queue->pkts_compl, tx_queue->bytes_compl);
		}
	}

294
	return spent;
295 296 297 298 299 300 301
}

/* NAPI poll handler
 *
 * NAPI guarantees serialisation of polls of the same device, which
 * provides the guarantee required by efx_process_channel().
 */
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
{
	int step = efx->irq_mod_step_us;

	if (channel->irq_mod_score < irq_adapt_low_thresh) {
		if (channel->irq_moderation_us > step) {
			channel->irq_moderation_us -= step;
			efx->type->push_irq_moderation(channel);
		}
	} else if (channel->irq_mod_score > irq_adapt_high_thresh) {
		if (channel->irq_moderation_us <
		    efx->irq_rx_moderation_us) {
			channel->irq_moderation_us += step;
			efx->type->push_irq_moderation(channel);
		}
	}

	channel->irq_count = 0;
	channel->irq_mod_score = 0;
}

323 324 325 326
static int efx_poll(struct napi_struct *napi, int budget)
{
	struct efx_channel *channel =
		container_of(napi, struct efx_channel, napi_str);
327
	struct efx_nic *efx = channel->efx;
328
	int spent;
329

330 331 332
	netif_vdbg(efx, intr, efx->net_dev,
		   "channel %d NAPI poll executing on CPU %d\n",
		   channel->channel, raw_smp_processor_id());
333

334
	spent = efx_process_channel(channel, budget);
335

336
	if (spent < budget) {
337
		if (efx_channel_has_rx_queue(channel) &&
338 339
		    efx->irq_rx_adaptive &&
		    unlikely(++channel->irq_count == 1000)) {
340
			efx_update_irq_mod(efx, channel);
341 342
		}

343 344
		efx_filter_rfs_expire(channel);

345
		/* There is no race here; although napi_disable() will
346
		 * only wait for napi_complete(), this isn't a problem
347
		 * since efx_nic_eventq_read_ack() will have no effect if
348 349
		 * interrupts have already been disabled.
		 */
350 351
		if (napi_complete_done(napi, spent))
			efx_nic_eventq_read_ack(channel);
352 353
	}

354
	return spent;
355 356 357 358 359 360 361 362 363
}

/* Create event queue
 * Event queue memory allocations are done only once.  If the channel
 * is reset, the memory buffer will be reused; this guards against
 * errors during channel reset and also simplifies interrupt handling.
 */
static int efx_probe_eventq(struct efx_channel *channel)
{
364 365 366
	struct efx_nic *efx = channel->efx;
	unsigned long entries;

367
	netif_dbg(efx, probe, efx->net_dev,
368
		  "chan %d create event queue\n", channel->channel);
369

370 371 372
	/* Build an event queue with room for one event per tx and rx buffer,
	 * plus some extra for link state events and MCDI completions. */
	entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
373
	EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
374 375
	channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;

376
	return efx_nic_probe_eventq(channel);
377 378 379
}

/* Prepare channel's event queue */
380
static int efx_init_eventq(struct efx_channel *channel)
381
{
382
	struct efx_nic *efx = channel->efx;
383 384 385 386
	int rc;

	EFX_WARN_ON_PARANOID(channel->eventq_init);

387
	netif_dbg(efx, drv, efx->net_dev,
388
		  "chan %d init event queue\n", channel->channel);
389

390 391
	rc = efx_nic_init_eventq(channel);
	if (rc == 0) {
392
		efx->type->push_irq_moderation(channel);
393 394 395 396
		channel->eventq_read_ptr = 0;
		channel->eventq_init = true;
	}
	return rc;
397 398
}

399
/* Enable event queue processing and NAPI */
400
void efx_start_eventq(struct efx_channel *channel)
401 402 403 404
{
	netif_dbg(channel->efx, ifup, channel->efx->net_dev,
		  "chan %d start event queue\n", channel->channel);

405
	/* Make sure the NAPI handler sees the enabled flag set */
406 407 408 409 410 411 412 413
	channel->enabled = true;
	smp_wmb();

	napi_enable(&channel->napi_str);
	efx_nic_eventq_read_ack(channel);
}

/* Disable event queue processing and NAPI */
414
void efx_stop_eventq(struct efx_channel *channel)
415 416 417 418 419 420 421 422
{
	if (!channel->enabled)
		return;

	napi_disable(&channel->napi_str);
	channel->enabled = false;
}

423 424
static void efx_fini_eventq(struct efx_channel *channel)
{
425 426 427
	if (!channel->eventq_init)
		return;

428 429
	netif_dbg(channel->efx, drv, channel->efx->net_dev,
		  "chan %d fini event queue\n", channel->channel);
430

431
	efx_nic_fini_eventq(channel);
432
	channel->eventq_init = false;
433 434 435 436
}

static void efx_remove_eventq(struct efx_channel *channel)
{
437 438
	netif_dbg(channel->efx, drv, channel->efx->net_dev,
		  "chan %d remove event queue\n", channel->channel);
439

440
	efx_nic_remove_eventq(channel);
441 442 443 444 445 446 447 448
}

/**************************************************************************
 *
 * Channel handling
 *
 *************************************************************************/

449
/* Allocate and initialise a channel structure. */
450 451 452 453 454 455 456 457
static struct efx_channel *
efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
{
	struct efx_channel *channel;
	struct efx_rx_queue *rx_queue;
	struct efx_tx_queue *tx_queue;
	int j;

458 459 460
	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
	if (!channel)
		return NULL;
461

462 463 464
	channel->efx = efx;
	channel->channel = i;
	channel->type = &efx_default_channel_type;
465

466 467 468 469 470 471
	for (j = 0; j < EFX_TXQ_TYPES; j++) {
		tx_queue = &channel->tx_queue[j];
		tx_queue->efx = efx;
		tx_queue->queue = i * EFX_TXQ_TYPES + j;
		tx_queue->channel = channel;
	}
472

473 474
	rx_queue = &channel->rx_queue;
	rx_queue->efx = efx;
475
	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
476

477 478 479 480 481 482 483 484 485 486 487 488 489
	return channel;
}

/* Allocate and initialise a channel structure, copying parameters
 * (but not resources) from an old channel structure.
 */
static struct efx_channel *
efx_copy_channel(const struct efx_channel *old_channel)
{
	struct efx_channel *channel;
	struct efx_rx_queue *rx_queue;
	struct efx_tx_queue *tx_queue;
	int j;
490

491 492 493 494 495 496 497
	channel = kmalloc(sizeof(*channel), GFP_KERNEL);
	if (!channel)
		return NULL;

	*channel = *old_channel;

	channel->napi_dev = NULL;
498 499 500
	INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
	channel->napi_str.napi_id = 0;
	channel->napi_str.state = 0;
501
	memset(&channel->eventq, 0, sizeof(channel->eventq));
502

503 504 505
	for (j = 0; j < EFX_TXQ_TYPES; j++) {
		tx_queue = &channel->tx_queue[j];
		if (tx_queue->channel)
506
			tx_queue->channel = channel;
507 508
		tx_queue->buffer = NULL;
		memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
509 510 511
	}

	rx_queue = &channel->rx_queue;
512 513
	rx_queue->buffer = NULL;
	memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
514
	timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
515 516 517 518

	return channel;
}

519 520 521 522 523 524
static int efx_probe_channel(struct efx_channel *channel)
{
	struct efx_tx_queue *tx_queue;
	struct efx_rx_queue *rx_queue;
	int rc;

525 526
	netif_dbg(channel->efx, probe, channel->efx->net_dev,
		  "creating channel %d\n", channel->channel);
527

528 529 530 531
	rc = channel->type->pre_probe(channel);
	if (rc)
		goto fail;

532 533
	rc = efx_probe_eventq(channel);
	if (rc)
534
		goto fail;
535 536 537 538

	efx_for_each_channel_tx_queue(tx_queue, channel) {
		rc = efx_probe_tx_queue(tx_queue);
		if (rc)
539
			goto fail;
540 541 542 543 544
	}

	efx_for_each_channel_rx_queue(rx_queue, channel) {
		rc = efx_probe_rx_queue(rx_queue);
		if (rc)
545
			goto fail;
546 547 548 549
	}

	return 0;

550 551
fail:
	efx_remove_channel(channel);
552 553 554
	return rc;
}

555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
static void
efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
{
	struct efx_nic *efx = channel->efx;
	const char *type;
	int number;

	number = channel->channel;
	if (efx->tx_channel_offset == 0) {
		type = "";
	} else if (channel->channel < efx->tx_channel_offset) {
		type = "-rx";
	} else {
		type = "-tx";
		number -= efx->tx_channel_offset;
	}
	snprintf(buf, len, "%s%s-%d", efx->name, type, number);
}
573

574 575 576 577
static void efx_set_channel_names(struct efx_nic *efx)
{
	struct efx_channel *channel;

578 579
	efx_for_each_channel(channel, efx)
		channel->type->get_name(channel,
B
Ben Hutchings 已提交
580 581
					efx->msi_context[channel->channel].name,
					sizeof(efx->msi_context[0].name));
582 583
}

584 585 586 587 588 589 590 591
static int efx_probe_channels(struct efx_nic *efx)
{
	struct efx_channel *channel;
	int rc;

	/* Restart special buffer allocation */
	efx->next_buffer_table = 0;

592 593 594 595 596 597
	/* Probe channels in reverse, so that any 'extra' channels
	 * use the start of the buffer table. This allows the traffic
	 * channels to be resized without moving them or wasting the
	 * entries before them.
	 */
	efx_for_each_channel_rev(channel, efx) {
598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
		rc = efx_probe_channel(channel);
		if (rc) {
			netif_err(efx, probe, efx->net_dev,
				  "failed to create channel %d\n",
				  channel->channel);
			goto fail;
		}
	}
	efx_set_channel_names(efx);

	return 0;

fail:
	efx_remove_channels(efx);
	return rc;
}

615 616 617 618
/* Channels are shutdown and reinitialised whilst the NIC is running
 * to propagate configuration changes (mtu, checksum offload), or
 * to clear hardware error conditions
 */
619
static void efx_start_datapath(struct efx_nic *efx)
620
{
621
	netdev_features_t old_features = efx->net_dev->features;
622
	bool old_rx_scatter = efx->rx_scatter;
623 624 625
	struct efx_tx_queue *tx_queue;
	struct efx_rx_queue *rx_queue;
	struct efx_channel *channel;
626
	size_t rx_buf_len;
627

628 629 630 631
	/* Calculate the rx buffer allocation parameters required to
	 * support the current MTU, including padding for header
	 * alignment and overruns.
	 */
632
	efx->rx_dma_len = (efx->rx_prefix_size +
633 634
			   EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
			   efx->type->rx_buffer_padding);
635
	rx_buf_len = (sizeof(struct efx_rx_page_state) +
636
		      efx->rx_ip_align + efx->rx_dma_len);
637
	if (rx_buf_len <= PAGE_SIZE) {
J
Jon Cooper 已提交
638
		efx->rx_scatter = efx->type->always_rx_scatter;
639 640
		efx->rx_buffer_order = 0;
	} else if (efx->type->can_rx_scatter) {
641
		BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
642
		BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
643 644 645
			     2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE,
				       EFX_RX_BUF_ALIGNMENT) >
			     PAGE_SIZE);
646 647 648 649 650 651 652 653
		efx->rx_scatter = true;
		efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
		efx->rx_buffer_order = 0;
	} else {
		efx->rx_scatter = false;
		efx->rx_buffer_order = get_order(rx_buf_len);
	}

654 655 656 657 658 659 660 661 662 663 664
	efx_rx_config_page_split(efx);
	if (efx->rx_buffer_order)
		netif_dbg(efx, drv, efx->net_dev,
			  "RX buf len=%u; page order=%u batch=%u\n",
			  efx->rx_dma_len, efx->rx_buffer_order,
			  efx->rx_pages_per_batch);
	else
		netif_dbg(efx, drv, efx->net_dev,
			  "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
			  efx->rx_dma_len, efx->rx_page_buf_step,
			  efx->rx_bufs_per_page, efx->rx_pages_per_batch);
665

666 667 668 669 670 671 672 673 674
	/* Restore previously fixed features in hw_features and remove
	 * features which are fixed now
	 */
	efx->net_dev->hw_features |= efx->net_dev->features;
	efx->net_dev->hw_features &= ~efx->fixed_features;
	efx->net_dev->features |= efx->fixed_features;
	if (efx->net_dev->features != old_features)
		netdev_features_change(efx->net_dev);

J
Jon Cooper 已提交
675
	/* RX filters may also have scatter-enabled flags */
676
	if (efx->rx_scatter != old_rx_scatter)
677
		efx->type->filter_update_rx_scatter(efx);
678

679 680 681 682 683 684 685 686 687 688
	/* We must keep at least one descriptor in a TX ring empty.
	 * We could avoid this when the queue size does not exactly
	 * match the hardware ring size, but it's not that important.
	 * Therefore we stop the queue when one more skb might fill
	 * the ring completely.  We wake it when half way back to
	 * empty.
	 */
	efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
	efx->txq_wake_thresh = efx->txq_stop_thresh / 2;

689 690
	/* Initialise the channels */
	efx_for_each_channel(channel, efx) {
691
		efx_for_each_channel_tx_queue(tx_queue, channel) {
692
			efx_init_tx_queue(tx_queue);
693 694
			atomic_inc(&efx->active_queues);
		}
695

696
		efx_for_each_channel_rx_queue(rx_queue, channel) {
697
			efx_init_rx_queue(rx_queue);
698
			atomic_inc(&efx->active_queues);
699 700 701
			efx_stop_eventq(channel);
			efx_fast_push_rx_descriptors(rx_queue, false);
			efx_start_eventq(channel);
702
		}
703

704
		WARN_ON(channel->rx_pkt_n_frags);
705 706
	}

707 708
	efx_ptp_start_datapath(efx);

709 710
	if (netif_device_present(efx->net_dev))
		netif_tx_wake_all_queues(efx->net_dev);
711 712
}

713
static void efx_stop_datapath(struct efx_nic *efx)
714 715 716 717
{
	struct efx_channel *channel;
	struct efx_tx_queue *tx_queue;
	struct efx_rx_queue *rx_queue;
718
	int rc;
719 720 721 722

	EFX_ASSERT_RESET_SERIALISED(efx);
	BUG_ON(efx->port_enabled);

723 724
	efx_ptp_stop_datapath(efx);

725 726 727 728 729 730
	/* Stop RX refill */
	efx_for_each_channel(channel, efx) {
		efx_for_each_channel_rx_queue(rx_queue, channel)
			rx_queue->refill_enabled = false;
	}

731
	efx_for_each_channel(channel, efx) {
732 733 734 735 736 737 738 739 740 741
		/* RX packet processing is pipelined, so wait for the
		 * NAPI handler to complete.  At least event queue 0
		 * might be kept active by non-data events, so don't
		 * use napi_synchronize() but actually disable NAPI
		 * temporarily.
		 */
		if (efx_channel_has_rx_queue(channel)) {
			efx_stop_eventq(channel);
			efx_start_eventq(channel);
		}
742
	}
743

744
	rc = efx->type->fini_dmaq(efx);
745
	if (rc) {
746 747 748 749 750 751 752
		netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
	} else {
		netif_dbg(efx, drv, efx->net_dev,
			  "successfully flushed all queues\n");
	}

	efx_for_each_channel(channel, efx) {
753 754
		efx_for_each_channel_rx_queue(rx_queue, channel)
			efx_fini_rx_queue(rx_queue);
755
		efx_for_each_possible_channel_tx_queue(tx_queue, channel)
756 757 758 759 760 761 762 763 764
			efx_fini_tx_queue(tx_queue);
	}
}

static void efx_remove_channel(struct efx_channel *channel)
{
	struct efx_tx_queue *tx_queue;
	struct efx_rx_queue *rx_queue;

765 766
	netif_dbg(channel->efx, drv, channel->efx->net_dev,
		  "destroy chan %d\n", channel->channel);
767 768 769

	efx_for_each_channel_rx_queue(rx_queue, channel)
		efx_remove_rx_queue(rx_queue);
770
	efx_for_each_possible_channel_tx_queue(tx_queue, channel)
771 772
		efx_remove_tx_queue(tx_queue);
	efx_remove_eventq(channel);
773
	channel->type->post_remove(channel);
774 775
}

776 777 778 779 780 781 782 783 784 785 786 787 788
static void efx_remove_channels(struct efx_nic *efx)
{
	struct efx_channel *channel;

	efx_for_each_channel(channel, efx)
		efx_remove_channel(channel);
}

int
efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
{
	struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
	u32 old_rxq_entries, old_txq_entries;
789
	unsigned i, next_buffer_table = 0;
790
	int rc, rc2;
791 792 793 794

	rc = efx_check_disabled(efx);
	if (rc)
		return rc;
795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816

	/* Not all channels should be reallocated. We must avoid
	 * reallocating their buffer table entries.
	 */
	efx_for_each_channel(channel, efx) {
		struct efx_rx_queue *rx_queue;
		struct efx_tx_queue *tx_queue;

		if (channel->type->copy)
			continue;
		next_buffer_table = max(next_buffer_table,
					channel->eventq.index +
					channel->eventq.entries);
		efx_for_each_channel_rx_queue(rx_queue, channel)
			next_buffer_table = max(next_buffer_table,
						rx_queue->rxd.index +
						rx_queue->rxd.entries);
		efx_for_each_channel_tx_queue(tx_queue, channel)
			next_buffer_table = max(next_buffer_table,
						tx_queue->txd.index +
						tx_queue->txd.entries);
	}
817

818
	efx_device_detach_sync(efx);
819
	efx_stop_all(efx);
B
Ben Hutchings 已提交
820
	efx_soft_disable_interrupts(efx);
821

822
	/* Clone channels (where possible) */
823 824
	memset(other_channel, 0, sizeof(other_channel));
	for (i = 0; i < efx->n_channels; i++) {
825 826 827
		channel = efx->channel[i];
		if (channel->type->copy)
			channel = channel->type->copy(channel);
828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845
		if (!channel) {
			rc = -ENOMEM;
			goto out;
		}
		other_channel[i] = channel;
	}

	/* Swap entry counts and channel pointers */
	old_rxq_entries = efx->rxq_entries;
	old_txq_entries = efx->txq_entries;
	efx->rxq_entries = rxq_entries;
	efx->txq_entries = txq_entries;
	for (i = 0; i < efx->n_channels; i++) {
		channel = efx->channel[i];
		efx->channel[i] = other_channel[i];
		other_channel[i] = channel;
	}

846 847
	/* Restart buffer table allocation */
	efx->next_buffer_table = next_buffer_table;
848 849

	for (i = 0; i < efx->n_channels; i++) {
850 851 852 853 854 855 856
		channel = efx->channel[i];
		if (!channel->type->copy)
			continue;
		rc = efx_probe_channel(channel);
		if (rc)
			goto rollback;
		efx_init_napi_channel(efx->channel[i]);
857
	}
858

859
out:
860 861 862 863 864 865 866 867 868
	/* Destroy unused channel structures */
	for (i = 0; i < efx->n_channels; i++) {
		channel = other_channel[i];
		if (channel && channel->type->copy) {
			efx_fini_napi_channel(channel);
			efx_remove_channel(channel);
			kfree(channel);
		}
	}
869

870 871 872 873 874 875 876 877
	rc2 = efx_soft_enable_interrupts(efx);
	if (rc2) {
		rc = rc ? rc : rc2;
		netif_err(efx, drv, efx->net_dev,
			  "unable to restart interrupts on channel reallocation\n");
		efx_schedule_reset(efx, RESET_TYPE_DISABLE);
	} else {
		efx_start_all(efx);
878
		efx_device_attach_if_not_resetting(efx);
879
	}
880 881 882 883 884 885 886 887 888 889 890 891 892 893
	return rc;

rollback:
	/* Swap back */
	efx->rxq_entries = old_rxq_entries;
	efx->txq_entries = old_txq_entries;
	for (i = 0; i < efx->n_channels; i++) {
		channel = efx->channel[i];
		efx->channel[i] = other_channel[i];
		other_channel[i] = channel;
	}
	goto out;
}

894
void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
895
{
896
	mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100));
897 898
}

899
static bool efx_default_channel_want_txqs(struct efx_channel *channel)
900 901 902 903 904
{
	return channel->channel - channel->efx->tx_channel_offset <
		channel->efx->n_tx_channels;
}

905 906
static const struct efx_channel_type efx_default_channel_type = {
	.pre_probe		= efx_channel_dummy_op_int,
907
	.post_remove		= efx_channel_dummy_op_void,
908 909
	.get_name		= efx_get_channel_name,
	.copy			= efx_copy_channel,
910
	.want_txqs		= efx_default_channel_want_txqs,
911
	.keep_eventq		= false,
912
	.want_pio		= true,
913 914 915 916 917 918 919
};

int efx_channel_dummy_op_int(struct efx_channel *channel)
{
	return 0;
}

920 921 922 923
void efx_channel_dummy_op_void(struct efx_channel *channel)
{
}

924 925 926 927 928 929 930 931 932 933
/**************************************************************************
 *
 * Port handling
 *
 **************************************************************************/

/* This ensures that the kernel is kept informed (via
 * netif_carrier_on/off) of the link status, and also maintains the
 * link status's stop on the port's TX queue.
 */
S
Steve Hodgson 已提交
934
void efx_link_status_changed(struct efx_nic *efx)
935
{
936 937
	struct efx_link_state *link_state = &efx->link_state;

938 939 940 941 942 943 944
	/* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
	 * that no events are triggered between unregister_netdev() and the
	 * driver unloading. A more general condition is that NETDEV_CHANGE
	 * can only be generated between NETDEV_UP and NETDEV_DOWN */
	if (!netif_running(efx->net_dev))
		return;

945
	if (link_state->up != netif_carrier_ok(efx->net_dev)) {
946 947
		efx->n_link_state_changes++;

948
		if (link_state->up)
949 950 951 952 953 954
			netif_carrier_on(efx->net_dev);
		else
			netif_carrier_off(efx->net_dev);
	}

	/* Status message for kernel log */
B
Ben Hutchings 已提交
955
	if (link_state->up)
956
		netif_info(efx, link, efx->net_dev,
957
			   "link up at %uMbps %s-duplex (MTU %d)\n",
958
			   link_state->speed, link_state->fd ? "full" : "half",
959
			   efx->net_dev->mtu);
B
Ben Hutchings 已提交
960
	else
961
		netif_info(efx, link, efx->net_dev, "link down\n");
962 963
}

964 965
void efx_link_set_advertising(struct efx_nic *efx,
			      const unsigned long *advertising)
B
Ben Hutchings 已提交
966
{
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985
	memcpy(efx->link_advertising, advertising,
	       sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK()));

	efx->link_advertising[0] |= ADVERTISED_Autoneg;
	if (advertising[0] & ADVERTISED_Pause)
		efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX);
	else
		efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
	if (advertising[0] & ADVERTISED_Asym_Pause)
		efx->wanted_fc ^= EFX_FC_TX;
}

/* Equivalent to efx_link_set_advertising with all-zeroes, except does not
 * force the Autoneg bit on.
 */
void efx_link_clear_advertising(struct efx_nic *efx)
{
	bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS);
	efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
B
Ben Hutchings 已提交
986 987
}

988
void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
B
Ben Hutchings 已提交
989 990
{
	efx->wanted_fc = wanted_fc;
991
	if (efx->link_advertising[0]) {
B
Ben Hutchings 已提交
992
		if (wanted_fc & EFX_FC_RX)
993 994
			efx->link_advertising[0] |= (ADVERTISED_Pause |
						     ADVERTISED_Asym_Pause);
B
Ben Hutchings 已提交
995
		else
996 997
			efx->link_advertising[0] &= ~(ADVERTISED_Pause |
						      ADVERTISED_Asym_Pause);
B
Ben Hutchings 已提交
998
		if (wanted_fc & EFX_FC_TX)
999
			efx->link_advertising[0] ^= ADVERTISED_Asym_Pause;
B
Ben Hutchings 已提交
1000 1001 1002
	}
}

1003 1004
static void efx_fini_port(struct efx_nic *efx);

1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
/* We assume that efx->type->reconfigure_mac will always try to sync RX
 * filters and therefore needs to read-lock the filter table against freeing
 */
void efx_mac_reconfigure(struct efx_nic *efx)
{
	down_read(&efx->filter_sem);
	efx->type->reconfigure_mac(efx);
	up_read(&efx->filter_sem);
}

B
Ben Hutchings 已提交
1015 1016 1017 1018 1019 1020 1021 1022
/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
 * the MAC appropriately. All other PHY configuration changes are pushed
 * through phy_op->set_settings(), and pushed asynchronously to the MAC
 * through efx_monitor().
 *
 * Callers must hold the mac_lock
 */
int __efx_reconfigure_port(struct efx_nic *efx)
1023
{
B
Ben Hutchings 已提交
1024 1025
	enum efx_phy_mode phy_mode;
	int rc;
1026

B
Ben Hutchings 已提交
1027
	WARN_ON(!mutex_is_locked(&efx->mac_lock));
1028

B
Ben Hutchings 已提交
1029 1030
	/* Disable PHY transmit in mac level loopbacks */
	phy_mode = efx->phy_mode;
1031 1032 1033 1034 1035
	if (LOOPBACK_INTERNAL(efx))
		efx->phy_mode |= PHY_MODE_TX_DISABLED;
	else
		efx->phy_mode &= ~PHY_MODE_TX_DISABLED;

B
Ben Hutchings 已提交
1036
	rc = efx->type->reconfigure_port(efx);
1037

B
Ben Hutchings 已提交
1038 1039
	if (rc)
		efx->phy_mode = phy_mode;
1040

B
Ben Hutchings 已提交
1041
	return rc;
1042 1043 1044 1045
}

/* Reinitialise the MAC to pick up new PHY settings, even if the port is
 * disabled. */
B
Ben Hutchings 已提交
1046
int efx_reconfigure_port(struct efx_nic *efx)
1047
{
B
Ben Hutchings 已提交
1048 1049
	int rc;

1050 1051 1052
	EFX_ASSERT_RESET_SERIALISED(efx);

	mutex_lock(&efx->mac_lock);
B
Ben Hutchings 已提交
1053
	rc = __efx_reconfigure_port(efx);
1054
	mutex_unlock(&efx->mac_lock);
B
Ben Hutchings 已提交
1055 1056

	return rc;
1057 1058
}

1059 1060 1061
/* Asynchronous work item for changing MAC promiscuity and multicast
 * hash.  Avoid a drain/rx_ingress enable by reconfiguring the current
 * MAC directly. */
1062 1063 1064 1065 1066
static void efx_mac_work(struct work_struct *data)
{
	struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);

	mutex_lock(&efx->mac_lock);
1067
	if (efx->port_enabled)
1068
		efx_mac_reconfigure(efx);
1069 1070 1071
	mutex_unlock(&efx->mac_lock);
}

1072 1073 1074 1075
static int efx_probe_port(struct efx_nic *efx)
{
	int rc;

1076
	netif_dbg(efx, probe, efx->net_dev, "create port\n");
1077

1078 1079 1080
	if (phy_flash_cfg)
		efx->phy_mode = PHY_MODE_SPECIAL;

1081 1082
	/* Connect up MAC/PHY operations table */
	rc = efx->type->probe_port(efx);
1083
	if (rc)
1084
		return rc;
1085

1086
	/* Initialise MAC address to permanent address */
1087
	ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr);
1088 1089 1090 1091 1092 1093 1094 1095

	return 0;
}

static int efx_init_port(struct efx_nic *efx)
{
	int rc;

1096
	netif_dbg(efx, drv, efx->net_dev, "init port\n");
1097

1098 1099
	mutex_lock(&efx->mac_lock);

1100
	rc = efx->phy_op->init(efx);
1101
	if (rc)
1102
		goto fail1;
1103

1104
	efx->port_initialized = true;
1105

B
Ben Hutchings 已提交
1106 1107
	/* Reconfigure the MAC before creating dma queues (required for
	 * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */
1108
	efx_mac_reconfigure(efx);
B
Ben Hutchings 已提交
1109 1110 1111

	/* Ensure the PHY advertises the correct flow control settings */
	rc = efx->phy_op->reconfigure(efx);
1112
	if (rc && rc != -EPERM)
B
Ben Hutchings 已提交
1113 1114
		goto fail2;

1115
	mutex_unlock(&efx->mac_lock);
1116
	return 0;
1117

1118
fail2:
1119
	efx->phy_op->fini(efx);
1120 1121
fail1:
	mutex_unlock(&efx->mac_lock);
1122
	return rc;
1123 1124 1125 1126
}

static void efx_start_port(struct efx_nic *efx)
{
1127
	netif_dbg(efx, ifup, efx->net_dev, "start port\n");
1128 1129 1130
	BUG_ON(efx->port_enabled);

	mutex_lock(&efx->mac_lock);
1131
	efx->port_enabled = true;
1132

1133
	/* Ensure MAC ingress/egress is enabled */
1134
	efx_mac_reconfigure(efx);
1135

1136 1137 1138
	mutex_unlock(&efx->mac_lock);
}

1139 1140 1141 1142 1143
/* Cancel work for MAC reconfiguration, periodic hardware monitoring
 * and the async self-test, wait for them to finish and prevent them
 * being scheduled again.  This doesn't cover online resets, which
 * should only be cancelled when removing the device.
 */
1144 1145
static void efx_stop_port(struct efx_nic *efx)
{
1146
	netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
1147

1148 1149
	EFX_ASSERT_RESET_SERIALISED(efx);

1150
	mutex_lock(&efx->mac_lock);
1151
	efx->port_enabled = false;
1152 1153 1154
	mutex_unlock(&efx->mac_lock);

	/* Serialise against efx_set_multicast_list() */
1155 1156
	netif_addr_lock_bh(efx->net_dev);
	netif_addr_unlock_bh(efx->net_dev);
1157 1158 1159 1160

	cancel_delayed_work_sync(&efx->monitor_work);
	efx_selftest_async_cancel(efx);
	cancel_work_sync(&efx->mac_work);
1161 1162 1163 1164
}

static void efx_fini_port(struct efx_nic *efx)
{
1165
	netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
1166 1167 1168 1169

	if (!efx->port_initialized)
		return;

1170
	efx->phy_op->fini(efx);
1171
	efx->port_initialized = false;
1172

1173
	efx->link_state.up = false;
1174 1175 1176 1177 1178
	efx_link_status_changed(efx);
}

static void efx_remove_port(struct efx_nic *efx)
{
1179
	netif_dbg(efx, drv, efx->net_dev, "destroying port\n");
1180

1181
	efx->type->remove_port(efx);
1182 1183 1184 1185 1186 1187 1188 1189
}

/**************************************************************************
 *
 * NIC handling
 *
 **************************************************************************/

1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260
static LIST_HEAD(efx_primary_list);
static LIST_HEAD(efx_unassociated_list);

static bool efx_same_controller(struct efx_nic *left, struct efx_nic *right)
{
	return left->type == right->type &&
		left->vpd_sn && right->vpd_sn &&
		!strcmp(left->vpd_sn, right->vpd_sn);
}

static void efx_associate(struct efx_nic *efx)
{
	struct efx_nic *other, *next;

	if (efx->primary == efx) {
		/* Adding primary function; look for secondaries */

		netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n");
		list_add_tail(&efx->node, &efx_primary_list);

		list_for_each_entry_safe(other, next, &efx_unassociated_list,
					 node) {
			if (efx_same_controller(efx, other)) {
				list_del(&other->node);
				netif_dbg(other, probe, other->net_dev,
					  "moving to secondary list of %s %s\n",
					  pci_name(efx->pci_dev),
					  efx->net_dev->name);
				list_add_tail(&other->node,
					      &efx->secondary_list);
				other->primary = efx;
			}
		}
	} else {
		/* Adding secondary function; look for primary */

		list_for_each_entry(other, &efx_primary_list, node) {
			if (efx_same_controller(efx, other)) {
				netif_dbg(efx, probe, efx->net_dev,
					  "adding to secondary list of %s %s\n",
					  pci_name(other->pci_dev),
					  other->net_dev->name);
				list_add_tail(&efx->node,
					      &other->secondary_list);
				efx->primary = other;
				return;
			}
		}

		netif_dbg(efx, probe, efx->net_dev,
			  "adding to unassociated list\n");
		list_add_tail(&efx->node, &efx_unassociated_list);
	}
}

static void efx_dissociate(struct efx_nic *efx)
{
	struct efx_nic *other, *next;

	list_del(&efx->node);
	efx->primary = NULL;

	list_for_each_entry_safe(other, next, &efx->secondary_list, node) {
		list_del(&other->node);
		netif_dbg(other, probe, other->net_dev,
			  "moving to unassociated list\n");
		list_add_tail(&other->node, &efx_unassociated_list);
		other->primary = NULL;
	}
}

1261 1262 1263 1264 1265
/* This configures the PCI device to enable I/O and DMA. */
static int efx_init_io(struct efx_nic *efx)
{
	struct pci_dev *pci_dev = efx->pci_dev;
	dma_addr_t dma_mask = efx->type->max_dma_mask;
1266
	unsigned int mem_map_size = efx->type->mem_map_size(efx);
1267
	int rc, bar;
1268

1269
	netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
1270

1271
	bar = efx->type->mem_bar(efx);
1272

1273 1274
	rc = pci_enable_device(pci_dev);
	if (rc) {
1275 1276
		netif_err(efx, probe, efx->net_dev,
			  "failed to enable PCI device\n");
1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287
		goto fail1;
	}

	pci_set_master(pci_dev);

	/* Set the PCI DMA mask.  Try all possibilities from our
	 * genuine mask down to 32 bits, because some architectures
	 * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
	 * masks event though they reject 46 bit masks.
	 */
	while (dma_mask > 0x7fffffffUL) {
C
Christoph Hellwig 已提交
1288 1289 1290
		rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
		if (rc == 0)
			break;
1291 1292 1293
		dma_mask >>= 1;
	}
	if (rc) {
1294 1295
		netif_err(efx, probe, efx->net_dev,
			  "could not find a suitable DMA mask\n");
1296 1297
		goto fail2;
	}
1298 1299
	netif_dbg(efx, probe, efx->net_dev,
		  "using DMA mask %llx\n", (unsigned long long) dma_mask);
1300

1301 1302
	efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
	rc = pci_request_region(pci_dev, bar, "sfc");
1303
	if (rc) {
1304 1305
		netif_err(efx, probe, efx->net_dev,
			  "request for memory BAR failed\n");
1306 1307 1308
		rc = -EIO;
		goto fail3;
	}
1309
	efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size);
1310
	if (!efx->membase) {
1311 1312
		netif_err(efx, probe, efx->net_dev,
			  "could not map memory BAR at %llx+%x\n",
1313
			  (unsigned long long)efx->membase_phys, mem_map_size);
1314 1315 1316
		rc = -ENOMEM;
		goto fail4;
	}
1317 1318
	netif_dbg(efx, probe, efx->net_dev,
		  "memory BAR at %llx+%x (virtual %p)\n",
1319 1320
		  (unsigned long long)efx->membase_phys, mem_map_size,
		  efx->membase);
1321 1322 1323 1324

	return 0;

 fail4:
1325
	pci_release_region(efx->pci_dev, bar);
1326
 fail3:
1327
	efx->membase_phys = 0;
1328 1329 1330 1331 1332 1333 1334 1335
 fail2:
	pci_disable_device(efx->pci_dev);
 fail1:
	return rc;
}

static void efx_fini_io(struct efx_nic *efx)
{
1336 1337
	int bar;

1338
	netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
1339 1340 1341 1342 1343 1344 1345

	if (efx->membase) {
		iounmap(efx->membase);
		efx->membase = NULL;
	}

	if (efx->membase_phys) {
1346
		bar = efx->type->mem_bar(efx);
1347
		pci_release_region(efx->pci_dev, bar);
1348
		efx->membase_phys = 0;
1349 1350
	}

1351 1352 1353
	/* Don't disable bus-mastering if VFs are assigned */
	if (!pci_vfs_assigned(efx->pci_dev))
		pci_disable_device(efx->pci_dev);
1354 1355
}

1356 1357 1358 1359 1360 1361 1362
void efx_set_default_rx_indir_table(struct efx_nic *efx)
{
	size_t i;

	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
		efx->rx_indir_table[i] =
			ethtool_rxfh_indir_default(i, efx->rss_spread);
1363 1364
}

1365
static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
1366
{
1367
	cpumask_var_t thread_mask;
1368
	unsigned int count;
1369
	int cpu;
1370

1371 1372 1373 1374 1375 1376 1377 1378
	if (rss_cpus) {
		count = rss_cpus;
	} else {
		if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
			netif_warn(efx, probe, efx->net_dev,
				   "RSS disabled due to allocation failure\n");
			return 1;
		}
1379

1380 1381 1382 1383 1384
		count = 0;
		for_each_online_cpu(cpu) {
			if (!cpumask_test_cpu(cpu, thread_mask)) {
				++count;
				cpumask_or(thread_mask, thread_mask,
1385
					   topology_sibling_cpumask(cpu));
1386 1387 1388 1389
			}
		}

		free_cpumask_var(thread_mask);
R
Rusty Russell 已提交
1390 1391
	}

1392 1393 1394 1395 1396 1397 1398
	if (count > EFX_MAX_RX_QUEUES) {
		netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
			       "Reducing number of rx queues from %u to %u.\n",
			       count, EFX_MAX_RX_QUEUES);
		count = EFX_MAX_RX_QUEUES;
	}

1399 1400 1401
	/* If RSS is requested for the PF *and* VFs then we can't write RSS
	 * table entries that are inaccessible to VFs
	 */
1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412
#ifdef CONFIG_SFC_SRIOV
	if (efx->type->sriov_wanted) {
		if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
		    count > efx_vf_size(efx)) {
			netif_warn(efx, probe, efx->net_dev,
				   "Reducing number of RSS channels from %u to %u for "
				   "VF support. Increase vf-msix-limit to use more "
				   "channels on the PF.\n",
				   count, efx_vf_size(efx));
			count = efx_vf_size(efx);
		}
1413
	}
1414
#endif
1415 1416 1417 1418 1419 1420 1421

	return count;
}

/* Probe the number and type of interrupts we are able to obtain, and
 * the resulting numbers of channels and RX queues.
 */
1422
static int efx_probe_interrupts(struct efx_nic *efx)
1423
{
1424 1425
	unsigned int extra_channels = 0;
	unsigned int i, j;
1426
	int rc;
1427

1428 1429 1430 1431
	for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
		if (efx->extra_channel_type[i])
			++extra_channels;

1432
	if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
1433
		struct msix_entry xentries[EFX_MAX_CHANNELS];
1434
		unsigned int n_channels;
1435

1436
		n_channels = efx_wanted_parallelism(efx);
1437
		if (efx_separate_tx_channels)
B
Ben Hutchings 已提交
1438
			n_channels *= 2;
1439
		n_channels += extra_channels;
1440
		n_channels = min(n_channels, efx->max_channels);
1441

B
Ben Hutchings 已提交
1442
		for (i = 0; i < n_channels; i++)
1443
			xentries[i].entry = i;
1444 1445 1446 1447 1448 1449
		rc = pci_enable_msix_range(efx->pci_dev,
					   xentries, 1, n_channels);
		if (rc < 0) {
			/* Fall back to single channel MSI */
			netif_err(efx, drv, efx->net_dev,
				  "could not enable MSI-X\n");
1450 1451 1452 1453
			if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
				efx->interrupt_mode = EFX_INT_MODE_MSI;
			else
				return rc;
1454
		} else if (rc < n_channels) {
1455 1456
			netif_err(efx, drv, efx->net_dev,
				  "WARNING: Insufficient MSI-X vectors"
1457
				  " available (%d < %u).\n", rc, n_channels);
1458 1459
			netif_err(efx, drv, efx->net_dev,
				  "WARNING: Performance may be reduced.\n");
B
Ben Hutchings 已提交
1460
			n_channels = rc;
1461 1462
		}

1463
		if (rc > 0) {
B
Ben Hutchings 已提交
1464
			efx->n_channels = n_channels;
1465 1466
			if (n_channels > extra_channels)
				n_channels -= extra_channels;
1467 1468 1469 1470
			if (efx_separate_tx_channels) {
				efx->n_tx_channels = min(max(n_channels / 2,
							     1U),
							 efx->max_tx_channels);
1471 1472 1473
				efx->n_rx_channels = max(n_channels -
							 efx->n_tx_channels,
							 1U);
B
Ben Hutchings 已提交
1474
			} else {
1475 1476
				efx->n_tx_channels = min(n_channels,
							 efx->max_tx_channels);
1477
				efx->n_rx_channels = n_channels;
B
Ben Hutchings 已提交
1478
			}
1479
			for (i = 0; i < efx->n_channels; i++)
1480 1481
				efx_get_channel(efx, i)->irq =
					xentries[i].vector;
1482 1483 1484 1485 1486
		}
	}

	/* Try single interrupt MSI */
	if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
1487
		efx->n_channels = 1;
B
Ben Hutchings 已提交
1488 1489
		efx->n_rx_channels = 1;
		efx->n_tx_channels = 1;
1490 1491
		rc = pci_enable_msi(efx->pci_dev);
		if (rc == 0) {
1492
			efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
1493
		} else {
1494 1495
			netif_err(efx, drv, efx->net_dev,
				  "could not enable MSI\n");
1496 1497 1498 1499
			if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
				efx->interrupt_mode = EFX_INT_MODE_LEGACY;
			else
				return rc;
1500 1501 1502 1503 1504
		}
	}

	/* Assume legacy interrupts */
	if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
1505
		efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
B
Ben Hutchings 已提交
1506 1507
		efx->n_rx_channels = 1;
		efx->n_tx_channels = 1;
1508 1509
		efx->legacy_irq = efx->pci_dev->irq;
	}
1510

1511
	/* Assign extra channels if possible */
1512
	efx->n_extra_tx_channels = 0;
1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523
	j = efx->n_channels;
	for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
		if (!efx->extra_channel_type[i])
			continue;
		if (efx->interrupt_mode != EFX_INT_MODE_MSIX ||
		    efx->n_channels <= extra_channels) {
			efx->extra_channel_type[i]->handle_no_channel(efx);
		} else {
			--j;
			efx_get_channel(efx, j)->type =
				efx->extra_channel_type[i];
1524 1525
			if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
				efx->n_extra_tx_channels++;
1526 1527 1528
		}
	}

1529
	/* RSS might be usable on VFs even if it is disabled on the PF */
1530 1531 1532 1533 1534 1535 1536 1537 1538
#ifdef CONFIG_SFC_SRIOV
	if (efx->type->sriov_wanted) {
		efx->rss_spread = ((efx->n_rx_channels > 1 ||
				    !efx->type->sriov_wanted(efx)) ?
				   efx->n_rx_channels : efx_vf_size(efx));
		return 0;
	}
#endif
	efx->rss_spread = efx->n_rx_channels;
1539

1540
	return 0;
1541 1542
}

1543
static int efx_soft_enable_interrupts(struct efx_nic *efx)
1544
{
1545 1546
	struct efx_channel *channel, *end_channel;
	int rc;
1547

1548 1549
	BUG_ON(efx->state == STATE_DISABLED);

B
Ben Hutchings 已提交
1550 1551
	efx->irq_soft_enabled = true;
	smp_wmb();
1552 1553

	efx_for_each_channel(channel, efx) {
1554 1555 1556 1557 1558
		if (!channel->type->keep_eventq) {
			rc = efx_init_eventq(channel);
			if (rc)
				goto fail;
		}
1559 1560 1561 1562
		efx_start_eventq(channel);
	}

	efx_mcdi_mode_event(efx);
1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575

	return 0;
fail:
	end_channel = channel;
	efx_for_each_channel(channel, efx) {
		if (channel == end_channel)
			break;
		efx_stop_eventq(channel);
		if (!channel->type->keep_eventq)
			efx_fini_eventq(channel);
	}

	return rc;
1576 1577
}

B
Ben Hutchings 已提交
1578
static void efx_soft_disable_interrupts(struct efx_nic *efx)
1579 1580 1581
{
	struct efx_channel *channel;

1582 1583 1584
	if (efx->state == STATE_DISABLED)
		return;

1585 1586
	efx_mcdi_mode_poll(efx);

B
Ben Hutchings 已提交
1587 1588 1589 1590
	efx->irq_soft_enabled = false;
	smp_wmb();

	if (efx->legacy_irq)
1591 1592 1593 1594 1595 1596 1597
		synchronize_irq(efx->legacy_irq);

	efx_for_each_channel(channel, efx) {
		if (channel->irq)
			synchronize_irq(channel->irq);

		efx_stop_eventq(channel);
B
Ben Hutchings 已提交
1598
		if (!channel->type->keep_eventq)
1599
			efx_fini_eventq(channel);
1600
	}
1601 1602 1603

	/* Flush the asynchronous MCDI request queue */
	efx_mcdi_flush_async(efx);
1604 1605
}

1606
static int efx_enable_interrupts(struct efx_nic *efx)
B
Ben Hutchings 已提交
1607
{
1608 1609
	struct efx_channel *channel, *end_channel;
	int rc;
B
Ben Hutchings 已提交
1610 1611 1612 1613 1614 1615 1616 1617

	BUG_ON(efx->state == STATE_DISABLED);

	if (efx->eeh_disabled_legacy_irq) {
		enable_irq(efx->legacy_irq);
		efx->eeh_disabled_legacy_irq = false;
	}

1618
	efx->type->irq_enable_master(efx);
B
Ben Hutchings 已提交
1619 1620

	efx_for_each_channel(channel, efx) {
1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638
		if (channel->type->keep_eventq) {
			rc = efx_init_eventq(channel);
			if (rc)
				goto fail;
		}
	}

	rc = efx_soft_enable_interrupts(efx);
	if (rc)
		goto fail;

	return 0;

fail:
	end_channel = channel;
	efx_for_each_channel(channel, efx) {
		if (channel == end_channel)
			break;
B
Ben Hutchings 已提交
1639
		if (channel->type->keep_eventq)
1640
			efx_fini_eventq(channel);
B
Ben Hutchings 已提交
1641 1642
	}

1643 1644 1645
	efx->type->irq_disable_non_ev(efx);

	return rc;
B
Ben Hutchings 已提交
1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658
}

static void efx_disable_interrupts(struct efx_nic *efx)
{
	struct efx_channel *channel;

	efx_soft_disable_interrupts(efx);

	efx_for_each_channel(channel, efx) {
		if (channel->type->keep_eventq)
			efx_fini_eventq(channel);
	}

1659
	efx->type->irq_disable_non_ev(efx);
B
Ben Hutchings 已提交
1660 1661
}

1662 1663 1664 1665 1666
static void efx_remove_interrupts(struct efx_nic *efx)
{
	struct efx_channel *channel;

	/* Remove MSI/MSI-X interrupts */
1667
	efx_for_each_channel(channel, efx)
1668 1669 1670 1671 1672 1673 1674 1675
		channel->irq = 0;
	pci_disable_msi(efx->pci_dev);
	pci_disable_msix(efx->pci_dev);

	/* Remove legacy interrupt */
	efx->legacy_irq = 0;
}

1676
static void efx_set_channels(struct efx_nic *efx)
1677
{
1678 1679 1680
	struct efx_channel *channel;
	struct efx_tx_queue *tx_queue;

1681
	efx->tx_channel_offset =
1682 1683
		efx_separate_tx_channels ?
		efx->n_channels - efx->n_tx_channels : 0;
1684

1685 1686
	/* We need to mark which channels really have RX and TX
	 * queues, and adjust the TX queue numbers if we have separate
1687 1688 1689
	 * RX-only and TX-only channels.
	 */
	efx_for_each_channel(channel, efx) {
1690 1691 1692 1693 1694
		if (channel->channel < efx->n_rx_channels)
			channel->rx_queue.core_index = channel->channel;
		else
			channel->rx_queue.core_index = -1;

1695 1696 1697 1698
		efx_for_each_channel_tx_queue(tx_queue, channel)
			tx_queue->queue -= (efx->tx_channel_offset *
					    EFX_TXQ_TYPES);
	}
1699 1700 1701 1702 1703 1704
}

static int efx_probe_nic(struct efx_nic *efx)
{
	int rc;

1705
	netif_dbg(efx, probe, efx->net_dev, "creating NIC\n");
1706 1707

	/* Carry out hardware-type specific initialisation */
1708
	rc = efx->type->probe(efx);
1709 1710 1711
	if (rc)
		return rc;

1712 1713 1714 1715 1716 1717 1718 1719
	do {
		if (!efx->max_channels || !efx->max_tx_channels) {
			netif_err(efx, drv, efx->net_dev,
				  "Insufficient resources to allocate"
				  " any channels\n");
			rc = -ENOSPC;
			goto fail1;
		}
1720

1721 1722 1723 1724 1725 1726
		/* Determine the number of channels and queues by trying
		 * to hook in MSI-X interrupts.
		 */
		rc = efx_probe_interrupts(efx);
		if (rc)
			goto fail1;
1727

1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739
		efx_set_channels(efx);

		/* dimension_resources can fail with EAGAIN */
		rc = efx->type->dimension_resources(efx);
		if (rc != 0 && rc != -EAGAIN)
			goto fail2;

		if (rc == -EAGAIN)
			/* try again with new max_channels */
			efx_remove_interrupts(efx);

	} while (rc == -EAGAIN);
1740

1741
	if (efx->n_channels > 1)
1742 1743 1744
		netdev_rss_key_fill(&efx->rx_hash_key,
				    sizeof(efx->rx_hash_key));
	efx_set_default_rx_indir_table(efx);
1745

1746 1747
	netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
	netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
1748 1749

	/* Initialise the interrupt moderation settings */
1750
	efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
1751 1752
	efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
				true);
1753 1754

	return 0;
1755

1756 1757 1758
fail2:
	efx_remove_interrupts(efx);
fail1:
1759 1760
	efx->type->remove(efx);
	return rc;
1761 1762 1763 1764
}

static void efx_remove_nic(struct efx_nic *efx)
{
1765
	netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n");
1766 1767

	efx_remove_interrupts(efx);
1768
	efx->type->remove(efx);
1769 1770
}

1771 1772 1773 1774 1775
static int efx_probe_filters(struct efx_nic *efx)
{
	int rc;

	spin_lock_init(&efx->filter_lock);
1776
	init_rwsem(&efx->filter_sem);
1777
	mutex_lock(&efx->mac_lock);
1778
	down_write(&efx->filter_sem);
1779 1780
	rc = efx->type->filter_table_probe(efx);
	if (rc)
1781
		goto out_unlock;
1782 1783 1784

#ifdef CONFIG_RFS_ACCEL
	if (efx->type->offload_features & NETIF_F_NTUPLE) {
1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805
		struct efx_channel *channel;
		int i, success = 1;

		efx_for_each_channel(channel, efx) {
			channel->rps_flow_id =
				kcalloc(efx->type->max_rx_ip_filters,
					sizeof(*channel->rps_flow_id),
					GFP_KERNEL);
			if (!channel->rps_flow_id)
				success = 0;
			else
				for (i = 0;
				     i < efx->type->max_rx_ip_filters;
				     ++i)
					channel->rps_flow_id[i] =
						RPS_FLOW_ID_INVALID;
		}

		if (!success) {
			efx_for_each_channel(channel, efx)
				kfree(channel->rps_flow_id);
1806
			efx->type->filter_table_remove(efx);
1807 1808
			rc = -ENOMEM;
			goto out_unlock;
1809
		}
1810 1811

		efx->rps_expire_index = efx->rps_expire_channel = 0;
1812 1813
	}
#endif
1814 1815
out_unlock:
	up_write(&efx->filter_sem);
1816
	mutex_unlock(&efx->mac_lock);
1817
	return rc;
1818 1819 1820 1821 1822
}

static void efx_remove_filters(struct efx_nic *efx)
{
#ifdef CONFIG_RFS_ACCEL
1823 1824 1825 1826
	struct efx_channel *channel;

	efx_for_each_channel(channel, efx)
		kfree(channel->rps_flow_id);
1827
#endif
1828
	down_write(&efx->filter_sem);
1829
	efx->type->filter_table_remove(efx);
1830
	up_write(&efx->filter_sem);
1831 1832 1833 1834
}

static void efx_restore_filters(struct efx_nic *efx)
{
1835
	down_read(&efx->filter_sem);
1836
	efx->type->filter_table_restore(efx);
1837
	up_read(&efx->filter_sem);
1838 1839
}

1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851
/**************************************************************************
 *
 * NIC startup/shutdown
 *
 *************************************************************************/

static int efx_probe_all(struct efx_nic *efx)
{
	int rc;

	rc = efx_probe_nic(efx);
	if (rc) {
1852
		netif_err(efx, probe, efx->net_dev, "failed to create NIC\n");
1853 1854 1855 1856 1857
		goto fail1;
	}

	rc = efx_probe_port(efx);
	if (rc) {
1858
		netif_err(efx, probe, efx->net_dev, "failed to create port\n");
1859 1860 1861
		goto fail2;
	}

1862 1863 1864 1865 1866
	BUILD_BUG_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_RXQ_MIN_ENT);
	if (WARN_ON(EFX_DEFAULT_DMAQ_SIZE < EFX_TXQ_MIN_ENT(efx))) {
		rc = -EINVAL;
		goto fail3;
	}
1867
	efx->rxq_entries = efx->txq_entries = EFX_DEFAULT_DMAQ_SIZE;
1868

1869 1870 1871 1872 1873 1874 1875 1876
#ifdef CONFIG_SFC_SRIOV
	rc = efx->type->vswitching_probe(efx);
	if (rc) /* not fatal; the PF will still work fine */
		netif_warn(efx, probe, efx->net_dev,
			   "failed to setup vswitching rc=%d;"
			   " VFs may not function\n", rc);
#endif

B
Ben Hutchings 已提交
1877 1878 1879 1880
	rc = efx_probe_filters(efx);
	if (rc) {
		netif_err(efx, probe, efx->net_dev,
			  "failed to create filter tables\n");
1881
		goto fail4;
B
Ben Hutchings 已提交
1882 1883
	}

1884 1885
	rc = efx_probe_channels(efx);
	if (rc)
1886
		goto fail5;
1887

1888 1889
	return 0;

1890
 fail5:
1891
	efx_remove_filters(efx);
1892 1893 1894 1895
 fail4:
#ifdef CONFIG_SFC_SRIOV
	efx->type->vswitching_remove(efx);
#endif
1896 1897 1898 1899 1900 1901 1902 1903
 fail3:
	efx_remove_port(efx);
 fail2:
	efx_remove_nic(efx);
 fail1:
	return rc;
}

1904 1905 1906 1907 1908 1909
/* If the interface is supposed to be running but is not, start
 * the hardware and software data path, regular activity for the port
 * (MAC statistics, link polling, etc.) and schedule the port to be
 * reconfigured.  Interrupts must already be enabled.  This function
 * is safe to call multiple times, so long as the NIC is not disabled.
 * Requires the RTNL lock.
1910
 */
1911 1912 1913
static void efx_start_all(struct efx_nic *efx)
{
	EFX_ASSERT_RESET_SERIALISED(efx);
1914
	BUG_ON(efx->state == STATE_DISABLED);
1915 1916 1917

	/* Check that it is appropriate to restart the interface. All
	 * of these flags are safe to read under just the rtnl lock */
1918 1919
	if (efx->port_enabled || !netif_running(efx->net_dev) ||
	    efx->reset_pending)
1920 1921 1922
		return;

	efx_start_port(efx);
1923
	efx_start_datapath(efx);
1924

1925 1926
	/* Start the hardware monitor if there is one */
	if (efx->type->monitor != NULL)
1927 1928
		queue_delayed_work(efx->workqueue, &efx->monitor_work,
				   efx_monitor_interval);
1929

1930
	/* Link state detection is normally event-driven; we have
1931 1932
	 * to poll now because we could have missed a change
	 */
1933 1934 1935 1936
	mutex_lock(&efx->mac_lock);
	if (efx->phy_op->poll(efx))
		efx_link_status_changed(efx);
	mutex_unlock(&efx->mac_lock);
1937

1938
	efx->type->start_stats(efx);
1939 1940 1941 1942
	efx->type->pull_stats(efx);
	spin_lock_bh(&efx->stats_lock);
	efx->type->update_stats(efx, NULL, NULL);
	spin_unlock_bh(&efx->stats_lock);
1943 1944
}

1945 1946 1947 1948 1949
/* Quiesce the hardware and software data path, and regular activity
 * for the port without bringing the link down.  Safe to call multiple
 * times with the NIC in almost any state, but interrupts should be
 * enabled.  Requires the RTNL lock.
 */
1950 1951 1952 1953 1954 1955 1956 1957
static void efx_stop_all(struct efx_nic *efx)
{
	EFX_ASSERT_RESET_SERIALISED(efx);

	/* port_enabled can be read safely under the rtnl lock */
	if (!efx->port_enabled)
		return;

1958 1959 1960 1961 1962 1963 1964
	/* update stats before we go down so we can accurately count
	 * rx_nodesc_drops
	 */
	efx->type->pull_stats(efx);
	spin_lock_bh(&efx->stats_lock);
	efx->type->update_stats(efx, NULL, NULL);
	spin_unlock_bh(&efx->stats_lock);
1965
	efx->type->stop_stats(efx);
1966 1967
	efx_stop_port(efx);

1968 1969 1970 1971 1972 1973
	/* Stop the kernel transmit interface.  This is only valid if
	 * the device is stopped or detached; otherwise the watchdog
	 * may fire immediately.
	 */
	WARN_ON(netif_running(efx->net_dev) &&
		netif_device_present(efx->net_dev));
1974 1975 1976
	netif_tx_disable(efx->net_dev);

	efx_stop_datapath(efx);
1977 1978 1979 1980
}

static void efx_remove_all(struct efx_nic *efx)
{
1981
	efx_remove_channels(efx);
1982
	efx_remove_filters(efx);
1983 1984 1985
#ifdef CONFIG_SFC_SRIOV
	efx->type->vswitching_remove(efx);
#endif
1986 1987 1988 1989 1990 1991 1992 1993 1994
	efx_remove_port(efx);
	efx_remove_nic(efx);
}

/**************************************************************************
 *
 * Interrupt moderation
 *
 **************************************************************************/
1995
unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs)
1996
{
1997 1998
	if (usecs == 0)
		return 0;
1999
	if (usecs * 1000 < efx->timer_quantum_ns)
2000
		return 1; /* never round down to 0 */
2001 2002 2003 2004 2005 2006 2007 2008 2009
	return usecs * 1000 / efx->timer_quantum_ns;
}

unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks)
{
	/* We must round up when converting ticks to microseconds
	 * because we round down when converting the other way.
	 */
	return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000);
2010 2011
}

2012
/* Set interrupt moderation parameters */
2013 2014 2015
int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
			    unsigned int rx_usecs, bool rx_adaptive,
			    bool rx_may_override_tx)
2016
{
2017
	struct efx_channel *channel;
2018 2019
	unsigned int timer_max_us;

2020 2021
	EFX_ASSERT_RESET_SERIALISED(efx);

2022 2023 2024
	timer_max_us = efx->timer_max_ns / 1000;

	if (tx_usecs > timer_max_us || rx_usecs > timer_max_us)
2025 2026
		return -EINVAL;

2027
	if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 &&
2028 2029 2030 2031 2032 2033
	    !rx_may_override_tx) {
		netif_err(efx, drv, efx->net_dev, "Channels are shared. "
			  "RX and TX IRQ moderation must be equal\n");
		return -EINVAL;
	}

2034
	efx->irq_rx_adaptive = rx_adaptive;
2035
	efx->irq_rx_moderation_us = rx_usecs;
2036
	efx_for_each_channel(channel, efx) {
2037
		if (efx_channel_has_rx_queue(channel))
2038
			channel->irq_moderation_us = rx_usecs;
2039
		else if (efx_channel_has_tx_queues(channel))
2040
			channel->irq_moderation_us = tx_usecs;
2041
	}
2042 2043

	return 0;
2044 2045
}

2046 2047 2048 2049
void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
			    unsigned int *rx_usecs, bool *rx_adaptive)
{
	*rx_adaptive = efx->irq_rx_adaptive;
2050
	*rx_usecs = efx->irq_rx_moderation_us;
2051 2052 2053 2054 2055

	/* If channels are shared between RX and TX, so is IRQ
	 * moderation.  Otherwise, IRQ moderation is the same for all
	 * TX channels and is not adaptive.
	 */
2056
	if (efx->tx_channel_offset == 0) {
2057
		*tx_usecs = *rx_usecs;
2058 2059 2060 2061 2062 2063
	} else {
		struct efx_channel *tx_channel;

		tx_channel = efx->channel[efx->tx_channel_offset];
		*tx_usecs = tx_channel->irq_moderation_us;
	}
2064 2065
}

2066 2067 2068 2069 2070 2071
/**************************************************************************
 *
 * Hardware monitor
 *
 **************************************************************************/

2072
/* Run periodically off the general workqueue */
2073 2074 2075 2076 2077
static void efx_monitor(struct work_struct *data)
{
	struct efx_nic *efx = container_of(data, struct efx_nic,
					   monitor_work.work);

2078 2079 2080
	netif_vdbg(efx, timer, efx->net_dev,
		   "hardware monitor executing on CPU %d\n",
		   raw_smp_processor_id());
2081
	BUG_ON(efx->type->monitor == NULL);
2082 2083 2084

	/* If the mac_lock is already held then it is likely a port
	 * reconfiguration is already in place, which will likely do
2085 2086 2087 2088 2089 2090
	 * most of the work of monitor() anyway. */
	if (mutex_trylock(&efx->mac_lock)) {
		if (efx->port_enabled)
			efx->type->monitor(efx);
		mutex_unlock(&efx->mac_lock);
	}
2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106

	queue_delayed_work(efx->workqueue, &efx->monitor_work,
			   efx_monitor_interval);
}

/**************************************************************************
 *
 * ioctls
 *
 *************************************************************************/

/* Net device ioctl
 * Context: process, rtnl_lock() held.
 */
static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
{
2107
	struct efx_nic *efx = netdev_priv(net_dev);
2108
	struct mii_ioctl_data *data = if_mii(ifr);
2109

2110
	if (cmd == SIOCSHWTSTAMP)
2111 2112 2113
		return efx_ptp_set_ts_config(efx, ifr);
	if (cmd == SIOCGHWTSTAMP)
		return efx_ptp_get_ts_config(efx, ifr);
2114

2115 2116 2117 2118 2119 2120
	/* Convert phy_id from older PRTAD/DEVAD format */
	if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
	    (data->phy_id & 0xfc00) == 0x0400)
		data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400;

	return mdio_mii_ioctl(&efx->mdio, data, cmd);
2121 2122 2123 2124 2125 2126 2127 2128
}

/**************************************************************************
 *
 * NAPI interface
 *
 **************************************************************************/

2129 2130 2131 2132 2133 2134 2135 2136 2137
static void efx_init_napi_channel(struct efx_channel *channel)
{
	struct efx_nic *efx = channel->efx;

	channel->napi_dev = efx->net_dev;
	netif_napi_add(channel->napi_dev, &channel->napi_str,
		       efx_poll, napi_weight);
}

2138
static void efx_init_napi(struct efx_nic *efx)
2139 2140 2141
{
	struct efx_channel *channel;

2142 2143
	efx_for_each_channel(channel, efx)
		efx_init_napi_channel(channel);
2144 2145 2146 2147
}

static void efx_fini_napi_channel(struct efx_channel *channel)
{
E
Eric Dumazet 已提交
2148
	if (channel->napi_dev)
2149
		netif_napi_del(&channel->napi_str);
E
Eric Dumazet 已提交
2150

2151
	channel->napi_dev = NULL;
2152 2153 2154 2155 2156 2157
}

static void efx_fini_napi(struct efx_nic *efx)
{
	struct efx_channel *channel;

2158 2159
	efx_for_each_channel(channel, efx)
		efx_fini_napi_channel(channel);
2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175
}

/**************************************************************************
 *
 * Kernel netpoll interface
 *
 *************************************************************************/

#ifdef CONFIG_NET_POLL_CONTROLLER

/* Although in the common case interrupts will be disabled, this is not
 * guaranteed. However, all our work happens inside the NAPI callback,
 * so no locking is required.
 */
static void efx_netpoll(struct net_device *net_dev)
{
2176
	struct efx_nic *efx = netdev_priv(net_dev);
2177 2178
	struct efx_channel *channel;

2179
	efx_for_each_channel(channel, efx)
2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191
		efx_schedule_channel(channel);
}

#endif

/**************************************************************************
 *
 * Kernel net device interface
 *
 *************************************************************************/

/* Context: process, rtnl_lock() held. */
2192
int efx_net_open(struct net_device *net_dev)
2193
{
2194
	struct efx_nic *efx = netdev_priv(net_dev);
2195 2196
	int rc;

2197 2198
	netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
		  raw_smp_processor_id());
2199

2200 2201 2202
	rc = efx_check_disabled(efx);
	if (rc)
		return rc;
2203 2204
	if (efx->phy_mode & PHY_MODE_SPECIAL)
		return -EBUSY;
2205 2206
	if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL))
		return -EIO;
2207

2208 2209 2210 2211
	/* Notify the kernel of the link state polled during driver load,
	 * before the monitor starts running */
	efx_link_status_changed(efx);

2212
	efx_start_all(efx);
2213 2214
	if (efx->state == STATE_DISABLED || efx->reset_pending)
		netif_device_detach(efx->net_dev);
2215
	efx_selftest_async_start(efx);
2216 2217 2218 2219 2220 2221 2222
	return 0;
}

/* Context: process, rtnl_lock() held.
 * Note that the kernel will ignore our return code; this method
 * should really be a void.
 */
2223
int efx_net_stop(struct net_device *net_dev)
2224
{
2225
	struct efx_nic *efx = netdev_priv(net_dev);
2226

2227 2228
	netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
		  raw_smp_processor_id());
2229

2230 2231
	/* Stop the device and flush all the channels */
	efx_stop_all(efx);
2232 2233 2234 2235

	return 0;
}

2236
/* Context: process, dev_base_lock or RTNL held, non-blocking. */
2237 2238
static void efx_net_stats(struct net_device *net_dev,
			  struct rtnl_link_stats64 *stats)
2239
{
2240
	struct efx_nic *efx = netdev_priv(net_dev);
2241

2242
	spin_lock_bh(&efx->stats_lock);
2243
	efx->type->update_stats(efx, NULL, stats);
2244
	spin_unlock_bh(&efx->stats_lock);
2245 2246 2247 2248 2249
}

/* Context: netif_tx_lock held, BHs disabled. */
static void efx_watchdog(struct net_device *net_dev)
{
2250
	struct efx_nic *efx = netdev_priv(net_dev);
2251

2252 2253 2254
	netif_err(efx, tx_err, efx->net_dev,
		  "TX stuck with port_enabled=%d: resetting channels\n",
		  efx->port_enabled);
2255

2256
	efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
2257 2258 2259 2260 2261 2262
}


/* Context: process, rtnl_lock() held. */
static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
{
2263
	struct efx_nic *efx = netdev_priv(net_dev);
2264
	int rc;
2265

2266 2267 2268
	rc = efx_check_disabled(efx);
	if (rc)
		return rc;
2269

2270
	netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
2271

2272 2273 2274
	efx_device_detach_sync(efx);
	efx_stop_all(efx);

B
Ben Hutchings 已提交
2275
	mutex_lock(&efx->mac_lock);
2276
	net_dev->mtu = new_mtu;
2277
	efx_mac_reconfigure(efx);
B
Ben Hutchings 已提交
2278 2279
	mutex_unlock(&efx->mac_lock);

2280
	efx_start_all(efx);
2281
	efx_device_attach_if_not_resetting(efx);
2282
	return 0;
2283 2284 2285 2286
}

static int efx_set_mac_address(struct net_device *net_dev, void *data)
{
2287
	struct efx_nic *efx = netdev_priv(net_dev);
2288
	struct sockaddr *addr = data;
2289
	u8 *new_addr = addr->sa_data;
2290 2291
	u8 old_addr[6];
	int rc;
2292 2293

	if (!is_valid_ether_addr(new_addr)) {
2294 2295 2296
		netif_err(efx, drv, efx->net_dev,
			  "invalid ethernet MAC address requested: %pM\n",
			  new_addr);
2297
		return -EADDRNOTAVAIL;
2298 2299
	}

2300 2301
	/* save old address */
	ether_addr_copy(old_addr, net_dev->dev_addr);
2302
	ether_addr_copy(net_dev->dev_addr, new_addr);
2303 2304
	if (efx->type->set_mac_address) {
		rc = efx->type->set_mac_address(efx);
2305 2306 2307 2308 2309
		if (rc) {
			ether_addr_copy(net_dev->dev_addr, old_addr);
			return rc;
		}
	}
2310 2311

	/* Reconfigure the MAC */
B
Ben Hutchings 已提交
2312
	mutex_lock(&efx->mac_lock);
2313
	efx_mac_reconfigure(efx);
B
Ben Hutchings 已提交
2314
	mutex_unlock(&efx->mac_lock);
2315 2316 2317 2318

	return 0;
}

2319
/* Context: netif_addr_lock held, BHs disabled. */
2320
static void efx_set_rx_mode(struct net_device *net_dev)
2321
{
2322
	struct efx_nic *efx = netdev_priv(net_dev);
2323

2324 2325 2326
	if (efx->port_enabled)
		queue_work(efx->workqueue, &efx->mac_work);
	/* Otherwise efx_start_port() will do this */
2327 2328
}

2329
static int efx_set_features(struct net_device *net_dev, netdev_features_t data)
2330 2331
{
	struct efx_nic *efx = netdev_priv(net_dev);
2332
	int rc;
2333 2334

	/* If disabling RX n-tuple filtering, clear existing filters */
2335 2336 2337 2338 2339 2340
	if (net_dev->features & ~data & NETIF_F_NTUPLE) {
		rc = efx->type->filter_clear_rx(efx, EFX_FILTER_PRI_MANUAL);
		if (rc)
			return rc;
	}

E
Edward Cree 已提交
2341 2342 2343 2344 2345
	/* If Rx VLAN filter is changed, update filters via mac_reconfigure.
	 * If rx-fcs is changed, mac_reconfigure updates that too.
	 */
	if ((net_dev->features ^ data) & (NETIF_F_HW_VLAN_CTAG_FILTER |
					  NETIF_F_RXFCS)) {
2346 2347 2348 2349 2350
		/* efx_set_rx_mode() will schedule MAC work to update filters
		 * when a new features are finally set in net_dev.
		 */
		efx_set_rx_mode(net_dev);
	}
2351 2352 2353 2354

	return 0;
}

2355 2356
static int efx_get_phys_port_id(struct net_device *net_dev,
				struct netdev_phys_item_id *ppid)
2357 2358 2359 2360 2361 2362 2363 2364 2365
{
	struct efx_nic *efx = netdev_priv(net_dev);

	if (efx->type->get_phys_port_id)
		return efx->type->get_phys_port_id(efx, ppid);
	else
		return -EOPNOTSUPP;
}

2366 2367 2368 2369 2370 2371 2372 2373 2374 2375
static int efx_get_phys_port_name(struct net_device *net_dev,
				  char *name, size_t len)
{
	struct efx_nic *efx = netdev_priv(net_dev);

	if (snprintf(name, len, "p%u", efx->port_num) >= len)
		return -EINVAL;
	return 0;
}

2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395
static int efx_vlan_rx_add_vid(struct net_device *net_dev, __be16 proto, u16 vid)
{
	struct efx_nic *efx = netdev_priv(net_dev);

	if (efx->type->vlan_rx_add_vid)
		return efx->type->vlan_rx_add_vid(efx, proto, vid);
	else
		return -EOPNOTSUPP;
}

static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vid)
{
	struct efx_nic *efx = netdev_priv(net_dev);

	if (efx->type->vlan_rx_kill_vid)
		return efx->type->vlan_rx_kill_vid(efx, proto, vid);
	else
		return -EOPNOTSUPP;
}

2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437
static int efx_udp_tunnel_type_map(enum udp_parsable_tunnel_type in)
{
	switch (in) {
	case UDP_TUNNEL_TYPE_VXLAN:
		return TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN;
	case UDP_TUNNEL_TYPE_GENEVE:
		return TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE;
	default:
		return -1;
	}
}

static void efx_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti)
{
	struct efx_nic *efx = netdev_priv(dev);
	struct efx_udp_tunnel tnl;
	int efx_tunnel_type;

	efx_tunnel_type = efx_udp_tunnel_type_map(ti->type);
	if (efx_tunnel_type < 0)
		return;

	tnl.type = (u16)efx_tunnel_type;
	tnl.port = ti->port;

	if (efx->type->udp_tnl_add_port)
		(void)efx->type->udp_tnl_add_port(efx, tnl);
}

static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti)
{
	struct efx_nic *efx = netdev_priv(dev);
	struct efx_udp_tunnel tnl;
	int efx_tunnel_type;

	efx_tunnel_type = efx_udp_tunnel_type_map(ti->type);
	if (efx_tunnel_type < 0)
		return;

	tnl.type = (u16)efx_tunnel_type;
	tnl.port = ti->port;

2438
	if (efx->type->udp_tnl_del_port)
2439 2440 2441
		(void)efx->type->udp_tnl_del_port(efx, tnl);
}

2442
static const struct net_device_ops efx_netdev_ops = {
S
Stephen Hemminger 已提交
2443 2444
	.ndo_open		= efx_net_open,
	.ndo_stop		= efx_net_stop,
2445
	.ndo_get_stats64	= efx_net_stats,
S
Stephen Hemminger 已提交
2446 2447 2448 2449 2450 2451
	.ndo_tx_timeout		= efx_watchdog,
	.ndo_start_xmit		= efx_hard_start_xmit,
	.ndo_validate_addr	= eth_validate_addr,
	.ndo_do_ioctl		= efx_ioctl,
	.ndo_change_mtu		= efx_change_mtu,
	.ndo_set_mac_address	= efx_set_mac_address,
2452
	.ndo_set_rx_mode	= efx_set_rx_mode,
2453
	.ndo_set_features	= efx_set_features,
2454 2455
	.ndo_vlan_rx_add_vid	= efx_vlan_rx_add_vid,
	.ndo_vlan_rx_kill_vid	= efx_vlan_rx_kill_vid,
2456
#ifdef CONFIG_SFC_SRIOV
2457 2458 2459 2460
	.ndo_set_vf_mac		= efx_sriov_set_vf_mac,
	.ndo_set_vf_vlan	= efx_sriov_set_vf_vlan,
	.ndo_set_vf_spoofchk	= efx_sriov_set_vf_spoofchk,
	.ndo_get_vf_config	= efx_sriov_get_vf_config,
2461
	.ndo_set_vf_link_state  = efx_sriov_set_vf_link_state,
2462
#endif
2463
	.ndo_get_phys_port_id   = efx_get_phys_port_id,
2464
	.ndo_get_phys_port_name	= efx_get_phys_port_name,
S
Stephen Hemminger 已提交
2465 2466 2467
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_poll_controller = efx_netpoll,
#endif
2468
	.ndo_setup_tc		= efx_setup_tc,
2469 2470 2471
#ifdef CONFIG_RFS_ACCEL
	.ndo_rx_flow_steer	= efx_filter_rfs,
#endif
2472 2473
	.ndo_udp_tunnel_add	= efx_udp_tunnel_add,
	.ndo_udp_tunnel_del	= efx_udp_tunnel_del,
S
Stephen Hemminger 已提交
2474 2475
};

2476 2477 2478 2479 2480 2481 2482
static void efx_update_name(struct efx_nic *efx)
{
	strcpy(efx->name, efx->net_dev->name);
	efx_mtd_rename(efx);
	efx_set_channel_names(efx);
}

2483 2484 2485
static int efx_netdev_event(struct notifier_block *this,
			    unsigned long event, void *ptr)
{
2486
	struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
2487

2488
	if ((net_dev->netdev_ops == &efx_netdev_ops) &&
2489 2490
	    event == NETDEV_CHANGENAME)
		efx_update_name(netdev_priv(net_dev));
2491 2492 2493 2494 2495 2496 2497 2498

	return NOTIFY_DONE;
}

static struct notifier_block efx_netdev_notifier = {
	.notifier_call = efx_netdev_event,
};

B
Ben Hutchings 已提交
2499 2500 2501 2502 2503 2504
static ssize_t
show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
{
	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
	return sprintf(buf, "%d\n", efx->phy_type);
}
2505
static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
B
Ben Hutchings 已提交
2506

2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528
#ifdef CONFIG_SFC_MCDI_LOGGING
static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
			     char *buf)
{
	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);

	return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
}
static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
			    const char *buf, size_t count)
{
	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
	struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
	bool enable = count > 0 && *buf != '0';

	mcdi->logging_enabled = enable;
	return count;
}
static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log);
#endif

2529 2530 2531
static int efx_register_netdev(struct efx_nic *efx)
{
	struct net_device *net_dev = efx->net_dev;
2532
	struct efx_channel *channel;
2533 2534 2535 2536
	int rc;

	net_dev->watchdog_timeo = 5 * HZ;
	net_dev->irq = efx->pci_dev->irq;
2537 2538
	net_dev->netdev_ops = &efx_netdev_ops;
	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
2539
		net_dev->priv_flags |= IFF_UNICAST_FLT;
2540
	net_dev->ethtool_ops = &efx_ethtool_ops;
2541
	net_dev->gso_max_segs = EFX_TSO_MAX_SEGS;
2542 2543
	net_dev->min_mtu = EFX_MIN_MTU;
	net_dev->max_mtu = EFX_MAX_MTU;
2544

2545
	rtnl_lock();
2546

2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559
	/* Enable resets to be scheduled and check whether any were
	 * already requested.  If so, the NIC is probably hosed so we
	 * abort.
	 */
	efx->state = STATE_READY;
	smp_mb(); /* ensure we change state before checking reset_pending */
	if (efx->reset_pending) {
		netif_err(efx, probe, efx->net_dev,
			  "aborting probe due to scheduled reset\n");
		rc = -EIO;
		goto fail_locked;
	}

2560 2561 2562
	rc = dev_alloc_name(net_dev, net_dev->name);
	if (rc < 0)
		goto fail_locked;
2563
	efx_update_name(efx);
2564

2565 2566 2567
	/* Always start with carrier off; PHY events will detect the link */
	netif_carrier_off(net_dev);

2568 2569 2570 2571
	rc = register_netdevice(net_dev);
	if (rc)
		goto fail_locked;

2572 2573
	efx_for_each_channel(channel, efx) {
		struct efx_tx_queue *tx_queue;
2574 2575
		efx_for_each_channel_tx_queue(tx_queue, channel)
			efx_init_tx_queue_core_txq(tx_queue);
2576 2577
	}

2578 2579
	efx_associate(efx);

2580
	rtnl_unlock();
2581

B
Ben Hutchings 已提交
2582 2583
	rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
	if (rc) {
2584 2585
		netif_err(efx, drv, efx->net_dev,
			  "failed to init net dev attributes\n");
B
Ben Hutchings 已提交
2586 2587
		goto fail_registered;
	}
2588 2589 2590 2591 2592 2593 2594 2595
#ifdef CONFIG_SFC_MCDI_LOGGING
	rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
	if (rc) {
		netif_err(efx, drv, efx->net_dev,
			  "failed to init net dev attributes\n");
		goto fail_attr_mcdi_logging;
	}
#endif
B
Ben Hutchings 已提交
2596

2597
	return 0;
B
Ben Hutchings 已提交
2598

2599 2600 2601 2602
#ifdef CONFIG_SFC_MCDI_LOGGING
fail_attr_mcdi_logging:
	device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
#endif
2603 2604
fail_registered:
	rtnl_lock();
2605
	efx_dissociate(efx);
2606
	unregister_netdevice(net_dev);
2607
fail_locked:
2608
	efx->state = STATE_UNINIT;
2609
	rtnl_unlock();
2610
	netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
2611
	return rc;
2612 2613 2614 2615 2616 2617 2618
}

static void efx_unregister_netdev(struct efx_nic *efx)
{
	if (!efx->net_dev)
		return;

2619
	BUG_ON(netdev_priv(efx->net_dev) != efx);
2620

2621 2622 2623 2624 2625 2626 2627 2628
	if (efx_dev_registered(efx)) {
		strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
#ifdef CONFIG_SFC_MCDI_LOGGING
		device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
#endif
		device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
		unregister_netdev(efx->net_dev);
	}
2629 2630 2631 2632 2633 2634 2635 2636
}

/**************************************************************************
 *
 * Device reset and suspend
 *
 **************************************************************************/

B
Ben Hutchings 已提交
2637 2638
/* Tears down the entire software state and most of the hardware state
 * before reset.  */
B
Ben Hutchings 已提交
2639
void efx_reset_down(struct efx_nic *efx, enum reset_type method)
2640 2641 2642
{
	EFX_ASSERT_RESET_SERIALISED(efx);

2643 2644 2645
	if (method == RESET_TYPE_MCDI_TIMEOUT)
		efx->type->prepare_flr(efx);

B
Ben Hutchings 已提交
2646
	efx_stop_all(efx);
B
Ben Hutchings 已提交
2647
	efx_disable_interrupts(efx);
2648 2649

	mutex_lock(&efx->mac_lock);
2650 2651
	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
	    method != RESET_TYPE_DATAPATH)
2652
		efx->phy_op->fini(efx);
2653
	efx->type->fini(efx);
2654 2655
}

B
Ben Hutchings 已提交
2656 2657 2658 2659 2660
/* This function will always ensure that the locks acquired in
 * efx_reset_down() are released. A failure return code indicates
 * that we were unable to reinitialise the hardware, and the
 * driver should be disabled. If ok is false, then the rx and tx
 * engines are not restarted, pending a RESET_DISABLE. */
B
Ben Hutchings 已提交
2661
int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
2662 2663 2664
{
	int rc;

B
Ben Hutchings 已提交
2665
	EFX_ASSERT_RESET_SERIALISED(efx);
2666

2667 2668 2669 2670
	if (method == RESET_TYPE_MCDI_TIMEOUT)
		efx->type->finish_flr(efx);

	/* Ensure that SRAM is initialised even if we're disabling the device */
2671
	rc = efx->type->init(efx);
2672
	if (rc) {
2673
		netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
2674
		goto fail;
2675 2676
	}

2677 2678 2679
	if (!ok)
		goto fail;

2680 2681
	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
	    method != RESET_TYPE_DATAPATH) {
2682 2683 2684
		rc = efx->phy_op->init(efx);
		if (rc)
			goto fail;
2685 2686
		rc = efx->phy_op->reconfigure(efx);
		if (rc && rc != -EPERM)
2687 2688
			netif_err(efx, drv, efx->net_dev,
				  "could not restore PHY settings\n");
2689 2690
	}

2691 2692 2693
	rc = efx_enable_interrupts(efx);
	if (rc)
		goto fail;
2694 2695 2696 2697 2698 2699 2700 2701 2702

#ifdef CONFIG_SFC_SRIOV
	rc = efx->type->vswitching_restore(efx);
	if (rc) /* not fatal; the PF will still work fine */
		netif_warn(efx, probe, efx->net_dev,
			   "failed to restore vswitching rc=%d;"
			   " VFs may not function\n", rc);
#endif

2703
	down_read(&efx->filter_sem);
B
Ben Hutchings 已提交
2704
	efx_restore_filters(efx);
2705
	up_read(&efx->filter_sem);
2706 2707
	if (efx->type->sriov_reset)
		efx->type->sriov_reset(efx);
2708 2709 2710 2711 2712

	mutex_unlock(&efx->mac_lock);

	efx_start_all(efx);

2713 2714 2715
	if (efx->type->udp_tnl_push_ports)
		efx->type->udp_tnl_push_ports(efx);

2716 2717 2718 2719
	return 0;

fail:
	efx->port_initialized = false;
B
Ben Hutchings 已提交
2720 2721 2722

	mutex_unlock(&efx->mac_lock);

2723 2724 2725
	return rc;
}

2726 2727
/* Reset the NIC using the specified method.  Note that the reset may
 * fail, in which case the card will be left in an unusable state.
2728
 *
2729
 * Caller must hold the rtnl_lock.
2730
 */
2731
int efx_reset(struct efx_nic *efx, enum reset_type method)
2732
{
2733 2734
	int rc, rc2;
	bool disabled;
2735

2736 2737
	netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
		   RESET_TYPE(method));
2738

2739
	efx_device_detach_sync(efx);
B
Ben Hutchings 已提交
2740
	efx_reset_down(efx, method);
2741

2742
	rc = efx->type->reset(efx, method);
2743
	if (rc) {
2744
		netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
2745
		goto out;
2746 2747
	}

2748 2749 2750
	/* Clear flags for the scopes we covered.  We assume the NIC and
	 * driver are now quiescent so that there is no race here.
	 */
2751 2752 2753 2754
	if (method < RESET_TYPE_MAX_METHOD)
		efx->reset_pending &= -(1 << (method + 1));
	else /* it doesn't fit into the well-ordered scope hierarchy */
		__clear_bit(method, &efx->reset_pending);
2755 2756 2757 2758 2759 2760 2761

	/* Reinitialise bus-mastering, which may have been turned off before
	 * the reset was scheduled. This is still appropriate, even in the
	 * RESET_TYPE_DISABLE since this driver generally assumes the hardware
	 * can respond to requests. */
	pci_set_master(efx->pci_dev);

2762
out:
2763
	/* Leave device stopped if necessary */
2764 2765 2766
	disabled = rc ||
		method == RESET_TYPE_DISABLE ||
		method == RESET_TYPE_RECOVER_OR_DISABLE;
2767 2768 2769 2770 2771
	rc2 = efx_reset_up(efx, method, !disabled);
	if (rc2) {
		disabled = true;
		if (!rc)
			rc = rc2;
2772 2773
	}

2774
	if (disabled) {
2775
		dev_close(efx->net_dev);
2776
		netif_err(efx, drv, efx->net_dev, "has been disabled\n");
2777 2778
		efx->state = STATE_DISABLED;
	} else {
2779
		netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
2780
		efx_device_attach_if_not_resetting(efx);
2781
	}
2782 2783 2784
	return rc;
}

2785 2786 2787 2788 2789
/* Try recovery mechanisms.
 * For now only EEH is supported.
 * Returns 0 if the recovery mechanisms are unsuccessful.
 * Returns a non-zero value otherwise.
 */
2790
int efx_try_recovery(struct efx_nic *efx)
2791 2792 2793 2794 2795 2796 2797
{
#ifdef CONFIG_EEH
	/* A PCI error can occur and not be seen by EEH because nothing
	 * happens on the PCI bus. In this case the driver may fail and
	 * schedule a 'recover or reset', leading to this recovery handler.
	 * Manually call the eeh failure check function.
	 */
2798
	struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
2799 2800 2801 2802 2803 2804 2805 2806 2807 2808
	if (eeh_dev_check_failure(eehdev)) {
		/* The EEH mechanisms will handle the error and reset the
		 * device if necessary.
		 */
		return 1;
	}
#endif
	return 0;
}

2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826
static void efx_wait_for_bist_end(struct efx_nic *efx)
{
	int i;

	for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) {
		if (efx_mcdi_poll_reboot(efx))
			goto out;
		msleep(BIST_WAIT_DELAY_MS);
	}

	netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n");
out:
	/* Either way unset the BIST flag. If we found no reboot we probably
	 * won't recover, but we should try.
	 */
	efx->mc_bist_for_other_fn = false;
}

2827 2828 2829 2830 2831
/* The worker thread exists so that code that cannot sleep can
 * schedule a reset for later.
 */
static void efx_reset_work(struct work_struct *data)
{
2832
	struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
2833 2834 2835
	unsigned long pending;
	enum reset_type method;

2836
	pending = READ_ONCE(efx->reset_pending);
2837 2838
	method = fls(pending) - 1;

2839 2840 2841
	if (method == RESET_TYPE_MC_BIST)
		efx_wait_for_bist_end(efx);

2842 2843 2844 2845
	if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
	     method == RESET_TYPE_RECOVER_OR_ALL) &&
	    efx_try_recovery(efx))
		return;
2846

2847
	if (!pending)
2848 2849
		return;

2850
	rtnl_lock();
2851 2852 2853 2854 2855 2856

	/* We checked the state in efx_schedule_reset() but it may
	 * have changed by now.  Now that we have the RTNL lock,
	 * it cannot change again.
	 */
	if (efx->state == STATE_READY)
2857
		(void)efx_reset(efx, method);
2858

2859
	rtnl_unlock();
2860 2861 2862 2863 2864 2865
}

void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
{
	enum reset_type method;

2866 2867 2868 2869 2870 2871 2872
	if (efx->state == STATE_RECOVERY) {
		netif_dbg(efx, drv, efx->net_dev,
			  "recovering: skip scheduling %s reset\n",
			  RESET_TYPE(type));
		return;
	}

2873 2874 2875
	switch (type) {
	case RESET_TYPE_INVISIBLE:
	case RESET_TYPE_ALL:
2876
	case RESET_TYPE_RECOVER_OR_ALL:
2877 2878
	case RESET_TYPE_WORLD:
	case RESET_TYPE_DISABLE:
2879
	case RESET_TYPE_RECOVER_OR_DISABLE:
2880
	case RESET_TYPE_DATAPATH:
2881
	case RESET_TYPE_MC_BIST:
2882
	case RESET_TYPE_MCDI_TIMEOUT:
2883
		method = type;
2884 2885
		netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
			  RESET_TYPE(method));
2886 2887
		break;
	default:
2888
		method = efx->type->map_reset_reason(type);
2889 2890 2891
		netif_dbg(efx, drv, efx->net_dev,
			  "scheduling %s reset for %s\n",
			  RESET_TYPE(method), RESET_TYPE(type));
2892 2893
		break;
	}
2894

2895
	set_bit(method, &efx->reset_pending);
2896 2897 2898 2899 2900
	smp_mb(); /* ensure we change reset_pending before checking state */

	/* If we're not READY then just leave the flags set as the cue
	 * to abort probing or reschedule the reset later.
	 */
2901
	if (READ_ONCE(efx->state) != STATE_READY)
2902
		return;
2903

2904 2905 2906 2907
	/* efx_process_channel() will no longer read events once a
	 * reset is scheduled. So switch back to poll'd MCDI completions. */
	efx_mcdi_mode_poll(efx);

2908
	queue_work(reset_workqueue, &efx->reset_work);
2909 2910 2911 2912 2913 2914 2915 2916 2917
}

/**************************************************************************
 *
 * List of NICs we support
 *
 **************************************************************************/

/* PCI device ID table */
2918
static const struct pci_device_id efx_pci_table[] = {
2919
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803),	/* SFC9020 */
2920
	 .driver_data = (unsigned long) &siena_a0_nic_type},
2921
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813),	/* SFL9021 */
2922
	 .driver_data = (unsigned long) &siena_a0_nic_type},
2923 2924
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0903),  /* SFC9120 PF */
	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
2925 2926
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1903),  /* SFC9120 VF */
	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
2927 2928
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0923),  /* SFC9140 PF */
	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
2929 2930 2931 2932 2933 2934
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1923),  /* SFC9140 VF */
	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0a03),  /* SFC9220 PF */
	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1a03),  /* SFC9220 VF */
	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
2935 2936 2937 2938
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0b03),  /* SFC9250 PF */
	 .driver_data = (unsigned long) &efx_hunt_a0_nic_type},
	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x1b03),  /* SFC9250 VF */
	 .driver_data = (unsigned long) &efx_hunt_a0_vf_nic_type},
2939 2940 2941 2942 2943
	{0}			/* end of list */
};

/**************************************************************************
 *
2944
 * Dummy PHY/MAC operations
2945
 *
2946
 * Can be used for some unimplemented operations
2947 2948 2949 2950 2951 2952 2953 2954 2955
 * Needed so all function pointers are valid and do not have to be tested
 * before use
 *
 **************************************************************************/
int efx_port_dummy_op_int(struct efx_nic *efx)
{
	return 0;
}
void efx_port_dummy_op_void(struct efx_nic *efx) {}
S
stephen hemminger 已提交
2956 2957

static bool efx_port_dummy_op_poll(struct efx_nic *efx)
S
Steve Hodgson 已提交
2958 2959 2960
{
	return false;
}
2961

2962
static const struct efx_phy_operations efx_dummy_phy_operations = {
2963
	.init		 = efx_port_dummy_op_int,
B
Ben Hutchings 已提交
2964
	.reconfigure	 = efx_port_dummy_op_int,
S
Steve Hodgson 已提交
2965
	.poll		 = efx_port_dummy_op_poll,
2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977
	.fini		 = efx_port_dummy_op_void,
};

/**************************************************************************
 *
 * Data housekeeping
 *
 **************************************************************************/

/* This zeroes out and then fills in the invariants in a struct
 * efx_nic (including all sub-structures).
 */
2978
static int efx_init_struct(struct efx_nic *efx,
2979 2980
			   struct pci_dev *pci_dev, struct net_device *net_dev)
{
2981
	int rc = -ENOMEM, i;
2982 2983

	/* Initialise common structures */
2984 2985
	INIT_LIST_HEAD(&efx->node);
	INIT_LIST_HEAD(&efx->secondary_list);
2986
	spin_lock_init(&efx->biu_lock);
2987 2988 2989
#ifdef CONFIG_SFC_MTD
	INIT_LIST_HEAD(&efx->mtd_list);
#endif
2990 2991
	INIT_WORK(&efx->reset_work, efx_reset_work);
	INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
2992
	INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
2993
	efx->pci_dev = pci_dev;
2994
	efx->msg_enable = debug;
2995
	efx->state = STATE_UNINIT;
2996 2997 2998
	strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));

	efx->net_dev = net_dev;
2999
	efx->rx_prefix_size = efx->type->rx_prefix_size;
3000 3001
	efx->rx_ip_align =
		NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
3002 3003
	efx->rx_packet_hash_offset =
		efx->type->rx_hash_offset - efx->type->rx_prefix_size;
3004 3005
	efx->rx_packet_ts_offset =
		efx->type->rx_ts_offset - efx->type->rx_prefix_size;
3006
	spin_lock_init(&efx->stats_lock);
3007
	efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
3008 3009
	efx->num_mac_stats = MC_CMD_MAC_NSTATS;
	BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
3010 3011
	mutex_init(&efx->mac_lock);
	efx->phy_op = &efx_dummy_phy_operations;
3012
	efx->mdio.dev = net_dev;
3013
	INIT_WORK(&efx->mac_work, efx_mac_work);
3014
	init_waitqueue_head(&efx->flush_wq);
3015 3016

	for (i = 0; i < EFX_MAX_CHANNELS; i++) {
3017 3018 3019
		efx->channel[i] = efx_alloc_channel(efx, i, NULL);
		if (!efx->channel[i])
			goto fail;
B
Ben Hutchings 已提交
3020 3021
		efx->msi_context[i].efx = efx;
		efx->msi_context[i].index = i;
3022 3023 3024
	}

	/* Higher numbered interrupt modes are less capable! */
3025 3026 3027 3028 3029
	if (WARN_ON_ONCE(efx->type->max_interrupt_mode >
			 efx->type->min_interrupt_mode)) {
		rc = -EIO;
		goto fail;
	}
3030 3031
	efx->interrupt_mode = max(efx->type->max_interrupt_mode,
				  interrupt_mode);
3032 3033
	efx->interrupt_mode = min(efx->type->min_interrupt_mode,
				  interrupt_mode);
3034

3035 3036 3037 3038
	/* Would be good to use the net_dev name, but we're too early */
	snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
		 pci_name(pci_dev));
	efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
3039
	if (!efx->workqueue)
3040
		goto fail;
3041

3042
	return 0;
3043 3044 3045

fail:
	efx_fini_struct(efx);
3046
	return rc;
3047 3048 3049 3050
}

static void efx_fini_struct(struct efx_nic *efx)
{
3051 3052 3053 3054 3055
	int i;

	for (i = 0; i < EFX_MAX_CHANNELS; i++)
		kfree(efx->channel[i]);

3056 3057
	kfree(efx->vpd_sn);

3058 3059 3060 3061 3062 3063
	if (efx->workqueue) {
		destroy_workqueue(efx->workqueue);
		efx->workqueue = NULL;
	}
}

3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074
void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
{
	u64 n_rx_nodesc_trunc = 0;
	struct efx_channel *channel;

	efx_for_each_channel(channel, efx)
		n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc;
	stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc;
	stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
}

3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085
/**************************************************************************
 *
 * PCI interface
 *
 **************************************************************************/

/* Main body of final NIC shutdown code
 * This is called only at module unload (or hotplug removal).
 */
static void efx_pci_remove_main(struct efx_nic *efx)
{
3086 3087 3088 3089 3090 3091
	/* Flush reset_work. It can no longer be scheduled since we
	 * are not READY.
	 */
	BUG_ON(efx->state == STATE_READY);
	cancel_work_sync(&efx->reset_work);

B
Ben Hutchings 已提交
3092
	efx_disable_interrupts(efx);
3093
	efx_nic_fini_interrupt(efx);
3094
	efx_fini_port(efx);
3095
	efx->type->fini(efx);
3096 3097 3098 3099 3100
	efx_fini_napi(efx);
	efx_remove_all(efx);
}

/* Final NIC shutdown
3101 3102
 * This is called only at module unload (or hotplug removal).  A PF can call
 * this on its VFs to ensure they are unbound first.
3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113
 */
static void efx_pci_remove(struct pci_dev *pci_dev)
{
	struct efx_nic *efx;

	efx = pci_get_drvdata(pci_dev);
	if (!efx)
		return;

	/* Mark the NIC as fini, then stop the interface */
	rtnl_lock();
3114
	efx_dissociate(efx);
3115
	dev_close(efx->net_dev);
B
Ben Hutchings 已提交
3116
	efx_disable_interrupts(efx);
3117
	efx->state = STATE_UNINIT;
3118 3119
	rtnl_unlock();

3120 3121 3122
	if (efx->type->sriov_fini)
		efx->type->sriov_fini(efx);

3123 3124
	efx_unregister_netdev(efx);

3125 3126
	efx_mtd_remove(efx);

3127 3128 3129
	efx_pci_remove_main(efx);

	efx_fini_io(efx);
3130
	netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
3131 3132 3133

	efx_fini_struct(efx);
	free_netdev(efx->net_dev);
3134 3135

	pci_disable_pcie_error_reporting(pci_dev);
3136 3137
};

3138 3139 3140 3141 3142 3143
/* NIC VPD information
 * Called during probe to display the part number of the
 * installed NIC.  VPD is potentially very large but this should
 * always appear within the first 512 bytes.
 */
#define SFC_VPD_LEN 512
3144
static void efx_probe_vpd_strings(struct efx_nic *efx)
3145 3146 3147 3148
{
	struct pci_dev *dev = efx->pci_dev;
	char vpd_data[SFC_VPD_LEN];
	ssize_t vpd_size;
3149
	int ro_start, ro_size, i, j;
3150 3151 3152 3153 3154 3155 3156 3157 3158

	/* Get the vpd data from the device */
	vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
	if (vpd_size <= 0) {
		netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n");
		return;
	}

	/* Get the Read only section */
3159 3160
	ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA);
	if (ro_start < 0) {
3161 3162 3163 3164
		netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n");
		return;
	}

3165 3166 3167
	ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
	j = ro_size;
	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186
	if (i + j > vpd_size)
		j = vpd_size - i;

	/* Get the Part number */
	i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN");
	if (i < 0) {
		netif_err(efx, drv, efx->net_dev, "Part number not found\n");
		return;
	}

	j = pci_vpd_info_field_size(&vpd_data[i]);
	i += PCI_VPD_INFO_FLD_HDR_SIZE;
	if (i + j > vpd_size) {
		netif_err(efx, drv, efx->net_dev, "Incomplete part number\n");
		return;
	}

	netif_info(efx, drv, efx->net_dev,
		   "Part Number : %.*s\n", j, &vpd_data[i]);
3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207

	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
	j = ro_size;
	i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN");
	if (i < 0) {
		netif_err(efx, drv, efx->net_dev, "Serial number not found\n");
		return;
	}

	j = pci_vpd_info_field_size(&vpd_data[i]);
	i += PCI_VPD_INFO_FLD_HDR_SIZE;
	if (i + j > vpd_size) {
		netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n");
		return;
	}

	efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL);
	if (!efx->vpd_sn)
		return;

	snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]);
3208 3209 3210
}


3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222
/* Main body of NIC initialisation
 * This is called at module load (or hotplug insertion, theoretically).
 */
static int efx_pci_probe_main(struct efx_nic *efx)
{
	int rc;

	/* Do start-of-day initialisation */
	rc = efx_probe_all(efx);
	if (rc)
		goto fail1;

3223
	efx_init_napi(efx);
3224

3225
	rc = efx->type->init(efx);
3226
	if (rc) {
3227 3228
		netif_err(efx, probe, efx->net_dev,
			  "failed to initialise NIC\n");
3229
		goto fail3;
3230 3231 3232 3233
	}

	rc = efx_init_port(efx);
	if (rc) {
3234 3235
		netif_err(efx, probe, efx->net_dev,
			  "failed to initialise port\n");
3236
		goto fail4;
3237 3238
	}

3239
	rc = efx_nic_init_interrupt(efx);
3240
	if (rc)
3241
		goto fail5;
3242 3243 3244
	rc = efx_enable_interrupts(efx);
	if (rc)
		goto fail6;
3245 3246 3247

	return 0;

3248 3249
 fail6:
	efx_nic_fini_interrupt(efx);
3250
 fail5:
3251 3252
	efx_fini_port(efx);
 fail4:
3253
	efx->type->fini(efx);
3254 3255 3256 3257 3258 3259 3260
 fail3:
	efx_fini_napi(efx);
	efx_remove_all(efx);
 fail1:
	return rc;
}

3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277
static int efx_pci_probe_post_io(struct efx_nic *efx)
{
	struct net_device *net_dev = efx->net_dev;
	int rc = efx_pci_probe_main(efx);

	if (rc)
		return rc;

	if (efx->type->sriov_init) {
		rc = efx->type->sriov_init(efx);
		if (rc)
			netif_err(efx, probe, efx->net_dev,
				  "SR-IOV can't be enabled rc %d\n", rc);
	}

	/* Determine netdevice features */
	net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
E
Edward Cree 已提交
3278
			      NETIF_F_TSO | NETIF_F_RXCSUM | NETIF_F_RXALL);
3279 3280 3281 3282 3283 3284 3285 3286 3287 3288
	if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
		net_dev->features |= NETIF_F_TSO6;
	/* Check whether device supports TSO */
	if (!efx->type->tso_versions || !efx->type->tso_versions(efx))
		net_dev->features &= ~NETIF_F_ALL_TSO;
	/* Mask for features that also apply to VLAN devices */
	net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
				   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
				   NETIF_F_RXCSUM);

E
Edward Cree 已提交
3289 3290 3291 3292
	net_dev->hw_features |= net_dev->features & ~efx->fixed_features;

	/* Disable receiving frames with bad FCS, by default. */
	net_dev->features &= ~NETIF_F_RXALL;
3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308

	/* Disable VLAN filtering by default.  It may be enforced if
	 * the feature is fixed (i.e. VLAN filters are required to
	 * receive VLAN tagged packets due to vPort restrictions).
	 */
	net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
	net_dev->features |= efx->fixed_features;

	rc = efx_register_netdev(efx);
	if (!rc)
		return 0;

	efx_pci_remove_main(efx);
	return rc;
}

3309 3310 3311
/* NIC initialisation
 *
 * This is called at module load (or hotplug insertion,
3312
 * theoretically).  It sets up PCI mappings, resets the NIC,
3313 3314 3315 3316 3317
 * sets up and registers the network devices with the kernel and hooks
 * the interrupt service routine.  It does not prepare the device for
 * transmission; this is left to the first time one of the network
 * interfaces is brought up (i.e. efx_net_open).
 */
B
Bill Pemberton 已提交
3318
static int efx_pci_probe(struct pci_dev *pci_dev,
3319
			 const struct pci_device_id *entry)
3320 3321 3322
{
	struct net_device *net_dev;
	struct efx_nic *efx;
3323
	int rc;
3324 3325

	/* Allocate and initialise a struct net_device and struct efx_nic */
3326 3327
	net_dev = alloc_etherdev_mqs(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES,
				     EFX_MAX_RX_QUEUES);
3328 3329
	if (!net_dev)
		return -ENOMEM;
3330 3331
	efx = netdev_priv(net_dev);
	efx->type = (const struct efx_nic_type *) entry->driver_data;
3332
	efx->fixed_features |= NETIF_F_HIGHDMA;
3333

3334
	pci_set_drvdata(pci_dev, efx);
3335
	SET_NETDEV_DEV(net_dev, &pci_dev->dev);
3336
	rc = efx_init_struct(efx, pci_dev, net_dev);
3337 3338 3339
	if (rc)
		goto fail1;

3340
	netif_info(efx, probe, efx->net_dev,
3341
		   "Solarflare NIC detected\n");
3342

3343 3344
	if (!efx->type->is_vf)
		efx_probe_vpd_strings(efx);
3345

3346 3347 3348 3349 3350
	/* Set up basic I/O (BAR mappings etc) */
	rc = efx_init_io(efx);
	if (rc)
		goto fail2;

3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369
	rc = efx_pci_probe_post_io(efx);
	if (rc) {
		/* On failure, retry once immediately.
		 * If we aborted probe due to a scheduled reset, dismiss it.
		 */
		efx->reset_pending = 0;
		rc = efx_pci_probe_post_io(efx);
		if (rc) {
			/* On another failure, retry once more
			 * after a 50-305ms delay.
			 */
			unsigned char r;

			get_random_bytes(&r, 1);
			msleep((unsigned int)r + 50);
			efx->reset_pending = 0;
			rc = efx_pci_probe_post_io(efx);
		}
	}
3370 3371
	if (rc)
		goto fail3;
3372

3373
	netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
3374

3375
	/* Try to create MTDs, but allow this to fail */
3376
	rtnl_lock();
3377
	rc = efx_mtd_probe(efx);
3378
	rtnl_unlock();
3379
	if (rc && rc != -EPERM)
3380 3381 3382
		netif_warn(efx, probe, efx->net_dev,
			   "failed to create MTDs (%d)\n", rc);

3383 3384
	rc = pci_enable_pcie_error_reporting(pci_dev);
	if (rc && rc != -EINVAL)
3385 3386 3387
		netif_notice(efx, probe, efx->net_dev,
			     "PCIE error reporting unavailable (%d).\n",
			     rc);
3388

3389 3390 3391
	if (efx->type->udp_tnl_push_ports)
		efx->type->udp_tnl_push_ports(efx);

3392 3393 3394 3395 3396 3397 3398
	return 0;

 fail3:
	efx_fini_io(efx);
 fail2:
	efx_fini_struct(efx);
 fail1:
S
Steve Hodgson 已提交
3399
	WARN_ON(rc > 0);
3400
	netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc);
3401 3402 3403 3404
	free_netdev(net_dev);
	return rc;
}

3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424
/* efx_pci_sriov_configure returns the actual number of Virtual Functions
 * enabled on success
 */
#ifdef CONFIG_SFC_SRIOV
static int efx_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
{
	int rc;
	struct efx_nic *efx = pci_get_drvdata(dev);

	if (efx->type->sriov_configure) {
		rc = efx->type->sriov_configure(efx, num_vfs);
		if (rc)
			return rc;
		else
			return num_vfs;
	} else
		return -EOPNOTSUPP;
}
#endif

3425 3426 3427 3428
static int efx_pm_freeze(struct device *dev)
{
	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));

3429 3430
	rtnl_lock();

3431 3432
	if (efx->state != STATE_DISABLED) {
		efx->state = STATE_UNINIT;
3433

3434
		efx_device_detach_sync(efx);
3435

3436
		efx_stop_all(efx);
B
Ben Hutchings 已提交
3437
		efx_disable_interrupts(efx);
3438
	}
3439

3440 3441
	rtnl_unlock();

3442 3443 3444 3445 3446
	return 0;
}

static int efx_pm_thaw(struct device *dev)
{
3447
	int rc;
3448 3449
	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));

3450 3451
	rtnl_lock();

3452
	if (efx->state != STATE_DISABLED) {
3453 3454 3455
		rc = efx_enable_interrupts(efx);
		if (rc)
			goto fail;
3456

3457 3458 3459
		mutex_lock(&efx->mac_lock);
		efx->phy_op->reconfigure(efx);
		mutex_unlock(&efx->mac_lock);
3460

3461
		efx_start_all(efx);
3462

3463
		efx_device_attach_if_not_resetting(efx);
3464

3465
		efx->state = STATE_READY;
3466

3467 3468
		efx->type->resume_wol(efx);
	}
3469

3470 3471
	rtnl_unlock();

3472 3473 3474
	/* Reschedule any quenched resets scheduled during efx_pm_freeze() */
	queue_work(reset_workqueue, &efx->reset_work);

3475
	return 0;
3476 3477 3478 3479 3480

fail:
	rtnl_unlock();

	return rc;
3481 3482 3483 3484 3485 3486 3487 3488 3489
}

static int efx_pm_poweroff(struct device *dev)
{
	struct pci_dev *pci_dev = to_pci_dev(dev);
	struct efx_nic *efx = pci_get_drvdata(pci_dev);

	efx->type->fini(efx);

3490
	efx->reset_pending = 0;
3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516

	pci_save_state(pci_dev);
	return pci_set_power_state(pci_dev, PCI_D3hot);
}

/* Used for both resume and restore */
static int efx_pm_resume(struct device *dev)
{
	struct pci_dev *pci_dev = to_pci_dev(dev);
	struct efx_nic *efx = pci_get_drvdata(pci_dev);
	int rc;

	rc = pci_set_power_state(pci_dev, PCI_D0);
	if (rc)
		return rc;
	pci_restore_state(pci_dev);
	rc = pci_enable_device(pci_dev);
	if (rc)
		return rc;
	pci_set_master(efx->pci_dev);
	rc = efx->type->reset(efx, RESET_TYPE_ALL);
	if (rc)
		return rc;
	rc = efx->type->init(efx);
	if (rc)
		return rc;
3517 3518
	rc = efx_pm_thaw(dev);
	return rc;
3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531
}

static int efx_pm_suspend(struct device *dev)
{
	int rc;

	efx_pm_freeze(dev);
	rc = efx_pm_poweroff(dev);
	if (rc)
		efx_pm_resume(dev);
	return rc;
}

3532
static const struct dev_pm_ops efx_pm_ops = {
3533 3534 3535 3536 3537 3538 3539 3540
	.suspend	= efx_pm_suspend,
	.resume		= efx_pm_resume,
	.freeze		= efx_pm_freeze,
	.thaw		= efx_pm_thaw,
	.poweroff	= efx_pm_poweroff,
	.restore	= efx_pm_resume,
};

3541 3542 3543 3544
/* A PCI error affecting this device was detected.
 * At this point MMIO and DMA may be disabled.
 * Stop the software path and request a slot reset.
 */
3545 3546
static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
					      enum pci_channel_state state)
3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562
{
	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
	struct efx_nic *efx = pci_get_drvdata(pdev);

	if (state == pci_channel_io_perm_failure)
		return PCI_ERS_RESULT_DISCONNECT;

	rtnl_lock();

	if (efx->state != STATE_DISABLED) {
		efx->state = STATE_RECOVERY;
		efx->reset_pending = 0;

		efx_device_detach_sync(efx);

		efx_stop_all(efx);
B
Ben Hutchings 已提交
3563
		efx_disable_interrupts(efx);
3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579

		status = PCI_ERS_RESULT_NEED_RESET;
	} else {
		/* If the interface is disabled we don't want to do anything
		 * with it.
		 */
		status = PCI_ERS_RESULT_RECOVERED;
	}

	rtnl_unlock();

	pci_disable_device(pdev);

	return status;
}

3580
/* Fake a successful reset, which will be performed later in efx_io_resume. */
3581
static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633
{
	struct efx_nic *efx = pci_get_drvdata(pdev);
	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
	int rc;

	if (pci_enable_device(pdev)) {
		netif_err(efx, hw, efx->net_dev,
			  "Cannot re-enable PCI device after reset.\n");
		status =  PCI_ERS_RESULT_DISCONNECT;
	}

	rc = pci_cleanup_aer_uncorrect_error_status(pdev);
	if (rc) {
		netif_err(efx, hw, efx->net_dev,
		"pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc);
		/* Non-fatal error. Continue. */
	}

	return status;
}

/* Perform the actual reset and resume I/O operations. */
static void efx_io_resume(struct pci_dev *pdev)
{
	struct efx_nic *efx = pci_get_drvdata(pdev);
	int rc;

	rtnl_lock();

	if (efx->state == STATE_DISABLED)
		goto out;

	rc = efx_reset(efx, RESET_TYPE_ALL);
	if (rc) {
		netif_err(efx, hw, efx->net_dev,
			  "efx_reset failed after PCI error (%d)\n", rc);
	} else {
		efx->state = STATE_READY;
		netif_dbg(efx, hw, efx->net_dev,
			  "Done resetting and resuming IO after PCI error.\n");
	}

out:
	rtnl_unlock();
}

/* For simplicity and reliability, we always require a slot reset and try to
 * reset the hardware when a pci error affecting the device is detected.
 * We leave both the link_reset and mmio_enabled callback unimplemented:
 * with our request for slot reset the mmio_enabled callback will never be
 * called, and the link_reset callback is not used by AER or EEH mechanisms.
 */
3634
static const struct pci_error_handlers efx_err_handlers = {
3635 3636 3637 3638 3639
	.error_detected = efx_io_error_detected,
	.slot_reset	= efx_io_slot_reset,
	.resume		= efx_io_resume,
};

3640
static struct pci_driver efx_pci_driver = {
3641
	.name		= KBUILD_MODNAME,
3642 3643 3644
	.id_table	= efx_pci_table,
	.probe		= efx_pci_probe,
	.remove		= efx_pci_remove,
3645
	.driver.pm	= &efx_pm_ops,
3646
	.err_handler	= &efx_err_handlers,
3647 3648 3649
#ifdef CONFIG_SFC_SRIOV
	.sriov_configure = efx_pci_sriov_configure,
#endif
3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671
};

/**************************************************************************
 *
 * Kernel module interface
 *
 *************************************************************************/

module_param(interrupt_mode, uint, 0444);
MODULE_PARM_DESC(interrupt_mode,
		 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");

static int __init efx_init_module(void)
{
	int rc;

	printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n");

	rc = register_netdevice_notifier(&efx_netdev_notifier);
	if (rc)
		goto err_notifier;

3672
#ifdef CONFIG_SFC_SRIOV
3673 3674 3675
	rc = efx_init_sriov();
	if (rc)
		goto err_sriov;
3676
#endif
3677

3678 3679 3680 3681 3682
	reset_workqueue = create_singlethread_workqueue("sfc_reset");
	if (!reset_workqueue) {
		rc = -ENOMEM;
		goto err_reset;
	}
3683 3684 3685 3686 3687 3688 3689 3690

	rc = pci_register_driver(&efx_pci_driver);
	if (rc < 0)
		goto err_pci;

	return 0;

 err_pci:
3691 3692
	destroy_workqueue(reset_workqueue);
 err_reset:
3693
#ifdef CONFIG_SFC_SRIOV
3694 3695
	efx_fini_sriov();
 err_sriov:
3696
#endif
3697 3698 3699 3700 3701 3702 3703 3704 3705 3706
	unregister_netdevice_notifier(&efx_netdev_notifier);
 err_notifier:
	return rc;
}

static void __exit efx_exit_module(void)
{
	printk(KERN_INFO "Solarflare NET driver unloading\n");

	pci_unregister_driver(&efx_pci_driver);
3707
	destroy_workqueue(reset_workqueue);
3708
#ifdef CONFIG_SFC_SRIOV
3709
	efx_fini_sriov();
3710
#endif
3711 3712 3713 3714 3715 3716 3717
	unregister_netdevice_notifier(&efx_netdev_notifier);

}

module_init(efx_init_module);
module_exit(efx_exit_module);

3718 3719
MODULE_AUTHOR("Solarflare Communications and "
	      "Michael Brown <mbrown@fensystems.co.uk>");
B
Ben Hutchings 已提交
3720
MODULE_DESCRIPTION("Solarflare network driver");
3721 3722
MODULE_LICENSE("GPL");
MODULE_DEVICE_TABLE(pci, efx_pci_table);
3723
MODULE_VERSION(EFX_DRIVER_VERSION);