igb_main.c 215.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/* Intel(R) Gigabit Ethernet Linux driver
 * Copyright(c) 2007-2014 Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, see <http://www.gnu.org/licenses/>.
 *
 * The full GNU General Public License is included in this distribution in
 * the file called "COPYING".
 *
 * Contact Information:
 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
 */
23

J
Jeff Kirsher 已提交
24 25
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

26 27 28
#include <linux/module.h>
#include <linux/types.h>
#include <linux/init.h>
J
Jiri Pirko 已提交
29
#include <linux/bitops.h>
30 31 32 33
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/netdevice.h>
#include <linux/ipv6.h>
34
#include <linux/slab.h>
35 36
#include <net/checksum.h>
#include <net/ip6_checksum.h>
37
#include <linux/net_tstamp.h>
38 39
#include <linux/mii.h>
#include <linux/ethtool.h>
40
#include <linux/if.h>
41 42
#include <linux/if_vlan.h>
#include <linux/pci.h>
43
#include <linux/pci-aspm.h>
44 45
#include <linux/delay.h>
#include <linux/interrupt.h>
46 47 48
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/sctp.h>
49
#include <linux/if_ether.h>
50
#include <linux/aer.h>
51
#include <linux/prefetch.h>
Y
Yan, Zheng 已提交
52
#include <linux/pm_runtime.h>
53
#ifdef CONFIG_IGB_DCA
J
Jeb Cramer 已提交
54 55
#include <linux/dca.h>
#endif
C
Carolyn Wyborny 已提交
56
#include <linux/i2c.h>
57 58
#include "igb.h"

C
Carolyn Wyborny 已提交
59
#define MAJ 5
T
Todd Fujinaka 已提交
60 61
#define MIN 3
#define BUILD 0
C
Carolyn Wyborny 已提交
62
#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
63
__stringify(BUILD) "-k"
64 65 66 67
char igb_driver_name[] = "igb";
char igb_driver_version[] = DRV_VERSION;
static const char igb_driver_string[] =
				"Intel(R) Gigabit Ethernet Network Driver";
68
static const char igb_copyright[] =
69
				"Copyright (c) 2007-2014 Intel Corporation.";
70 71 72 73 74

static const struct e1000_info *igb_info_tbl[] = {
	[board_82575] = &e1000_82575_info,
};

75
static const struct pci_device_id igb_pci_tbl[] = {
76 77 78
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) },
79 80 81 82 83
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII), board_82575 },
84 85
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS), board_82575 },
86 87 88 89
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
90 91
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
92
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
93 94 95
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
96 97
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
G
Gasparakis, Joseph 已提交
98 99
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
A
Alexander Duyck 已提交
100
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
101
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
102
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
A
Alexander Duyck 已提交
103 104
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
105
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
106
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
107
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
108 109 110 111 112 113 114 115 116 117 118 119 120
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
	/* required last entry */
	{0, }
};

MODULE_DEVICE_TABLE(pci, igb_pci_tbl);

static int igb_setup_all_tx_resources(struct igb_adapter *);
static int igb_setup_all_rx_resources(struct igb_adapter *);
static void igb_free_all_tx_resources(struct igb_adapter *);
static void igb_free_all_rx_resources(struct igb_adapter *);
121
static void igb_setup_mrqc(struct igb_adapter *);
122
static int igb_probe(struct pci_dev *, const struct pci_device_id *);
123
static void igb_remove(struct pci_dev *pdev);
124 125 126
static int igb_sw_init(struct igb_adapter *);
static int igb_open(struct net_device *);
static int igb_close(struct net_device *);
127
static void igb_configure(struct igb_adapter *);
128 129 130 131
static void igb_configure_tx(struct igb_adapter *);
static void igb_configure_rx(struct igb_adapter *);
static void igb_clean_all_tx_rings(struct igb_adapter *);
static void igb_clean_all_rx_rings(struct igb_adapter *);
132 133
static void igb_clean_tx_ring(struct igb_ring *);
static void igb_clean_rx_ring(struct igb_ring *);
134
static void igb_set_rx_mode(struct net_device *);
135 136 137
static void igb_update_phy_info(unsigned long);
static void igb_watchdog(unsigned long);
static void igb_watchdog_task(struct work_struct *);
138
static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
E
Eric Dumazet 已提交
139
static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
140
					  struct rtnl_link_stats64 *stats);
141 142
static int igb_change_mtu(struct net_device *, int);
static int igb_set_mac(struct net_device *, void *);
143
static void igb_set_uta(struct igb_adapter *adapter);
144 145 146
static irqreturn_t igb_intr(int irq, void *);
static irqreturn_t igb_intr_msi(int irq, void *);
static irqreturn_t igb_msix_other(int irq, void *);
147
static irqreturn_t igb_msix_ring(int irq, void *);
148
#ifdef CONFIG_IGB_DCA
149
static void igb_update_dca(struct igb_q_vector *);
J
Jeb Cramer 已提交
150
static void igb_setup_dca(struct igb_adapter *);
151
#endif /* CONFIG_IGB_DCA */
152
static int igb_poll(struct napi_struct *, int);
153
static bool igb_clean_tx_irq(struct igb_q_vector *);
154
static bool igb_clean_rx_irq(struct igb_q_vector *, int);
155 156 157
static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
static void igb_tx_timeout(struct net_device *);
static void igb_reset_task(struct work_struct *);
158 159
static void igb_vlan_mode(struct net_device *netdev,
			  netdev_features_t features);
160 161
static int igb_vlan_rx_add_vid(struct net_device *, __be16, u16);
static int igb_vlan_rx_kill_vid(struct net_device *, __be16, u16);
162
static void igb_restore_vlan(struct igb_adapter *);
163
static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
164 165 166
static void igb_ping_all_vfs(struct igb_adapter *);
static void igb_msg_task(struct igb_adapter *);
static void igb_vmm_control(struct igb_adapter *);
167
static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
168
static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
169 170 171
static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
static int igb_ndo_set_vf_vlan(struct net_device *netdev,
			       int vf, u16 vlan, u8 qos);
172
static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
L
Lior Levy 已提交
173 174
static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
				   bool setting);
175 176
static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
				 struct ifla_vf_info *ivi);
177
static void igb_check_vf_rate_limit(struct igb_adapter *);
R
RongQing Li 已提交
178 179

#ifdef CONFIG_PCI_IOV
180
static int igb_vf_configure(struct igb_adapter *adapter, int vf);
181
static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs);
182 183
static int igb_disable_sriov(struct pci_dev *dev);
static int igb_pci_disable_sriov(struct pci_dev *dev);
R
RongQing Li 已提交
184
#endif
185 186

#ifdef CONFIG_PM
187
#ifdef CONFIG_PM_SLEEP
Y
Yan, Zheng 已提交
188
static int igb_suspend(struct device *);
189
#endif
Y
Yan, Zheng 已提交
190 191 192 193 194 195 196 197 198
static int igb_resume(struct device *);
static int igb_runtime_suspend(struct device *dev);
static int igb_runtime_resume(struct device *dev);
static int igb_runtime_idle(struct device *dev);
static const struct dev_pm_ops igb_pm_ops = {
	SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
	SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
			igb_runtime_idle)
};
199 200
#endif
static void igb_shutdown(struct pci_dev *);
201
static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs);
202
#ifdef CONFIG_IGB_DCA
J
Jeb Cramer 已提交
203 204 205 206 207 208 209
static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
static struct notifier_block dca_notifier = {
	.notifier_call	= igb_notify_dca,
	.next		= NULL,
	.priority	= 0
};
#endif
210 211 212 213
#ifdef CONFIG_NET_POLL_CONTROLLER
/* for netdump / net console */
static void igb_netpoll(struct net_device *);
#endif
214
#ifdef CONFIG_PCI_IOV
215
static unsigned int max_vfs;
216
module_param(max_vfs, uint, 0);
C
Carolyn Wyborny 已提交
217
MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate per physical function");
218 219
#endif /* CONFIG_PCI_IOV */

220 221 222 223 224
static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
		     pci_channel_state_t);
static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
static void igb_io_resume(struct pci_dev *);

225
static const struct pci_error_handlers igb_err_handler = {
226 227 228 229 230
	.error_detected = igb_io_error_detected,
	.slot_reset = igb_io_slot_reset,
	.resume = igb_io_resume,
};

231
static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
232 233 234 235 236

static struct pci_driver igb_driver = {
	.name     = igb_driver_name,
	.id_table = igb_pci_tbl,
	.probe    = igb_probe,
237
	.remove   = igb_remove,
238
#ifdef CONFIG_PM
Y
Yan, Zheng 已提交
239
	.driver.pm = &igb_pm_ops,
240 241
#endif
	.shutdown = igb_shutdown,
242
	.sriov_configure = igb_pci_sriov_configure,
243 244 245 246 247 248 249 250
	.err_handler = &igb_err_handler
};

MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);

251 252 253 254 255
#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
static int debug = -1;
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");

256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
struct igb_reg_info {
	u32 ofs;
	char *name;
};

static const struct igb_reg_info igb_reg_info_tbl[] = {

	/* General Registers */
	{E1000_CTRL, "CTRL"},
	{E1000_STATUS, "STATUS"},
	{E1000_CTRL_EXT, "CTRL_EXT"},

	/* Interrupt Registers */
	{E1000_ICR, "ICR"},

	/* RX Registers */
	{E1000_RCTL, "RCTL"},
	{E1000_RDLEN(0), "RDLEN"},
	{E1000_RDH(0), "RDH"},
	{E1000_RDT(0), "RDT"},
	{E1000_RXDCTL(0), "RXDCTL"},
	{E1000_RDBAL(0), "RDBAL"},
	{E1000_RDBAH(0), "RDBAH"},

	/* TX Registers */
	{E1000_TCTL, "TCTL"},
	{E1000_TDBAL(0), "TDBAL"},
	{E1000_TDBAH(0), "TDBAH"},
	{E1000_TDLEN(0), "TDLEN"},
	{E1000_TDH(0), "TDH"},
	{E1000_TDT(0), "TDT"},
	{E1000_TXDCTL(0), "TXDCTL"},
	{E1000_TDFH, "TDFH"},
	{E1000_TDFT, "TDFT"},
	{E1000_TDFHS, "TDFHS"},
	{E1000_TDFPC, "TDFPC"},

	/* List Terminator */
	{}
};

297
/* igb_regdump - register printout routine */
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
{
	int n = 0;
	char rname[16];
	u32 regs[8];

	switch (reginfo->ofs) {
	case E1000_RDLEN(0):
		for (n = 0; n < 4; n++)
			regs[n] = rd32(E1000_RDLEN(n));
		break;
	case E1000_RDH(0):
		for (n = 0; n < 4; n++)
			regs[n] = rd32(E1000_RDH(n));
		break;
	case E1000_RDT(0):
		for (n = 0; n < 4; n++)
			regs[n] = rd32(E1000_RDT(n));
		break;
	case E1000_RXDCTL(0):
		for (n = 0; n < 4; n++)
			regs[n] = rd32(E1000_RXDCTL(n));
		break;
	case E1000_RDBAL(0):
		for (n = 0; n < 4; n++)
			regs[n] = rd32(E1000_RDBAL(n));
		break;
	case E1000_RDBAH(0):
		for (n = 0; n < 4; n++)
			regs[n] = rd32(E1000_RDBAH(n));
		break;
	case E1000_TDBAL(0):
		for (n = 0; n < 4; n++)
			regs[n] = rd32(E1000_RDBAL(n));
		break;
	case E1000_TDBAH(0):
		for (n = 0; n < 4; n++)
			regs[n] = rd32(E1000_TDBAH(n));
		break;
	case E1000_TDLEN(0):
		for (n = 0; n < 4; n++)
			regs[n] = rd32(E1000_TDLEN(n));
		break;
	case E1000_TDH(0):
		for (n = 0; n < 4; n++)
			regs[n] = rd32(E1000_TDH(n));
		break;
	case E1000_TDT(0):
		for (n = 0; n < 4; n++)
			regs[n] = rd32(E1000_TDT(n));
		break;
	case E1000_TXDCTL(0):
		for (n = 0; n < 4; n++)
			regs[n] = rd32(E1000_TXDCTL(n));
		break;
	default:
J
Jeff Kirsher 已提交
354
		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
355 356 357 358
		return;
	}

	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
J
Jeff Kirsher 已提交
359 360
	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
		regs[2], regs[3]);
361 362
}

363
/* igb_dump - Print registers, Tx-rings and Rx-rings */
364 365 366 367 368 369 370 371 372 373 374
static void igb_dump(struct igb_adapter *adapter)
{
	struct net_device *netdev = adapter->netdev;
	struct e1000_hw *hw = &adapter->hw;
	struct igb_reg_info *reginfo;
	struct igb_ring *tx_ring;
	union e1000_adv_tx_desc *tx_desc;
	struct my_u0 { u64 a; u64 b; } *u0;
	struct igb_ring *rx_ring;
	union e1000_adv_rx_desc *rx_desc;
	u32 staterr;
375
	u16 i, n;
376 377 378 379 380 381 382

	if (!netif_msg_hw(adapter))
		return;

	/* Print netdevice Info */
	if (netdev) {
		dev_info(&adapter->pdev->dev, "Net device Info\n");
C
Carolyn Wyborny 已提交
383
		pr_info("Device Name     state            trans_start      last_rx\n");
J
Jeff Kirsher 已提交
384 385
		pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
			netdev->state, netdev->trans_start, netdev->last_rx);
386 387 388 389
	}

	/* Print Registers */
	dev_info(&adapter->pdev->dev, "Register Dump\n");
J
Jeff Kirsher 已提交
390
	pr_info(" Register Name   Value\n");
391 392 393 394 395 396 397 398 399 400
	for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
	     reginfo->name; reginfo++) {
		igb_regdump(hw, reginfo);
	}

	/* Print TX Ring Summary */
	if (!netdev || !netif_running(netdev))
		goto exit;

	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
J
Jeff Kirsher 已提交
401
	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
402
	for (n = 0; n < adapter->num_tx_queues; n++) {
403
		struct igb_tx_buffer *buffer_info;
404
		tx_ring = adapter->tx_ring[n];
405
		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
J
Jeff Kirsher 已提交
406 407
		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
			n, tx_ring->next_to_use, tx_ring->next_to_clean,
408 409
			(u64)dma_unmap_addr(buffer_info, dma),
			dma_unmap_len(buffer_info, len),
J
Jeff Kirsher 已提交
410 411
			buffer_info->next_to_watch,
			(u64)buffer_info->time_stamp);
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
	}

	/* Print TX Rings */
	if (!netif_msg_tx_done(adapter))
		goto rx_ring_summary;

	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");

	/* Transmit Descriptor Formats
	 *
	 * Advanced Transmit Descriptor
	 *   +--------------------------------------------------------------+
	 * 0 |         Buffer Address [63:0]                                |
	 *   +--------------------------------------------------------------+
	 * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
	 *   +--------------------------------------------------------------+
	 *   63      46 45    40 39 38 36 35 32 31   24             15       0
	 */

	for (n = 0; n < adapter->num_tx_queues; n++) {
		tx_ring = adapter->tx_ring[n];
J
Jeff Kirsher 已提交
433 434 435
		pr_info("------------------------------------\n");
		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
		pr_info("------------------------------------\n");
C
Carolyn Wyborny 已提交
436
		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] [bi->dma       ] leng  ntw timestamp        bi->skb\n");
437 438

		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
J
Jeff Kirsher 已提交
439
			const char *next_desc;
440
			struct igb_tx_buffer *buffer_info;
441
			tx_desc = IGB_TX_DESC(tx_ring, i);
442
			buffer_info = &tx_ring->tx_buffer_info[i];
443
			u0 = (struct my_u0 *)tx_desc;
J
Jeff Kirsher 已提交
444 445 446 447 448 449 450 451 452 453
			if (i == tx_ring->next_to_use &&
			    i == tx_ring->next_to_clean)
				next_desc = " NTC/U";
			else if (i == tx_ring->next_to_use)
				next_desc = " NTU";
			else if (i == tx_ring->next_to_clean)
				next_desc = " NTC";
			else
				next_desc = "";

C
Carolyn Wyborny 已提交
454 455
			pr_info("T [0x%03X]    %016llX %016llX %016llX %04X  %p %016llX %p%s\n",
				i, le64_to_cpu(u0->a),
456
				le64_to_cpu(u0->b),
457 458
				(u64)dma_unmap_addr(buffer_info, dma),
				dma_unmap_len(buffer_info, len),
459 460
				buffer_info->next_to_watch,
				(u64)buffer_info->time_stamp,
J
Jeff Kirsher 已提交
461
				buffer_info->skb, next_desc);
462

463
			if (netif_msg_pktdata(adapter) && buffer_info->skb)
464 465
				print_hex_dump(KERN_INFO, "",
					DUMP_PREFIX_ADDRESS,
466
					16, 1, buffer_info->skb->data,
467 468
					dma_unmap_len(buffer_info, len),
					true);
469 470 471 472 473 474
		}
	}

	/* Print RX Rings Summary */
rx_ring_summary:
	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
J
Jeff Kirsher 已提交
475
	pr_info("Queue [NTU] [NTC]\n");
476 477
	for (n = 0; n < adapter->num_rx_queues; n++) {
		rx_ring = adapter->rx_ring[n];
J
Jeff Kirsher 已提交
478 479
		pr_info(" %5d %5X %5X\n",
			n, rx_ring->next_to_use, rx_ring->next_to_clean);
480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
	}

	/* Print RX Rings */
	if (!netif_msg_rx_status(adapter))
		goto exit;

	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");

	/* Advanced Receive Descriptor (Read) Format
	 *    63                                           1        0
	 *    +-----------------------------------------------------+
	 *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
	 *    +----------------------------------------------+------+
	 *  8 |       Header Buffer Address [63:1]           |  DD  |
	 *    +-----------------------------------------------------+
	 *
	 *
	 * Advanced Receive Descriptor (Write-Back) Format
	 *
	 *   63       48 47    32 31  30      21 20 17 16   4 3     0
	 *   +------------------------------------------------------+
	 * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
	 *   | Checksum   Ident  |   |           |    | Type | Type |
	 *   +------------------------------------------------------+
	 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
	 *   +------------------------------------------------------+
	 *   63       48 47    32 31            20 19               0
	 */

	for (n = 0; n < adapter->num_rx_queues; n++) {
		rx_ring = adapter->rx_ring[n];
J
Jeff Kirsher 已提交
511 512 513
		pr_info("------------------------------------\n");
		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
		pr_info("------------------------------------\n");
C
Carolyn Wyborny 已提交
514 515
		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] [bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n");
516 517

		for (i = 0; i < rx_ring->count; i++) {
J
Jeff Kirsher 已提交
518
			const char *next_desc;
519 520
			struct igb_rx_buffer *buffer_info;
			buffer_info = &rx_ring->rx_buffer_info[i];
521
			rx_desc = IGB_RX_DESC(rx_ring, i);
522 523
			u0 = (struct my_u0 *)rx_desc;
			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
J
Jeff Kirsher 已提交
524 525 526 527 528 529 530 531

			if (i == rx_ring->next_to_use)
				next_desc = " NTU";
			else if (i == rx_ring->next_to_clean)
				next_desc = " NTC";
			else
				next_desc = "";

532 533
			if (staterr & E1000_RXD_STAT_DD) {
				/* Descriptor Done */
534 535
				pr_info("%s[0x%03X]     %016llX %016llX ---------------- %s\n",
					"RWB", i,
536 537
					le64_to_cpu(u0->a),
					le64_to_cpu(u0->b),
538
					next_desc);
539
			} else {
540 541
				pr_info("%s[0x%03X]     %016llX %016llX %016llX %s\n",
					"R  ", i,
542 543 544
					le64_to_cpu(u0->a),
					le64_to_cpu(u0->b),
					(u64)buffer_info->dma,
545
					next_desc);
546

547
				if (netif_msg_pktdata(adapter) &&
548
				    buffer_info->dma && buffer_info->page) {
549 550 551
					print_hex_dump(KERN_INFO, "",
					  DUMP_PREFIX_ADDRESS,
					  16, 1,
552 553
					  page_address(buffer_info->page) +
						      buffer_info->page_offset,
554
					  IGB_RX_BUFSZ, true);
555 556 557 558 559 560 561 562 563
				}
			}
		}
	}

exit:
	return;
}

564 565
/**
 *  igb_get_i2c_data - Reads the I2C SDA data bit
C
Carolyn Wyborny 已提交
566 567 568 569
 *  @hw: pointer to hardware structure
 *  @i2cctl: Current value of I2CCTL register
 *
 *  Returns the I2C data bit value
570
 **/
C
Carolyn Wyborny 已提交
571 572 573 574 575 576
static int igb_get_i2c_data(void *data)
{
	struct igb_adapter *adapter = (struct igb_adapter *)data;
	struct e1000_hw *hw = &adapter->hw;
	s32 i2cctl = rd32(E1000_I2CPARAMS);

577
	return !!(i2cctl & E1000_I2C_DATA_IN);
C
Carolyn Wyborny 已提交
578 579
}

580 581
/**
 *  igb_set_i2c_data - Sets the I2C data bit
C
Carolyn Wyborny 已提交
582 583 584 585
 *  @data: pointer to hardware structure
 *  @state: I2C data value (0 or 1) to set
 *
 *  Sets the I2C data bit
586
 **/
C
Carolyn Wyborny 已提交
587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
static void igb_set_i2c_data(void *data, int state)
{
	struct igb_adapter *adapter = (struct igb_adapter *)data;
	struct e1000_hw *hw = &adapter->hw;
	s32 i2cctl = rd32(E1000_I2CPARAMS);

	if (state)
		i2cctl |= E1000_I2C_DATA_OUT;
	else
		i2cctl &= ~E1000_I2C_DATA_OUT;

	i2cctl &= ~E1000_I2C_DATA_OE_N;
	i2cctl |= E1000_I2C_CLK_OE_N;
	wr32(E1000_I2CPARAMS, i2cctl);
	wrfl();

}

605 606
/**
 *  igb_set_i2c_clk - Sets the I2C SCL clock
C
Carolyn Wyborny 已提交
607 608 609 610
 *  @data: pointer to hardware structure
 *  @state: state to set clock
 *
 *  Sets the I2C clock line to state
611
 **/
C
Carolyn Wyborny 已提交
612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
static void igb_set_i2c_clk(void *data, int state)
{
	struct igb_adapter *adapter = (struct igb_adapter *)data;
	struct e1000_hw *hw = &adapter->hw;
	s32 i2cctl = rd32(E1000_I2CPARAMS);

	if (state) {
		i2cctl |= E1000_I2C_CLK_OUT;
		i2cctl &= ~E1000_I2C_CLK_OE_N;
	} else {
		i2cctl &= ~E1000_I2C_CLK_OUT;
		i2cctl &= ~E1000_I2C_CLK_OE_N;
	}
	wr32(E1000_I2CPARAMS, i2cctl);
	wrfl();
}

629 630
/**
 *  igb_get_i2c_clk - Gets the I2C SCL clock state
C
Carolyn Wyborny 已提交
631 632 633
 *  @data: pointer to hardware structure
 *
 *  Gets the I2C clock state
634
 **/
C
Carolyn Wyborny 已提交
635 636 637 638 639 640
static int igb_get_i2c_clk(void *data)
{
	struct igb_adapter *adapter = (struct igb_adapter *)data;
	struct e1000_hw *hw = &adapter->hw;
	s32 i2cctl = rd32(E1000_I2CPARAMS);

641
	return !!(i2cctl & E1000_I2C_CLK_IN);
C
Carolyn Wyborny 已提交
642 643 644 645 646 647 648 649 650 651 652
}

static const struct i2c_algo_bit_data igb_i2c_algo = {
	.setsda		= igb_set_i2c_data,
	.setscl		= igb_set_i2c_clk,
	.getsda		= igb_get_i2c_data,
	.getscl		= igb_get_i2c_clk,
	.udelay		= 5,
	.timeout	= 20,
};

653
/**
654 655 656 657
 *  igb_get_hw_dev - return device
 *  @hw: pointer to hardware structure
 *
 *  used by hardware layer to print debugging information
658
 **/
659
struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
660 661
{
	struct igb_adapter *adapter = hw->back;
662
	return adapter->netdev;
663
}
P
Patrick Ohly 已提交
664

665
/**
666
 *  igb_init_module - Driver Registration Routine
667
 *
668 669
 *  igb_init_module is the first routine called when the driver is
 *  loaded. All it does is register with the PCI subsystem.
670 671 672 673
 **/
static int __init igb_init_module(void)
{
	int ret;
674

J
Jeff Kirsher 已提交
675
	pr_info("%s - version %s\n",
676
	       igb_driver_string, igb_driver_version);
J
Jeff Kirsher 已提交
677
	pr_info("%s\n", igb_copyright);
678

679
#ifdef CONFIG_IGB_DCA
J
Jeb Cramer 已提交
680 681
	dca_register_notify(&dca_notifier);
#endif
682
	ret = pci_register_driver(&igb_driver);
683 684 685 686 687 688
	return ret;
}

module_init(igb_init_module);

/**
689
 *  igb_exit_module - Driver Exit Cleanup Routine
690
 *
691 692
 *  igb_exit_module is called just before the driver is removed
 *  from memory.
693 694 695
 **/
static void __exit igb_exit_module(void)
{
696
#ifdef CONFIG_IGB_DCA
J
Jeb Cramer 已提交
697 698
	dca_unregister_notify(&dca_notifier);
#endif
699 700 701 702 703
	pci_unregister_driver(&igb_driver);
}

module_exit(igb_exit_module);

704 705
#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
/**
706 707
 *  igb_cache_ring_register - Descriptor ring to register mapping
 *  @adapter: board private structure to initialize
708
 *
709 710
 *  Once we know the feature-set enabled for the device, we'll cache
 *  the register offset the descriptor ring is assigned to.
711 712 713
 **/
static void igb_cache_ring_register(struct igb_adapter *adapter)
{
714
	int i = 0, j = 0;
715
	u32 rbase_offset = adapter->vfs_allocated_count;
716 717 718 719 720 721 722 723

	switch (adapter->hw.mac.type) {
	case e1000_82576:
		/* The queues are allocated for virtualization such that VF 0
		 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
		 * In order to avoid collision we start at the first free queue
		 * and continue consuming queues in the same sequence
		 */
724
		if (adapter->vfs_allocated_count) {
725
			for (; i < adapter->rss_queues; i++)
726
				adapter->rx_ring[i]->reg_idx = rbase_offset +
727
							       Q_IDX_82576(i);
728
		}
729
		/* Fall through */
730
	case e1000_82575:
731
	case e1000_82580:
732
	case e1000_i350:
733
	case e1000_i354:
734 735
	case e1000_i210:
	case e1000_i211:
736
		/* Fall through */
737
	default:
738
		for (; i < adapter->num_rx_queues; i++)
739
			adapter->rx_ring[i]->reg_idx = rbase_offset + i;
740
		for (; j < adapter->num_tx_queues; j++)
741
			adapter->tx_ring[j]->reg_idx = rbase_offset + j;
742 743 744 745
		break;
	}
}

746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767
u32 igb_rd32(struct e1000_hw *hw, u32 reg)
{
	struct igb_adapter *igb = container_of(hw, struct igb_adapter, hw);
	u8 __iomem *hw_addr = ACCESS_ONCE(hw->hw_addr);
	u32 value = 0;

	if (E1000_REMOVED(hw_addr))
		return ~value;

	value = readl(&hw_addr[reg]);

	/* reads should not return all F's */
	if (!(~value) && (!reg || !(~readl(hw_addr)))) {
		struct net_device *netdev = igb->netdev;
		hw->hw_addr = NULL;
		netif_device_detach(netdev);
		netdev_err(netdev, "PCIe link lost, device now detached\n");
	}

	return value;
}

A
Alexander Duyck 已提交
768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793
/**
 *  igb_write_ivar - configure ivar for given MSI-X vector
 *  @hw: pointer to the HW structure
 *  @msix_vector: vector number we are allocating to a given ring
 *  @index: row index of IVAR register to write within IVAR table
 *  @offset: column offset of in IVAR, should be multiple of 8
 *
 *  This function is intended to handle the writing of the IVAR register
 *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
 *  each containing an cause allocation for an Rx and Tx ring, and a
 *  variable number of rows depending on the number of queues supported.
 **/
static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
			   int index, int offset)
{
	u32 ivar = array_rd32(E1000_IVAR0, index);

	/* clear any bits that are currently set */
	ivar &= ~((u32)0xFF << offset);

	/* write vector and valid bit */
	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;

	array_wr32(E1000_IVAR0, index, ivar);
}

794
#define IGB_N0_QUEUE -1
795
static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
796
{
797
	struct igb_adapter *adapter = q_vector->adapter;
798
	struct e1000_hw *hw = &adapter->hw;
799 800
	int rx_queue = IGB_N0_QUEUE;
	int tx_queue = IGB_N0_QUEUE;
A
Alexander Duyck 已提交
801
	u32 msixbm = 0;
802

803 804 805 806
	if (q_vector->rx.ring)
		rx_queue = q_vector->rx.ring->reg_idx;
	if (q_vector->tx.ring)
		tx_queue = q_vector->tx.ring->reg_idx;
A
Alexander Duyck 已提交
807 808 809

	switch (hw->mac.type) {
	case e1000_82575:
810
		/* The 82575 assigns vectors using a bitmask, which matches the
811 812 813 814
		 * bitmask for the EICR/EIMS/EIMC registers.  To assign one
		 * or more queues to a vector, we write the appropriate bits
		 * into the MSIXBM register for that vector.
		 */
815
		if (rx_queue > IGB_N0_QUEUE)
816
			msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
817
		if (tx_queue > IGB_N0_QUEUE)
818
			msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
819
		if (!(adapter->flags & IGB_FLAG_HAS_MSIX) && msix_vector == 0)
820
			msixbm |= E1000_EIMS_OTHER;
821
		array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
822
		q_vector->eims_value = msixbm;
A
Alexander Duyck 已提交
823 824
		break;
	case e1000_82576:
825
		/* 82576 uses a table that essentially consists of 2 columns
A
Alexander Duyck 已提交
826 827 828 829 830 831 832 833 834 835 836 837
		 * with 8 rows.  The ordering is column-major so we use the
		 * lower 3 bits as the row index, and the 4th bit as the
		 * column offset.
		 */
		if (rx_queue > IGB_N0_QUEUE)
			igb_write_ivar(hw, msix_vector,
				       rx_queue & 0x7,
				       (rx_queue & 0x8) << 1);
		if (tx_queue > IGB_N0_QUEUE)
			igb_write_ivar(hw, msix_vector,
				       tx_queue & 0x7,
				       ((tx_queue & 0x8) << 1) + 8);
838
		q_vector->eims_value = 1 << msix_vector;
A
Alexander Duyck 已提交
839
		break;
840
	case e1000_82580:
841
	case e1000_i350:
842
	case e1000_i354:
843 844
	case e1000_i210:
	case e1000_i211:
845
		/* On 82580 and newer adapters the scheme is similar to 82576
A
Alexander Duyck 已提交
846 847 848 849 850 851 852 853 854 855 856 857 858
		 * however instead of ordering column-major we have things
		 * ordered row-major.  So we traverse the table by using
		 * bit 0 as the column offset, and the remaining bits as the
		 * row index.
		 */
		if (rx_queue > IGB_N0_QUEUE)
			igb_write_ivar(hw, msix_vector,
				       rx_queue >> 1,
				       (rx_queue & 0x1) << 4);
		if (tx_queue > IGB_N0_QUEUE)
			igb_write_ivar(hw, msix_vector,
				       tx_queue >> 1,
				       ((tx_queue & 0x1) << 4) + 8);
859 860
		q_vector->eims_value = 1 << msix_vector;
		break;
A
Alexander Duyck 已提交
861 862 863 864
	default:
		BUG();
		break;
	}
865 866 867 868 869 870

	/* add q_vector eims value to global eims_enable_mask */
	adapter->eims_enable_mask |= q_vector->eims_value;

	/* configure q_vector to set itr on first interrupt */
	q_vector->set_itr = 1;
871 872 873
}

/**
874 875
 *  igb_configure_msix - Configure MSI-X hardware
 *  @adapter: board private structure to initialize
876
 *
877 878
 *  igb_configure_msix sets up the hardware to properly
 *  generate MSI-X interrupts.
879 880 881 882 883 884 885 886 887 888
 **/
static void igb_configure_msix(struct igb_adapter *adapter)
{
	u32 tmp;
	int i, vector = 0;
	struct e1000_hw *hw = &adapter->hw;

	adapter->eims_enable_mask = 0;

	/* set vector for other causes, i.e. link changes */
A
Alexander Duyck 已提交
889 890
	switch (hw->mac.type) {
	case e1000_82575:
891 892 893 894 895 896 897 898 899
		tmp = rd32(E1000_CTRL_EXT);
		/* enable MSI-X PBA support*/
		tmp |= E1000_CTRL_EXT_PBA_CLR;

		/* Auto-Mask interrupts upon ICR read. */
		tmp |= E1000_CTRL_EXT_EIAME;
		tmp |= E1000_CTRL_EXT_IRCA;

		wr32(E1000_CTRL_EXT, tmp);
900 901

		/* enable msix_other interrupt */
902
		array_wr32(E1000_MSIXBM(0), vector++, E1000_EIMS_OTHER);
P
PJ Waskiewicz 已提交
903
		adapter->eims_other = E1000_EIMS_OTHER;
904

A
Alexander Duyck 已提交
905 906 907
		break;

	case e1000_82576:
908
	case e1000_82580:
909
	case e1000_i350:
910
	case e1000_i354:
911 912
	case e1000_i210:
	case e1000_i211:
913
		/* Turn on MSI-X capability first, or our settings
914 915
		 * won't stick.  And it will take days to debug.
		 */
916
		wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
917 918
		     E1000_GPIE_PBA | E1000_GPIE_EIAME |
		     E1000_GPIE_NSICR);
919 920 921

		/* enable msix_other interrupt */
		adapter->eims_other = 1 << vector;
A
Alexander Duyck 已提交
922 923
		tmp = (vector++ | E1000_IVAR_VALID) << 8;

924
		wr32(E1000_IVAR_MISC, tmp);
A
Alexander Duyck 已提交
925 926 927 928 929
		break;
	default:
		/* do nothing, since nothing else supports MSI-X */
		break;
	} /* switch (hw->mac.type) */
930 931 932

	adapter->eims_enable_mask |= adapter->eims_other;

933 934
	for (i = 0; i < adapter->num_q_vectors; i++)
		igb_assign_vector(adapter->q_vector[i], vector++);
935

936 937 938 939
	wrfl();
}

/**
940 941
 *  igb_request_msix - Initialize MSI-X interrupts
 *  @adapter: board private structure to initialize
942
 *
943 944
 *  igb_request_msix allocates MSI-X vectors and requests interrupts from the
 *  kernel.
945 946 947 948
 **/
static int igb_request_msix(struct igb_adapter *adapter)
{
	struct net_device *netdev = adapter->netdev;
949
	struct e1000_hw *hw = &adapter->hw;
950
	int i, err = 0, vector = 0, free_vector = 0;
951

952
	err = request_irq(adapter->msix_entries[vector].vector,
953
			  igb_msix_other, 0, netdev->name, adapter);
954
	if (err)
955
		goto err_out;
956 957 958 959

	for (i = 0; i < adapter->num_q_vectors; i++) {
		struct igb_q_vector *q_vector = adapter->q_vector[i];

960 961
		vector++;

962 963
		q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);

964
		if (q_vector->rx.ring && q_vector->tx.ring)
965
			sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
966 967
				q_vector->rx.ring->queue_index);
		else if (q_vector->tx.ring)
968
			sprintf(q_vector->name, "%s-tx-%u", netdev->name,
969 970
				q_vector->tx.ring->queue_index);
		else if (q_vector->rx.ring)
971
			sprintf(q_vector->name, "%s-rx-%u", netdev->name,
972
				q_vector->rx.ring->queue_index);
973
		else
974 975
			sprintf(q_vector->name, "%s-unused", netdev->name);

976
		err = request_irq(adapter->msix_entries[vector].vector,
977 978
				  igb_msix_ring, 0, q_vector->name,
				  q_vector);
979
		if (err)
980
			goto err_free;
981 982 983 984
	}

	igb_configure_msix(adapter);
	return 0;
985 986 987 988 989 990 991 992 993 994 995

err_free:
	/* free already assigned IRQs */
	free_irq(adapter->msix_entries[free_vector++].vector, adapter);

	vector--;
	for (i = 0; i < vector; i++) {
		free_irq(adapter->msix_entries[free_vector++].vector,
			 adapter->q_vector[i]);
	}
err_out:
996 997 998
	return err;
}

999
/**
1000 1001 1002
 *  igb_free_q_vector - Free memory allocated for specific interrupt vector
 *  @adapter: board private structure to initialize
 *  @v_idx: Index of vector to be freed
1003
 *
1004
 *  This function frees the memory allocated to the q_vector.
1005 1006 1007 1008 1009
 **/
static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx)
{
	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];

1010 1011 1012 1013 1014
	adapter->q_vector[v_idx] = NULL;

	/* igb_get_stats64() might access the rings on this vector,
	 * we must wait a grace period before freeing it.
	 */
1015 1016
	if (q_vector)
		kfree_rcu(q_vector, rcu);
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030
}

/**
 *  igb_reset_q_vector - Reset config for interrupt vector
 *  @adapter: board private structure to initialize
 *  @v_idx: Index of vector to be reset
 *
 *  If NAPI is enabled it will delete any references to the
 *  NAPI struct. This is preparation for igb_free_q_vector.
 **/
static void igb_reset_q_vector(struct igb_adapter *adapter, int v_idx)
{
	struct igb_q_vector *q_vector = adapter->q_vector[v_idx];

1031 1032 1033 1034 1035 1036
	/* Coming from igb_set_interrupt_capability, the vectors are not yet
	 * allocated. So, q_vector is NULL so we should stop here.
	 */
	if (!q_vector)
		return;

1037 1038 1039 1040
	if (q_vector->tx.ring)
		adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;

	if (q_vector->rx.ring)
1041
		adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
1042 1043 1044

	netif_napi_del(&q_vector->napi);

1045 1046 1047 1048 1049 1050
}

static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
{
	int v_idx = adapter->num_q_vectors;

1051
	if (adapter->flags & IGB_FLAG_HAS_MSIX)
1052
		pci_disable_msix(adapter->pdev);
1053
	else if (adapter->flags & IGB_FLAG_HAS_MSI)
1054 1055 1056 1057
		pci_disable_msi(adapter->pdev);

	while (v_idx--)
		igb_reset_q_vector(adapter, v_idx);
1058 1059
}

1060
/**
1061 1062
 *  igb_free_q_vectors - Free memory allocated for interrupt vectors
 *  @adapter: board private structure to initialize
1063
 *
1064 1065 1066
 *  This function frees the memory allocated to the q_vectors.  In addition if
 *  NAPI is enabled it will delete any references to the NAPI struct prior
 *  to freeing the q_vector.
1067 1068 1069
 **/
static void igb_free_q_vectors(struct igb_adapter *adapter)
{
1070 1071 1072 1073
	int v_idx = adapter->num_q_vectors;

	adapter->num_tx_queues = 0;
	adapter->num_rx_queues = 0;
1074
	adapter->num_q_vectors = 0;
1075

1076 1077
	while (v_idx--) {
		igb_reset_q_vector(adapter, v_idx);
1078
		igb_free_q_vector(adapter, v_idx);
1079
	}
1080 1081 1082
}

/**
1083 1084
 *  igb_clear_interrupt_scheme - reset the device to a state of no interrupts
 *  @adapter: board private structure to initialize
1085
 *
1086 1087
 *  This function resets the device so that it has 0 Rx queues, Tx queues, and
 *  MSI-X interrupts allocated.
1088 1089 1090 1091 1092 1093
 */
static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
{
	igb_free_q_vectors(adapter);
	igb_reset_interrupt_capability(adapter);
}
1094 1095

/**
1096 1097 1098
 *  igb_set_interrupt_capability - set MSI or MSI-X if supported
 *  @adapter: board private structure to initialize
 *  @msix: boolean value of MSIX capability
1099
 *
1100 1101
 *  Attempt to configure interrupts using the best available
 *  capabilities of the hardware and kernel.
1102
 **/
1103
static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix)
1104 1105 1106 1107
{
	int err;
	int numvecs, i;

1108 1109
	if (!msix)
		goto msi_only;
1110
	adapter->flags |= IGB_FLAG_HAS_MSIX;
1111

1112
	/* Number of supported queues. */
1113
	adapter->num_rx_queues = adapter->rss_queues;
1114 1115 1116 1117
	if (adapter->vfs_allocated_count)
		adapter->num_tx_queues = 1;
	else
		adapter->num_tx_queues = adapter->rss_queues;
1118

1119
	/* start with one vector for every Rx queue */
1120 1121
	numvecs = adapter->num_rx_queues;

1122
	/* if Tx handler is separate add 1 for every Tx queue */
1123 1124
	if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
		numvecs += adapter->num_tx_queues;
1125 1126 1127 1128 1129 1130

	/* store the number of vectors reserved for queues */
	adapter->num_q_vectors = numvecs;

	/* add 1 vector for link status interrupts */
	numvecs++;
1131 1132 1133
	for (i = 0; i < numvecs; i++)
		adapter->msix_entries[i].entry = i;

1134 1135 1136 1137 1138
	err = pci_enable_msix_range(adapter->pdev,
				    adapter->msix_entries,
				    numvecs,
				    numvecs);
	if (err > 0)
1139
		return;
1140 1141 1142 1143 1144

	igb_reset_interrupt_capability(adapter);

	/* If we can't do MSI-X, try MSI */
msi_only:
1145
	adapter->flags &= ~IGB_FLAG_HAS_MSIX;
1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156
#ifdef CONFIG_PCI_IOV
	/* disable SR-IOV for non MSI-X configurations */
	if (adapter->vf_data) {
		struct e1000_hw *hw = &adapter->hw;
		/* disable iov and allow time for transactions to clear */
		pci_disable_sriov(adapter->pdev);
		msleep(500);

		kfree(adapter->vf_data);
		adapter->vf_data = NULL;
		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1157
		wrfl();
1158 1159 1160 1161
		msleep(100);
		dev_info(&adapter->pdev->dev, "IOV Disabled\n");
	}
#endif
1162
	adapter->vfs_allocated_count = 0;
1163
	adapter->rss_queues = 1;
1164
	adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1165
	adapter->num_rx_queues = 1;
1166
	adapter->num_tx_queues = 1;
1167
	adapter->num_q_vectors = 1;
1168
	if (!pci_enable_msi(adapter->pdev))
1169
		adapter->flags |= IGB_FLAG_HAS_MSI;
1170 1171
}

1172 1173 1174 1175 1176 1177 1178
static void igb_add_ring(struct igb_ring *ring,
			 struct igb_ring_container *head)
{
	head->ring = ring;
	head->count++;
}

1179
/**
1180 1181 1182 1183 1184 1185 1186 1187
 *  igb_alloc_q_vector - Allocate memory for a single interrupt vector
 *  @adapter: board private structure to initialize
 *  @v_count: q_vectors allocated on adapter, used for ring interleaving
 *  @v_idx: index of vector in adapter struct
 *  @txr_count: total number of Tx rings to allocate
 *  @txr_idx: index of first Tx ring to allocate
 *  @rxr_count: total number of Rx rings to allocate
 *  @rxr_idx: index of first Rx ring to allocate
1188
 *
1189
 *  We allocate one q_vector.  If allocation fails we return -ENOMEM.
1190
 **/
1191 1192 1193 1194
static int igb_alloc_q_vector(struct igb_adapter *adapter,
			      int v_count, int v_idx,
			      int txr_count, int txr_idx,
			      int rxr_count, int rxr_idx)
1195 1196
{
	struct igb_q_vector *q_vector;
1197 1198
	struct igb_ring *ring;
	int ring_count, size;
1199

1200 1201 1202 1203 1204 1205 1206 1207 1208
	/* igb only supports 1 Tx and/or 1 Rx queue per vector */
	if (txr_count > 1 || rxr_count > 1)
		return -ENOMEM;

	ring_count = txr_count + rxr_count;
	size = sizeof(struct igb_q_vector) +
	       (sizeof(struct igb_ring) * ring_count);

	/* allocate q_vector and rings */
1209
	q_vector = adapter->q_vector[v_idx];
1210
	if (!q_vector) {
1211
		q_vector = kzalloc(size, GFP_KERNEL);
1212 1213 1214 1215
	} else if (size > ksize(q_vector)) {
		kfree_rcu(q_vector, rcu);
		q_vector = kzalloc(size, GFP_KERNEL);
	} else {
1216
		memset(q_vector, 0, size);
1217
	}
1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238
	if (!q_vector)
		return -ENOMEM;

	/* initialize NAPI */
	netif_napi_add(adapter->netdev, &q_vector->napi,
		       igb_poll, 64);

	/* tie q_vector and adapter together */
	adapter->q_vector[v_idx] = q_vector;
	q_vector->adapter = adapter;

	/* initialize work limits */
	q_vector->tx.work_limit = adapter->tx_work_limit;

	/* initialize ITR configuration */
	q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0);
	q_vector->itr_val = IGB_START_ITR;

	/* initialize pointer to rings */
	ring = q_vector->ring;

1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249
	/* intialize ITR */
	if (rxr_count) {
		/* rx or rx/tx vector */
		if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
			q_vector->itr_val = adapter->rx_itr_setting;
	} else {
		/* tx only vector */
		if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
			q_vector->itr_val = adapter->tx_itr_setting;
	}

1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
	if (txr_count) {
		/* assign generic ring traits */
		ring->dev = &adapter->pdev->dev;
		ring->netdev = adapter->netdev;

		/* configure backlink on ring */
		ring->q_vector = q_vector;

		/* update q_vector Tx values */
		igb_add_ring(ring, &q_vector->tx);

		/* For 82575, context index must be unique per ring. */
		if (adapter->hw.mac.type == e1000_82575)
			set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);

		/* apply Tx specific ring traits */
		ring->count = adapter->tx_ring_count;
		ring->queue_index = txr_idx;

1269 1270 1271
		u64_stats_init(&ring->tx_syncp);
		u64_stats_init(&ring->tx_syncp2);

1272 1273 1274 1275 1276
		/* assign ring to adapter */
		adapter->tx_ring[txr_idx] = ring;

		/* push pointer to next ring */
		ring++;
1277
	}
1278

1279 1280 1281 1282
	if (rxr_count) {
		/* assign generic ring traits */
		ring->dev = &adapter->pdev->dev;
		ring->netdev = adapter->netdev;
1283

1284 1285
		/* configure backlink on ring */
		ring->q_vector = q_vector;
1286

1287 1288
		/* update q_vector Rx values */
		igb_add_ring(ring, &q_vector->rx);
1289

1290 1291 1292
		/* set flag indicating ring supports SCTP checksum offload */
		if (adapter->hw.mac.type >= e1000_82576)
			set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
1293

1294
		/* On i350, i354, i210, and i211, loopback VLAN packets
1295
		 * have the tag byte-swapped.
1296
		 */
1297 1298
		if (adapter->hw.mac.type >= e1000_i350)
			set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
1299

1300 1301 1302 1303
		/* apply Rx specific ring traits */
		ring->count = adapter->rx_ring_count;
		ring->queue_index = rxr_idx;

1304 1305
		u64_stats_init(&ring->rx_syncp);

1306 1307 1308 1309 1310
		/* assign ring to adapter */
		adapter->rx_ring[rxr_idx] = ring;
	}

	return 0;
1311 1312
}

1313

1314
/**
1315 1316
 *  igb_alloc_q_vectors - Allocate memory for interrupt vectors
 *  @adapter: board private structure to initialize
1317
 *
1318 1319
 *  We allocate one q_vector per queue interrupt.  If allocation fails we
 *  return -ENOMEM.
1320
 **/
1321
static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1322
{
1323 1324 1325 1326 1327
	int q_vectors = adapter->num_q_vectors;
	int rxr_remaining = adapter->num_rx_queues;
	int txr_remaining = adapter->num_tx_queues;
	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
	int err;
1328

1329 1330 1331 1332
	if (q_vectors >= (rxr_remaining + txr_remaining)) {
		for (; rxr_remaining; v_idx++) {
			err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
						 0, 0, 1, rxr_idx);
1333

1334 1335 1336 1337 1338 1339
			if (err)
				goto err_out;

			/* update counts and index */
			rxr_remaining--;
			rxr_idx++;
1340 1341
		}
	}
1342 1343 1344 1345

	for (; v_idx < q_vectors; v_idx++) {
		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
1346

1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359
		err = igb_alloc_q_vector(adapter, q_vectors, v_idx,
					 tqpv, txr_idx, rqpv, rxr_idx);

		if (err)
			goto err_out;

		/* update counts and index */
		rxr_remaining -= rqpv;
		txr_remaining -= tqpv;
		rxr_idx++;
		txr_idx++;
	}

1360
	return 0;
1361 1362 1363 1364 1365 1366 1367 1368 1369 1370

err_out:
	adapter->num_tx_queues = 0;
	adapter->num_rx_queues = 0;
	adapter->num_q_vectors = 0;

	while (v_idx--)
		igb_free_q_vector(adapter, v_idx);

	return -ENOMEM;
1371 1372 1373
}

/**
1374 1375 1376
 *  igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
 *  @adapter: board private structure to initialize
 *  @msix: boolean value of MSIX capability
1377
 *
1378
 *  This function initializes the interrupts and allocates all of the queues.
1379
 **/
1380
static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix)
1381 1382 1383 1384
{
	struct pci_dev *pdev = adapter->pdev;
	int err;

1385
	igb_set_interrupt_capability(adapter, msix);
1386 1387 1388 1389 1390 1391 1392

	err = igb_alloc_q_vectors(adapter);
	if (err) {
		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
		goto err_alloc_q_vectors;
	}

1393
	igb_cache_ring_register(adapter);
1394 1395

	return 0;
1396

1397 1398 1399 1400 1401
err_alloc_q_vectors:
	igb_reset_interrupt_capability(adapter);
	return err;
}

1402
/**
1403 1404
 *  igb_request_irq - initialize interrupts
 *  @adapter: board private structure to initialize
1405
 *
1406 1407
 *  Attempts to configure interrupts using the best available
 *  capabilities of the hardware and kernel.
1408 1409 1410 1411
 **/
static int igb_request_irq(struct igb_adapter *adapter)
{
	struct net_device *netdev = adapter->netdev;
1412
	struct pci_dev *pdev = adapter->pdev;
1413 1414
	int err = 0;

1415
	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1416
		err = igb_request_msix(adapter);
P
PJ Waskiewicz 已提交
1417
		if (!err)
1418 1419
			goto request_done;
		/* fall back to MSI */
1420 1421
		igb_free_all_tx_resources(adapter);
		igb_free_all_rx_resources(adapter);
1422

1423
		igb_clear_interrupt_scheme(adapter);
1424 1425
		err = igb_init_interrupt_scheme(adapter, false);
		if (err)
1426
			goto request_done;
1427

1428 1429
		igb_setup_all_tx_resources(adapter);
		igb_setup_all_rx_resources(adapter);
1430
		igb_configure(adapter);
1431
	}
P
PJ Waskiewicz 已提交
1432

1433 1434
	igb_assign_vector(adapter->q_vector[0], 0);

1435
	if (adapter->flags & IGB_FLAG_HAS_MSI) {
1436
		err = request_irq(pdev->irq, igb_intr_msi, 0,
1437
				  netdev->name, adapter);
1438 1439
		if (!err)
			goto request_done;
1440

1441 1442
		/* fall back to legacy interrupts */
		igb_reset_interrupt_capability(adapter);
1443
		adapter->flags &= ~IGB_FLAG_HAS_MSI;
1444 1445
	}

1446
	err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1447
			  netdev->name, adapter);
1448

A
Andy Gospodarek 已提交
1449
	if (err)
1450
		dev_err(&pdev->dev, "Error %d getting interrupt\n",
1451 1452 1453 1454 1455 1456 1457 1458
			err);

request_done:
	return err;
}

static void igb_free_irq(struct igb_adapter *adapter)
{
1459
	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1460 1461
		int vector = 0, i;

1462
		free_irq(adapter->msix_entries[vector++].vector, adapter);
1463

1464
		for (i = 0; i < adapter->num_q_vectors; i++)
1465
			free_irq(adapter->msix_entries[vector++].vector,
1466
				 adapter->q_vector[i]);
1467 1468
	} else {
		free_irq(adapter->pdev->irq, adapter);
1469 1470 1471 1472
	}
}

/**
1473 1474
 *  igb_irq_disable - Mask off interrupt generation on the NIC
 *  @adapter: board private structure
1475 1476 1477 1478 1479
 **/
static void igb_irq_disable(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;

1480
	/* we need to be careful when disabling interrupts.  The VFs are also
1481 1482 1483
	 * mapped into these registers and so clearing the bits can cause
	 * issues on the VF drivers so we only need to clear what we set
	 */
1484
	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1485
		u32 regval = rd32(E1000_EIAM);
1486

1487 1488 1489 1490
		wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
		wr32(E1000_EIMC, adapter->eims_enable_mask);
		regval = rd32(E1000_EIAC);
		wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1491
	}
P
PJ Waskiewicz 已提交
1492 1493

	wr32(E1000_IAM, 0);
1494 1495
	wr32(E1000_IMC, ~0);
	wrfl();
1496
	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1497
		int i;
1498

1499 1500 1501 1502 1503
		for (i = 0; i < adapter->num_q_vectors; i++)
			synchronize_irq(adapter->msix_entries[i].vector);
	} else {
		synchronize_irq(adapter->pdev->irq);
	}
1504 1505 1506
}

/**
1507 1508
 *  igb_irq_enable - Enable default interrupt generation settings
 *  @adapter: board private structure
1509 1510 1511 1512 1513
 **/
static void igb_irq_enable(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;

1514
	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
1515
		u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1516
		u32 regval = rd32(E1000_EIAC);
1517

1518 1519 1520
		wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
		regval = rd32(E1000_EIAM);
		wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
P
PJ Waskiewicz 已提交
1521
		wr32(E1000_EIMS, adapter->eims_enable_mask);
1522
		if (adapter->vfs_allocated_count) {
1523
			wr32(E1000_MBVFIMR, 0xFF);
1524 1525 1526
			ims |= E1000_IMS_VMMB;
		}
		wr32(E1000_IMS, ims);
P
PJ Waskiewicz 已提交
1527
	} else {
1528 1529 1530 1531
		wr32(E1000_IMS, IMS_ENABLE_MASK |
				E1000_IMS_DRSTA);
		wr32(E1000_IAM, IMS_ENABLE_MASK |
				E1000_IMS_DRSTA);
P
PJ Waskiewicz 已提交
1532
	}
1533 1534 1535 1536
}

static void igb_update_mng_vlan(struct igb_adapter *adapter)
{
1537
	struct e1000_hw *hw = &adapter->hw;
1538 1539
	u16 vid = adapter->hw.mng_cookie.vlan_id;
	u16 old_vid = adapter->mng_vlan_id;
1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550

	if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
		/* add VID to filter table */
		igb_vfta_set(hw, vid, true);
		adapter->mng_vlan_id = vid;
	} else {
		adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
	}

	if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
	    (vid != old_vid) &&
J
Jiri Pirko 已提交
1551
	    !test_bit(old_vid, adapter->active_vlans)) {
1552 1553
		/* remove VID from filter table */
		igb_vfta_set(hw, old_vid, false);
1554 1555 1556 1557
	}
}

/**
1558 1559
 *  igb_release_hw_control - release control of the h/w to f/w
 *  @adapter: address of board private structure
1560
 *
1561 1562 1563
 *  igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
 *  For ASF and Pass Through versions of f/w this means that the
 *  driver is no longer loaded.
1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576
 **/
static void igb_release_hw_control(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 ctrl_ext;

	/* Let firmware take over control of h/w */
	ctrl_ext = rd32(E1000_CTRL_EXT);
	wr32(E1000_CTRL_EXT,
			ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
}

/**
1577 1578
 *  igb_get_hw_control - get control of the h/w from f/w
 *  @adapter: address of board private structure
1579
 *
1580 1581 1582
 *  igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
 *  For ASF and Pass Through versions of f/w this means that
 *  the driver is loaded.
1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595
 **/
static void igb_get_hw_control(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 ctrl_ext;

	/* Let firmware know the driver has taken over */
	ctrl_ext = rd32(E1000_CTRL_EXT);
	wr32(E1000_CTRL_EXT,
			ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
}

/**
1596 1597
 *  igb_configure - configure the hardware for RX and TX
 *  @adapter: private board structure
1598 1599 1600 1601 1602 1603 1604
 **/
static void igb_configure(struct igb_adapter *adapter)
{
	struct net_device *netdev = adapter->netdev;
	int i;

	igb_get_hw_control(adapter);
1605
	igb_set_rx_mode(netdev);
1606 1607 1608

	igb_restore_vlan(adapter);

1609
	igb_setup_tctl(adapter);
1610
	igb_setup_mrqc(adapter);
1611
	igb_setup_rctl(adapter);
1612 1613

	igb_configure_tx(adapter);
1614
	igb_configure_rx(adapter);
1615 1616 1617

	igb_rx_fifo_flush_82575(&adapter->hw);

1618
	/* call igb_desc_unused which always leaves
1619
	 * at least 1 descriptor unused to make sure
1620 1621
	 * next_to_use != next_to_clean
	 */
1622
	for (i = 0; i < adapter->num_rx_queues; i++) {
1623
		struct igb_ring *ring = adapter->rx_ring[i];
1624
		igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1625 1626 1627
	}
}

1628
/**
1629 1630
 *  igb_power_up_link - Power up the phy/serdes link
 *  @adapter: address of board private structure
1631 1632 1633
 **/
void igb_power_up_link(struct igb_adapter *adapter)
{
1634 1635
	igb_reset_phy(&adapter->hw);

1636 1637 1638 1639
	if (adapter->hw.phy.media_type == e1000_media_type_copper)
		igb_power_up_phy_copper(&adapter->hw);
	else
		igb_power_up_serdes_link_82575(&adapter->hw);
1640 1641

	igb_setup_link(&adapter->hw);
1642 1643 1644
}

/**
1645 1646
 *  igb_power_down_link - Power down the phy/serdes link
 *  @adapter: address of board private structure
1647 1648 1649 1650 1651 1652 1653 1654
 */
static void igb_power_down_link(struct igb_adapter *adapter)
{
	if (adapter->hw.phy.media_type == e1000_media_type_copper)
		igb_power_down_phy_copper_82575(&adapter->hw);
	else
		igb_shutdown_serdes_link_82575(&adapter->hw);
}
1655

1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722
/**
 * Detect and switch function for Media Auto Sense
 * @adapter: address of the board private structure
 **/
static void igb_check_swap_media(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 ctrl_ext, connsw;
	bool swap_now = false;

	ctrl_ext = rd32(E1000_CTRL_EXT);
	connsw = rd32(E1000_CONNSW);

	/* need to live swap if current media is copper and we have fiber/serdes
	 * to go to.
	 */

	if ((hw->phy.media_type == e1000_media_type_copper) &&
	    (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) {
		swap_now = true;
	} else if (!(connsw & E1000_CONNSW_SERDESD)) {
		/* copper signal takes time to appear */
		if (adapter->copper_tries < 4) {
			adapter->copper_tries++;
			connsw |= E1000_CONNSW_AUTOSENSE_CONF;
			wr32(E1000_CONNSW, connsw);
			return;
		} else {
			adapter->copper_tries = 0;
			if ((connsw & E1000_CONNSW_PHYSD) &&
			    (!(connsw & E1000_CONNSW_PHY_PDN))) {
				swap_now = true;
				connsw &= ~E1000_CONNSW_AUTOSENSE_CONF;
				wr32(E1000_CONNSW, connsw);
			}
		}
	}

	if (!swap_now)
		return;

	switch (hw->phy.media_type) {
	case e1000_media_type_copper:
		netdev_info(adapter->netdev,
			"MAS: changing media to fiber/serdes\n");
		ctrl_ext |=
			E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
		adapter->flags |= IGB_FLAG_MEDIA_RESET;
		adapter->copper_tries = 0;
		break;
	case e1000_media_type_internal_serdes:
	case e1000_media_type_fiber:
		netdev_info(adapter->netdev,
			"MAS: changing media to copper\n");
		ctrl_ext &=
			~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
		adapter->flags |= IGB_FLAG_MEDIA_RESET;
		break;
	default:
		/* shouldn't get here during regular operation */
		netdev_err(adapter->netdev,
			"AMS: Invalid media type found, returning\n");
		break;
	}
	wr32(E1000_CTRL_EXT, ctrl_ext);
}

1723
/**
1724 1725
 *  igb_up - Open the interface and prepare it to handle traffic
 *  @adapter: board private structure
1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736
 **/
int igb_up(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	int i;

	/* hardware has been reset, we need to reload some things */
	igb_configure(adapter);

	clear_bit(__IGB_DOWN, &adapter->state);

1737 1738 1739
	for (i = 0; i < adapter->num_q_vectors; i++)
		napi_enable(&(adapter->q_vector[i]->napi));

1740
	if (adapter->flags & IGB_FLAG_HAS_MSIX)
1741
		igb_configure_msix(adapter);
1742 1743
	else
		igb_assign_vector(adapter->q_vector[0], 0);
1744 1745 1746 1747 1748

	/* Clear any pending interrupts. */
	rd32(E1000_ICR);
	igb_irq_enable(adapter);

1749 1750 1751
	/* notify VFs that reset has been completed */
	if (adapter->vfs_allocated_count) {
		u32 reg_data = rd32(E1000_CTRL_EXT);
1752

1753 1754 1755 1756
		reg_data |= E1000_CTRL_EXT_PFRSTD;
		wr32(E1000_CTRL_EXT, reg_data);
	}

1757 1758
	netif_tx_start_all_queues(adapter->netdev);

1759 1760 1761 1762
	/* start the watchdog. */
	hw->mac.get_link_status = 1;
	schedule_work(&adapter->watchdog_task);

1763 1764 1765 1766
	if ((adapter->flags & IGB_FLAG_EEE) &&
	    (!hw->dev_spec._82575.eee_disable))
		adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T;

1767 1768 1769 1770 1771 1772
	return 0;
}

void igb_down(struct igb_adapter *adapter)
{
	struct net_device *netdev = adapter->netdev;
1773
	struct e1000_hw *hw = &adapter->hw;
1774 1775 1776 1777
	u32 tctl, rctl;
	int i;

	/* signal that we're down so the interrupt handler does not
1778 1779
	 * reschedule our watchdog timer
	 */
1780 1781 1782 1783 1784 1785 1786
	set_bit(__IGB_DOWN, &adapter->state);

	/* disable receives in the hardware */
	rctl = rd32(E1000_RCTL);
	wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
	/* flush and sleep below */

1787
	netif_carrier_off(netdev);
1788
	netif_tx_stop_all_queues(netdev);
1789 1790 1791 1792 1793 1794 1795

	/* disable transmits in the hardware */
	tctl = rd32(E1000_TCTL);
	tctl &= ~E1000_TCTL_EN;
	wr32(E1000_TCTL, tctl);
	/* flush both disables and wait for them to finish */
	wrfl();
1796
	usleep_range(10000, 11000);
1797

1798 1799
	igb_irq_disable(adapter);

1800 1801
	adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;

1802
	for (i = 0; i < adapter->num_q_vectors; i++) {
1803 1804 1805 1806
		if (adapter->q_vector[i]) {
			napi_synchronize(&adapter->q_vector[i]->napi);
			napi_disable(&adapter->q_vector[i]->napi);
		}
1807
	}
1808 1809 1810 1811

	del_timer_sync(&adapter->watchdog_timer);
	del_timer_sync(&adapter->phy_info_timer);

1812
	/* record the stats before reset*/
E
Eric Dumazet 已提交
1813 1814 1815
	spin_lock(&adapter->stats64_lock);
	igb_update_stats(adapter, &adapter->stats64);
	spin_unlock(&adapter->stats64_lock);
1816

1817 1818 1819
	adapter->link_speed = 0;
	adapter->link_duplex = 0;

1820 1821
	if (!pci_channel_offline(adapter->pdev))
		igb_reset(adapter);
1822 1823
	igb_clean_all_tx_rings(adapter);
	igb_clean_all_rx_rings(adapter);
1824 1825 1826 1827 1828
#ifdef CONFIG_IGB_DCA

	/* since we reset the hardware DCA settings were cleared */
	igb_setup_dca(adapter);
#endif
1829 1830 1831 1832 1833 1834
}

void igb_reinit_locked(struct igb_adapter *adapter)
{
	WARN_ON(in_interrupt());
	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1835
		usleep_range(1000, 2000);
1836 1837 1838 1839 1840
	igb_down(adapter);
	igb_up(adapter);
	clear_bit(__IGB_RESETTING, &adapter->state);
}

1841 1842 1843 1844
/** igb_enable_mas - Media Autosense re-enable after swap
 *
 * @adapter: adapter struct
 **/
1845
static void igb_enable_mas(struct igb_adapter *adapter)
1846 1847
{
	struct e1000_hw *hw = &adapter->hw;
1848
	u32 connsw = rd32(E1000_CONNSW);
1849 1850

	/* configure for SerDes media detect */
1851 1852
	if ((hw->phy.media_type == e1000_media_type_copper) &&
	    (!(connsw & E1000_CONNSW_SERDESD))) {
1853 1854 1855 1856 1857 1858 1859
		connsw |= E1000_CONNSW_ENRGSRC;
		connsw |= E1000_CONNSW_AUTOSENSE_EN;
		wr32(E1000_CONNSW, connsw);
		wrfl();
	}
}

1860 1861
void igb_reset(struct igb_adapter *adapter)
{
1862
	struct pci_dev *pdev = adapter->pdev;
1863
	struct e1000_hw *hw = &adapter->hw;
A
Alexander Duyck 已提交
1864 1865
	struct e1000_mac_info *mac = &hw->mac;
	struct e1000_fc_info *fc = &hw->fc;
1866
	u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm;
1867 1868 1869 1870

	/* Repartition Pba for greater than 9k mtu
	 * To take effect CTRL.RST is required.
	 */
1871
	switch (mac->type) {
1872
	case e1000_i350:
1873
	case e1000_i354:
1874 1875 1876 1877
	case e1000_82580:
		pba = rd32(E1000_RXPBS);
		pba = igb_rxpbs_adjust_82580(pba);
		break;
1878
	case e1000_82576:
1879 1880
		pba = rd32(E1000_RXPBS);
		pba &= E1000_RXPBS_SIZE_MASK_82576;
1881 1882
		break;
	case e1000_82575:
1883 1884
	case e1000_i210:
	case e1000_i211:
1885 1886 1887
	default:
		pba = E1000_PBA_34K;
		break;
A
Alexander Duyck 已提交
1888
	}
1889

A
Alexander Duyck 已提交
1890 1891
	if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
	    (mac->type < e1000_82576)) {
1892 1893 1894 1895 1896 1897 1898 1899
		/* adjust PBA for jumbo frames */
		wr32(E1000_PBA, pba);

		/* To maintain wire speed transmits, the Tx FIFO should be
		 * large enough to accommodate two full transmit packets,
		 * rounded up to the next 1KB and expressed in KB.  Likewise,
		 * the Rx FIFO should be large enough to accommodate at least
		 * one full receive packet and is similarly rounded up and
1900 1901
		 * expressed in KB.
		 */
1902 1903 1904 1905 1906
		pba = rd32(E1000_PBA);
		/* upper 16 bits has Tx packet buffer allocation size in KB */
		tx_space = pba >> 16;
		/* lower 16 bits has Rx packet buffer allocation size in KB */
		pba &= 0xffff;
1907 1908 1909
		/* the Tx fifo also stores 16 bytes of information about the Tx
		 * but don't include ethernet FCS because hardware appends it
		 */
1910
		min_tx_space = (adapter->max_frame_size +
1911
				sizeof(union e1000_adv_tx_desc) -
1912 1913 1914 1915 1916 1917 1918 1919 1920 1921
				ETH_FCS_LEN) * 2;
		min_tx_space = ALIGN(min_tx_space, 1024);
		min_tx_space >>= 10;
		/* software strips receive CRC, so leave room for it */
		min_rx_space = adapter->max_frame_size;
		min_rx_space = ALIGN(min_rx_space, 1024);
		min_rx_space >>= 10;

		/* If current Tx allocation is less than the min Tx FIFO size,
		 * and the min Tx FIFO size is less than the current Rx FIFO
1922 1923
		 * allocation, take space away from current Rx allocation
		 */
1924 1925 1926 1927
		if (tx_space < min_tx_space &&
		    ((min_tx_space - tx_space) < pba)) {
			pba = pba - (min_tx_space - tx_space);

1928 1929 1930
			/* if short on Rx space, Rx wins and must trump Tx
			 * adjustment
			 */
1931 1932 1933
			if (pba < min_rx_space)
				pba = min_rx_space;
		}
A
Alexander Duyck 已提交
1934
		wr32(E1000_PBA, pba);
1935 1936 1937 1938 1939 1940 1941
	}

	/* flow control settings */
	/* The high water mark must be low enough to fit one full frame
	 * (or the size used for early receive) above it in the Rx FIFO.
	 * Set it to the lower of:
	 * - 90% of the Rx FIFO size, or
1942 1943
	 * - the full Rx FIFO size minus one full frame
	 */
1944
	hwm = min(((pba << 10) * 9 / 10),
A
Alexander Duyck 已提交
1945
			((pba << 10) - 2 * adapter->max_frame_size));
1946

1947
	fc->high_water = hwm & 0xFFFFFFF0;	/* 16-byte granularity */
1948
	fc->low_water = fc->high_water - 16;
1949 1950
	fc->pause_time = 0xFFFF;
	fc->send_xon = 1;
1951
	fc->current_mode = fc->requested_mode;
1952

1953 1954 1955
	/* disable receive for all VFs and wait one second */
	if (adapter->vfs_allocated_count) {
		int i;
1956

1957
		for (i = 0 ; i < adapter->vfs_allocated_count; i++)
G
Greg Rose 已提交
1958
			adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1959 1960

		/* ping all the active vfs to let them know we are going down */
1961
		igb_ping_all_vfs(adapter);
1962 1963 1964 1965 1966 1967

		/* disable transmits and receives */
		wr32(E1000_VFRE, 0);
		wr32(E1000_VFTE, 0);
	}

1968
	/* Allow time for pending master requests to run */
1969
	hw->mac.ops.reset_hw(hw);
1970 1971
	wr32(E1000_WUC, 0);

1972 1973 1974 1975 1976
	if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
		/* need to resetup here after media swap */
		adapter->ei.get_invariants(hw);
		adapter->flags &= ~IGB_FLAG_MEDIA_RESET;
	}
1977 1978 1979
	if ((mac->type == e1000_82575) &&
	    (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
		igb_enable_mas(adapter);
1980
	}
1981
	if (hw->mac.ops.init_hw(hw))
1982
		dev_err(&pdev->dev, "Hardware Error\n");
1983

1984
	/* Flow control settings reset on hardware reset, so guarantee flow
1985 1986 1987 1988 1989
	 * control is off when forcing speed.
	 */
	if (!hw->mac.autoneg)
		igb_force_mac_fc(hw);

1990
	igb_init_dmac(adapter, pba);
1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
#ifdef CONFIG_IGB_HWMON
	/* Re-initialize the thermal sensor on i350 devices. */
	if (!test_bit(__IGB_DOWN, &adapter->state)) {
		if (mac->type == e1000_i350 && hw->bus.func == 0) {
			/* If present, re-initialize the external thermal sensor
			 * interface.
			 */
			if (adapter->ets)
				mac->ops.init_thermal_sensor_thresh(hw);
		}
	}
#endif
J
Jeff Kirsher 已提交
2003
	/* Re-establish EEE setting */
2004 2005 2006 2007 2008
	if (hw->phy.media_type == e1000_media_type_copper) {
		switch (mac->type) {
		case e1000_i350:
		case e1000_i210:
		case e1000_i211:
2009
			igb_set_eee_i350(hw, true, true);
2010 2011
			break;
		case e1000_i354:
2012
			igb_set_eee_i354(hw, true, true);
2013 2014 2015 2016 2017
			break;
		default:
			break;
		}
	}
2018 2019 2020
	if (!netif_running(adapter->netdev))
		igb_power_down_link(adapter);

2021 2022 2023 2024 2025
	igb_update_mng_vlan(adapter);

	/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
	wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);

2026 2027 2028
	/* Re-enable PTP, where applicable. */
	igb_ptp_reset(adapter);

2029
	igb_get_phy_info(hw);
2030 2031
}

2032 2033
static netdev_features_t igb_fix_features(struct net_device *netdev,
	netdev_features_t features)
J
Jiri Pirko 已提交
2034
{
2035 2036
	/* Since there is no support for separate Rx/Tx vlan accel
	 * enable/disable make sure Tx flag is always in same state as Rx.
J
Jiri Pirko 已提交
2037
	 */
2038 2039
	if (features & NETIF_F_HW_VLAN_CTAG_RX)
		features |= NETIF_F_HW_VLAN_CTAG_TX;
J
Jiri Pirko 已提交
2040
	else
2041
		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
J
Jiri Pirko 已提交
2042 2043 2044 2045

	return features;
}

2046 2047
static int igb_set_features(struct net_device *netdev,
	netdev_features_t features)
2048
{
2049
	netdev_features_t changed = netdev->features ^ features;
B
Ben Greear 已提交
2050
	struct igb_adapter *adapter = netdev_priv(netdev);
2051

2052
	if (changed & NETIF_F_HW_VLAN_CTAG_RX)
J
Jiri Pirko 已提交
2053 2054
		igb_vlan_mode(netdev, features);

B
Ben Greear 已提交
2055 2056 2057 2058 2059 2060 2061 2062 2063 2064
	if (!(changed & NETIF_F_RXALL))
		return 0;

	netdev->features = features;

	if (netif_running(netdev))
		igb_reinit_locked(adapter);
	else
		igb_reset(adapter);

2065 2066 2067
	return 0;
}

S
Stephen Hemminger 已提交
2068
static const struct net_device_ops igb_netdev_ops = {
2069
	.ndo_open		= igb_open,
S
Stephen Hemminger 已提交
2070
	.ndo_stop		= igb_close,
2071
	.ndo_start_xmit		= igb_xmit_frame,
E
Eric Dumazet 已提交
2072
	.ndo_get_stats64	= igb_get_stats64,
2073
	.ndo_set_rx_mode	= igb_set_rx_mode,
S
Stephen Hemminger 已提交
2074 2075 2076 2077 2078 2079 2080
	.ndo_set_mac_address	= igb_set_mac,
	.ndo_change_mtu		= igb_change_mtu,
	.ndo_do_ioctl		= igb_ioctl,
	.ndo_tx_timeout		= igb_tx_timeout,
	.ndo_validate_addr	= eth_validate_addr,
	.ndo_vlan_rx_add_vid	= igb_vlan_rx_add_vid,
	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
2081 2082
	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
2083
	.ndo_set_vf_rate	= igb_ndo_set_vf_bw,
L
Lior Levy 已提交
2084
	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
2085
	.ndo_get_vf_config	= igb_ndo_get_vf_config,
S
Stephen Hemminger 已提交
2086 2087 2088
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_poll_controller	= igb_netpoll,
#endif
J
Jiri Pirko 已提交
2089 2090
	.ndo_fix_features	= igb_fix_features,
	.ndo_set_features	= igb_set_features,
2091
	.ndo_features_check	= passthru_features_check,
S
Stephen Hemminger 已提交
2092 2093
};

2094 2095 2096 2097 2098 2099 2100
/**
 * igb_set_fw_version - Configure version string for ethtool
 * @adapter: adapter struct
 **/
void igb_set_fw_version(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
2101 2102 2103 2104 2105
	struct e1000_fw_version fw;

	igb_get_fw_version(hw, &fw);

	switch (hw->mac.type) {
2106
	case e1000_i210:
2107
	case e1000_i211:
2108 2109 2110 2111 2112 2113 2114 2115 2116
		if (!(igb_get_flash_presence_i210(hw))) {
			snprintf(adapter->fw_version,
				 sizeof(adapter->fw_version),
				 "%2d.%2d-%d",
				 fw.invm_major, fw.invm_minor,
				 fw.invm_img_type);
			break;
		}
		/* fall through */
2117 2118 2119 2120 2121 2122 2123 2124 2125
	default:
		/* if option is rom valid, display its version too */
		if (fw.or_valid) {
			snprintf(adapter->fw_version,
				 sizeof(adapter->fw_version),
				 "%d.%d, 0x%08x, %d.%d.%d",
				 fw.eep_major, fw.eep_minor, fw.etrack_id,
				 fw.or_major, fw.or_build, fw.or_patch);
		/* no option rom */
2126
		} else if (fw.etrack_id != 0X0000) {
2127
			snprintf(adapter->fw_version,
2128 2129 2130 2131 2132 2133 2134 2135
			    sizeof(adapter->fw_version),
			    "%d.%d, 0x%08x",
			    fw.eep_major, fw.eep_minor, fw.etrack_id);
		} else {
		snprintf(adapter->fw_version,
		    sizeof(adapter->fw_version),
		    "%d.%d.%d",
		    fw.eep_major, fw.eep_minor, fw.eep_build);
2136 2137
		}
		break;
2138 2139 2140
	}
}

2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192
/**
 * igb_init_mas - init Media Autosense feature if enabled in the NVM
 *
 * @adapter: adapter struct
 **/
static void igb_init_mas(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	u16 eeprom_data;

	hw->nvm.ops.read(hw, NVM_COMPAT, 1, &eeprom_data);
	switch (hw->bus.func) {
	case E1000_FUNC_0:
		if (eeprom_data & IGB_MAS_ENABLE_0) {
			adapter->flags |= IGB_FLAG_MAS_ENABLE;
			netdev_info(adapter->netdev,
				"MAS: Enabling Media Autosense for port %d\n",
				hw->bus.func);
		}
		break;
	case E1000_FUNC_1:
		if (eeprom_data & IGB_MAS_ENABLE_1) {
			adapter->flags |= IGB_FLAG_MAS_ENABLE;
			netdev_info(adapter->netdev,
				"MAS: Enabling Media Autosense for port %d\n",
				hw->bus.func);
		}
		break;
	case E1000_FUNC_2:
		if (eeprom_data & IGB_MAS_ENABLE_2) {
			adapter->flags |= IGB_FLAG_MAS_ENABLE;
			netdev_info(adapter->netdev,
				"MAS: Enabling Media Autosense for port %d\n",
				hw->bus.func);
		}
		break;
	case E1000_FUNC_3:
		if (eeprom_data & IGB_MAS_ENABLE_3) {
			adapter->flags |= IGB_FLAG_MAS_ENABLE;
			netdev_info(adapter->netdev,
				"MAS: Enabling Media Autosense for port %d\n",
				hw->bus.func);
		}
		break;
	default:
		/* Shouldn't get here */
		netdev_err(adapter->netdev,
			"MAS: Invalid port configuration, returning\n");
		break;
	}
}

2193 2194
/**
 *  igb_init_i2c - Init I2C interface
C
Carolyn Wyborny 已提交
2195
 *  @adapter: pointer to adapter structure
2196
 **/
C
Carolyn Wyborny 已提交
2197 2198
static s32 igb_init_i2c(struct igb_adapter *adapter)
{
T
Todd Fujinaka 已提交
2199
	s32 status = 0;
C
Carolyn Wyborny 已提交
2200 2201 2202

	/* I2C interface supported on i350 devices */
	if (adapter->hw.mac.type != e1000_i350)
T
Todd Fujinaka 已提交
2203
		return 0;
C
Carolyn Wyborny 已提交
2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219

	/* Initialize the i2c bus which is controlled by the registers.
	 * This bus will use the i2c_algo_bit structue that implements
	 * the protocol through toggling of the 4 bits in the register.
	 */
	adapter->i2c_adap.owner = THIS_MODULE;
	adapter->i2c_algo = igb_i2c_algo;
	adapter->i2c_algo.data = adapter;
	adapter->i2c_adap.algo_data = &adapter->i2c_algo;
	adapter->i2c_adap.dev.parent = &adapter->pdev->dev;
	strlcpy(adapter->i2c_adap.name, "igb BB",
		sizeof(adapter->i2c_adap.name));
	status = i2c_bit_add_bus(&adapter->i2c_adap);
	return status;
}

2220
/**
2221 2222 2223
 *  igb_probe - Device Initialization Routine
 *  @pdev: PCI device information struct
 *  @ent: entry in igb_pci_tbl
2224
 *
2225
 *  Returns 0 on success, negative on failure
2226
 *
2227 2228 2229
 *  igb_probe initializes an adapter identified by a pci_dev structure.
 *  The OS initialization, configuring of the adapter private structure,
 *  and a hardware reset occur.
2230
 **/
2231
static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
2232 2233 2234 2235
{
	struct net_device *netdev;
	struct igb_adapter *adapter;
	struct e1000_hw *hw;
2236
	u16 eeprom_data = 0;
2237
	s32 ret_val;
2238
	static int global_quad_port_a; /* global quad port a indication */
2239
	const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
2240
	int err, pci_using_dac;
2241
	u8 part_str[E1000_PBANUM_LENGTH];
2242

2243 2244 2245 2246 2247
	/* Catch broken hardware that put the wrong VF device ID in
	 * the PCIe SR-IOV capability.
	 */
	if (pdev->is_virtfn) {
		WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
2248
			pci_name(pdev), pdev->vendor, pdev->device);
2249 2250 2251
		return -EINVAL;
	}

2252
	err = pci_enable_device_mem(pdev);
2253 2254 2255 2256
	if (err)
		return err;

	pci_using_dac = 0;
2257
	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
2258
	if (!err) {
2259
		pci_using_dac = 1;
2260
	} else {
2261
		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
2262
		if (err) {
2263 2264 2265
			dev_err(&pdev->dev,
				"No usable DMA configuration, aborting\n");
			goto err_dma;
2266 2267 2268
		}
	}

2269
	err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
2270 2271
					   IORESOURCE_MEM),
					   igb_driver_name);
2272 2273 2274
	if (err)
		goto err_pci_reg;

2275
	pci_enable_pcie_error_reporting(pdev);
2276

2277
	pci_set_master(pdev);
2278
	pci_save_state(pdev);
2279 2280

	err = -ENOMEM;
2281
	netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
2282
				   IGB_MAX_TX_QUEUES);
2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293
	if (!netdev)
		goto err_alloc_etherdev;

	SET_NETDEV_DEV(netdev, &pdev->dev);

	pci_set_drvdata(pdev, netdev);
	adapter = netdev_priv(netdev);
	adapter->netdev = netdev;
	adapter->pdev = pdev;
	hw = &adapter->hw;
	hw->back = adapter;
2294
	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
2295 2296

	err = -EIO;
2297
	hw->hw_addr = pci_iomap(pdev, 0, 0);
2298
	if (!hw->hw_addr)
2299 2300
		goto err_ioremap;

S
Stephen Hemminger 已提交
2301
	netdev->netdev_ops = &igb_netdev_ops;
2302 2303 2304 2305 2306
	igb_set_ethtool_ops(netdev);
	netdev->watchdog_timeo = 5 * HZ;

	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);

2307 2308
	netdev->mem_start = pci_resource_start(pdev, 0);
	netdev->mem_end = pci_resource_end(pdev, 0);
2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323

	/* PCI config space info */
	hw->vendor_id = pdev->vendor;
	hw->device_id = pdev->device;
	hw->revision_id = pdev->revision;
	hw->subsystem_vendor_id = pdev->subsystem_vendor;
	hw->subsystem_device_id = pdev->subsystem_device;

	/* Copy the default MAC, PHY and NVM function pointers */
	memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
	memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
	memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
	/* Initialize skew-specific constants */
	err = ei->get_invariants(hw);
	if (err)
2324
		goto err_sw_init;
2325

2326
	/* setup the private structure */
2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345
	err = igb_sw_init(adapter);
	if (err)
		goto err_sw_init;

	igb_get_bus_info_pcie(hw);

	hw->phy.autoneg_wait_to_complete = false;

	/* Copper options */
	if (hw->phy.media_type == e1000_media_type_copper) {
		hw->phy.mdix = AUTO_ALL_MODES;
		hw->phy.disable_polarity_correction = false;
		hw->phy.ms_type = e1000_ms_hw_default;
	}

	if (igb_check_reset_block(hw))
		dev_info(&pdev->dev,
			"PHY reset is blocked due to SOL/IDER session.\n");

2346
	/* features is initialized to 0 in allocation, it might have bits
2347 2348 2349 2350 2351 2352 2353 2354 2355 2356
	 * set by igb_sw_init so we should use an or instead of an
	 * assignment.
	 */
	netdev->features |= NETIF_F_SG |
			    NETIF_F_IP_CSUM |
			    NETIF_F_IPV6_CSUM |
			    NETIF_F_TSO |
			    NETIF_F_TSO6 |
			    NETIF_F_RXHASH |
			    NETIF_F_RXCSUM |
2357 2358
			    NETIF_F_HW_VLAN_CTAG_RX |
			    NETIF_F_HW_VLAN_CTAG_TX;
2359 2360 2361

	/* copy netdev features into list of user selectable features */
	netdev->hw_features |= netdev->features;
B
Ben Greear 已提交
2362
	netdev->hw_features |= NETIF_F_RXALL;
2363 2364

	/* set this bit last since it cannot be part of hw_features */
2365
	netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
2366 2367 2368 2369 2370 2371

	netdev->vlan_features |= NETIF_F_TSO |
				 NETIF_F_TSO6 |
				 NETIF_F_IP_CSUM |
				 NETIF_F_IPV6_CSUM |
				 NETIF_F_SG;
2372

2373 2374
	netdev->priv_flags |= IFF_SUPP_NOFCS;

2375
	if (pci_using_dac) {
2376
		netdev->features |= NETIF_F_HIGHDMA;
2377 2378
		netdev->vlan_features |= NETIF_F_HIGHDMA;
	}
2379

2380 2381
	if (hw->mac.type >= e1000_82576) {
		netdev->hw_features |= NETIF_F_SCTP_CSUM;
2382
		netdev->features |= NETIF_F_SCTP_CSUM;
2383
	}
2384

2385 2386
	netdev->priv_flags |= IFF_UNICAST_FLT;

2387
	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2388 2389

	/* before reading the NVM, reset the controller to put the device in a
2390 2391
	 * known good starting state
	 */
2392 2393
	hw->mac.ops.reset_hw(hw);

2394 2395
	/* make sure the NVM is good , i211/i210 parts can have special NVM
	 * that doesn't contain a checksum
2396
	 */
2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409
	switch (hw->mac.type) {
	case e1000_i210:
	case e1000_i211:
		if (igb_get_flash_presence_i210(hw)) {
			if (hw->nvm.ops.validate(hw) < 0) {
				dev_err(&pdev->dev,
					"The NVM Checksum Is Not Valid\n");
				err = -EIO;
				goto err_eeprom;
			}
		}
		break;
	default:
2410 2411 2412 2413 2414
		if (hw->nvm.ops.validate(hw) < 0) {
			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
			err = -EIO;
			goto err_eeprom;
		}
2415
		break;
2416 2417 2418 2419 2420 2421 2422 2423
	}

	/* copy the MAC address out of the NVM */
	if (hw->mac.ops.read_mac_addr(hw))
		dev_err(&pdev->dev, "NVM Read Error\n");

	memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);

2424
	if (!is_valid_ether_addr(netdev->dev_addr)) {
2425 2426 2427 2428 2429
		dev_err(&pdev->dev, "Invalid MAC Address\n");
		err = -EIO;
		goto err_eeprom;
	}

2430 2431 2432
	/* get firmware version for ethtool -i */
	igb_set_fw_version(adapter);

2433 2434 2435 2436 2437 2438
	/* configure RXPBSIZE and TXPBSIZE */
	if (hw->mac.type == e1000_i210) {
		wr32(E1000_RXPBS, I210_RXPBSIZE_DEFAULT);
		wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
	}

2439
	setup_timer(&adapter->watchdog_timer, igb_watchdog,
2440
		    (unsigned long) adapter);
2441
	setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2442
		    (unsigned long) adapter);
2443 2444 2445 2446

	INIT_WORK(&adapter->reset_task, igb_reset_task);
	INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);

2447
	/* Initialize link properties that are user-changeable */
2448 2449 2450 2451
	adapter->fc_autoneg = true;
	hw->mac.autoneg = true;
	hw->phy.autoneg_advertised = 0x2f;

2452 2453
	hw->fc.requested_mode = e1000_fc_default;
	hw->fc.current_mode = e1000_fc_default;
2454 2455 2456

	igb_validate_mdi_setting(hw);

2457
	/* By default, support wake on port A */
2458
	if (hw->bus.func == 0)
2459 2460 2461 2462
		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;

	/* Check the NVM for wake support on non-port A ports */
	if (hw->mac.type >= e1000_82580)
2463
		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2464 2465
				 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
				 &eeprom_data);
2466 2467
	else if (hw->bus.func == 1)
		hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2468

2469 2470
	if (eeprom_data & IGB_EEPROM_APME)
		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
2471 2472 2473

	/* now that we have the eeprom settings, apply the special cases where
	 * the eeprom may be wrong or the board simply won't support wake on
2474 2475
	 * lan on a particular port
	 */
2476 2477
	switch (pdev->device) {
	case E1000_DEV_ID_82575GB_QUAD_COPPER:
2478
		adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
2479 2480
		break;
	case E1000_DEV_ID_82575EB_FIBER_SERDES:
A
Alexander Duyck 已提交
2481 2482
	case E1000_DEV_ID_82576_FIBER:
	case E1000_DEV_ID_82576_SERDES:
2483
		/* Wake events only supported on port A for dual fiber
2484 2485
		 * regardless of eeprom setting
		 */
2486
		if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2487
			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
2488
		break;
2489
	case E1000_DEV_ID_82576_QUAD_COPPER:
2490
	case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2491 2492
		/* if quad port adapter, disable WoL on all but port A */
		if (global_quad_port_a != 0)
2493
			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
2494 2495 2496 2497 2498 2499
		else
			adapter->flags |= IGB_FLAG_QUAD_PORT_A;
		/* Reset for multiple quad port adapters */
		if (++global_quad_port_a == 4)
			global_quad_port_a = 0;
		break;
2500 2501 2502 2503
	default:
		/* If the device can't wake, don't set software support */
		if (!device_can_wakeup(&adapter->pdev->dev))
			adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED;
2504 2505 2506
	}

	/* initialize the wol settings based on the eeprom settings */
2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518
	if (adapter->flags & IGB_FLAG_WOL_SUPPORTED)
		adapter->wol |= E1000_WUFC_MAG;

	/* Some vendors want WoL disabled by default, but still supported */
	if ((hw->mac.type == e1000_i350) &&
	    (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) {
		adapter->flags |= IGB_FLAG_WOL_SUPPORTED;
		adapter->wol = 0;
	}

	device_set_wakeup_enable(&adapter->pdev->dev,
				 adapter->flags & IGB_FLAG_WOL_SUPPORTED);
2519 2520 2521 2522

	/* reset the hardware with the new settings */
	igb_reset(adapter);

C
Carolyn Wyborny 已提交
2523 2524 2525 2526 2527 2528 2529
	/* Init the I2C interface */
	err = igb_init_i2c(adapter);
	if (err) {
		dev_err(&pdev->dev, "failed to init i2c interface\n");
		goto err_eeprom;
	}

2530
	/* let the f/w know that the h/w is now under the control of the
2531 2532
	 * driver.
	 */
2533 2534 2535 2536 2537 2538 2539
	igb_get_hw_control(adapter);

	strcpy(netdev->name, "eth%d");
	err = register_netdev(netdev);
	if (err)
		goto err_register;

2540 2541 2542
	/* carrier off reporting is important to ethtool even BEFORE open */
	netif_carrier_off(netdev);

2543
#ifdef CONFIG_IGB_DCA
2544
	if (dca_add_requester(&pdev->dev) == 0) {
2545
		adapter->flags |= IGB_FLAG_DCA_ENABLED;
J
Jeb Cramer 已提交
2546 2547 2548 2549
		dev_info(&pdev->dev, "DCA enabled\n");
		igb_setup_dca(adapter);
	}

P
Patrick Ohly 已提交
2550
#endif
2551 2552 2553 2554
#ifdef CONFIG_IGB_HWMON
	/* Initialize the thermal sensor on i350 devices. */
	if (hw->mac.type == e1000_i350 && hw->bus.func == 0) {
		u16 ets_word;
2555

2556
		/* Read the NVM to determine if this i350 device supports an
2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570
		 * external thermal sensor.
		 */
		hw->nvm.ops.read(hw, NVM_ETS_CFG, 1, &ets_word);
		if (ets_word != 0x0000 && ets_word != 0xFFFF)
			adapter->ets = true;
		else
			adapter->ets = false;
		if (igb_sysfs_init(adapter))
			dev_err(&pdev->dev,
				"failed to allocate sysfs resources\n");
	} else {
		adapter->ets = false;
	}
#endif
2571 2572 2573 2574 2575
	/* Check if Media Autosense is enabled */
	adapter->ei = *ei;
	if (hw->dev_spec._82575.mas_capable)
		igb_init_mas(adapter);

A
Anders Berggren 已提交
2576
	/* do hw tstamp init after resetting */
2577
	igb_ptp_init(adapter);
A
Anders Berggren 已提交
2578

2579
	dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593
	/* print bus type/speed/width info, not applicable to i354 */
	if (hw->mac.type != e1000_i354) {
		dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
			 netdev->name,
			 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
			  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
			   "unknown"),
			 ((hw->bus.width == e1000_bus_width_pcie_x4) ?
			  "Width x4" :
			  (hw->bus.width == e1000_bus_width_pcie_x2) ?
			  "Width x2" :
			  (hw->bus.width == e1000_bus_width_pcie_x1) ?
			  "Width x1" : "unknown"), netdev->dev_addr);
	}
2594

2595 2596 2597 2598 2599 2600 2601 2602
	if ((hw->mac.type >= e1000_i210 ||
	     igb_get_flash_presence_i210(hw))) {
		ret_val = igb_read_part_string(hw, part_str,
					       E1000_PBANUM_LENGTH);
	} else {
		ret_val = -E1000_ERR_INVM_VALUE_NOT_FOUND;
	}

2603 2604 2605
	if (ret_val)
		strcpy(part_str, "Unknown");
	dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2606 2607
	dev_info(&pdev->dev,
		"Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2608
		(adapter->flags & IGB_FLAG_HAS_MSIX) ? "MSI-X" :
2609
		(adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2610
		adapter->num_rx_queues, adapter->num_tx_queues);
2611 2612 2613 2614 2615 2616
	if (hw->phy.media_type == e1000_media_type_copper) {
		switch (hw->mac.type) {
		case e1000_i350:
		case e1000_i210:
		case e1000_i211:
			/* Enable EEE for internal copper PHY devices */
2617
			err = igb_set_eee_i350(hw, true, true);
2618 2619 2620 2621 2622 2623 2624 2625
			if ((!err) &&
			    (!hw->dev_spec._82575.eee_disable)) {
				adapter->eee_advert =
					MDIO_EEE_100TX | MDIO_EEE_1000T;
				adapter->flags |= IGB_FLAG_EEE;
			}
			break;
		case e1000_i354:
2626
			if ((rd32(E1000_CTRL_EXT) &
2627
			    E1000_CTRL_EXT_LINK_MODE_SGMII)) {
2628
				err = igb_set_eee_i354(hw, true, true);
2629 2630 2631 2632 2633 2634 2635 2636 2637 2638
				if ((!err) &&
					(!hw->dev_spec._82575.eee_disable)) {
					adapter->eee_advert =
					   MDIO_EEE_100TX | MDIO_EEE_1000T;
					adapter->flags |= IGB_FLAG_EEE;
				}
			}
			break;
		default:
			break;
2639
		}
2640
	}
Y
Yan, Zheng 已提交
2641
	pm_runtime_put_noidle(&pdev->dev);
2642 2643 2644 2645
	return 0;

err_register:
	igb_release_hw_control(adapter);
C
Carolyn Wyborny 已提交
2646
	memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap));
2647 2648
err_eeprom:
	if (!igb_check_reset_block(hw))
2649
		igb_reset_phy(hw);
2650 2651 2652 2653

	if (hw->flash_address)
		iounmap(hw->flash_address);
err_sw_init:
J
Jia-Ju Bai 已提交
2654
	kfree(adapter->shadow_vfta);
2655
	igb_clear_interrupt_scheme(adapter);
2656 2657 2658
#ifdef CONFIG_PCI_IOV
	igb_disable_sriov(pdev);
#endif
2659
	pci_iounmap(pdev, hw->hw_addr);
2660 2661 2662
err_ioremap:
	free_netdev(netdev);
err_alloc_etherdev:
2663
	pci_release_selected_regions(pdev,
2664
				     pci_select_bars(pdev, IORESOURCE_MEM));
2665 2666 2667 2668 2669 2670
err_pci_reg:
err_dma:
	pci_disable_device(pdev);
	return err;
}

2671
#ifdef CONFIG_PCI_IOV
2672
static int igb_disable_sriov(struct pci_dev *pdev)
2673 2674 2675 2676 2677 2678 2679 2680
{
	struct net_device *netdev = pci_get_drvdata(pdev);
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;

	/* reclaim resources allocated to VFs */
	if (adapter->vf_data) {
		/* disable iov and allow time for transactions to clear */
2681
		if (pci_vfs_assigned(pdev)) {
2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712
			dev_warn(&pdev->dev,
				 "Cannot deallocate SR-IOV virtual functions while they are assigned - VFs will not be deallocated\n");
			return -EPERM;
		} else {
			pci_disable_sriov(pdev);
			msleep(500);
		}

		kfree(adapter->vf_data);
		adapter->vf_data = NULL;
		adapter->vfs_allocated_count = 0;
		wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
		wrfl();
		msleep(100);
		dev_info(&pdev->dev, "IOV Disabled\n");

		/* Re-enable DMA Coalescing flag since IOV is turned off */
		adapter->flags |= IGB_FLAG_DMAC;
	}

	return 0;
}

static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs)
{
	struct net_device *netdev = pci_get_drvdata(pdev);
	struct igb_adapter *adapter = netdev_priv(netdev);
	int old_vfs = pci_num_vf(pdev);
	int err = 0;
	int i;

2713
	if (!(adapter->flags & IGB_FLAG_HAS_MSIX) || num_vfs > 7) {
2714 2715 2716
		err = -EPERM;
		goto out;
	}
2717 2718 2719
	if (!num_vfs)
		goto out;

2720 2721 2722 2723 2724 2725
	if (old_vfs) {
		dev_info(&pdev->dev, "%d pre-allocated VFs found - override max_vfs setting of %d\n",
			 old_vfs, max_vfs);
		adapter->vfs_allocated_count = old_vfs;
	} else
		adapter->vfs_allocated_count = num_vfs;
2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738

	adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
				sizeof(struct vf_data_storage), GFP_KERNEL);

	/* if allocation failed then we do not support SR-IOV */
	if (!adapter->vf_data) {
		adapter->vfs_allocated_count = 0;
		dev_err(&pdev->dev,
			"Unable to allocate memory for VF Data Storage\n");
		err = -ENOMEM;
		goto out;
	}

2739 2740 2741 2742 2743 2744
	/* only call pci_enable_sriov() if no VFs are allocated already */
	if (!old_vfs) {
		err = pci_enable_sriov(pdev, adapter->vfs_allocated_count);
		if (err)
			goto err_out;
	}
2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762
	dev_info(&pdev->dev, "%d VFs allocated\n",
		 adapter->vfs_allocated_count);
	for (i = 0; i < adapter->vfs_allocated_count; i++)
		igb_vf_configure(adapter, i);

	/* DMA Coalescing is not supported in IOV mode. */
	adapter->flags &= ~IGB_FLAG_DMAC;
	goto out;

err_out:
	kfree(adapter->vf_data);
	adapter->vf_data = NULL;
	adapter->vfs_allocated_count = 0;
out:
	return err;
}

#endif
2763
/**
C
Carolyn Wyborny 已提交
2764 2765
 *  igb_remove_i2c - Cleanup  I2C interface
 *  @adapter: pointer to adapter structure
2766
 **/
C
Carolyn Wyborny 已提交
2767 2768 2769 2770 2771 2772
static void igb_remove_i2c(struct igb_adapter *adapter)
{
	/* free the adapter bus structure */
	i2c_del_adapter(&adapter->i2c_adap);
}

2773
/**
2774 2775
 *  igb_remove - Device Removal Routine
 *  @pdev: PCI device information struct
2776
 *
2777 2778 2779 2780
 *  igb_remove is called by the PCI subsystem to alert the driver
 *  that it should release a PCI device.  The could be caused by a
 *  Hot-Plug event, or because the driver is going to be removed from
 *  memory.
2781
 **/
2782
static void igb_remove(struct pci_dev *pdev)
2783 2784 2785
{
	struct net_device *netdev = pci_get_drvdata(pdev);
	struct igb_adapter *adapter = netdev_priv(netdev);
J
Jeb Cramer 已提交
2786
	struct e1000_hw *hw = &adapter->hw;
2787

Y
Yan, Zheng 已提交
2788
	pm_runtime_get_noresume(&pdev->dev);
2789 2790 2791
#ifdef CONFIG_IGB_HWMON
	igb_sysfs_exit(adapter);
#endif
C
Carolyn Wyborny 已提交
2792
	igb_remove_i2c(adapter);
2793
	igb_ptp_stop(adapter);
2794
	/* The watchdog timer may be rescheduled, so explicitly
2795 2796
	 * disable watchdog from being rescheduled.
	 */
2797 2798 2799 2800
	set_bit(__IGB_DOWN, &adapter->state);
	del_timer_sync(&adapter->watchdog_timer);
	del_timer_sync(&adapter->phy_info_timer);

2801 2802
	cancel_work_sync(&adapter->reset_task);
	cancel_work_sync(&adapter->watchdog_task);
2803

2804
#ifdef CONFIG_IGB_DCA
2805
	if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
J
Jeb Cramer 已提交
2806 2807
		dev_info(&pdev->dev, "DCA disabled\n");
		dca_remove_requester(&pdev->dev);
2808
		adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
A
Alexander Duyck 已提交
2809
		wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
J
Jeb Cramer 已提交
2810 2811 2812
	}
#endif

2813
	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
2814 2815
	 * would have already happened in close and is redundant.
	 */
2816 2817
	igb_release_hw_control(adapter);

2818
#ifdef CONFIG_PCI_IOV
2819
	igb_disable_sriov(pdev);
2820
#endif
2821

2822 2823 2824 2825
	unregister_netdev(netdev);

	igb_clear_interrupt_scheme(adapter);

2826
	pci_iounmap(pdev, hw->hw_addr);
2827 2828
	if (hw->flash_address)
		iounmap(hw->flash_address);
2829
	pci_release_selected_regions(pdev,
2830
				     pci_select_bars(pdev, IORESOURCE_MEM));
2831

2832
	kfree(adapter->shadow_vfta);
2833 2834
	free_netdev(netdev);

2835
	pci_disable_pcie_error_reporting(pdev);
2836

2837 2838 2839
	pci_disable_device(pdev);
}

2840
/**
2841 2842
 *  igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
 *  @adapter: board private structure to initialize
2843
 *
2844 2845 2846 2847
 *  This function initializes the vf specific data storage and then attempts to
 *  allocate the VFs.  The reason for ordering it this way is because it is much
 *  mor expensive time wise to disable SR-IOV than it is to allocate and free
 *  the memory for the VFs.
2848
 **/
2849
static void igb_probe_vfs(struct igb_adapter *adapter)
2850 2851 2852
{
#ifdef CONFIG_PCI_IOV
	struct pci_dev *pdev = adapter->pdev;
2853
	struct e1000_hw *hw = &adapter->hw;
2854

2855 2856 2857 2858
	/* Virtualization features not supported on i210 family. */
	if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
		return;

2859
	pci_sriov_set_totalvfs(pdev, 7);
2860
	igb_enable_sriov(pdev, max_vfs);
2861

2862 2863 2864
#endif /* CONFIG_PCI_IOV */
}

2865
static void igb_init_queue_configuration(struct igb_adapter *adapter)
2866 2867
{
	struct e1000_hw *hw = &adapter->hw;
2868
	u32 max_rss_queues;
2869

2870
	/* Determine the maximum number of RSS queues supported. */
2871
	switch (hw->mac.type) {
2872 2873 2874 2875
	case e1000_i211:
		max_rss_queues = IGB_MAX_RX_QUEUES_I211;
		break;
	case e1000_82575:
2876
	case e1000_i210:
2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892
		max_rss_queues = IGB_MAX_RX_QUEUES_82575;
		break;
	case e1000_i350:
		/* I350 cannot do RSS and SR-IOV at the same time */
		if (!!adapter->vfs_allocated_count) {
			max_rss_queues = 1;
			break;
		}
		/* fall through */
	case e1000_82576:
		if (!!adapter->vfs_allocated_count) {
			max_rss_queues = 2;
			break;
		}
		/* fall through */
	case e1000_82580:
2893
	case e1000_i354:
2894 2895
	default:
		max_rss_queues = IGB_MAX_RX_QUEUES;
2896
		break;
2897 2898 2899 2900
	}

	adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());

2901 2902 2903 2904 2905 2906 2907 2908
	igb_set_flag_queue_pairs(adapter, max_rss_queues);
}

void igb_set_flag_queue_pairs(struct igb_adapter *adapter,
			      const u32 max_rss_queues)
{
	struct e1000_hw *hw = &adapter->hw;

2909 2910 2911
	/* Determine if we need to pair queues. */
	switch (hw->mac.type) {
	case e1000_82575:
2912
	case e1000_i211:
2913
		/* Device supports enough interrupts without queue pairing. */
2914
		break;
2915
	case e1000_82576:
2916
		/* If VFs are going to be allocated with RSS queues then we
2917 2918 2919 2920 2921 2922 2923 2924 2925
		 * should pair the queues in order to conserve interrupts due
		 * to limited supply.
		 */
		if ((adapter->rss_queues > 1) &&
		    (adapter->vfs_allocated_count > 6))
			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
		/* fall through */
	case e1000_82580:
	case e1000_i350:
2926
	case e1000_i354:
2927
	case e1000_i210:
2928
	default:
2929
		/* If rss_queues > half of max_rss_queues, pair the queues in
2930 2931 2932 2933
		 * order to conserve interrupts due to limited supply.
		 */
		if (adapter->rss_queues > (max_rss_queues / 2))
			adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2934 2935
		break;
	}
2936 2937 2938
}

/**
2939 2940
 *  igb_sw_init - Initialize general software structures (struct igb_adapter)
 *  @adapter: board private structure to initialize
2941
 *
2942 2943 2944
 *  igb_sw_init initializes the Adapter private data structure.
 *  Fields are initialized based on PCI device information and
 *  OS network device settings (MTU size).
2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976
 **/
static int igb_sw_init(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	struct net_device *netdev = adapter->netdev;
	struct pci_dev *pdev = adapter->pdev;

	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);

	/* set default ring sizes */
	adapter->tx_ring_count = IGB_DEFAULT_TXD;
	adapter->rx_ring_count = IGB_DEFAULT_RXD;

	/* set default ITR values */
	adapter->rx_itr_setting = IGB_DEFAULT_ITR;
	adapter->tx_itr_setting = IGB_DEFAULT_ITR;

	/* set default work limits */
	adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;

	adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
				  VLAN_HLEN;
	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;

	spin_lock_init(&adapter->stats64_lock);
#ifdef CONFIG_PCI_IOV
	switch (hw->mac.type) {
	case e1000_82576:
	case e1000_i350:
		if (max_vfs > 7) {
			dev_warn(&pdev->dev,
				 "Maximum of 7 VFs per PF, using max\n");
2977
			max_vfs = adapter->vfs_allocated_count = 7;
2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988
		} else
			adapter->vfs_allocated_count = max_vfs;
		if (adapter->vfs_allocated_count)
			dev_warn(&pdev->dev,
				 "Enabling SR-IOV VFs using the module parameter is deprecated - please use the pci sysfs interface.\n");
		break;
	default:
		break;
	}
#endif /* CONFIG_PCI_IOV */

2989 2990 2991
	/* Assume MSI-X interrupts, will be checked during IRQ allocation */
	adapter->flags |= IGB_FLAG_HAS_MSIX;

2992 2993
	igb_probe_vfs(adapter);

2994
	igb_init_queue_configuration(adapter);
2995

2996
	/* Setup and initialize a copy of the hw vlan table array */
2997 2998
	adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
				       GFP_ATOMIC);
2999

3000
	/* This call may decrease the number of queues */
3001
	if (igb_init_interrupt_scheme(adapter, true)) {
3002 3003 3004 3005 3006 3007 3008
		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
		return -ENOMEM;
	}

	/* Explicitly disable IRQ since the NIC can be in any state. */
	igb_irq_disable(adapter);

3009
	if (hw->mac.type >= e1000_i350)
3010 3011
		adapter->flags &= ~IGB_FLAG_DMAC;

3012 3013 3014 3015 3016
	set_bit(__IGB_DOWN, &adapter->state);
	return 0;
}

/**
3017 3018
 *  igb_open - Called when a network interface is made active
 *  @netdev: network interface device structure
3019
 *
3020
 *  Returns 0 on success, negative value on failure
3021
 *
3022 3023 3024 3025 3026
 *  The open entry point is called when a network interface is made
 *  active by the system (IFF_UP).  At this point all resources needed
 *  for transmit and receive operations are allocated, the interrupt
 *  handler is registered with the OS, the watchdog timer is started,
 *  and the stack is notified that the interface is ready.
3027
 **/
Y
Yan, Zheng 已提交
3028
static int __igb_open(struct net_device *netdev, bool resuming)
3029 3030 3031
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
Y
Yan, Zheng 已提交
3032
	struct pci_dev *pdev = adapter->pdev;
3033 3034 3035 3036
	int err;
	int i;

	/* disallow open during test */
Y
Yan, Zheng 已提交
3037 3038
	if (test_bit(__IGB_TESTING, &adapter->state)) {
		WARN_ON(resuming);
3039
		return -EBUSY;
Y
Yan, Zheng 已提交
3040 3041 3042 3043
	}

	if (!resuming)
		pm_runtime_get_sync(&pdev->dev);
3044

3045 3046
	netif_carrier_off(netdev);

3047 3048 3049 3050 3051 3052 3053 3054 3055 3056
	/* allocate transmit descriptors */
	err = igb_setup_all_tx_resources(adapter);
	if (err)
		goto err_setup_tx;

	/* allocate receive descriptors */
	err = igb_setup_all_rx_resources(adapter);
	if (err)
		goto err_setup_rx;

3057
	igb_power_up_link(adapter);
3058 3059 3060 3061

	/* before we allocate an interrupt, we must be ready to handle it.
	 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
	 * as soon as we call pci_request_irq, so we have to setup our
3062 3063
	 * clean_rx handler before we do so.
	 */
3064 3065 3066 3067 3068 3069
	igb_configure(adapter);

	err = igb_request_irq(adapter);
	if (err)
		goto err_req_irq;

3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080
	/* Notify the stack of the actual queue counts. */
	err = netif_set_real_num_tx_queues(adapter->netdev,
					   adapter->num_tx_queues);
	if (err)
		goto err_set_queues;

	err = netif_set_real_num_rx_queues(adapter->netdev,
					   adapter->num_rx_queues);
	if (err)
		goto err_set_queues;

3081 3082 3083
	/* From here on the code is the same as igb_up() */
	clear_bit(__IGB_DOWN, &adapter->state);

3084 3085
	for (i = 0; i < adapter->num_q_vectors; i++)
		napi_enable(&(adapter->q_vector[i]->napi));
3086 3087 3088

	/* Clear any pending interrupts. */
	rd32(E1000_ICR);
P
PJ Waskiewicz 已提交
3089 3090 3091

	igb_irq_enable(adapter);

3092 3093 3094
	/* notify VFs that reset has been completed */
	if (adapter->vfs_allocated_count) {
		u32 reg_data = rd32(E1000_CTRL_EXT);
3095

3096 3097 3098 3099
		reg_data |= E1000_CTRL_EXT_PFRSTD;
		wr32(E1000_CTRL_EXT, reg_data);
	}

3100 3101
	netif_tx_start_all_queues(netdev);

Y
Yan, Zheng 已提交
3102 3103 3104
	if (!resuming)
		pm_runtime_put(&pdev->dev);

3105 3106 3107
	/* start the watchdog. */
	hw->mac.get_link_status = 1;
	schedule_work(&adapter->watchdog_task);
3108 3109 3110

	return 0;

3111 3112
err_set_queues:
	igb_free_irq(adapter);
3113 3114
err_req_irq:
	igb_release_hw_control(adapter);
3115
	igb_power_down_link(adapter);
3116 3117 3118 3119 3120
	igb_free_all_rx_resources(adapter);
err_setup_rx:
	igb_free_all_tx_resources(adapter);
err_setup_tx:
	igb_reset(adapter);
Y
Yan, Zheng 已提交
3121 3122
	if (!resuming)
		pm_runtime_put(&pdev->dev);
3123 3124 3125 3126

	return err;
}

Y
Yan, Zheng 已提交
3127 3128 3129 3130 3131
static int igb_open(struct net_device *netdev)
{
	return __igb_open(netdev, false);
}

3132
/**
3133 3134
 *  igb_close - Disables a network interface
 *  @netdev: network interface device structure
3135
 *
3136
 *  Returns 0, this is not allowed to fail
3137
 *
3138 3139 3140 3141
 *  The close entry point is called when an interface is de-activated
 *  by the OS.  The hardware is still under the driver's control, but
 *  needs to be disabled.  A global MAC reset is issued to stop the
 *  hardware, and all transmit and receive resources are freed.
3142
 **/
Y
Yan, Zheng 已提交
3143
static int __igb_close(struct net_device *netdev, bool suspending)
3144 3145
{
	struct igb_adapter *adapter = netdev_priv(netdev);
Y
Yan, Zheng 已提交
3146
	struct pci_dev *pdev = adapter->pdev;
3147 3148 3149

	WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));

Y
Yan, Zheng 已提交
3150 3151 3152 3153
	if (!suspending)
		pm_runtime_get_sync(&pdev->dev);

	igb_down(adapter);
3154 3155 3156 3157 3158
	igb_free_irq(adapter);

	igb_free_all_tx_resources(adapter);
	igb_free_all_rx_resources(adapter);

Y
Yan, Zheng 已提交
3159 3160
	if (!suspending)
		pm_runtime_put_sync(&pdev->dev);
3161 3162 3163
	return 0;
}

Y
Yan, Zheng 已提交
3164 3165 3166 3167 3168
static int igb_close(struct net_device *netdev)
{
	return __igb_close(netdev, false);
}

3169
/**
3170 3171
 *  igb_setup_tx_resources - allocate Tx resources (Descriptors)
 *  @tx_ring: tx descriptor ring (for a specific queue) to setup
3172
 *
3173
 *  Return 0 on success, negative on failure
3174
 **/
3175
int igb_setup_tx_resources(struct igb_ring *tx_ring)
3176
{
3177
	struct device *dev = tx_ring->dev;
3178 3179
	int size;

3180
	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3181 3182

	tx_ring->tx_buffer_info = vzalloc(size);
3183
	if (!tx_ring->tx_buffer_info)
3184 3185 3186
		goto err;

	/* round up to nearest 4K */
3187
	tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
3188 3189
	tx_ring->size = ALIGN(tx_ring->size, 4096);

3190 3191
	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
					   &tx_ring->dma, GFP_KERNEL);
3192 3193 3194 3195 3196
	if (!tx_ring->desc)
		goto err;

	tx_ring->next_to_use = 0;
	tx_ring->next_to_clean = 0;
3197

3198 3199 3200
	return 0;

err:
3201
	vfree(tx_ring->tx_buffer_info);
3202 3203
	tx_ring->tx_buffer_info = NULL;
	dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
3204 3205 3206 3207
	return -ENOMEM;
}

/**
3208 3209 3210
 *  igb_setup_all_tx_resources - wrapper to allocate Tx resources
 *				 (Descriptors) for all queues
 *  @adapter: board private structure
3211
 *
3212
 *  Return 0 on success, negative on failure
3213 3214 3215
 **/
static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
{
3216
	struct pci_dev *pdev = adapter->pdev;
3217 3218 3219
	int i, err = 0;

	for (i = 0; i < adapter->num_tx_queues; i++) {
3220
		err = igb_setup_tx_resources(adapter->tx_ring[i]);
3221
		if (err) {
3222
			dev_err(&pdev->dev,
3223 3224
				"Allocation for Tx Queue %u failed\n", i);
			for (i--; i >= 0; i--)
3225
				igb_free_tx_resources(adapter->tx_ring[i]);
3226 3227 3228 3229 3230 3231 3232 3233
			break;
		}
	}

	return err;
}

/**
3234 3235
 *  igb_setup_tctl - configure the transmit control registers
 *  @adapter: Board private structure
3236
 **/
3237
void igb_setup_tctl(struct igb_adapter *adapter)
3238 3239 3240 3241
{
	struct e1000_hw *hw = &adapter->hw;
	u32 tctl;

3242 3243
	/* disable queue 0 which is enabled by default on 82575 and 82576 */
	wr32(E1000_TXDCTL(0), 0);
3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258

	/* Program the Transmit Control Register */
	tctl = rd32(E1000_TCTL);
	tctl &= ~E1000_TCTL_CT;
	tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
		(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);

	igb_config_collision_dist(hw);

	/* Enable transmits */
	tctl |= E1000_TCTL_EN;

	wr32(E1000_TCTL, tctl);
}

3259
/**
3260 3261 3262
 *  igb_configure_tx_ring - Configure transmit ring after Reset
 *  @adapter: board private structure
 *  @ring: tx ring to configure
3263
 *
3264
 *  Configure a transmit ring after a reset.
3265
 **/
3266
void igb_configure_tx_ring(struct igb_adapter *adapter,
3267
			   struct igb_ring *ring)
3268 3269
{
	struct e1000_hw *hw = &adapter->hw;
3270
	u32 txdctl = 0;
3271 3272 3273 3274
	u64 tdba = ring->dma;
	int reg_idx = ring->reg_idx;

	/* disable the queue */
3275
	wr32(E1000_TXDCTL(reg_idx), 0);
3276 3277 3278 3279
	wrfl();
	mdelay(10);

	wr32(E1000_TDLEN(reg_idx),
3280
	     ring->count * sizeof(union e1000_adv_tx_desc));
3281
	wr32(E1000_TDBAL(reg_idx),
3282
	     tdba & 0x00000000ffffffffULL);
3283 3284
	wr32(E1000_TDBAH(reg_idx), tdba >> 32);

3285
	ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
3286
	wr32(E1000_TDH(reg_idx), 0);
3287
	writel(0, ring->tail);
3288 3289 3290 3291 3292 3293 3294 3295 3296 3297

	txdctl |= IGB_TX_PTHRESH;
	txdctl |= IGB_TX_HTHRESH << 8;
	txdctl |= IGB_TX_WTHRESH << 16;

	txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
	wr32(E1000_TXDCTL(reg_idx), txdctl);
}

/**
3298 3299
 *  igb_configure_tx - Configure transmit Unit after Reset
 *  @adapter: board private structure
3300
 *
3301
 *  Configure the Tx unit of the MAC after a reset.
3302 3303 3304 3305 3306 3307
 **/
static void igb_configure_tx(struct igb_adapter *adapter)
{
	int i;

	for (i = 0; i < adapter->num_tx_queues; i++)
3308
		igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
3309 3310
}

3311
/**
3312 3313
 *  igb_setup_rx_resources - allocate Rx resources (Descriptors)
 *  @rx_ring: Rx descriptor ring (for a specific queue) to setup
3314
 *
3315
 *  Returns 0 on success, negative on failure
3316
 **/
3317
int igb_setup_rx_resources(struct igb_ring *rx_ring)
3318
{
3319
	struct device *dev = rx_ring->dev;
3320
	int size;
3321

3322
	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3323 3324

	rx_ring->rx_buffer_info = vzalloc(size);
3325
	if (!rx_ring->rx_buffer_info)
3326 3327 3328
		goto err;

	/* Round up to nearest 4K */
3329
	rx_ring->size = rx_ring->count * sizeof(union e1000_adv_rx_desc);
3330 3331
	rx_ring->size = ALIGN(rx_ring->size, 4096);

3332 3333
	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
					   &rx_ring->dma, GFP_KERNEL);
3334 3335 3336
	if (!rx_ring->desc)
		goto err;

3337
	rx_ring->next_to_alloc = 0;
3338 3339 3340 3341 3342 3343
	rx_ring->next_to_clean = 0;
	rx_ring->next_to_use = 0;

	return 0;

err:
3344 3345
	vfree(rx_ring->rx_buffer_info);
	rx_ring->rx_buffer_info = NULL;
3346
	dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
3347 3348 3349 3350
	return -ENOMEM;
}

/**
3351 3352 3353
 *  igb_setup_all_rx_resources - wrapper to allocate Rx resources
 *				 (Descriptors) for all queues
 *  @adapter: board private structure
3354
 *
3355
 *  Return 0 on success, negative on failure
3356 3357 3358
 **/
static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
{
3359
	struct pci_dev *pdev = adapter->pdev;
3360 3361 3362
	int i, err = 0;

	for (i = 0; i < adapter->num_rx_queues; i++) {
3363
		err = igb_setup_rx_resources(adapter->rx_ring[i]);
3364
		if (err) {
3365
			dev_err(&pdev->dev,
3366 3367
				"Allocation for Rx Queue %u failed\n", i);
			for (i--; i >= 0; i--)
3368
				igb_free_rx_resources(adapter->rx_ring[i]);
3369 3370 3371 3372 3373 3374 3375
			break;
		}
	}

	return err;
}

3376
/**
3377 3378
 *  igb_setup_mrqc - configure the multiple receive queue control registers
 *  @adapter: Board private structure
3379 3380 3381 3382 3383
 **/
static void igb_setup_mrqc(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 mrqc, rxcsum;
3384
	u32 j, num_rx_queues;
3385
	u32 rss_key[10];
3386

3387
	netdev_rss_key_fill(rss_key, sizeof(rss_key));
3388
	for (j = 0; j < 10; j++)
3389
		wr32(E1000_RSSRK(j), rss_key[j]);
3390

3391
	num_rx_queues = adapter->rss_queues;
3392

3393 3394 3395
	switch (hw->mac.type) {
	case e1000_82576:
		/* 82576 supports 2 RSS queues for SR-IOV */
3396
		if (adapter->vfs_allocated_count)
3397
			num_rx_queues = 2;
3398 3399 3400
		break;
	default:
		break;
3401 3402
	}

3403 3404
	if (adapter->rss_indir_tbl_init != num_rx_queues) {
		for (j = 0; j < IGB_RETA_SIZE; j++)
3405 3406
			adapter->rss_indir_tbl[j] =
			(j * num_rx_queues) / IGB_RETA_SIZE;
3407
		adapter->rss_indir_tbl_init = num_rx_queues;
3408
	}
3409
	igb_write_rss_indir_tbl(adapter);
3410

3411
	/* Disable raw packet checksumming so that RSS hash is placed in
3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423
	 * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
	 * offloads as they are enabled by default
	 */
	rxcsum = rd32(E1000_RXCSUM);
	rxcsum |= E1000_RXCSUM_PCSD;

	if (adapter->hw.mac.type >= e1000_82576)
		/* Enable Receive Checksum Offload for SCTP */
		rxcsum |= E1000_RXCSUM_CRCOFL;

	/* Don't need to set TUOFL or IPOFL, they default to 1 */
	wr32(E1000_RXCSUM, rxcsum);
3424

3425 3426 3427
	/* Generate RSS hash based on packet types, TCP/UDP
	 * port numbers and/or IPv4/v6 src and dst addresses
	 */
3428 3429 3430 3431 3432
	mrqc = E1000_MRQC_RSS_FIELD_IPV4 |
	       E1000_MRQC_RSS_FIELD_IPV4_TCP |
	       E1000_MRQC_RSS_FIELD_IPV6 |
	       E1000_MRQC_RSS_FIELD_IPV6_TCP |
	       E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
3433

3434 3435 3436 3437 3438
	if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP)
		mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
	if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP)
		mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;

3439 3440
	/* If VMDq is enabled then we set the appropriate mode for that, else
	 * we default to RSS so that an RSS hash is calculated per packet even
3441 3442
	 * if we are only using one queue
	 */
3443 3444 3445 3446
	if (adapter->vfs_allocated_count) {
		if (hw->mac.type > e1000_82575) {
			/* Set the default pool for the PF's first queue */
			u32 vtctl = rd32(E1000_VT_CTL);
3447

3448 3449 3450 3451 3452 3453
			vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
				   E1000_VT_CTL_DISABLE_DEF_POOL);
			vtctl |= adapter->vfs_allocated_count <<
				E1000_VT_CTL_DEFAULT_POOL_SHIFT;
			wr32(E1000_VT_CTL, vtctl);
		}
3454
		if (adapter->rss_queues > 1)
3455
			mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
3456
		else
3457
			mrqc |= E1000_MRQC_ENABLE_VMDQ;
3458
	} else {
3459 3460
		if (hw->mac.type != e1000_i211)
			mrqc |= E1000_MRQC_ENABLE_RSS_4Q;
3461 3462 3463 3464 3465 3466
	}
	igb_vmm_control(adapter);

	wr32(E1000_MRQC, mrqc);
}

3467
/**
3468 3469
 *  igb_setup_rctl - configure the receive control registers
 *  @adapter: Board private structure
3470
 **/
3471
void igb_setup_rctl(struct igb_adapter *adapter)
3472 3473 3474 3475 3476 3477 3478
{
	struct e1000_hw *hw = &adapter->hw;
	u32 rctl;

	rctl = rd32(E1000_RCTL);

	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3479
	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3480

3481
	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3482
		(hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3483

3484
	/* enable stripping of CRC. It's unlikely this will break BMC
3485 3486
	 * redirection as it did with e1000. Newer features require
	 * that the HW strips the CRC.
3487
	 */
3488
	rctl |= E1000_RCTL_SECRC;
3489

3490
	/* disable store bad packets and clear size bits. */
3491
	rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3492

A
Alexander Duyck 已提交
3493 3494
	/* enable LPE to prevent packets larger than max_frame_size */
	rctl |= E1000_RCTL_LPE;
3495

3496 3497
	/* disable queue 0 to prevent tail write w/o re-config */
	wr32(E1000_RXDCTL(0), 0);
3498

3499 3500 3501 3502 3503 3504 3505 3506 3507
	/* Attention!!!  For SR-IOV PF driver operations you must enable
	 * queue drop for all VF and PF queues to prevent head of line blocking
	 * if an un-trusted VF does not provide descriptors to hardware.
	 */
	if (adapter->vfs_allocated_count) {
		/* set all queue drop enable bits */
		wr32(E1000_QDE, ALL_QUEUES);
	}

B
Ben Greear 已提交
3508 3509 3510
	/* This is useful for sniffing bad packets. */
	if (adapter->netdev->features & NETIF_F_RXALL) {
		/* UPE and MPE will be handled by normal PROMISC logic
3511 3512
		 * in e1000e_set_rx_mode
		 */
B
Ben Greear 已提交
3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524
		rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
			 E1000_RCTL_BAM | /* RX All Bcast Pkts */
			 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */

		rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
			  E1000_RCTL_DPF | /* Allow filtered pause */
			  E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
		/* Do not mess with E1000_CTRL_VME, it affects transmit as well,
		 * and that breaks VLANs.
		 */
	}

3525 3526 3527
	wr32(E1000_RCTL, rctl);
}

3528
static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3529
				   int vfn)
3530 3531 3532 3533 3534
{
	struct e1000_hw *hw = &adapter->hw;
	u32 vmolr;

	/* if it isn't the PF check to see if VFs are enabled and
3535 3536
	 * increase the size to support vlan tags
	 */
3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548
	if (vfn < adapter->vfs_allocated_count &&
	    adapter->vf_data[vfn].vlans_enabled)
		size += VLAN_TAG_SIZE;

	vmolr = rd32(E1000_VMOLR(vfn));
	vmolr &= ~E1000_VMOLR_RLPML_MASK;
	vmolr |= size | E1000_VMOLR_LPE;
	wr32(E1000_VMOLR(vfn), vmolr);

	return 0;
}

3549
/**
3550 3551
 *  igb_rlpml_set - set maximum receive packet size
 *  @adapter: board private structure
3552
 *
3553
 *  Configure maximum receivable packet size.
3554 3555 3556
 **/
static void igb_rlpml_set(struct igb_adapter *adapter)
{
3557
	u32 max_frame_size = adapter->max_frame_size;
3558 3559 3560 3561 3562
	struct e1000_hw *hw = &adapter->hw;
	u16 pf_id = adapter->vfs_allocated_count;

	if (pf_id) {
		igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3563
		/* If we're in VMDQ or SR-IOV mode, then set global RLPML
3564 3565 3566 3567 3568
		 * to our max jumbo frame size, in case we need to enable
		 * jumbo frames on one of the rings later.
		 * This will not pass over-length frames into the default
		 * queue because it's gated by the VMOLR.RLPML.
		 */
3569
		max_frame_size = MAX_JUMBO_FRAME_SIZE;
3570 3571 3572 3573 3574
	}

	wr32(E1000_RLPML, max_frame_size);
}

3575 3576
static inline void igb_set_vmolr(struct igb_adapter *adapter,
				 int vfn, bool aupe)
3577 3578 3579 3580
{
	struct e1000_hw *hw = &adapter->hw;
	u32 vmolr;

3581
	/* This register exists only on 82576 and newer so if we are older then
3582 3583 3584 3585 3586 3587
	 * we should exit and do nothing
	 */
	if (hw->mac.type < e1000_82576)
		return;

	vmolr = rd32(E1000_VMOLR(vfn));
3588
	vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3589 3590 3591 3592 3593 3594 3595
	if (hw->mac.type == e1000_i350) {
		u32 dvmolr;

		dvmolr = rd32(E1000_DVMOLR(vfn));
		dvmolr |= E1000_DVMOLR_STRVLAN;
		wr32(E1000_DVMOLR(vfn), dvmolr);
	}
3596
	if (aupe)
3597
		vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3598 3599
	else
		vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3600 3601 3602 3603

	/* clear all bits that might not be set */
	vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);

3604
	if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3605
		vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3606
	/* for VMDq only allow the VFs and pool 0 to accept broadcast and
3607 3608 3609
	 * multicast packets
	 */
	if (vfn <= adapter->vfs_allocated_count)
3610
		vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3611 3612 3613 3614

	wr32(E1000_VMOLR(vfn), vmolr);
}

3615
/**
3616 3617 3618
 *  igb_configure_rx_ring - Configure a receive ring after Reset
 *  @adapter: board private structure
 *  @ring: receive ring to be configured
3619
 *
3620
 *  Configure the Rx unit of the MAC after a reset.
3621
 **/
3622
void igb_configure_rx_ring(struct igb_adapter *adapter,
3623
			   struct igb_ring *ring)
3624 3625 3626 3627
{
	struct e1000_hw *hw = &adapter->hw;
	u64 rdba = ring->dma;
	int reg_idx = ring->reg_idx;
3628
	u32 srrctl = 0, rxdctl = 0;
3629 3630

	/* disable the queue */
3631
	wr32(E1000_RXDCTL(reg_idx), 0);
3632 3633 3634 3635 3636 3637

	/* Set DMA base address registers */
	wr32(E1000_RDBAL(reg_idx),
	     rdba & 0x00000000ffffffffULL);
	wr32(E1000_RDBAH(reg_idx), rdba >> 32);
	wr32(E1000_RDLEN(reg_idx),
3638
	     ring->count * sizeof(union e1000_adv_rx_desc));
3639 3640

	/* initialize head and tail */
3641
	ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3642
	wr32(E1000_RDH(reg_idx), 0);
3643
	writel(0, ring->tail);
3644

3645
	/* set descriptor configuration */
3646
	srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3647
	srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3648
	srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3649
	if (hw->mac.type >= e1000_82580)
N
Nick Nunley 已提交
3650
		srrctl |= E1000_SRRCTL_TIMESTAMP;
3651 3652 3653
	/* Only set Drop Enable if we are supporting multiple queues */
	if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
		srrctl |= E1000_SRRCTL_DROP_EN;
3654 3655 3656

	wr32(E1000_SRRCTL(reg_idx), srrctl);

3657
	/* set filtering for VMDQ pools */
3658
	igb_set_vmolr(adapter, reg_idx & 0x7, true);
3659

3660 3661 3662
	rxdctl |= IGB_RX_PTHRESH;
	rxdctl |= IGB_RX_HTHRESH << 8;
	rxdctl |= IGB_RX_WTHRESH << 16;
3663 3664 3665

	/* enable receive descriptor fetching */
	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3666 3667 3668
	wr32(E1000_RXDCTL(reg_idx), rxdctl);
}

3669
/**
3670 3671
 *  igb_configure_rx - Configure receive Unit after Reset
 *  @adapter: board private structure
3672
 *
3673
 *  Configure the Rx unit of the MAC after a reset.
3674 3675 3676
 **/
static void igb_configure_rx(struct igb_adapter *adapter)
{
3677
	int i;
3678

3679 3680 3681
	/* set UTA to appropriate mode */
	igb_set_uta(adapter);

3682 3683
	/* set the correct pool for the PF default MAC address in entry 0 */
	igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3684
			 adapter->vfs_allocated_count);
3685

3686
	/* Setup the HW Rx Head and Tail Descriptor Pointers and
3687 3688
	 * the Base and Length of the Rx Descriptor Ring
	 */
3689 3690
	for (i = 0; i < adapter->num_rx_queues; i++)
		igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3691 3692 3693
}

/**
3694 3695
 *  igb_free_tx_resources - Free Tx Resources per Queue
 *  @tx_ring: Tx descriptor ring for a specific queue
3696
 *
3697
 *  Free all transmit software resources
3698
 **/
3699
void igb_free_tx_resources(struct igb_ring *tx_ring)
3700
{
3701
	igb_clean_tx_ring(tx_ring);
3702

3703 3704
	vfree(tx_ring->tx_buffer_info);
	tx_ring->tx_buffer_info = NULL;
3705

3706 3707 3708 3709
	/* if not set, then don't free */
	if (!tx_ring->desc)
		return;

3710 3711
	dma_free_coherent(tx_ring->dev, tx_ring->size,
			  tx_ring->desc, tx_ring->dma);
3712 3713 3714 3715 3716

	tx_ring->desc = NULL;
}

/**
3717 3718
 *  igb_free_all_tx_resources - Free Tx Resources for All Queues
 *  @adapter: board private structure
3719
 *
3720
 *  Free all transmit software resources
3721 3722 3723 3724 3725 3726
 **/
static void igb_free_all_tx_resources(struct igb_adapter *adapter)
{
	int i;

	for (i = 0; i < adapter->num_tx_queues; i++)
3727 3728
		if (adapter->tx_ring[i])
			igb_free_tx_resources(adapter->tx_ring[i]);
3729 3730
}

3731 3732 3733 3734 3735
void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
				    struct igb_tx_buffer *tx_buffer)
{
	if (tx_buffer->skb) {
		dev_kfree_skb_any(tx_buffer->skb);
3736
		if (dma_unmap_len(tx_buffer, len))
3737
			dma_unmap_single(ring->dev,
3738 3739
					 dma_unmap_addr(tx_buffer, dma),
					 dma_unmap_len(tx_buffer, len),
3740
					 DMA_TO_DEVICE);
3741
	} else if (dma_unmap_len(tx_buffer, len)) {
3742
		dma_unmap_page(ring->dev,
3743 3744
			       dma_unmap_addr(tx_buffer, dma),
			       dma_unmap_len(tx_buffer, len),
3745 3746 3747 3748
			       DMA_TO_DEVICE);
	}
	tx_buffer->next_to_watch = NULL;
	tx_buffer->skb = NULL;
3749
	dma_unmap_len_set(tx_buffer, len, 0);
3750
	/* buffer_info must be completely set up in the transmit path */
3751 3752 3753
}

/**
3754 3755
 *  igb_clean_tx_ring - Free Tx Buffers
 *  @tx_ring: ring to be cleaned
3756
 **/
3757
static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3758
{
3759
	struct igb_tx_buffer *buffer_info;
3760
	unsigned long size;
3761
	u16 i;
3762

3763
	if (!tx_ring->tx_buffer_info)
3764 3765 3766 3767
		return;
	/* Free all the Tx ring sk_buffs */

	for (i = 0; i < tx_ring->count; i++) {
3768
		buffer_info = &tx_ring->tx_buffer_info[i];
3769
		igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3770 3771
	}

3772 3773
	netdev_tx_reset_queue(txring_txq(tx_ring));

3774 3775
	size = sizeof(struct igb_tx_buffer) * tx_ring->count;
	memset(tx_ring->tx_buffer_info, 0, size);
3776 3777 3778 3779 3780 3781 3782 3783 3784

	/* Zero out the descriptor ring */
	memset(tx_ring->desc, 0, tx_ring->size);

	tx_ring->next_to_use = 0;
	tx_ring->next_to_clean = 0;
}

/**
3785 3786
 *  igb_clean_all_tx_rings - Free Tx Buffers for all queues
 *  @adapter: board private structure
3787 3788 3789 3790 3791 3792
 **/
static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
{
	int i;

	for (i = 0; i < adapter->num_tx_queues; i++)
3793 3794
		if (adapter->tx_ring[i])
			igb_clean_tx_ring(adapter->tx_ring[i]);
3795 3796 3797
}

/**
3798 3799
 *  igb_free_rx_resources - Free Rx Resources
 *  @rx_ring: ring to clean the resources from
3800
 *
3801
 *  Free all receive software resources
3802
 **/
3803
void igb_free_rx_resources(struct igb_ring *rx_ring)
3804
{
3805
	igb_clean_rx_ring(rx_ring);
3806

3807 3808
	vfree(rx_ring->rx_buffer_info);
	rx_ring->rx_buffer_info = NULL;
3809

3810 3811 3812 3813
	/* if not set, then don't free */
	if (!rx_ring->desc)
		return;

3814 3815
	dma_free_coherent(rx_ring->dev, rx_ring->size,
			  rx_ring->desc, rx_ring->dma);
3816 3817 3818 3819 3820

	rx_ring->desc = NULL;
}

/**
3821 3822
 *  igb_free_all_rx_resources - Free Rx Resources for All Queues
 *  @adapter: board private structure
3823
 *
3824
 *  Free all receive software resources
3825 3826 3827 3828 3829 3830
 **/
static void igb_free_all_rx_resources(struct igb_adapter *adapter)
{
	int i;

	for (i = 0; i < adapter->num_rx_queues; i++)
3831 3832
		if (adapter->rx_ring[i])
			igb_free_rx_resources(adapter->rx_ring[i]);
3833 3834 3835
}

/**
3836 3837
 *  igb_clean_rx_ring - Free Rx Buffers per Queue
 *  @rx_ring: ring to free buffers from
3838
 **/
3839
static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3840 3841
{
	unsigned long size;
3842
	u16 i;
3843

3844 3845 3846 3847
	if (rx_ring->skb)
		dev_kfree_skb(rx_ring->skb);
	rx_ring->skb = NULL;

3848
	if (!rx_ring->rx_buffer_info)
3849
		return;
3850

3851 3852
	/* Free all the Rx ring sk_buffs */
	for (i = 0; i < rx_ring->count; i++) {
3853
		struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3854

3855 3856 3857 3858 3859 3860 3861 3862 3863
		if (!buffer_info->page)
			continue;

		dma_unmap_page(rx_ring->dev,
			       buffer_info->dma,
			       PAGE_SIZE,
			       DMA_FROM_DEVICE);
		__free_page(buffer_info->page);

3864
		buffer_info->page = NULL;
3865 3866
	}

3867 3868
	size = sizeof(struct igb_rx_buffer) * rx_ring->count;
	memset(rx_ring->rx_buffer_info, 0, size);
3869 3870 3871 3872

	/* Zero out the descriptor ring */
	memset(rx_ring->desc, 0, rx_ring->size);

3873
	rx_ring->next_to_alloc = 0;
3874 3875 3876 3877 3878
	rx_ring->next_to_clean = 0;
	rx_ring->next_to_use = 0;
}

/**
3879 3880
 *  igb_clean_all_rx_rings - Free Rx Buffers for all queues
 *  @adapter: board private structure
3881 3882 3883 3884 3885 3886
 **/
static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
{
	int i;

	for (i = 0; i < adapter->num_rx_queues; i++)
3887 3888
		if (adapter->rx_ring[i])
			igb_clean_rx_ring(adapter->rx_ring[i]);
3889 3890 3891
}

/**
3892 3893 3894
 *  igb_set_mac - Change the Ethernet Address of the NIC
 *  @netdev: network interface device structure
 *  @p: pointer to an address structure
3895
 *
3896
 *  Returns 0 on success, negative on failure
3897 3898 3899 3900
 **/
static int igb_set_mac(struct net_device *netdev, void *p)
{
	struct igb_adapter *adapter = netdev_priv(netdev);
3901
	struct e1000_hw *hw = &adapter->hw;
3902 3903 3904 3905 3906 3907
	struct sockaddr *addr = p;

	if (!is_valid_ether_addr(addr->sa_data))
		return -EADDRNOTAVAIL;

	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3908
	memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3909

3910 3911
	/* set the correct pool for the new PF MAC address in entry 0 */
	igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3912
			 adapter->vfs_allocated_count);
3913

3914 3915 3916 3917
	return 0;
}

/**
3918 3919
 *  igb_write_mc_addr_list - write multicast addresses to MTA
 *  @netdev: network interface device structure
3920
 *
3921 3922 3923 3924
 *  Writes multicast address list to the MTA hash table.
 *  Returns: -ENOMEM on failure
 *           0 on no addresses written
 *           X on writing X addresses to MTA
3925
 **/
3926
static int igb_write_mc_addr_list(struct net_device *netdev)
3927 3928 3929
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
3930
	struct netdev_hw_addr *ha;
3931
	u8  *mta_list;
3932 3933
	int i;

3934
	if (netdev_mc_empty(netdev)) {
3935 3936 3937 3938 3939
		/* nothing to program, so clear mc list */
		igb_update_mc_addr_list(hw, NULL, 0);
		igb_restore_vf_multicasts(adapter);
		return 0;
	}
3940

3941
	mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3942 3943
	if (!mta_list)
		return -ENOMEM;
3944

3945
	/* The shared function expects a packed array of only addresses. */
3946
	i = 0;
3947 3948
	netdev_for_each_mc_addr(ha, netdev)
		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3949 3950 3951 3952

	igb_update_mc_addr_list(hw, mta_list, i);
	kfree(mta_list);

3953
	return netdev_mc_count(netdev);
3954 3955 3956
}

/**
3957 3958
 *  igb_write_uc_addr_list - write unicast addresses to RAR table
 *  @netdev: network interface device structure
3959
 *
3960 3961 3962 3963
 *  Writes unicast address list to the RAR table.
 *  Returns: -ENOMEM on failure/insufficient address space
 *           0 on no addresses written
 *           X on writing X addresses to the RAR table
3964 3965 3966 3967 3968 3969 3970 3971 3972 3973
 **/
static int igb_write_uc_addr_list(struct net_device *netdev)
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
	unsigned int vfn = adapter->vfs_allocated_count;
	unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
	int count = 0;

	/* return ENOMEM indicating insufficient memory for addresses */
3974
	if (netdev_uc_count(netdev) > rar_entries)
3975
		return -ENOMEM;
3976

3977
	if (!netdev_uc_empty(netdev) && rar_entries) {
3978
		struct netdev_hw_addr *ha;
3979 3980

		netdev_for_each_uc_addr(ha, netdev) {
3981 3982
			if (!rar_entries)
				break;
3983
			igb_rar_set_qsel(adapter, ha->addr,
3984 3985
					 rar_entries--,
					 vfn);
3986
			count++;
3987 3988 3989 3990 3991 3992 3993 3994 3995
		}
	}
	/* write the addresses in reverse order to avoid write combining */
	for (; rar_entries > 0 ; rar_entries--) {
		wr32(E1000_RAH(rar_entries), 0);
		wr32(E1000_RAL(rar_entries), 0);
	}
	wrfl();

3996 3997 3998 3999
	return count;
}

/**
4000 4001
 *  igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
 *  @netdev: network interface device structure
4002
 *
4003 4004 4005 4006
 *  The set_rx_mode entry point is called whenever the unicast or multicast
 *  address lists or the network interface flags are updated.  This routine is
 *  responsible for configuring the hardware for proper unicast, multicast,
 *  promiscuous mode, and all-multi behavior.
4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022
 **/
static void igb_set_rx_mode(struct net_device *netdev)
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
	unsigned int vfn = adapter->vfs_allocated_count;
	u32 rctl, vmolr = 0;
	int count;

	/* Check for Promiscuous and All Multicast modes */
	rctl = rd32(E1000_RCTL);

	/* clear the effected bits */
	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);

	if (netdev->flags & IFF_PROMISC) {
4023
		/* retain VLAN HW filtering if in VT mode */
4024
		if (adapter->vfs_allocated_count)
4025
			rctl |= E1000_RCTL_VFE;
4026 4027 4028 4029 4030 4031 4032
		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
	} else {
		if (netdev->flags & IFF_ALLMULTI) {
			rctl |= E1000_RCTL_MPE;
			vmolr |= E1000_VMOLR_MPME;
		} else {
4033
			/* Write addresses to the MTA, if the attempt fails
L
Lucas De Marchi 已提交
4034
			 * then we should just turn on promiscuous mode so
4035 4036 4037 4038 4039 4040 4041 4042 4043 4044
			 * that we can at least receive multicast traffic
			 */
			count = igb_write_mc_addr_list(netdev);
			if (count < 0) {
				rctl |= E1000_RCTL_MPE;
				vmolr |= E1000_VMOLR_MPME;
			} else if (count) {
				vmolr |= E1000_VMOLR_ROMPE;
			}
		}
4045
		/* Write addresses to available RAR registers, if there is not
4046
		 * sufficient space to store all the addresses then enable
L
Lucas De Marchi 已提交
4047
		 * unicast promiscuous mode
4048 4049 4050 4051 4052 4053 4054
		 */
		count = igb_write_uc_addr_list(netdev);
		if (count < 0) {
			rctl |= E1000_RCTL_UPE;
			vmolr |= E1000_VMOLR_ROPE;
		}
		rctl |= E1000_RCTL_VFE;
4055
	}
4056
	wr32(E1000_RCTL, rctl);
4057

4058
	/* In order to support SR-IOV and eventually VMDq it is necessary to set
4059 4060 4061 4062
	 * the VMOLR to enable the appropriate modes.  Without this workaround
	 * we will have issues with VLAN tag stripping not being done for frames
	 * that are only arriving because we are the default pool
	 */
4063
	if ((hw->mac.type < e1000_82576) || (hw->mac.type > e1000_i350))
4064
		return;
4065

4066
	vmolr |= rd32(E1000_VMOLR(vfn)) &
4067
		 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
4068
	wr32(E1000_VMOLR(vfn), vmolr);
4069
	igb_restore_vf_multicasts(adapter);
4070 4071
}

G
Greg Rose 已提交
4072 4073 4074 4075 4076 4077 4078 4079
static void igb_check_wvbr(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 wvbr = 0;

	switch (hw->mac.type) {
	case e1000_82576:
	case e1000_i350:
4080 4081
		wvbr = rd32(E1000_WVBR);
		if (!wvbr)
G
Greg Rose 已提交
4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099
			return;
		break;
	default:
		break;
	}

	adapter->wvbr |= wvbr;
}

#define IGB_STAGGERED_QUEUE_OFFSET 8

static void igb_spoof_check(struct igb_adapter *adapter)
{
	int j;

	if (!adapter->wvbr)
		return;

4100
	for (j = 0; j < adapter->vfs_allocated_count; j++) {
G
Greg Rose 已提交
4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111
		if (adapter->wvbr & (1 << j) ||
		    adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
			dev_warn(&adapter->pdev->dev,
				"Spoof event(s) detected on VF %d\n", j);
			adapter->wvbr &=
				~((1 << j) |
				  (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
		}
	}
}

4112
/* Need to wait a few seconds after link up to get diagnostic information from
4113 4114
 * the phy
 */
4115 4116 4117
static void igb_update_phy_info(unsigned long data)
{
	struct igb_adapter *adapter = (struct igb_adapter *) data;
4118
	igb_get_phy_info(&adapter->hw);
4119 4120
}

A
Alexander Duyck 已提交
4121
/**
4122 4123
 *  igb_has_link - check shared code for link and determine up/down
 *  @adapter: pointer to driver private info
A
Alexander Duyck 已提交
4124
 **/
4125
bool igb_has_link(struct igb_adapter *adapter)
A
Alexander Duyck 已提交
4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136
{
	struct e1000_hw *hw = &adapter->hw;
	bool link_active = false;

	/* get_link_status is set on LSC (link status) interrupt or
	 * rx sequence error interrupt.  get_link_status will stay
	 * false until the e1000_check_for_link establishes link
	 * for copper adapters ONLY
	 */
	switch (hw->phy.media_type) {
	case e1000_media_type_copper:
4137 4138
		if (!hw->mac.get_link_status)
			return true;
A
Alexander Duyck 已提交
4139
	case e1000_media_type_internal_serdes:
4140 4141
		hw->mac.ops.check_for_link(hw);
		link_active = !hw->mac.get_link_status;
A
Alexander Duyck 已提交
4142 4143 4144 4145 4146 4147
		break;
	default:
	case e1000_media_type_unknown:
		break;
	}

4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158
	if (((hw->mac.type == e1000_i210) ||
	     (hw->mac.type == e1000_i211)) &&
	     (hw->phy.id == I210_I_PHY_ID)) {
		if (!netif_carrier_ok(adapter->netdev)) {
			adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
		} else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) {
			adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE;
			adapter->link_check_timeout = jiffies;
		}
	}

A
Alexander Duyck 已提交
4159 4160 4161
	return link_active;
}

4162 4163 4164 4165 4166
static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
{
	bool ret = false;
	u32 ctrl_ext, thstat;

4167
	/* check for thermal sensor event on i350 copper only */
4168 4169 4170 4171 4172
	if (hw->mac.type == e1000_i350) {
		thstat = rd32(E1000_THSTAT);
		ctrl_ext = rd32(E1000_CTRL_EXT);

		if ((hw->phy.media_type == e1000_media_type_copper) &&
4173
		    !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII))
4174 4175 4176 4177 4178 4179
			ret = !!(thstat & event);
	}

	return ret;
}

4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199
/**
 *  igb_check_lvmmc - check for malformed packets received
 *  and indicated in LVMMC register
 *  @adapter: pointer to adapter
 **/
static void igb_check_lvmmc(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 lvmmc;

	lvmmc = rd32(E1000_LVMMC);
	if (lvmmc) {
		if (unlikely(net_ratelimit())) {
			netdev_warn(adapter->netdev,
				    "malformed Tx packet detected and dropped, LVMMC:0x%08x\n",
				    lvmmc);
		}
	}
}

4200
/**
4201 4202
 *  igb_watchdog - Timer Call-back
 *  @data: pointer to adapter cast into an unsigned long
4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213
 **/
static void igb_watchdog(unsigned long data)
{
	struct igb_adapter *adapter = (struct igb_adapter *)data;
	/* Do the rest outside of interrupt context */
	schedule_work(&adapter->watchdog_task);
}

static void igb_watchdog_task(struct work_struct *work)
{
	struct igb_adapter *adapter = container_of(work,
4214 4215
						   struct igb_adapter,
						   watchdog_task);
4216
	struct e1000_hw *hw = &adapter->hw;
4217
	struct e1000_phy_info *phy = &hw->phy;
4218
	struct net_device *netdev = adapter->netdev;
4219
	u32 link;
4220
	int i;
4221
	u32 connsw;
4222

A
Alexander Duyck 已提交
4223
	link = igb_has_link(adapter);
4224 4225 4226 4227 4228 4229 4230 4231

	if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) {
		if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
			adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE;
		else
			link = false;
	}

4232 4233 4234 4235 4236 4237 4238 4239
	/* Force link down if we have fiber to swap to */
	if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
		if (hw->phy.media_type == e1000_media_type_copper) {
			connsw = rd32(E1000_CONNSW);
			if (!(connsw & E1000_CONNSW_AUTOSENSE_EN))
				link = 0;
		}
	}
4240
	if (link) {
4241 4242 4243 4244 4245 4246
		/* Perform a reset if the media type changed. */
		if (hw->dev_spec._82575.media_changed) {
			hw->dev_spec._82575.media_changed = false;
			adapter->flags |= IGB_FLAG_MEDIA_RESET;
			igb_reset(adapter);
		}
Y
Yan, Zheng 已提交
4247 4248 4249
		/* Cancel scheduled suspend requests. */
		pm_runtime_resume(netdev->dev.parent);

4250 4251
		if (!netif_carrier_ok(netdev)) {
			u32 ctrl;
4252

4253
			hw->mac.ops.get_speed_and_duplex(hw,
4254 4255
							 &adapter->link_speed,
							 &adapter->link_duplex);
4256 4257

			ctrl = rd32(E1000_CTRL);
4258
			/* Links status message must follow this format */
C
Carolyn Wyborny 已提交
4259 4260
			netdev_info(netdev,
			       "igb: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
4261 4262 4263
			       netdev->name,
			       adapter->link_speed,
			       adapter->link_duplex == FULL_DUPLEX ?
J
Jeff Kirsher 已提交
4264 4265 4266 4267 4268
			       "Full" : "Half",
			       (ctrl & E1000_CTRL_TFCE) &&
			       (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
			       (ctrl & E1000_CTRL_RFCE) ?  "RX" :
			       (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
4269

4270 4271 4272 4273 4274 4275 4276 4277 4278
			/* disable EEE if enabled */
			if ((adapter->flags & IGB_FLAG_EEE) &&
				(adapter->link_duplex == HALF_DUPLEX)) {
				dev_info(&adapter->pdev->dev,
				"EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex.\n");
				adapter->hw.dev_spec._82575.eee_disable = true;
				adapter->flags &= ~IGB_FLAG_EEE;
			}

4279 4280 4281 4282 4283
			/* check if SmartSpeed worked */
			igb_check_downshift(hw);
			if (phy->speed_downgraded)
				netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");

4284
			/* check for thermal sensor event */
J
Jeff Kirsher 已提交
4285
			if (igb_thermal_sensor_event(hw,
4286
			    E1000_THSTAT_LINK_THROTTLE))
C
Carolyn Wyborny 已提交
4287
				netdev_info(netdev, "The network adapter link speed was downshifted because it overheated\n");
4288

4289
			/* adjust timeout factor according to speed/duplex */
4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301
			adapter->tx_timeout_factor = 1;
			switch (adapter->link_speed) {
			case SPEED_10:
				adapter->tx_timeout_factor = 14;
				break;
			case SPEED_100:
				/* maybe add some timeout factor ? */
				break;
			}

			netif_carrier_on(netdev);

4302
			igb_ping_all_vfs(adapter);
4303
			igb_check_vf_rate_limit(adapter);
4304

4305
			/* link state has changed, schedule phy info update */
4306 4307 4308 4309 4310 4311 4312 4313
			if (!test_bit(__IGB_DOWN, &adapter->state))
				mod_timer(&adapter->phy_info_timer,
					  round_jiffies(jiffies + 2 * HZ));
		}
	} else {
		if (netif_carrier_ok(netdev)) {
			adapter->link_speed = 0;
			adapter->link_duplex = 0;
4314 4315

			/* check for thermal sensor event */
J
Jeff Kirsher 已提交
4316 4317
			if (igb_thermal_sensor_event(hw,
			    E1000_THSTAT_PWR_DOWN)) {
C
Carolyn Wyborny 已提交
4318
				netdev_err(netdev, "The network adapter was stopped because it overheated\n");
4319
			}
4320

4321
			/* Links status message must follow this format */
C
Carolyn Wyborny 已提交
4322
			netdev_info(netdev, "igb: %s NIC Link is Down\n",
4323
			       netdev->name);
4324
			netif_carrier_off(netdev);
4325

4326 4327
			igb_ping_all_vfs(adapter);

4328
			/* link state has changed, schedule phy info update */
4329 4330 4331
			if (!test_bit(__IGB_DOWN, &adapter->state))
				mod_timer(&adapter->phy_info_timer,
					  round_jiffies(jiffies + 2 * HZ));
Y
Yan, Zheng 已提交
4332

4333 4334 4335 4336 4337 4338 4339 4340 4341
			/* link is down, time to check for alternate media */
			if (adapter->flags & IGB_FLAG_MAS_ENABLE) {
				igb_check_swap_media(adapter);
				if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
					schedule_work(&adapter->reset_task);
					/* return immediately */
					return;
				}
			}
Y
Yan, Zheng 已提交
4342 4343
			pm_schedule_suspend(netdev->dev.parent,
					    MSEC_PER_SEC * 5);
4344 4345 4346 4347 4348 4349 4350 4351 4352 4353

		/* also check for alternate media here */
		} else if (!netif_carrier_ok(netdev) &&
			   (adapter->flags & IGB_FLAG_MAS_ENABLE)) {
			igb_check_swap_media(adapter);
			if (adapter->flags & IGB_FLAG_MEDIA_RESET) {
				schedule_work(&adapter->reset_task);
				/* return immediately */
				return;
			}
4354 4355 4356
		}
	}

E
Eric Dumazet 已提交
4357 4358 4359
	spin_lock(&adapter->stats64_lock);
	igb_update_stats(adapter, &adapter->stats64);
	spin_unlock(&adapter->stats64_lock);
4360

4361
	for (i = 0; i < adapter->num_tx_queues; i++) {
4362
		struct igb_ring *tx_ring = adapter->tx_ring[i];
4363
		if (!netif_carrier_ok(netdev)) {
4364 4365 4366
			/* We've lost link, so the controller stops DMA,
			 * but we've got queued Tx work that's never going
			 * to get done, so reset controller to flush Tx.
4367 4368
			 * (Do the reset outside of interrupt context).
			 */
4369 4370 4371 4372 4373 4374
			if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
				adapter->tx_timeout_count++;
				schedule_work(&adapter->reset_task);
				/* return immediately since reset is imminent */
				return;
			}
4375 4376
		}

4377
		/* Force detection of hung controller every watchdog period */
4378
		set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
4379
	}
4380

4381
	/* Cause software interrupt to ensure Rx ring is cleaned */
4382
	if (adapter->flags & IGB_FLAG_HAS_MSIX) {
4383
		u32 eics = 0;
4384

4385 4386
		for (i = 0; i < adapter->num_q_vectors; i++)
			eics |= adapter->q_vector[i]->eims_value;
4387 4388 4389 4390
		wr32(E1000_EICS, eics);
	} else {
		wr32(E1000_ICS, E1000_ICS_RXDMT0);
	}
4391

G
Greg Rose 已提交
4392
	igb_spoof_check(adapter);
4393
	igb_ptp_rx_hang(adapter);
G
Greg Rose 已提交
4394

4395 4396 4397 4398 4399
	/* Check LVMMC register on i350/i354 only */
	if ((adapter->hw.mac.type == e1000_i350) ||
	    (adapter->hw.mac.type == e1000_i354))
		igb_check_lvmmc(adapter);

4400
	/* Reset the timer */
4401 4402 4403 4404 4405 4406 4407 4408
	if (!test_bit(__IGB_DOWN, &adapter->state)) {
		if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)
			mod_timer(&adapter->watchdog_timer,
				  round_jiffies(jiffies +  HZ));
		else
			mod_timer(&adapter->watchdog_timer,
				  round_jiffies(jiffies + 2 * HZ));
	}
4409 4410 4411 4412 4413 4414 4415 4416 4417
}

enum latency_range {
	lowest_latency = 0,
	low_latency = 1,
	bulk_latency = 2,
	latency_invalid = 255
};

4418
/**
4419 4420
 *  igb_update_ring_itr - update the dynamic ITR value based on packet size
 *  @q_vector: pointer to q_vector
4421
 *
4422 4423 4424 4425 4426 4427 4428
 *  Stores a new ITR value based on strictly on packet size.  This
 *  algorithm is less sophisticated than that used in igb_update_itr,
 *  due to the difficulty of synchronizing statistics across multiple
 *  receive rings.  The divisors and thresholds used by this function
 *  were determined based on theoretical maximum wire speed and testing
 *  data, in order to minimize response time while increasing bulk
 *  throughput.
4429
 *  This functionality is controlled by ethtool's coalescing settings.
4430 4431
 *  NOTE:  This function is called only when operating in a multiqueue
 *         receive environment.
4432
 **/
4433
static void igb_update_ring_itr(struct igb_q_vector *q_vector)
4434
{
4435
	int new_val = q_vector->itr_val;
4436
	int avg_wire_size = 0;
4437
	struct igb_adapter *adapter = q_vector->adapter;
E
Eric Dumazet 已提交
4438
	unsigned int packets;
4439

4440 4441 4442 4443
	/* For non-gigabit speeds, just fix the interrupt rate at 4000
	 * ints/sec - ITR timer value of 120 ticks.
	 */
	if (adapter->link_speed != SPEED_1000) {
4444
		new_val = IGB_4K_ITR;
4445
		goto set_itr_val;
4446
	}
4447

4448 4449 4450
	packets = q_vector->rx.total_packets;
	if (packets)
		avg_wire_size = q_vector->rx.total_bytes / packets;
4451

4452 4453 4454 4455
	packets = q_vector->tx.total_packets;
	if (packets)
		avg_wire_size = max_t(u32, avg_wire_size,
				      q_vector->tx.total_bytes / packets);
4456 4457 4458 4459

	/* if avg_wire_size isn't set no work was done */
	if (!avg_wire_size)
		goto clear_counts;
4460

4461 4462 4463 4464 4465
	/* Add 24 bytes to size to account for CRC, preamble, and gap */
	avg_wire_size += 24;

	/* Don't starve jumbo frames */
	avg_wire_size = min(avg_wire_size, 3000);
4466

4467 4468 4469 4470 4471
	/* Give a little boost to mid-size frames */
	if ((avg_wire_size > 300) && (avg_wire_size < 1200))
		new_val = avg_wire_size / 3;
	else
		new_val = avg_wire_size / 2;
4472

4473 4474 4475 4476 4477
	/* conservative mode (itr 3) eliminates the lowest_latency setting */
	if (new_val < IGB_20K_ITR &&
	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
		new_val = IGB_20K_ITR;
4478

4479
set_itr_val:
4480 4481 4482
	if (new_val != q_vector->itr_val) {
		q_vector->itr_val = new_val;
		q_vector->set_itr = 1;
4483
	}
4484
clear_counts:
4485 4486 4487 4488
	q_vector->rx.total_bytes = 0;
	q_vector->rx.total_packets = 0;
	q_vector->tx.total_bytes = 0;
	q_vector->tx.total_packets = 0;
4489 4490 4491
}

/**
4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502
 *  igb_update_itr - update the dynamic ITR value based on statistics
 *  @q_vector: pointer to q_vector
 *  @ring_container: ring info to update the itr for
 *
 *  Stores a new ITR value based on packets and byte
 *  counts during the last interrupt.  The advantage of per interrupt
 *  computation is faster updates and more accurate ITR for the current
 *  traffic pattern.  Constants in this function were computed
 *  based on theoretical maximum wire speed and thresholds were set based
 *  on testing data as well as attempting to minimize response time
 *  while increasing bulk throughput.
4503
 *  This functionality is controlled by ethtool's coalescing settings.
4504 4505
 *  NOTE:  These calculations are only valid when operating in a single-
 *         queue environment.
4506
 **/
4507 4508
static void igb_update_itr(struct igb_q_vector *q_vector,
			   struct igb_ring_container *ring_container)
4509
{
4510 4511 4512
	unsigned int packets = ring_container->total_packets;
	unsigned int bytes = ring_container->total_bytes;
	u8 itrval = ring_container->itr;
4513

4514
	/* no packets, exit with status unchanged */
4515
	if (packets == 0)
4516
		return;
4517

4518
	switch (itrval) {
4519 4520 4521
	case lowest_latency:
		/* handle TSO and jumbo frames */
		if (bytes/packets > 8000)
4522
			itrval = bulk_latency;
4523
		else if ((packets < 5) && (bytes > 512))
4524
			itrval = low_latency;
4525 4526 4527 4528
		break;
	case low_latency:  /* 50 usec aka 20000 ints/s */
		if (bytes > 10000) {
			/* this if handles the TSO accounting */
4529
			if (bytes/packets > 8000)
4530
				itrval = bulk_latency;
4531
			else if ((packets < 10) || ((bytes/packets) > 1200))
4532
				itrval = bulk_latency;
4533
			else if ((packets > 35))
4534
				itrval = lowest_latency;
4535
		} else if (bytes/packets > 2000) {
4536
			itrval = bulk_latency;
4537
		} else if (packets <= 2 && bytes < 512) {
4538
			itrval = lowest_latency;
4539 4540 4541 4542 4543
		}
		break;
	case bulk_latency: /* 250 usec aka 4000 ints/s */
		if (bytes > 25000) {
			if (packets > 35)
4544
				itrval = low_latency;
4545
		} else if (bytes < 1500) {
4546
			itrval = low_latency;
4547 4548 4549 4550
		}
		break;
	}

4551 4552 4553 4554 4555 4556
	/* clear work counters since we have the values we need */
	ring_container->total_bytes = 0;
	ring_container->total_packets = 0;

	/* write updated itr to ring container */
	ring_container->itr = itrval;
4557 4558
}

4559
static void igb_set_itr(struct igb_q_vector *q_vector)
4560
{
4561
	struct igb_adapter *adapter = q_vector->adapter;
4562
	u32 new_itr = q_vector->itr_val;
4563
	u8 current_itr = 0;
4564 4565 4566 4567

	/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
	if (adapter->link_speed != SPEED_1000) {
		current_itr = 0;
4568
		new_itr = IGB_4K_ITR;
4569 4570 4571
		goto set_itr_now;
	}

4572 4573
	igb_update_itr(q_vector, &q_vector->tx);
	igb_update_itr(q_vector, &q_vector->rx);
4574

4575
	current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4576

4577
	/* conservative mode (itr 3) eliminates the lowest_latency setting */
4578 4579 4580
	if (current_itr == lowest_latency &&
	    ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
	     (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4581 4582
		current_itr = low_latency;

4583 4584 4585
	switch (current_itr) {
	/* counts and packets in update_itr are dependent on these numbers */
	case lowest_latency:
4586
		new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4587 4588
		break;
	case low_latency:
4589
		new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4590 4591
		break;
	case bulk_latency:
4592
		new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4593 4594 4595 4596 4597 4598
		break;
	default:
		break;
	}

set_itr_now:
4599
	if (new_itr != q_vector->itr_val) {
4600 4601
		/* this attempts to bias the interrupt rate towards Bulk
		 * by adding intermediate steps when interrupt rate is
4602 4603
		 * increasing
		 */
4604
		new_itr = new_itr > q_vector->itr_val ?
4605 4606 4607
			  max((new_itr * q_vector->itr_val) /
			  (new_itr + (q_vector->itr_val >> 2)),
			  new_itr) : new_itr;
4608 4609 4610 4611 4612 4613
		/* Don't write the value here; it resets the adapter's
		 * internal timer, and causes us to delay far longer than
		 * we should between interrupts.  Instead, we write the ITR
		 * value at the beginning of the next interrupt so the timing
		 * ends up being correct.
		 */
4614 4615
		q_vector->itr_val = new_itr;
		q_vector->set_itr = 1;
4616 4617 4618
	}
}

4619 4620
static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
			    u32 type_tucmd, u32 mss_l4len_idx)
4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633
{
	struct e1000_adv_tx_context_desc *context_desc;
	u16 i = tx_ring->next_to_use;

	context_desc = IGB_TX_CTXTDESC(tx_ring, i);

	i++;
	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;

	/* set bits to identify this as an advanced context descriptor */
	type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;

	/* For 82575, context index must be unique per ring. */
4634
	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4635 4636 4637 4638 4639 4640 4641 4642
		mss_l4len_idx |= tx_ring->reg_idx << 4;

	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
	context_desc->seqnum_seed	= 0;
	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
}

4643 4644 4645
static int igb_tso(struct igb_ring *tx_ring,
		   struct igb_tx_buffer *first,
		   u8 *hdr_len)
4646
{
4647
	struct sk_buff *skb = first->skb;
4648 4649
	u32 vlan_macip_lens, type_tucmd;
	u32 mss_l4len_idx, l4len;
4650
	int err;
4651

4652 4653 4654
	if (skb->ip_summed != CHECKSUM_PARTIAL)
		return 0;

4655 4656
	if (!skb_is_gso(skb))
		return 0;
4657

4658 4659 4660
	err = skb_cow_head(skb, 0);
	if (err < 0)
		return err;
4661

4662 4663
	/* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
	type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4664

4665
	if (first->protocol == htons(ETH_P_IP)) {
4666 4667 4668 4669 4670 4671 4672
		struct iphdr *iph = ip_hdr(skb);
		iph->tot_len = 0;
		iph->check = 0;
		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
							 iph->daddr, 0,
							 IPPROTO_TCP,
							 0);
4673
		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4674 4675 4676
		first->tx_flags |= IGB_TX_FLAGS_TSO |
				   IGB_TX_FLAGS_CSUM |
				   IGB_TX_FLAGS_IPV4;
4677
	} else if (skb_is_gso_v6(skb)) {
4678 4679 4680 4681
		ipv6_hdr(skb)->payload_len = 0;
		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
						       &ipv6_hdr(skb)->daddr,
						       0, IPPROTO_TCP, 0);
4682 4683
		first->tx_flags |= IGB_TX_FLAGS_TSO |
				   IGB_TX_FLAGS_CSUM;
4684 4685
	}

4686
	/* compute header lengths */
4687 4688
	l4len = tcp_hdrlen(skb);
	*hdr_len = skb_transport_offset(skb) + l4len;
4689

4690 4691 4692 4693
	/* update gso size and bytecount with header size */
	first->gso_segs = skb_shinfo(skb)->gso_segs;
	first->bytecount += (first->gso_segs - 1) * *hdr_len;

4694
	/* MSS L4LEN IDX */
4695 4696
	mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
	mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4697

4698 4699 4700
	/* VLAN MACLEN IPLEN */
	vlan_macip_lens = skb_network_header_len(skb);
	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4701
	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4702

4703
	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4704

4705
	return 1;
4706 4707
}

4708
static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4709
{
4710
	struct sk_buff *skb = first->skb;
4711 4712 4713
	u32 vlan_macip_lens = 0;
	u32 mss_l4len_idx = 0;
	u32 type_tucmd = 0;
4714

4715
	if (skb->ip_summed != CHECKSUM_PARTIAL) {
4716 4717
		if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
			return;
4718 4719
	} else {
		u8 l4_hdr = 0;
4720

4721
		switch (first->protocol) {
4722
		case htons(ETH_P_IP):
4723 4724 4725 4726
			vlan_macip_lens |= skb_network_header_len(skb);
			type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
			l4_hdr = ip_hdr(skb)->protocol;
			break;
4727
		case htons(ETH_P_IPV6):
4728 4729 4730 4731 4732 4733
			vlan_macip_lens |= skb_network_header_len(skb);
			l4_hdr = ipv6_hdr(skb)->nexthdr;
			break;
		default:
			if (unlikely(net_ratelimit())) {
				dev_warn(tx_ring->dev,
4734 4735
					 "partial checksum but proto=%x!\n",
					 first->protocol);
4736
			}
4737 4738
			break;
		}
4739

4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757
		switch (l4_hdr) {
		case IPPROTO_TCP:
			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
			mss_l4len_idx = tcp_hdrlen(skb) <<
					E1000_ADVTXD_L4LEN_SHIFT;
			break;
		case IPPROTO_SCTP:
			type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
			mss_l4len_idx = sizeof(struct sctphdr) <<
					E1000_ADVTXD_L4LEN_SHIFT;
			break;
		case IPPROTO_UDP:
			mss_l4len_idx = sizeof(struct udphdr) <<
					E1000_ADVTXD_L4LEN_SHIFT;
			break;
		default:
			if (unlikely(net_ratelimit())) {
				dev_warn(tx_ring->dev,
4758 4759
					 "partial checksum but l4 proto=%x!\n",
					 l4_hdr);
4760
			}
4761
			break;
4762
		}
4763 4764 4765

		/* update TX checksum flag */
		first->tx_flags |= IGB_TX_FLAGS_CSUM;
4766
	}
4767

4768
	vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4769
	vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4770

4771
	igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4772 4773
}

4774 4775 4776 4777 4778 4779
#define IGB_SET_FLAG(_input, _flag, _result) \
	((_flag <= _result) ? \
	 ((u32)(_input & _flag) * (_result / _flag)) : \
	 ((u32)(_input & _flag) / (_flag / _result)))

static u32 igb_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
4780 4781
{
	/* set type for advanced descriptor with frame checksum insertion */
4782 4783 4784
	u32 cmd_type = E1000_ADVTXD_DTYP_DATA |
		       E1000_ADVTXD_DCMD_DEXT |
		       E1000_ADVTXD_DCMD_IFCS;
4785 4786

	/* set HW vlan bit if vlan is present */
4787 4788 4789 4790 4791 4792
	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_VLAN,
				 (E1000_ADVTXD_DCMD_VLE));

	/* set segmentation bits for TSO */
	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSO,
				 (E1000_ADVTXD_DCMD_TSE));
4793 4794

	/* set timestamp bit if present */
4795 4796
	cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSTAMP,
				 (E1000_ADVTXD_MAC_TSTAMP));
4797

4798 4799
	/* insert frame checksum */
	cmd_type ^= IGB_SET_FLAG(skb->no_fcs, 1, E1000_ADVTXD_DCMD_IFCS);
4800 4801 4802 4803

	return cmd_type;
}

4804 4805 4806
static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
				 union e1000_adv_tx_desc *tx_desc,
				 u32 tx_flags, unsigned int paylen)
4807 4808 4809
{
	u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;

4810 4811
	/* 82575 requires a unique index per ring */
	if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4812 4813 4814
		olinfo_status |= tx_ring->reg_idx << 4;

	/* insert L4 checksum */
4815 4816 4817
	olinfo_status |= IGB_SET_FLAG(tx_flags,
				      IGB_TX_FLAGS_CSUM,
				      (E1000_TXD_POPTS_TXSM << 8));
4818

4819 4820 4821 4822
	/* insert IPv4 checksum */
	olinfo_status |= IGB_SET_FLAG(tx_flags,
				      IGB_TX_FLAGS_IPV4,
				      (E1000_TXD_POPTS_IXSM << 8));
4823

4824
	tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4825 4826
}

4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861
static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
{
	struct net_device *netdev = tx_ring->netdev;

	netif_stop_subqueue(netdev, tx_ring->queue_index);

	/* Herbert's original patch had:
	 *  smp_mb__after_netif_stop_queue();
	 * but since that doesn't exist yet, just open code it.
	 */
	smp_mb();

	/* We need to check again in a case another CPU has just
	 * made room available.
	 */
	if (igb_desc_unused(tx_ring) < size)
		return -EBUSY;

	/* A reprieve! */
	netif_wake_subqueue(netdev, tx_ring->queue_index);

	u64_stats_update_begin(&tx_ring->tx_syncp2);
	tx_ring->tx_stats.restart_queue2++;
	u64_stats_update_end(&tx_ring->tx_syncp2);

	return 0;
}

static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
{
	if (igb_desc_unused(tx_ring) >= size)
		return 0;
	return __igb_maybe_stop_tx(tx_ring, size);
}

4862 4863
static void igb_tx_map(struct igb_ring *tx_ring,
		       struct igb_tx_buffer *first,
4864
		       const u8 hdr_len)
4865
{
4866
	struct sk_buff *skb = first->skb;
4867
	struct igb_tx_buffer *tx_buffer;
4868
	union e1000_adv_tx_desc *tx_desc;
4869
	struct skb_frag_struct *frag;
4870
	dma_addr_t dma;
4871
	unsigned int data_len, size;
4872
	u32 tx_flags = first->tx_flags;
4873
	u32 cmd_type = igb_tx_cmd_type(skb, tx_flags);
4874 4875 4876 4877
	u16 i = tx_ring->next_to_use;

	tx_desc = IGB_TX_DESC(tx_ring, i);

4878 4879 4880 4881
	igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);

	size = skb_headlen(skb);
	data_len = skb->data_len;
4882 4883

	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4884

4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895
	tx_buffer = first;

	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
		if (dma_mapping_error(tx_ring->dev, dma))
			goto dma_error;

		/* record length, and DMA address */
		dma_unmap_len_set(tx_buffer, len, size);
		dma_unmap_addr_set(tx_buffer, dma, dma);

		tx_desc->read.buffer_addr = cpu_to_le64(dma);
4896 4897 4898

		while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
			tx_desc->read.cmd_type_len =
4899
				cpu_to_le32(cmd_type ^ IGB_MAX_DATA_PER_TXD);
4900 4901 4902 4903 4904 4905 4906

			i++;
			tx_desc++;
			if (i == tx_ring->count) {
				tx_desc = IGB_TX_DESC(tx_ring, 0);
				i = 0;
			}
4907
			tx_desc->read.olinfo_status = 0;
4908 4909 4910 4911 4912 4913 4914 4915 4916

			dma += IGB_MAX_DATA_PER_TXD;
			size -= IGB_MAX_DATA_PER_TXD;

			tx_desc->read.buffer_addr = cpu_to_le64(dma);
		}

		if (likely(!data_len))
			break;
4917

4918
		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
4919

4920
		i++;
4921 4922 4923
		tx_desc++;
		if (i == tx_ring->count) {
			tx_desc = IGB_TX_DESC(tx_ring, 0);
4924
			i = 0;
4925
		}
4926
		tx_desc->read.olinfo_status = 0;
4927

E
Eric Dumazet 已提交
4928
		size = skb_frag_size(frag);
4929 4930 4931
		data_len -= size;

		dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4932
				       size, DMA_TO_DEVICE);
4933

4934
		tx_buffer = &tx_ring->tx_buffer_info[i];
4935 4936
	}

4937
	/* write last descriptor with RS and EOP bits */
4938 4939
	cmd_type |= size | IGB_TXD_DCMD;
	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
4940

4941 4942
	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);

4943 4944 4945
	/* set the timestamp */
	first->time_stamp = jiffies;

4946
	/* Force memory writes to complete before letting h/w know there
4947 4948 4949 4950 4951 4952 4953 4954
	 * are new descriptors to fetch.  (Only applicable for weak-ordered
	 * memory model archs, such as IA-64).
	 *
	 * We also need this memory barrier to make certain all of the
	 * status bits have been updated before next_to_watch is written.
	 */
	wmb();

4955
	/* set next_to_watch value indicating a packet is present */
4956
	first->next_to_watch = tx_desc;
4957

4958 4959 4960
	i++;
	if (i == tx_ring->count)
		i = 0;
4961

4962
	tx_ring->next_to_use = i;
4963

4964 4965 4966 4967
	/* Make sure there is space in the ring for the next send. */
	igb_maybe_stop_tx(tx_ring, DESC_NEEDED);

	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
4968 4969 4970 4971 4972 4973 4974
		writel(i, tx_ring->tail);

		/* we need this if more than one processor can write to our tail
		 * at a time, it synchronizes IO on IA64/Altix systems
		 */
		mmiowb();
	}
4975 4976 4977 4978 4979 4980 4981
	return;

dma_error:
	dev_err(tx_ring->dev, "TX DMA map failed\n");

	/* clear dma mappings for failed tx_buffer_info map */
	for (;;) {
4982 4983 4984
		tx_buffer = &tx_ring->tx_buffer_info[i];
		igb_unmap_and_free_tx_resource(tx_ring, tx_buffer);
		if (tx_buffer == first)
4985
			break;
4986 4987
		if (i == 0)
			i = tx_ring->count;
4988 4989 4990
		i--;
	}

4991 4992 4993
	tx_ring->next_to_use = i;
}

4994 4995
netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
				struct igb_ring *tx_ring)
4996
{
4997
	struct igb_tx_buffer *first;
4998
	int tso;
N
Nick Nunley 已提交
4999
	u32 tx_flags = 0;
5000
	unsigned short f;
5001
	u16 count = TXD_USE_COUNT(skb_headlen(skb));
5002
	__be16 protocol = vlan_get_protocol(skb);
N
Nick Nunley 已提交
5003
	u8 hdr_len = 0;
5004

5005 5006
	/* need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD,
	 *       + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD,
5007 5008
	 *       + 2 desc gap to keep tail from touching head,
	 *       + 1 desc for context descriptor,
5009 5010
	 * otherwise try next time
	 */
5011 5012
	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
5013 5014

	if (igb_maybe_stop_tx(tx_ring, count + 3)) {
5015 5016 5017
		/* this is a hard error */
		return NETDEV_TX_BUSY;
	}
5018

5019 5020 5021 5022 5023 5024
	/* record the location of the first descriptor for this packet */
	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
	first->skb = skb;
	first->bytecount = skb->len;
	first->gso_segs = 1;

5025 5026
	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
		struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
5027

5028 5029
		if (!test_and_set_bit_lock(__IGB_PTP_TX_IN_PROGRESS,
					   &adapter->state)) {
5030 5031 5032 5033 5034 5035 5036 5037
			skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
			tx_flags |= IGB_TX_FLAGS_TSTAMP;

			adapter->ptp_tx_skb = skb_get(skb);
			adapter->ptp_tx_start = jiffies;
			if (adapter->hw.mac.type == e1000_82576)
				schedule_work(&adapter->ptp_tx_work);
		}
5038
	}
5039

5040 5041
	skb_tx_timestamp(skb);

5042
	if (skb_vlan_tag_present(skb)) {
5043
		tx_flags |= IGB_TX_FLAGS_VLAN;
5044
		tx_flags |= (skb_vlan_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
5045 5046
	}

5047 5048 5049
	/* record initial flags and protocol */
	first->tx_flags = tx_flags;
	first->protocol = protocol;
A
Alexander Duyck 已提交
5050

5051 5052
	tso = igb_tso(tx_ring, first, &hdr_len);
	if (tso < 0)
5053
		goto out_drop;
5054 5055
	else if (!tso)
		igb_tx_csum(tx_ring, first);
5056

5057
	igb_tx_map(tx_ring, first, hdr_len);
5058

5059
	return NETDEV_TX_OK;
5060 5061

out_drop:
5062 5063
	igb_unmap_and_free_tx_resource(tx_ring, first);

5064
	return NETDEV_TX_OK;
5065 5066
}

5067 5068
static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
						    struct sk_buff *skb)
5069
{
5070 5071
	unsigned int r_idx = skb->queue_mapping;

5072 5073 5074 5075 5076 5077
	if (r_idx >= adapter->num_tx_queues)
		r_idx = r_idx % adapter->num_tx_queues;

	return adapter->tx_ring[r_idx];
}

5078 5079
static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
				  struct net_device *netdev)
5080 5081
{
	struct igb_adapter *adapter = netdev_priv(netdev);
5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092

	if (test_bit(__IGB_DOWN, &adapter->state)) {
		dev_kfree_skb_any(skb);
		return NETDEV_TX_OK;
	}

	if (skb->len <= 0) {
		dev_kfree_skb_any(skb);
		return NETDEV_TX_OK;
	}

5093
	/* The minimum packet size with TCTL.PSP set is 17 so pad the skb
5094 5095
	 * in order to meet this minimum size requirement.
	 */
5096 5097
	if (skb_put_padto(skb, 17))
		return NETDEV_TX_OK;
5098

5099
	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
5100 5101 5102
}

/**
5103 5104
 *  igb_tx_timeout - Respond to a Tx Hang
 *  @netdev: network interface device structure
5105 5106 5107 5108 5109 5110 5111 5112
 **/
static void igb_tx_timeout(struct net_device *netdev)
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;

	/* Do the reset outside of interrupt context */
	adapter->tx_timeout_count++;
5113

5114
	if (hw->mac.type >= e1000_82580)
5115 5116
		hw->dev_spec._82575.global_device_reset = true;

5117
	schedule_work(&adapter->reset_task);
5118 5119
	wr32(E1000_EICS,
	     (adapter->eims_enable_mask & ~adapter->eims_other));
5120 5121 5122 5123 5124 5125 5126
}

static void igb_reset_task(struct work_struct *work)
{
	struct igb_adapter *adapter;
	adapter = container_of(work, struct igb_adapter, reset_task);

5127 5128
	igb_dump(adapter);
	netdev_err(adapter->netdev, "Reset adapter\n");
5129 5130 5131 5132
	igb_reinit_locked(adapter);
}

/**
5133 5134 5135
 *  igb_get_stats64 - Get System Network Statistics
 *  @netdev: network interface device structure
 *  @stats: rtnl_link_stats64 pointer
5136
 **/
E
Eric Dumazet 已提交
5137
static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
5138
						struct rtnl_link_stats64 *stats)
5139
{
E
Eric Dumazet 已提交
5140 5141 5142 5143 5144 5145 5146 5147
	struct igb_adapter *adapter = netdev_priv(netdev);

	spin_lock(&adapter->stats64_lock);
	igb_update_stats(adapter, &adapter->stats64);
	memcpy(stats, &adapter->stats64, sizeof(*stats));
	spin_unlock(&adapter->stats64_lock);

	return stats;
5148 5149 5150
}

/**
5151 5152 5153
 *  igb_change_mtu - Change the Maximum Transfer Unit
 *  @netdev: network interface device structure
 *  @new_mtu: new value for maximum frame size
5154
 *
5155
 *  Returns 0 on success, negative on failure
5156 5157 5158 5159
 **/
static int igb_change_mtu(struct net_device *netdev, int new_mtu)
{
	struct igb_adapter *adapter = netdev_priv(netdev);
5160
	struct pci_dev *pdev = adapter->pdev;
5161
	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
5162

5163
	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
5164
		dev_err(&pdev->dev, "Invalid MTU setting\n");
5165 5166 5167
		return -EINVAL;
	}

5168
#define MAX_STD_JUMBO_FRAME_SIZE 9238
5169
	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
5170
		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
5171 5172 5173
		return -EINVAL;
	}

5174 5175 5176 5177
	/* adjust max frame to be at least the size of a standard frame */
	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;

5178
	while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
5179
		usleep_range(1000, 2000);
5180

5181 5182
	/* igb_down has a dependency on max_frame_size */
	adapter->max_frame_size = max_frame;
5183

5184 5185
	if (netif_running(netdev))
		igb_down(adapter);
5186

5187
	dev_info(&pdev->dev, "changing MTU from %d to %d\n",
5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201
		 netdev->mtu, new_mtu);
	netdev->mtu = new_mtu;

	if (netif_running(netdev))
		igb_up(adapter);
	else
		igb_reset(adapter);

	clear_bit(__IGB_RESETTING, &adapter->state);

	return 0;
}

/**
5202 5203
 *  igb_update_stats - Update the board statistics counters
 *  @adapter: board private structure
5204
 **/
E
Eric Dumazet 已提交
5205 5206
void igb_update_stats(struct igb_adapter *adapter,
		      struct rtnl_link_stats64 *net_stats)
5207 5208 5209
{
	struct e1000_hw *hw = &adapter->hw;
	struct pci_dev *pdev = adapter->pdev;
5210
	u32 reg, mpc;
5211 5212
	int i;
	u64 bytes, packets;
E
Eric Dumazet 已提交
5213 5214
	unsigned int start;
	u64 _bytes, _packets;
5215

5216
	/* Prevent stats update while adapter is being reset, or if the pci
5217 5218 5219 5220 5221 5222 5223
	 * connection is down.
	 */
	if (adapter->link_speed == 0)
		return;
	if (pci_channel_offline(pdev))
		return;

5224 5225
	bytes = 0;
	packets = 0;
5226 5227

	rcu_read_lock();
5228
	for (i = 0; i < adapter->num_rx_queues; i++) {
5229
		struct igb_ring *ring = adapter->rx_ring[i];
5230 5231 5232
		u32 rqdpc = rd32(E1000_RQDPC(i));
		if (hw->mac.type >= e1000_i210)
			wr32(E1000_RQDPC(i), 0);
E
Eric Dumazet 已提交
5233

5234 5235 5236 5237
		if (rqdpc) {
			ring->rx_stats.drops += rqdpc;
			net_stats->rx_fifo_errors += rqdpc;
		}
E
Eric Dumazet 已提交
5238 5239

		do {
5240
			start = u64_stats_fetch_begin_irq(&ring->rx_syncp);
E
Eric Dumazet 已提交
5241 5242
			_bytes = ring->rx_stats.bytes;
			_packets = ring->rx_stats.packets;
5243
		} while (u64_stats_fetch_retry_irq(&ring->rx_syncp, start));
E
Eric Dumazet 已提交
5244 5245
		bytes += _bytes;
		packets += _packets;
5246 5247
	}

5248 5249
	net_stats->rx_bytes = bytes;
	net_stats->rx_packets = packets;
5250 5251 5252 5253

	bytes = 0;
	packets = 0;
	for (i = 0; i < adapter->num_tx_queues; i++) {
5254
		struct igb_ring *ring = adapter->tx_ring[i];
E
Eric Dumazet 已提交
5255
		do {
5256
			start = u64_stats_fetch_begin_irq(&ring->tx_syncp);
E
Eric Dumazet 已提交
5257 5258
			_bytes = ring->tx_stats.bytes;
			_packets = ring->tx_stats.packets;
5259
		} while (u64_stats_fetch_retry_irq(&ring->tx_syncp, start));
E
Eric Dumazet 已提交
5260 5261
		bytes += _bytes;
		packets += _packets;
5262
	}
5263 5264
	net_stats->tx_bytes = bytes;
	net_stats->tx_packets = packets;
5265
	rcu_read_unlock();
5266 5267

	/* read stats registers */
5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284
	adapter->stats.crcerrs += rd32(E1000_CRCERRS);
	adapter->stats.gprc += rd32(E1000_GPRC);
	adapter->stats.gorc += rd32(E1000_GORCL);
	rd32(E1000_GORCH); /* clear GORCL */
	adapter->stats.bprc += rd32(E1000_BPRC);
	adapter->stats.mprc += rd32(E1000_MPRC);
	adapter->stats.roc += rd32(E1000_ROC);

	adapter->stats.prc64 += rd32(E1000_PRC64);
	adapter->stats.prc127 += rd32(E1000_PRC127);
	adapter->stats.prc255 += rd32(E1000_PRC255);
	adapter->stats.prc511 += rd32(E1000_PRC511);
	adapter->stats.prc1023 += rd32(E1000_PRC1023);
	adapter->stats.prc1522 += rd32(E1000_PRC1522);
	adapter->stats.symerrs += rd32(E1000_SYMERRS);
	adapter->stats.sec += rd32(E1000_SEC);

5285 5286 5287
	mpc = rd32(E1000_MPC);
	adapter->stats.mpc += mpc;
	net_stats->rx_fifo_errors += mpc;
5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301
	adapter->stats.scc += rd32(E1000_SCC);
	adapter->stats.ecol += rd32(E1000_ECOL);
	adapter->stats.mcc += rd32(E1000_MCC);
	adapter->stats.latecol += rd32(E1000_LATECOL);
	adapter->stats.dc += rd32(E1000_DC);
	adapter->stats.rlec += rd32(E1000_RLEC);
	adapter->stats.xonrxc += rd32(E1000_XONRXC);
	adapter->stats.xontxc += rd32(E1000_XONTXC);
	adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
	adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
	adapter->stats.fcruc += rd32(E1000_FCRUC);
	adapter->stats.gptc += rd32(E1000_GPTC);
	adapter->stats.gotc += rd32(E1000_GOTCL);
	rd32(E1000_GOTCH); /* clear GOTCL */
5302
	adapter->stats.rnbc += rd32(E1000_RNBC);
5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319
	adapter->stats.ruc += rd32(E1000_RUC);
	adapter->stats.rfc += rd32(E1000_RFC);
	adapter->stats.rjc += rd32(E1000_RJC);
	adapter->stats.tor += rd32(E1000_TORH);
	adapter->stats.tot += rd32(E1000_TOTH);
	adapter->stats.tpr += rd32(E1000_TPR);

	adapter->stats.ptc64 += rd32(E1000_PTC64);
	adapter->stats.ptc127 += rd32(E1000_PTC127);
	adapter->stats.ptc255 += rd32(E1000_PTC255);
	adapter->stats.ptc511 += rd32(E1000_PTC511);
	adapter->stats.ptc1023 += rd32(E1000_PTC1023);
	adapter->stats.ptc1522 += rd32(E1000_PTC1522);

	adapter->stats.mptc += rd32(E1000_MPTC);
	adapter->stats.bptc += rd32(E1000_BPTC);

5320 5321
	adapter->stats.tpt += rd32(E1000_TPT);
	adapter->stats.colc += rd32(E1000_COLC);
5322 5323

	adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
5324 5325 5326 5327
	/* read internal phy specific stats */
	reg = rd32(E1000_CTRL_EXT);
	if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
		adapter->stats.rxerrc += rd32(E1000_RXERRC);
5328 5329 5330 5331 5332

		/* this stat has invalid values on i210/i211 */
		if ((hw->mac.type != e1000_i210) &&
		    (hw->mac.type != e1000_i211))
			adapter->stats.tncrs += rd32(E1000_TNCRS);
5333 5334
	}

5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348
	adapter->stats.tsctc += rd32(E1000_TSCTC);
	adapter->stats.tsctfc += rd32(E1000_TSCTFC);

	adapter->stats.iac += rd32(E1000_IAC);
	adapter->stats.icrxoc += rd32(E1000_ICRXOC);
	adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
	adapter->stats.icrxatc += rd32(E1000_ICRXATC);
	adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
	adapter->stats.ictxatc += rd32(E1000_ICTXATC);
	adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
	adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
	adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);

	/* Fill out the OS statistics structure */
5349 5350
	net_stats->multicast = adapter->stats.mprc;
	net_stats->collisions = adapter->stats.colc;
5351 5352 5353 5354

	/* Rx Errors */

	/* RLEC on some newer hardware can be incorrect so build
5355 5356
	 * our own version based on RUC and ROC
	 */
5357
	net_stats->rx_errors = adapter->stats.rxerrc +
5358 5359 5360
		adapter->stats.crcerrs + adapter->stats.algnerrc +
		adapter->stats.ruc + adapter->stats.roc +
		adapter->stats.cexterr;
5361 5362 5363 5364 5365
	net_stats->rx_length_errors = adapter->stats.ruc +
				      adapter->stats.roc;
	net_stats->rx_crc_errors = adapter->stats.crcerrs;
	net_stats->rx_frame_errors = adapter->stats.algnerrc;
	net_stats->rx_missed_errors = adapter->stats.mpc;
5366 5367

	/* Tx Errors */
5368 5369 5370 5371 5372
	net_stats->tx_errors = adapter->stats.ecol +
			       adapter->stats.latecol;
	net_stats->tx_aborted_errors = adapter->stats.ecol;
	net_stats->tx_window_errors = adapter->stats.latecol;
	net_stats->tx_carrier_errors = adapter->stats.tncrs;
5373 5374 5375 5376 5377 5378 5379

	/* Tx Dropped needs to be maintained elsewhere */

	/* Management Stats */
	adapter->stats.mgptc += rd32(E1000_MGTPTC);
	adapter->stats.mgprc += rd32(E1000_MGTPRC);
	adapter->stats.mgpdc += rd32(E1000_MGTPDC);
5380 5381 5382 5383 5384 5385 5386 5387 5388

	/* OS2BMC Stats */
	reg = rd32(E1000_MANC);
	if (reg & E1000_MANC_EN_BMC2OS) {
		adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
		adapter->stats.o2bspc += rd32(E1000_O2BSPC);
		adapter->stats.b2ospc += rd32(E1000_B2OSPC);
		adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
	}
5389 5390
}

5391 5392 5393
static void igb_tsync_interrupt(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
5394
	struct ptp_clock_event event;
A
Arnd Bergmann 已提交
5395
	struct timespec64 ts;
5396
	u32 ack = 0, tsauxc, sec, nsec, tsicr = rd32(E1000_TSICR);
5397 5398 5399 5400 5401 5402 5403 5404 5405

	if (tsicr & TSINTR_SYS_WRAP) {
		event.type = PTP_CLOCK_PPS;
		if (adapter->ptp_caps.pps)
			ptp_clock_event(adapter->ptp_clock, &event);
		else
			dev_err(&adapter->pdev->dev, "unexpected SYS WRAP");
		ack |= TSINTR_SYS_WRAP;
	}
5406 5407 5408 5409

	if (tsicr & E1000_TSICR_TXTS) {
		/* retrieve hardware timestamp */
		schedule_work(&adapter->ptp_tx_work);
5410
		ack |= E1000_TSICR_TXTS;
5411
	}
5412

5413 5414
	if (tsicr & TSINTR_TT0) {
		spin_lock(&adapter->tmreg_lock);
A
Arnd Bergmann 已提交
5415 5416 5417
		ts = timespec64_add(adapter->perout[0].start,
				    adapter->perout[0].period);
		/* u32 conversion of tv_sec is safe until y2106 */
5418
		wr32(E1000_TRGTTIML0, ts.tv_nsec);
A
Arnd Bergmann 已提交
5419
		wr32(E1000_TRGTTIMH0, (u32)ts.tv_sec);
5420 5421 5422 5423 5424 5425 5426 5427 5428 5429
		tsauxc = rd32(E1000_TSAUXC);
		tsauxc |= TSAUXC_EN_TT0;
		wr32(E1000_TSAUXC, tsauxc);
		adapter->perout[0].start = ts;
		spin_unlock(&adapter->tmreg_lock);
		ack |= TSINTR_TT0;
	}

	if (tsicr & TSINTR_TT1) {
		spin_lock(&adapter->tmreg_lock);
A
Arnd Bergmann 已提交
5430 5431
		ts = timespec64_add(adapter->perout[1].start,
				    adapter->perout[1].period);
5432
		wr32(E1000_TRGTTIML1, ts.tv_nsec);
A
Arnd Bergmann 已提交
5433
		wr32(E1000_TRGTTIMH1, (u32)ts.tv_sec);
5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461
		tsauxc = rd32(E1000_TSAUXC);
		tsauxc |= TSAUXC_EN_TT1;
		wr32(E1000_TSAUXC, tsauxc);
		adapter->perout[1].start = ts;
		spin_unlock(&adapter->tmreg_lock);
		ack |= TSINTR_TT1;
	}

	if (tsicr & TSINTR_AUTT0) {
		nsec = rd32(E1000_AUXSTMPL0);
		sec  = rd32(E1000_AUXSTMPH0);
		event.type = PTP_CLOCK_EXTTS;
		event.index = 0;
		event.timestamp = sec * 1000000000ULL + nsec;
		ptp_clock_event(adapter->ptp_clock, &event);
		ack |= TSINTR_AUTT0;
	}

	if (tsicr & TSINTR_AUTT1) {
		nsec = rd32(E1000_AUXSTMPL1);
		sec  = rd32(E1000_AUXSTMPH1);
		event.type = PTP_CLOCK_EXTTS;
		event.index = 1;
		event.timestamp = sec * 1000000000ULL + nsec;
		ptp_clock_event(adapter->ptp_clock, &event);
		ack |= TSINTR_AUTT1;
	}

5462 5463
	/* acknowledge the interrupts */
	wr32(E1000_TSICR, ack);
5464 5465
}

5466 5467
static irqreturn_t igb_msix_other(int irq, void *data)
{
5468
	struct igb_adapter *adapter = data;
5469
	struct e1000_hw *hw = &adapter->hw;
P
PJ Waskiewicz 已提交
5470 5471
	u32 icr = rd32(E1000_ICR);
	/* reading ICR causes bit 31 of EICR to be cleared */
5472

5473 5474 5475
	if (icr & E1000_ICR_DRSTA)
		schedule_work(&adapter->reset_task);

5476
	if (icr & E1000_ICR_DOUTSYNC) {
5477 5478
		/* HW is reporting DMA is out of sync */
		adapter->stats.doosync++;
G
Greg Rose 已提交
5479 5480
		/* The DMA Out of Sync is also indication of a spoof event
		 * in IOV mode. Check the Wrong VM Behavior register to
5481 5482
		 * see if it is really a spoof event.
		 */
G
Greg Rose 已提交
5483
		igb_check_wvbr(adapter);
5484
	}
5485

5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496
	/* Check for a mailbox event */
	if (icr & E1000_ICR_VMMB)
		igb_msg_task(adapter);

	if (icr & E1000_ICR_LSC) {
		hw->mac.get_link_status = 1;
		/* guard against interrupt when we're going down */
		if (!test_bit(__IGB_DOWN, &adapter->state))
			mod_timer(&adapter->watchdog_timer, jiffies + 1);
	}

5497 5498
	if (icr & E1000_ICR_TS)
		igb_tsync_interrupt(adapter);
5499

P
PJ Waskiewicz 已提交
5500
	wr32(E1000_EIMS, adapter->eims_other);
5501 5502 5503 5504

	return IRQ_HANDLED;
}

5505
static void igb_write_itr(struct igb_q_vector *q_vector)
5506
{
5507
	struct igb_adapter *adapter = q_vector->adapter;
5508
	u32 itr_val = q_vector->itr_val & 0x7FFC;
5509

5510 5511
	if (!q_vector->set_itr)
		return;
5512

5513 5514
	if (!itr_val)
		itr_val = 0x4;
5515

5516 5517
	if (adapter->hw.mac.type == e1000_82575)
		itr_val |= itr_val << 16;
5518
	else
5519
		itr_val |= E1000_EITR_CNT_IGNR;
5520

5521 5522
	writel(itr_val, q_vector->itr_register);
	q_vector->set_itr = 0;
5523 5524
}

5525
static irqreturn_t igb_msix_ring(int irq, void *data)
5526
{
5527
	struct igb_q_vector *q_vector = data;
5528

5529 5530
	/* Write the ITR value calculated from the previous interrupt. */
	igb_write_itr(q_vector);
5531

5532
	napi_schedule(&q_vector->napi);
P
PJ Waskiewicz 已提交
5533

5534
	return IRQ_HANDLED;
J
Jeb Cramer 已提交
5535 5536
}

5537
#ifdef CONFIG_IGB_DCA
5538 5539 5540 5541 5542 5543 5544 5545 5546 5547
static void igb_update_tx_dca(struct igb_adapter *adapter,
			      struct igb_ring *tx_ring,
			      int cpu)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 txctrl = dca3_get_tag(tx_ring->dev, cpu);

	if (hw->mac.type != e1000_82575)
		txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT;

5548
	/* We can enable relaxed ordering for reads, but not writes when
5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568
	 * DCA is enabled.  This is due to a known issue in some chipsets
	 * which will cause the DCA tag to be cleared.
	 */
	txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN |
		  E1000_DCA_TXCTRL_DATA_RRO_EN |
		  E1000_DCA_TXCTRL_DESC_DCA_EN;

	wr32(E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl);
}

static void igb_update_rx_dca(struct igb_adapter *adapter,
			      struct igb_ring *rx_ring,
			      int cpu)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu);

	if (hw->mac.type != e1000_82575)
		rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT;

5569
	/* We can enable relaxed ordering for reads, but not writes when
5570 5571 5572 5573 5574 5575 5576 5577 5578
	 * DCA is enabled.  This is due to a known issue in some chipsets
	 * which will cause the DCA tag to be cleared.
	 */
	rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN |
		  E1000_DCA_RXCTRL_DESC_DCA_EN;

	wr32(E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl);
}

5579
static void igb_update_dca(struct igb_q_vector *q_vector)
J
Jeb Cramer 已提交
5580
{
5581
	struct igb_adapter *adapter = q_vector->adapter;
J
Jeb Cramer 已提交
5582 5583
	int cpu = get_cpu();

5584 5585 5586
	if (q_vector->cpu == cpu)
		goto out_no_update;

5587 5588 5589 5590 5591 5592
	if (q_vector->tx.ring)
		igb_update_tx_dca(adapter, q_vector->tx.ring, cpu);

	if (q_vector->rx.ring)
		igb_update_rx_dca(adapter, q_vector->rx.ring, cpu);

5593 5594
	q_vector->cpu = cpu;
out_no_update:
J
Jeb Cramer 已提交
5595 5596 5597 5598 5599
	put_cpu();
}

static void igb_setup_dca(struct igb_adapter *adapter)
{
5600
	struct e1000_hw *hw = &adapter->hw;
J
Jeb Cramer 已提交
5601 5602
	int i;

5603
	if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
J
Jeb Cramer 已提交
5604 5605
		return;

5606 5607 5608
	/* Always use CB2 mode, difference is masked in the CB driver. */
	wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);

5609
	for (i = 0; i < adapter->num_q_vectors; i++) {
5610 5611
		adapter->q_vector[i]->cpu = -1;
		igb_update_dca(adapter->q_vector[i]);
J
Jeb Cramer 已提交
5612 5613 5614 5615 5616 5617 5618
	}
}

static int __igb_notify_dca(struct device *dev, void *data)
{
	struct net_device *netdev = dev_get_drvdata(dev);
	struct igb_adapter *adapter = netdev_priv(netdev);
5619
	struct pci_dev *pdev = adapter->pdev;
J
Jeb Cramer 已提交
5620 5621 5622 5623 5624 5625
	struct e1000_hw *hw = &adapter->hw;
	unsigned long event = *(unsigned long *)data;

	switch (event) {
	case DCA_PROVIDER_ADD:
		/* if already enabled, don't do it again */
5626
		if (adapter->flags & IGB_FLAG_DCA_ENABLED)
J
Jeb Cramer 已提交
5627 5628
			break;
		if (dca_add_requester(dev) == 0) {
5629
			adapter->flags |= IGB_FLAG_DCA_ENABLED;
5630
			dev_info(&pdev->dev, "DCA enabled\n");
J
Jeb Cramer 已提交
5631 5632 5633 5634 5635
			igb_setup_dca(adapter);
			break;
		}
		/* Fall Through since DCA is disabled. */
	case DCA_PROVIDER_REMOVE:
5636
		if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
J
Jeb Cramer 已提交
5637
			/* without this a class_device is left
5638 5639
			 * hanging around in the sysfs model
			 */
J
Jeb Cramer 已提交
5640
			dca_remove_requester(dev);
5641
			dev_info(&pdev->dev, "DCA disabled\n");
5642
			adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
A
Alexander Duyck 已提交
5643
			wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
J
Jeb Cramer 已提交
5644 5645 5646
		}
		break;
	}
5647

J
Jeb Cramer 已提交
5648
	return 0;
5649 5650
}

J
Jeb Cramer 已提交
5651
static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
5652
			  void *p)
J
Jeb Cramer 已提交
5653 5654 5655 5656
{
	int ret_val;

	ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
5657
					 __igb_notify_dca);
J
Jeb Cramer 已提交
5658 5659 5660

	return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
}
5661
#endif /* CONFIG_IGB_DCA */
5662

5663 5664 5665 5666 5667
#ifdef CONFIG_PCI_IOV
static int igb_vf_configure(struct igb_adapter *adapter, int vf)
{
	unsigned char mac_addr[ETH_ALEN];

5668
	eth_zero_addr(mac_addr);
5669 5670
	igb_set_vf_mac(adapter, vf, mac_addr);

L
Lior Levy 已提交
5671 5672 5673
	/* By default spoof check is enabled for all VFs */
	adapter->vf_data[vf].spoofchk_enabled = true;

5674
	return 0;
5675 5676 5677
}

#endif
5678 5679 5680 5681 5682 5683 5684 5685
static void igb_ping_all_vfs(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 ping;
	int i;

	for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
		ping = E1000_PF_CONTROL_MSG;
5686
		if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5687 5688 5689 5690 5691
			ping |= E1000_VT_MSGTYPE_CTS;
		igb_write_mbx(hw, &ping, 1, i);
	}
}

5692 5693 5694 5695 5696 5697
static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 vmolr = rd32(E1000_VMOLR(vf));
	struct vf_data_storage *vf_data = &adapter->vf_data[vf];

5698
	vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5699
			    IGB_VF_FLAG_MULTI_PROMISC);
5700 5701 5702 5703
	vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);

	if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
		vmolr |= E1000_VMOLR_MPME;
5704
		vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5705 5706
		*msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
	} else {
5707
		/* if we have hashes and we are clearing a multicast promisc
5708 5709 5710 5711 5712 5713 5714
		 * flag we need to write the hashes to the MTA as this step
		 * was previously skipped
		 */
		if (vf_data->num_vf_mc_hashes > 30) {
			vmolr |= E1000_VMOLR_MPME;
		} else if (vf_data->num_vf_mc_hashes) {
			int j;
5715

5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730
			vmolr |= E1000_VMOLR_ROMPE;
			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
		}
	}

	wr32(E1000_VMOLR(vf), vmolr);

	/* there are flags left unprocessed, likely not supported */
	if (*msgbuf & E1000_VT_MSGINFO_MASK)
		return -EINVAL;

	return 0;
}

5731 5732 5733 5734 5735 5736 5737 5738
static int igb_set_vf_multicasts(struct igb_adapter *adapter,
				  u32 *msgbuf, u32 vf)
{
	int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
	u16 *hash_list = (u16 *)&msgbuf[1];
	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
	int i;

5739
	/* salt away the number of multicast addresses assigned
5740 5741 5742 5743 5744
	 * to this VF for later use to restore when the PF multi cast
	 * list changes
	 */
	vf_data->num_vf_mc_hashes = n;

5745 5746 5747 5748 5749
	/* only up to 30 hash values supported */
	if (n > 30)
		n = 30;

	/* store the hashes for later use */
5750
	for (i = 0; i < n; i++)
5751
		vf_data->vf_mc_hashes[i] = hash_list[i];
5752 5753

	/* Flush and reset the mta with the new values */
5754
	igb_set_rx_mode(adapter->netdev);
5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765

	return 0;
}

static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	struct vf_data_storage *vf_data;
	int i, j;

	for (i = 0; i < adapter->vfs_allocated_count; i++) {
5766
		u32 vmolr = rd32(E1000_VMOLR(i));
5767

5768 5769
		vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);

5770
		vf_data = &adapter->vf_data[i];
5771 5772 5773 5774 5775 5776 5777 5778 5779 5780

		if ((vf_data->num_vf_mc_hashes > 30) ||
		    (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
			vmolr |= E1000_VMOLR_MPME;
		} else if (vf_data->num_vf_mc_hashes) {
			vmolr |= E1000_VMOLR_ROMPE;
			for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
				igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
		}
		wr32(E1000_VMOLR(i), vmolr);
5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808
	}
}

static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 pool_mask, reg, vid;
	int i;

	pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);

	/* Find the vlan filter for this id */
	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
		reg = rd32(E1000_VLVF(i));

		/* remove the vf from the pool */
		reg &= ~pool_mask;

		/* if pool is empty then remove entry from vfta */
		if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
		    (reg & E1000_VLVF_VLANID_ENABLE)) {
			reg = 0;
			vid = reg & E1000_VLVF_VLANID_MASK;
			igb_vfta_set(hw, vid, false);
		}

		wr32(E1000_VLVF(i), reg);
	}
5809 5810

	adapter->vf_data[vf].vlans_enabled = 0;
5811 5812 5813 5814 5815 5816 5817
}

static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 reg, i;

5818 5819 5820 5821 5822
	/* The vlvf table only exists on 82576 hardware and newer */
	if (hw->mac.type < e1000_82576)
		return -1;

	/* we only need to do this if VMDq is enabled */
5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851
	if (!adapter->vfs_allocated_count)
		return -1;

	/* Find the vlan filter for this id */
	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
		reg = rd32(E1000_VLVF(i));
		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
		    vid == (reg & E1000_VLVF_VLANID_MASK))
			break;
	}

	if (add) {
		if (i == E1000_VLVF_ARRAY_SIZE) {
			/* Did not find a matching VLAN ID entry that was
			 * enabled.  Search for a free filter entry, i.e.
			 * one without the enable bit set
			 */
			for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
				reg = rd32(E1000_VLVF(i));
				if (!(reg & E1000_VLVF_VLANID_ENABLE))
					break;
			}
		}
		if (i < E1000_VLVF_ARRAY_SIZE) {
			/* Found an enabled/available entry */
			reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);

			/* if !enabled we need to set this up in vfta */
			if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5852 5853
				/* add VID to filter table */
				igb_vfta_set(hw, vid, true);
5854 5855
				reg |= E1000_VLVF_VLANID_ENABLE;
			}
A
Alexander Duyck 已提交
5856 5857
			reg &= ~E1000_VLVF_VLANID_MASK;
			reg |= vid;
5858
			wr32(E1000_VLVF(i), reg);
5859 5860 5861 5862 5863 5864 5865

			/* do not modify RLPML for PF devices */
			if (vf >= adapter->vfs_allocated_count)
				return 0;

			if (!adapter->vf_data[vf].vlans_enabled) {
				u32 size;
5866

5867 5868 5869 5870 5871 5872 5873 5874
				reg = rd32(E1000_VMOLR(vf));
				size = reg & E1000_VMOLR_RLPML_MASK;
				size += 4;
				reg &= ~E1000_VMOLR_RLPML_MASK;
				reg |= size;
				wr32(E1000_VMOLR(vf), reg);
			}

5875
			adapter->vf_data[vf].vlans_enabled++;
5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886
		}
	} else {
		if (i < E1000_VLVF_ARRAY_SIZE) {
			/* remove vf from the pool */
			reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
			/* if pool is empty then remove entry from vfta */
			if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
				reg = 0;
				igb_vfta_set(hw, vid, false);
			}
			wr32(E1000_VLVF(i), reg);
5887 5888 5889 5890 5891 5892 5893 5894

			/* do not modify RLPML for PF devices */
			if (vf >= adapter->vfs_allocated_count)
				return 0;

			adapter->vf_data[vf].vlans_enabled--;
			if (!adapter->vf_data[vf].vlans_enabled) {
				u32 size;
5895

5896 5897 5898 5899 5900 5901 5902
				reg = rd32(E1000_VMOLR(vf));
				size = reg & E1000_VMOLR_RLPML_MASK;
				size -= 4;
				reg &= ~E1000_VMOLR_RLPML_MASK;
				reg |= size;
				wr32(E1000_VMOLR(vf), reg);
			}
5903 5904
		}
	}
5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937
	return 0;
}

static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
{
	struct e1000_hw *hw = &adapter->hw;

	if (vid)
		wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
	else
		wr32(E1000_VMVIR(vf), 0);
}

static int igb_ndo_set_vf_vlan(struct net_device *netdev,
			       int vf, u16 vlan, u8 qos)
{
	int err = 0;
	struct igb_adapter *adapter = netdev_priv(netdev);

	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
		return -EINVAL;
	if (vlan || qos) {
		err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
		if (err)
			goto out;
		igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
		igb_set_vmolr(adapter, vf, !vlan);
		adapter->vf_data[vf].pf_vlan = vlan;
		adapter->vf_data[vf].pf_qos = qos;
		dev_info(&adapter->pdev->dev,
			 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
		if (test_bit(__IGB_DOWN, &adapter->state)) {
			dev_warn(&adapter->pdev->dev,
5938
				 "The VF VLAN has been set, but the PF device is not up.\n");
5939
			dev_warn(&adapter->pdev->dev,
5940
				 "Bring the PF device up before attempting to use the VF device.\n");
5941 5942 5943
		}
	} else {
		igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5944
			     false, vf);
5945 5946 5947 5948
		igb_set_vmvir(adapter, vlan, vf);
		igb_set_vmolr(adapter, vf, true);
		adapter->vf_data[vf].pf_vlan = 0;
		adapter->vf_data[vf].pf_qos = 0;
5949
	}
5950
out:
5951
	return err;
5952 5953
}

5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973
static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid)
{
	struct e1000_hw *hw = &adapter->hw;
	int i;
	u32 reg;

	/* Find the vlan filter for this id */
	for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
		reg = rd32(E1000_VLVF(i));
		if ((reg & E1000_VLVF_VLANID_ENABLE) &&
		    vid == (reg & E1000_VLVF_VLANID_MASK))
			break;
	}

	if (i >= E1000_VLVF_ARRAY_SIZE)
		i = -1;

	return i;
}

5974 5975
static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
{
5976
	struct e1000_hw *hw = &adapter->hw;
5977 5978
	int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
	int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5979
	int err = 0;
5980

5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000
	/* If in promiscuous mode we need to make sure the PF also has
	 * the VLAN filter set.
	 */
	if (add && (adapter->netdev->flags & IFF_PROMISC))
		err = igb_vlvf_set(adapter, vid, add,
				   adapter->vfs_allocated_count);
	if (err)
		goto out;

	err = igb_vlvf_set(adapter, vid, add, vf);

	if (err)
		goto out;

	/* Go through all the checks to see if the VLAN filter should
	 * be wiped completely.
	 */
	if (!add && (adapter->netdev->flags & IFF_PROMISC)) {
		u32 vlvf, bits;
		int regndx = igb_find_vlvf_entry(adapter, vid);
6001

6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022
		if (regndx < 0)
			goto out;
		/* See if any other pools are set for this VLAN filter
		 * entry other than the PF.
		 */
		vlvf = bits = rd32(E1000_VLVF(regndx));
		bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT +
			      adapter->vfs_allocated_count);
		/* If the filter was removed then ensure PF pool bit
		 * is cleared if the PF only added itself to the pool
		 * because the PF is in promiscuous mode.
		 */
		if ((vlvf & VLAN_VID_MASK) == vid &&
		    !test_bit(vid, adapter->active_vlans) &&
		    !bits)
			igb_vlvf_set(adapter, vid, add,
				     adapter->vfs_allocated_count);
	}

out:
	return err;
6023 6024
}

6025
static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
6026
{
G
Greg Rose 已提交
6027 6028
	/* clear flags - except flag that indicates PF has set the MAC */
	adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
6029
	adapter->vf_data[vf].last_nack = jiffies;
6030 6031

	/* reset offloads to defaults */
6032
	igb_set_vmolr(adapter, vf, true);
6033 6034 6035

	/* reset vlans for device */
	igb_clear_vf_vfta(adapter, vf);
6036 6037 6038 6039 6040 6041
	if (adapter->vf_data[vf].pf_vlan)
		igb_ndo_set_vf_vlan(adapter->netdev, vf,
				    adapter->vf_data[vf].pf_vlan,
				    adapter->vf_data[vf].pf_qos);
	else
		igb_clear_vf_vfta(adapter, vf);
6042 6043 6044 6045 6046

	/* reset multicast table array for vf */
	adapter->vf_data[vf].num_vf_mc_hashes = 0;

	/* Flush and reset the mta with the new values */
6047
	igb_set_rx_mode(adapter->netdev);
6048 6049
}

6050 6051 6052 6053
static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
{
	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;

6054
	/* clear mac address as we were hotplug removed/added */
6055
	if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
6056
		eth_zero_addr(vf_mac);
6057 6058 6059 6060 6061 6062

	/* process remaining reset events */
	igb_vf_reset(adapter, vf);
}

static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
6063 6064 6065
{
	struct e1000_hw *hw = &adapter->hw;
	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
6066
	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6067 6068 6069 6070
	u32 reg, msgbuf[3];
	u8 *addr = (u8 *)(&msgbuf[1]);

	/* process all the same items cleared in a function level reset */
6071
	igb_vf_reset(adapter, vf);
6072 6073

	/* set vf mac address */
6074
	igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
6075 6076 6077 6078 6079 6080 6081

	/* enable transmit and receive for vf */
	reg = rd32(E1000_VFTE);
	wr32(E1000_VFTE, reg | (1 << vf));
	reg = rd32(E1000_VFRE);
	wr32(E1000_VFRE, reg | (1 << vf));

G
Greg Rose 已提交
6082
	adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
6083 6084

	/* reply to reset with ack and vf mac address */
6085 6086 6087 6088 6089 6090
	if (!is_zero_ether_addr(vf_mac)) {
		msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
		memcpy(addr, vf_mac, ETH_ALEN);
	} else {
		msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_NACK;
	}
6091 6092 6093 6094 6095
	igb_write_mbx(hw, msgbuf, 3, vf);
}

static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
{
6096
	/* The VF MAC Address is stored in a packed array of bytes
G
Greg Rose 已提交
6097 6098
	 * starting at the second 32 bit word of the msg array
	 */
6099 6100
	unsigned char *addr = (char *)&msg[1];
	int err = -1;
6101

6102 6103
	if (is_valid_ether_addr(addr))
		err = igb_set_vf_mac(adapter, vf, addr);
6104

6105
	return err;
6106 6107 6108 6109 6110
}

static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
{
	struct e1000_hw *hw = &adapter->hw;
6111
	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
6112 6113 6114
	u32 msg = E1000_VT_MSGTYPE_NACK;

	/* if device isn't clear to send it shouldn't be reading either */
6115 6116
	if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
	    time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
6117
		igb_write_mbx(hw, &msg, 1, vf);
6118
		vf_data->last_nack = jiffies;
6119 6120 6121
	}
}

6122
static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
6123
{
6124 6125
	struct pci_dev *pdev = adapter->pdev;
	u32 msgbuf[E1000_VFMAILBOX_SIZE];
6126
	struct e1000_hw *hw = &adapter->hw;
6127
	struct vf_data_storage *vf_data = &adapter->vf_data[vf];
6128 6129
	s32 retval;

6130
	retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
6131

6132 6133
	if (retval) {
		/* if receive failed revoke VF CTS stats and restart init */
6134
		dev_err(&pdev->dev, "Error receiving message from VF\n");
6135 6136 6137 6138 6139
		vf_data->flags &= ~IGB_VF_FLAG_CTS;
		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
			return;
		goto out;
	}
6140 6141 6142

	/* this is a message we already processed, do nothing */
	if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
6143
		return;
6144

6145
	/* until the vf completes a reset it should not be
6146 6147 6148 6149
	 * allowed to start any configuration.
	 */
	if (msgbuf[0] == E1000_VF_RESET) {
		igb_vf_reset_msg(adapter, vf);
6150
		return;
6151 6152
	}

6153
	if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
6154 6155 6156 6157
		if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
			return;
		retval = -1;
		goto out;
6158 6159 6160 6161
	}

	switch ((msgbuf[0] & 0xFFFF)) {
	case E1000_VF_SET_MAC_ADDR:
6162 6163 6164 6165 6166
		retval = -EINVAL;
		if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
			retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
		else
			dev_warn(&pdev->dev,
6167 6168
				 "VF %d attempted to override administratively set MAC address\nReload the VF driver to resume operations\n",
				 vf);
6169
		break;
6170 6171 6172
	case E1000_VF_SET_PROMISC:
		retval = igb_set_vf_promisc(adapter, msgbuf, vf);
		break;
6173 6174 6175 6176 6177 6178 6179
	case E1000_VF_SET_MULTICAST:
		retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
		break;
	case E1000_VF_SET_LPE:
		retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
		break;
	case E1000_VF_SET_VLAN:
6180 6181 6182
		retval = -1;
		if (vf_data->pf_vlan)
			dev_warn(&pdev->dev,
6183 6184
				 "VF %d attempted to override administratively set VLAN tag\nReload the VF driver to resume operations\n",
				 vf);
6185 6186
		else
			retval = igb_set_vf_vlan(adapter, msgbuf, vf);
6187 6188
		break;
	default:
6189
		dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
6190 6191 6192 6193
		retval = -1;
		break;
	}

6194 6195
	msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
out:
6196 6197 6198 6199 6200 6201 6202
	/* notify the VF of the results of what it sent us */
	if (retval)
		msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
	else
		msgbuf[0] |= E1000_VT_MSGTYPE_ACK;

	igb_write_mbx(hw, msgbuf, 1, vf);
6203
}
6204

6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222
static void igb_msg_task(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 vf;

	for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
		/* process any reset requests */
		if (!igb_check_for_rst(hw, vf))
			igb_vf_reset_event(adapter, vf);

		/* process any messages pending */
		if (!igb_check_for_msg(hw, vf))
			igb_rcv_msg_from_vf(adapter, vf);

		/* process any acks */
		if (!igb_check_for_ack(hw, vf))
			igb_rcv_ack_from_vf(adapter, vf);
	}
6223 6224
}

6225 6226 6227 6228 6229 6230 6231
/**
 *  igb_set_uta - Set unicast filter table address
 *  @adapter: board private structure
 *
 *  The unicast table address is a register array of 32-bit registers.
 *  The table is meant to be used in a way similar to how the MTA is used
 *  however due to certain limitations in the hardware it is necessary to
L
Lucas De Marchi 已提交
6232 6233
 *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
 *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251
 **/
static void igb_set_uta(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
	int i;

	/* The UTA table only exists on 82576 hardware and newer */
	if (hw->mac.type < e1000_82576)
		return;

	/* we only need to do this if VMDq is enabled */
	if (!adapter->vfs_allocated_count)
		return;

	for (i = 0; i < hw->mac.uta_reg_count; i++)
		array_wr32(E1000_UTA, i, ~0);
}

6252
/**
6253 6254 6255
 *  igb_intr_msi - Interrupt Handler
 *  @irq: interrupt number
 *  @data: pointer to a network interface device structure
6256 6257 6258
 **/
static irqreturn_t igb_intr_msi(int irq, void *data)
{
6259 6260
	struct igb_adapter *adapter = data;
	struct igb_q_vector *q_vector = adapter->q_vector[0];
6261 6262 6263 6264
	struct e1000_hw *hw = &adapter->hw;
	/* read ICR disables interrupts using IAM */
	u32 icr = rd32(E1000_ICR);

6265
	igb_write_itr(q_vector);
6266

6267 6268 6269
	if (icr & E1000_ICR_DRSTA)
		schedule_work(&adapter->reset_task);

6270
	if (icr & E1000_ICR_DOUTSYNC) {
6271 6272 6273 6274
		/* HW is reporting DMA is out of sync */
		adapter->stats.doosync++;
	}

6275 6276 6277 6278 6279 6280
	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
		hw->mac.get_link_status = 1;
		if (!test_bit(__IGB_DOWN, &adapter->state))
			mod_timer(&adapter->watchdog_timer, jiffies + 1);
	}

6281 6282
	if (icr & E1000_ICR_TS)
		igb_tsync_interrupt(adapter);
6283

6284
	napi_schedule(&q_vector->napi);
6285 6286 6287 6288 6289

	return IRQ_HANDLED;
}

/**
6290 6291 6292
 *  igb_intr - Legacy Interrupt Handler
 *  @irq: interrupt number
 *  @data: pointer to a network interface device structure
6293 6294 6295
 **/
static irqreturn_t igb_intr(int irq, void *data)
{
6296 6297
	struct igb_adapter *adapter = data;
	struct igb_q_vector *q_vector = adapter->q_vector[0];
6298 6299
	struct e1000_hw *hw = &adapter->hw;
	/* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
6300 6301
	 * need for the IMC write
	 */
6302 6303 6304
	u32 icr = rd32(E1000_ICR);

	/* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
6305 6306
	 * not set, then the adapter didn't send an interrupt
	 */
6307 6308 6309
	if (!(icr & E1000_ICR_INT_ASSERTED))
		return IRQ_NONE;

6310 6311
	igb_write_itr(q_vector);

6312 6313 6314
	if (icr & E1000_ICR_DRSTA)
		schedule_work(&adapter->reset_task);

6315
	if (icr & E1000_ICR_DOUTSYNC) {
6316 6317 6318 6319
		/* HW is reporting DMA is out of sync */
		adapter->stats.doosync++;
	}

6320 6321 6322 6323 6324 6325 6326
	if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
		hw->mac.get_link_status = 1;
		/* guard against interrupt when we're going down */
		if (!test_bit(__IGB_DOWN, &adapter->state))
			mod_timer(&adapter->watchdog_timer, jiffies + 1);
	}

6327 6328
	if (icr & E1000_ICR_TS)
		igb_tsync_interrupt(adapter);
6329

6330
	napi_schedule(&q_vector->napi);
6331 6332 6333 6334

	return IRQ_HANDLED;
}

6335
static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
6336
{
6337
	struct igb_adapter *adapter = q_vector->adapter;
6338
	struct e1000_hw *hw = &adapter->hw;
6339

6340 6341 6342 6343
	if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
	    (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
		if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
			igb_set_itr(q_vector);
6344
		else
6345
			igb_update_ring_itr(q_vector);
6346 6347
	}

6348
	if (!test_bit(__IGB_DOWN, &adapter->state)) {
6349
		if (adapter->flags & IGB_FLAG_HAS_MSIX)
6350
			wr32(E1000_EIMS, q_vector->eims_value);
6351 6352 6353
		else
			igb_irq_enable(adapter);
	}
6354 6355
}

6356
/**
6357 6358 6359
 *  igb_poll - NAPI Rx polling callback
 *  @napi: napi polling structure
 *  @budget: count of how many packets we should handle
6360 6361
 **/
static int igb_poll(struct napi_struct *napi, int budget)
6362
{
6363
	struct igb_q_vector *q_vector = container_of(napi,
6364 6365
						     struct igb_q_vector,
						     napi);
6366
	bool clean_complete = true;
6367

6368
#ifdef CONFIG_IGB_DCA
6369 6370
	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
		igb_update_dca(q_vector);
J
Jeb Cramer 已提交
6371
#endif
6372
	if (q_vector->tx.ring)
6373
		clean_complete = igb_clean_tx_irq(q_vector);
6374

6375
	if (q_vector->rx.ring)
6376
		clean_complete &= igb_clean_rx_irq(q_vector, budget);
6377

6378 6379 6380
	/* If all work not completed, return budget and keep polling */
	if (!clean_complete)
		return budget;
6381

6382
	/* If not enough Rx work done, exit the polling mode */
6383 6384
	napi_complete(napi);
	igb_ring_irq_enable(q_vector);
6385

6386
	return 0;
6387
}
A
Al Viro 已提交
6388

6389
/**
6390 6391
 *  igb_clean_tx_irq - Reclaim resources after transmit completes
 *  @q_vector: pointer to q_vector containing needed info
6392
 *
6393
 *  returns true if ring is completely cleaned
6394
 **/
6395
static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
6396
{
6397
	struct igb_adapter *adapter = q_vector->adapter;
6398
	struct igb_ring *tx_ring = q_vector->tx.ring;
6399
	struct igb_tx_buffer *tx_buffer;
6400
	union e1000_adv_tx_desc *tx_desc;
6401
	unsigned int total_bytes = 0, total_packets = 0;
6402
	unsigned int budget = q_vector->tx.work_limit;
6403
	unsigned int i = tx_ring->next_to_clean;
6404

6405 6406
	if (test_bit(__IGB_DOWN, &adapter->state))
		return true;
A
Alexander Duyck 已提交
6407

6408
	tx_buffer = &tx_ring->tx_buffer_info[i];
6409
	tx_desc = IGB_TX_DESC(tx_ring, i);
6410
	i -= tx_ring->count;
6411

6412 6413
	do {
		union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
6414 6415 6416 6417

		/* if next_to_watch is not set then there is no work pending */
		if (!eop_desc)
			break;
6418

6419
		/* prevent any other reads prior to eop_desc */
6420
		read_barrier_depends();
6421

6422 6423 6424 6425
		/* if DD is not set pending work has not been completed */
		if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
			break;

6426 6427
		/* clear next_to_watch to prevent false hangs */
		tx_buffer->next_to_watch = NULL;
6428

6429 6430 6431
		/* update the statistics for this packet */
		total_bytes += tx_buffer->bytecount;
		total_packets += tx_buffer->gso_segs;
6432

6433
		/* free the skb */
6434
		dev_consume_skb_any(tx_buffer->skb);
6435

6436 6437
		/* unmap skb header data */
		dma_unmap_single(tx_ring->dev,
6438 6439
				 dma_unmap_addr(tx_buffer, dma),
				 dma_unmap_len(tx_buffer, len),
6440 6441
				 DMA_TO_DEVICE);

6442 6443 6444 6445
		/* clear tx_buffer data */
		tx_buffer->skb = NULL;
		dma_unmap_len_set(tx_buffer, len, 0);

6446 6447
		/* clear last DMA location and unmap remaining buffers */
		while (tx_desc != eop_desc) {
6448 6449
			tx_buffer++;
			tx_desc++;
6450
			i++;
6451 6452
			if (unlikely(!i)) {
				i -= tx_ring->count;
6453
				tx_buffer = tx_ring->tx_buffer_info;
6454 6455
				tx_desc = IGB_TX_DESC(tx_ring, 0);
			}
6456 6457

			/* unmap any remaining paged data */
6458
			if (dma_unmap_len(tx_buffer, len)) {
6459
				dma_unmap_page(tx_ring->dev,
6460 6461
					       dma_unmap_addr(tx_buffer, dma),
					       dma_unmap_len(tx_buffer, len),
6462
					       DMA_TO_DEVICE);
6463
				dma_unmap_len_set(tx_buffer, len, 0);
6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475
			}
		}

		/* move us one more past the eop_desc for start of next pkt */
		tx_buffer++;
		tx_desc++;
		i++;
		if (unlikely(!i)) {
			i -= tx_ring->count;
			tx_buffer = tx_ring->tx_buffer_info;
			tx_desc = IGB_TX_DESC(tx_ring, 0);
		}
6476 6477 6478 6479 6480 6481 6482

		/* issue prefetch for next Tx descriptor */
		prefetch(tx_desc);

		/* update budget accounting */
		budget--;
	} while (likely(budget));
A
Alexander Duyck 已提交
6483

6484 6485
	netdev_tx_completed_queue(txring_txq(tx_ring),
				  total_packets, total_bytes);
6486
	i += tx_ring->count;
6487
	tx_ring->next_to_clean = i;
6488 6489 6490 6491
	u64_stats_update_begin(&tx_ring->tx_syncp);
	tx_ring->tx_stats.bytes += total_bytes;
	tx_ring->tx_stats.packets += total_packets;
	u64_stats_update_end(&tx_ring->tx_syncp);
6492 6493
	q_vector->tx.total_bytes += total_bytes;
	q_vector->tx.total_packets += total_packets;
6494

6495
	if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
6496
		struct e1000_hw *hw = &adapter->hw;
E
Eric Dumazet 已提交
6497

6498
		/* Detect a transmit hang in hardware, this serializes the
6499 6500
		 * check with the clearing of time_stamp and movement of i
		 */
6501
		clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
6502
		if (tx_buffer->next_to_watch &&
6503
		    time_after(jiffies, tx_buffer->time_stamp +
6504 6505
			       (adapter->tx_timeout_factor * HZ)) &&
		    !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
6506 6507

			/* detected Tx unit hang */
6508
			dev_err(tx_ring->dev,
6509
				"Detected Tx Unit Hang\n"
A
Alexander Duyck 已提交
6510
				"  Tx Queue             <%d>\n"
6511 6512 6513 6514 6515 6516
				"  TDH                  <%x>\n"
				"  TDT                  <%x>\n"
				"  next_to_use          <%x>\n"
				"  next_to_clean        <%x>\n"
				"buffer_info[next_to_clean]\n"
				"  time_stamp           <%lx>\n"
6517
				"  next_to_watch        <%p>\n"
6518 6519
				"  jiffies              <%lx>\n"
				"  desc.status          <%x>\n",
A
Alexander Duyck 已提交
6520
				tx_ring->queue_index,
6521
				rd32(E1000_TDH(tx_ring->reg_idx)),
6522
				readl(tx_ring->tail),
6523 6524
				tx_ring->next_to_use,
				tx_ring->next_to_clean,
6525
				tx_buffer->time_stamp,
6526
				tx_buffer->next_to_watch,
6527
				jiffies,
6528
				tx_buffer->next_to_watch->wb.status);
6529 6530 6531 6532 6533
			netif_stop_subqueue(tx_ring->netdev,
					    tx_ring->queue_index);

			/* we are about to reset, no point in enabling stuff */
			return true;
6534 6535
		}
	}
6536

6537
#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
6538
	if (unlikely(total_packets &&
6539 6540
	    netif_carrier_ok(tx_ring->netdev) &&
	    igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557
		/* Make sure that anybody stopping the queue after this
		 * sees the new next_to_clean.
		 */
		smp_mb();
		if (__netif_subqueue_stopped(tx_ring->netdev,
					     tx_ring->queue_index) &&
		    !(test_bit(__IGB_DOWN, &adapter->state))) {
			netif_wake_subqueue(tx_ring->netdev,
					    tx_ring->queue_index);

			u64_stats_update_begin(&tx_ring->tx_syncp);
			tx_ring->tx_stats.restart_queue++;
			u64_stats_update_end(&tx_ring->tx_syncp);
		}
	}

	return !!budget;
6558 6559
}

6560
/**
6561 6562 6563
 *  igb_reuse_rx_page - page flip buffer and store it back on the ring
 *  @rx_ring: rx descriptor ring to store buffers on
 *  @old_buff: donor buffer to have page reused
6564
 *
6565
 *  Synchronizes page for reuse by the adapter
6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579
 **/
static void igb_reuse_rx_page(struct igb_ring *rx_ring,
			      struct igb_rx_buffer *old_buff)
{
	struct igb_rx_buffer *new_buff;
	u16 nta = rx_ring->next_to_alloc;

	new_buff = &rx_ring->rx_buffer_info[nta];

	/* update, and store next to alloc */
	nta++;
	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;

	/* transfer page from old buffer to new buffer */
6580
	*new_buff = *old_buff;
6581 6582 6583 6584

	/* sync the buffer for use by the device */
	dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
					 old_buff->page_offset,
6585
					 IGB_RX_BUFSZ,
6586 6587 6588
					 DMA_FROM_DEVICE);
}

A
Alexander Duyck 已提交
6589 6590
static inline bool igb_page_is_reserved(struct page *page)
{
6591
	return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
A
Alexander Duyck 已提交
6592 6593
}

6594 6595 6596 6597 6598
static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
				  struct page *page,
				  unsigned int truesize)
{
	/* avoid re-using remote pages */
A
Alexander Duyck 已提交
6599
	if (unlikely(igb_page_is_reserved(page)))
6600 6601
		return false;

6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616
#if (PAGE_SIZE < 8192)
	/* if we are only owner of page we can reuse it */
	if (unlikely(page_count(page) != 1))
		return false;

	/* flip page offset to other buffer */
	rx_buffer->page_offset ^= IGB_RX_BUFSZ;
#else
	/* move offset up to the next cache line */
	rx_buffer->page_offset += truesize;

	if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ))
		return false;
#endif

A
Alexander Duyck 已提交
6617 6618 6619 6620 6621
	/* Even if we own the page, we are not allowed to use atomic_set()
	 * This would break get_page_unless_zero() users.
	 */
	atomic_inc(&page->_count);

6622 6623 6624
	return true;
}

6625
/**
6626 6627 6628 6629 6630
 *  igb_add_rx_frag - Add contents of Rx buffer to sk_buff
 *  @rx_ring: rx descriptor ring to transact packets on
 *  @rx_buffer: buffer containing page to add
 *  @rx_desc: descriptor containing length of buffer written by hardware
 *  @skb: sk_buff to place the data into
6631
 *
6632 6633 6634 6635
 *  This function will add the data contained in rx_buffer->page to the skb.
 *  This is done either through a direct copy if the data in the buffer is
 *  less than the skb header size, otherwise it will just attach the page as
 *  a frag to the skb.
6636
 *
6637 6638
 *  The function will then update the page offset if necessary and return
 *  true if the buffer can be reused by the adapter.
6639 6640 6641 6642 6643 6644 6645
 **/
static bool igb_add_rx_frag(struct igb_ring *rx_ring,
			    struct igb_rx_buffer *rx_buffer,
			    union e1000_adv_rx_desc *rx_desc,
			    struct sk_buff *skb)
{
	struct page *page = rx_buffer->page;
6646
	unsigned char *va = page_address(page) + rx_buffer->page_offset;
6647
	unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
6648 6649 6650
#if (PAGE_SIZE < 8192)
	unsigned int truesize = IGB_RX_BUFSZ;
#else
6651
	unsigned int truesize = SKB_DATA_ALIGN(size);
6652
#endif
6653
	unsigned int pull_len;
6654

6655 6656
	if (unlikely(skb_is_nonlinear(skb)))
		goto add_tail_frag;
6657

6658 6659 6660 6661 6662
	if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) {
		igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
		va += IGB_TS_HDR_LEN;
		size -= IGB_TS_HDR_LEN;
	}
6663

6664
	if (likely(size <= IGB_RX_HDR_LEN)) {
6665 6666
		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));

A
Alexander Duyck 已提交
6667 6668
		/* page is not reserved, we can reuse buffer as-is */
		if (likely(!igb_page_is_reserved(page)))
6669 6670 6671
			return true;

		/* this page cannot be reused so discard it */
A
Alexander Duyck 已提交
6672
		__free_page(page);
6673 6674 6675
		return false;
	}

6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688
	/* we need the header to contain the greater of either ETH_HLEN or
	 * 60 bytes if the skb->len is less than 60 for skb_pad.
	 */
	pull_len = eth_get_headlen(va, IGB_RX_HDR_LEN);

	/* align pull length to size of long to optimize memcpy performance */
	memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long)));

	/* update all of the pointers */
	va += pull_len;
	size -= pull_len;

add_tail_frag:
6689
	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
6690
			(unsigned long)va & ~PAGE_MASK, size, truesize);
6691

6692 6693
	return igb_can_reuse_rx_page(rx_buffer, page, truesize);
}
6694

6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716
static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
					   union e1000_adv_rx_desc *rx_desc,
					   struct sk_buff *skb)
{
	struct igb_rx_buffer *rx_buffer;
	struct page *page;

	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
	page = rx_buffer->page;
	prefetchw(page);

	if (likely(!skb)) {
		void *page_addr = page_address(page) +
				  rx_buffer->page_offset;

		/* prefetch first cache line of first page */
		prefetch(page_addr);
#if L1_CACHE_BYTES < 128
		prefetch(page_addr + L1_CACHE_BYTES);
#endif

		/* allocate a skb to store the frags */
6717
		skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGB_RX_HDR_LEN);
6718 6719 6720 6721 6722
		if (unlikely(!skb)) {
			rx_ring->rx_stats.alloc_failed++;
			return NULL;
		}

6723
		/* we will be copying header into skb->data in
6724 6725 6726 6727 6728 6729 6730 6731 6732 6733
		 * pskb_may_pull so it is in our interest to prefetch
		 * it now to avoid a possible cache miss
		 */
		prefetchw(skb->data);
	}

	/* we are reusing so sync this buffer for CPU use */
	dma_sync_single_range_for_cpu(rx_ring->dev,
				      rx_buffer->dma,
				      rx_buffer->page_offset,
6734
				      IGB_RX_BUFSZ,
6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752
				      DMA_FROM_DEVICE);

	/* pull page into skb */
	if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
		/* hand second half of page back to the ring */
		igb_reuse_rx_page(rx_ring, rx_buffer);
	} else {
		/* we are not reusing the buffer so unmap it */
		dma_unmap_page(rx_ring->dev, rx_buffer->dma,
			       PAGE_SIZE, DMA_FROM_DEVICE);
	}

	/* clear contents of rx_buffer */
	rx_buffer->page = NULL;

	return skb;
}

6753
static inline void igb_rx_checksum(struct igb_ring *ring,
6754 6755
				   union e1000_adv_rx_desc *rx_desc,
				   struct sk_buff *skb)
6756
{
6757
	skb_checksum_none_assert(skb);
6758

6759
	/* Ignore Checksum bit is set */
6760
	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
6761 6762 6763 6764
		return;

	/* Rx checksum disabled via ethtool */
	if (!(ring->netdev->features & NETIF_F_RXCSUM))
6765
		return;
6766

6767
	/* TCP/UDP checksum error bit is set */
6768 6769 6770
	if (igb_test_staterr(rx_desc,
			     E1000_RXDEXT_STATERR_TCPE |
			     E1000_RXDEXT_STATERR_IPE)) {
6771
		/* work around errata with sctp packets where the TCPE aka
6772 6773 6774
		 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
		 * packets, (aka let the stack check the crc32c)
		 */
6775 6776
		if (!((skb->len == 60) &&
		      test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
E
Eric Dumazet 已提交
6777
			u64_stats_update_begin(&ring->rx_syncp);
6778
			ring->rx_stats.csum_err++;
E
Eric Dumazet 已提交
6779 6780
			u64_stats_update_end(&ring->rx_syncp);
		}
6781 6782 6783 6784
		/* let the stack verify checksum errors */
		return;
	}
	/* It must be a TCP or UDP packet with a valid checksum */
6785 6786
	if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
				      E1000_RXD_STAT_UDPCS))
6787 6788
		skb->ip_summed = CHECKSUM_UNNECESSARY;

6789 6790
	dev_dbg(ring->dev, "cksum success: bits %08X\n",
		le32_to_cpu(rx_desc->wb.upper.status_error));
6791 6792
}

6793 6794 6795 6796 6797
static inline void igb_rx_hash(struct igb_ring *ring,
			       union e1000_adv_rx_desc *rx_desc,
			       struct sk_buff *skb)
{
	if (ring->netdev->features & NETIF_F_RXHASH)
T
Tom Herbert 已提交
6798 6799 6800
		skb_set_hash(skb,
			     le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
			     PKT_HASH_TYPE_L3);
6801 6802
}

6803
/**
6804 6805 6806 6807
 *  igb_is_non_eop - process handling of non-EOP buffers
 *  @rx_ring: Rx ring being processed
 *  @rx_desc: Rx descriptor for current buffer
 *  @skb: current socket buffer containing buffer in progress
6808
 *
6809 6810 6811 6812
 *  This function updates next to clean.  If the buffer is an EOP buffer
 *  this function exits returning false, otherwise it will place the
 *  sk_buff in the next buffer to be chained and return true indicating
 *  that this is in fact a non-EOP buffer.
6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830
 **/
static bool igb_is_non_eop(struct igb_ring *rx_ring,
			   union e1000_adv_rx_desc *rx_desc)
{
	u32 ntc = rx_ring->next_to_clean + 1;

	/* fetch, update, and store next to clean */
	ntc = (ntc < rx_ring->count) ? ntc : 0;
	rx_ring->next_to_clean = ntc;

	prefetch(IGB_RX_DESC(rx_ring, ntc));

	if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)))
		return false;

	return true;
}

6831
/**
6832 6833 6834 6835
 *  igb_cleanup_headers - Correct corrupted or empty headers
 *  @rx_ring: rx descriptor ring packet is being transacted on
 *  @rx_desc: pointer to the EOP Rx descriptor
 *  @skb: pointer to current skb being fixed
6836
 *
6837 6838
 *  Address the case where we are pulling data in on pages only
 *  and as such no data is present in the skb header.
6839
 *
6840 6841
 *  In addition if skb is not at least 60 bytes we need to pad it so that
 *  it is large enough to qualify as a valid Ethernet frame.
6842
 *
6843
 *  Returns true if an error was encountered and skb was freed.
6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857
 **/
static bool igb_cleanup_headers(struct igb_ring *rx_ring,
				union e1000_adv_rx_desc *rx_desc,
				struct sk_buff *skb)
{
	if (unlikely((igb_test_staterr(rx_desc,
				       E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
		struct net_device *netdev = rx_ring->netdev;
		if (!(netdev->features & NETIF_F_RXALL)) {
			dev_kfree_skb_any(skb);
			return true;
		}
	}

6858 6859 6860
	/* if eth_skb_pad returns an error the skb was freed */
	if (eth_skb_pad(skb))
		return true;
6861 6862

	return false;
6863 6864
}

6865
/**
6866 6867 6868 6869
 *  igb_process_skb_fields - Populate skb header fields from Rx descriptor
 *  @rx_ring: rx descriptor ring packet is being transacted on
 *  @rx_desc: pointer to the EOP Rx descriptor
 *  @skb: pointer to current skb being populated
6870
 *
6871 6872 6873
 *  This function checks the ring, descriptor, and packet information in
 *  order to populate the hash, checksum, VLAN, timestamp, protocol, and
 *  other fields within the skb.
6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884
 **/
static void igb_process_skb_fields(struct igb_ring *rx_ring,
				   union e1000_adv_rx_desc *rx_desc,
				   struct sk_buff *skb)
{
	struct net_device *dev = rx_ring->netdev;

	igb_rx_hash(rx_ring, rx_desc, skb);

	igb_rx_checksum(rx_ring, rx_desc, skb);

6885 6886 6887
	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS) &&
	    !igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))
		igb_ptp_rx_rgtstamp(rx_ring->q_vector, skb);
6888

6889
	if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
6890 6891
	    igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
		u16 vid;
6892

6893 6894 6895 6896 6897 6898
		if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
		    test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
			vid = be16_to_cpu(rx_desc->wb.upper.vlan);
		else
			vid = le16_to_cpu(rx_desc->wb.upper.vlan);

6899
		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
6900 6901 6902 6903 6904 6905 6906
	}

	skb_record_rx_queue(skb, rx_ring->queue_index);

	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
}

6907
static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
6908
{
6909
	struct igb_ring *rx_ring = q_vector->rx.ring;
6910
	struct sk_buff *skb = rx_ring->skb;
6911
	unsigned int total_bytes = 0, total_packets = 0;
6912
	u16 cleaned_count = igb_desc_unused(rx_ring);
6913

6914
	while (likely(total_packets < budget)) {
6915
		union e1000_adv_rx_desc *rx_desc;
6916

6917 6918 6919 6920 6921
		/* return some buffers to hardware, one at a time is too slow */
		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
			igb_alloc_rx_buffers(rx_ring, cleaned_count);
			cleaned_count = 0;
		}
6922

6923
		rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean);
6924

6925
		if (!rx_desc->wb.upper.status_error)
6926
			break;
6927

6928 6929
		/* This memory barrier is needed to keep us from reading
		 * any other fields out of the rx_desc until we know the
6930
		 * descriptor has been written back
6931
		 */
6932
		dma_rmb();
6933

6934
		/* retrieve a buffer from the ring */
6935
		skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb);
6936

6937 6938 6939
		/* exit if we failed to retrieve a buffer */
		if (!skb)
			break;
6940

6941
		cleaned_count++;
6942

6943 6944 6945
		/* fetch next buffer in frame if non-eop */
		if (igb_is_non_eop(rx_ring, rx_desc))
			continue;
6946 6947 6948 6949 6950

		/* verify the packet layout is correct */
		if (igb_cleanup_headers(rx_ring, rx_desc, skb)) {
			skb = NULL;
			continue;
6951 6952
		}

6953
		/* probably a little skewed due to removing CRC */
6954 6955
		total_bytes += skb->len;

6956 6957
		/* populate checksum, timestamp, VLAN, and protocol */
		igb_process_skb_fields(rx_ring, rx_desc, skb);
6958

J
Jiri Pirko 已提交
6959
		napi_gro_receive(&q_vector->napi, skb);
6960

6961 6962 6963
		/* reset skb pointer */
		skb = NULL;

6964 6965
		/* update budget accounting */
		total_packets++;
6966
	}
6967

6968 6969 6970
	/* place incomplete frames back on ring for completion */
	rx_ring->skb = skb;

E
Eric Dumazet 已提交
6971
	u64_stats_update_begin(&rx_ring->rx_syncp);
6972 6973
	rx_ring->rx_stats.packets += total_packets;
	rx_ring->rx_stats.bytes += total_bytes;
E
Eric Dumazet 已提交
6974
	u64_stats_update_end(&rx_ring->rx_syncp);
6975 6976
	q_vector->rx.total_packets += total_packets;
	q_vector->rx.total_bytes += total_bytes;
6977 6978

	if (cleaned_count)
6979
		igb_alloc_rx_buffers(rx_ring, cleaned_count);
6980

6981
	return total_packets < budget;
6982 6983
}

6984
static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6985
				  struct igb_rx_buffer *bi)
6986 6987
{
	struct page *page = bi->page;
6988
	dma_addr_t dma;
6989

6990 6991
	/* since we are recycling buffers we should seldom need to alloc */
	if (likely(page))
6992 6993
		return true;

6994
	/* alloc new page for storage */
6995
	page = dev_alloc_page();
6996 6997 6998
	if (unlikely(!page)) {
		rx_ring->rx_stats.alloc_failed++;
		return false;
6999 7000
	}

7001 7002
	/* map page for use */
	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
7003

7004
	/* if mapping failed free memory back to system since
7005 7006
	 * there isn't much point in holding memory we can't use
	 */
7007
	if (dma_mapping_error(rx_ring->dev, dma)) {
7008 7009
		__free_page(page);

7010 7011 7012 7013
		rx_ring->rx_stats.alloc_failed++;
		return false;
	}

7014
	bi->dma = dma;
7015 7016
	bi->page = page;
	bi->page_offset = 0;
7017

7018 7019 7020
	return true;
}

7021
/**
7022 7023
 *  igb_alloc_rx_buffers - Replace used receive buffers; packet split
 *  @adapter: address of board private structure
7024
 **/
7025
void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
7026 7027
{
	union e1000_adv_rx_desc *rx_desc;
7028
	struct igb_rx_buffer *bi;
7029
	u16 i = rx_ring->next_to_use;
7030

7031 7032 7033 7034
	/* nothing to do */
	if (!cleaned_count)
		return;

7035
	rx_desc = IGB_RX_DESC(rx_ring, i);
7036
	bi = &rx_ring->rx_buffer_info[i];
7037
	i -= rx_ring->count;
7038

7039
	do {
7040
		if (!igb_alloc_mapped_page(rx_ring, bi))
7041
			break;
7042

7043
		/* Refresh the desc even if buffer_addrs didn't change
7044 7045
		 * because each write-back erases this info.
		 */
7046
		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
7047

7048 7049
		rx_desc++;
		bi++;
7050
		i++;
7051
		if (unlikely(!i)) {
7052
			rx_desc = IGB_RX_DESC(rx_ring, 0);
7053
			bi = rx_ring->rx_buffer_info;
7054 7055 7056
			i -= rx_ring->count;
		}

A
Alexander Duyck 已提交
7057 7058
		/* clear the status bits for the next_to_use descriptor */
		rx_desc->wb.upper.status_error = 0;
7059 7060 7061

		cleaned_count--;
	} while (cleaned_count);
7062

7063 7064
	i += rx_ring->count;

7065
	if (rx_ring->next_to_use != i) {
7066
		/* record the next descriptor to use */
7067 7068
		rx_ring->next_to_use = i;

7069 7070 7071
		/* update next to alloc since we have filled the ring */
		rx_ring->next_to_alloc = i;

7072
		/* Force memory writes to complete before letting h/w
7073 7074
		 * know there are new descriptors to fetch.  (Only
		 * applicable for weak-ordered memory model archs,
7075 7076
		 * such as IA-64).
		 */
7077
		wmb();
7078
		writel(i, rx_ring->tail);
7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100
	}
}

/**
 * igb_mii_ioctl -
 * @netdev:
 * @ifreq:
 * @cmd:
 **/
static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct mii_ioctl_data *data = if_mii(ifr);

	if (adapter->hw.phy.media_type != e1000_media_type_copper)
		return -EOPNOTSUPP;

	switch (cmd) {
	case SIOCGMIIPHY:
		data->phy_id = adapter->hw.phy.addr;
		break;
	case SIOCGMIIREG:
7101
		if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
7102
				     &data->val_out))
7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124
			return -EIO;
		break;
	case SIOCSMIIREG:
	default:
		return -EOPNOTSUPP;
	}
	return 0;
}

/**
 * igb_ioctl -
 * @netdev:
 * @ifreq:
 * @cmd:
 **/
static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
{
	switch (cmd) {
	case SIOCGMIIPHY:
	case SIOCGMIIREG:
	case SIOCSMIIREG:
		return igb_mii_ioctl(netdev, ifr, cmd);
7125 7126
	case SIOCGHWTSTAMP:
		return igb_ptp_get_ts_config(netdev, ifr);
7127
	case SIOCSHWTSTAMP:
7128
		return igb_ptp_set_ts_config(netdev, ifr);
7129 7130 7131 7132 7133
	default:
		return -EOPNOTSUPP;
	}
}

7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147
void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value)
{
	struct igb_adapter *adapter = hw->back;

	pci_read_config_word(adapter->pdev, reg, value);
}

void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value)
{
	struct igb_adapter *adapter = hw->back;

	pci_write_config_word(adapter->pdev, reg, *value);
}

7148 7149 7150 7151
s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
{
	struct igb_adapter *adapter = hw->back;

7152
	if (pcie_capability_read_word(adapter->pdev, reg, value))
7153 7154 7155 7156 7157 7158 7159 7160 7161
		return -E1000_ERR_CONFIG;

	return 0;
}

s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
{
	struct igb_adapter *adapter = hw->back;

7162
	if (pcie_capability_write_word(adapter->pdev, reg, *value))
7163 7164 7165 7166 7167
		return -E1000_ERR_CONFIG;

	return 0;
}

7168
static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
7169 7170 7171 7172
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
	u32 ctrl, rctl;
7173
	bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
7174

7175
	if (enable) {
7176 7177 7178 7179 7180
		/* enable VLAN tag insert/strip */
		ctrl = rd32(E1000_CTRL);
		ctrl |= E1000_CTRL_VME;
		wr32(E1000_CTRL, ctrl);

7181
		/* Disable CFI check */
7182 7183 7184 7185 7186 7187 7188 7189 7190 7191
		rctl = rd32(E1000_RCTL);
		rctl &= ~E1000_RCTL_CFIEN;
		wr32(E1000_RCTL, rctl);
	} else {
		/* disable VLAN tag insert/strip */
		ctrl = rd32(E1000_CTRL);
		ctrl &= ~E1000_CTRL_VME;
		wr32(E1000_CTRL, ctrl);
	}

7192
	igb_rlpml_set(adapter);
7193 7194
}

7195 7196
static int igb_vlan_rx_add_vid(struct net_device *netdev,
			       __be16 proto, u16 vid)
7197 7198 7199
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
7200
	int pf_id = adapter->vfs_allocated_count;
7201

7202 7203
	/* attempt to add filter to vlvf array */
	igb_vlvf_set(adapter, vid, true, pf_id);
7204

7205 7206
	/* add the filter since PF can receive vlans w/o entry in vlvf */
	igb_vfta_set(hw, vid, true);
J
Jiri Pirko 已提交
7207 7208

	set_bit(vid, adapter->active_vlans);
7209 7210

	return 0;
7211 7212
}

7213 7214
static int igb_vlan_rx_kill_vid(struct net_device *netdev,
				__be16 proto, u16 vid)
7215 7216 7217
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
7218
	int pf_id = adapter->vfs_allocated_count;
7219
	s32 err;
7220

7221 7222
	/* remove vlan from VLVF table array */
	err = igb_vlvf_set(adapter, vid, false, pf_id);
7223

7224 7225
	/* if vid was not present in VLVF just remove it from table */
	if (err)
7226
		igb_vfta_set(hw, vid, false);
J
Jiri Pirko 已提交
7227 7228

	clear_bit(vid, adapter->active_vlans);
7229 7230

	return 0;
7231 7232 7233 7234
}

static void igb_restore_vlan(struct igb_adapter *adapter)
{
J
Jiri Pirko 已提交
7235
	u16 vid;
7236

7237 7238
	igb_vlan_mode(adapter->netdev, adapter->netdev->features);

J
Jiri Pirko 已提交
7239
	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
7240
		igb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
7241 7242
}

7243
int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
7244
{
7245
	struct pci_dev *pdev = adapter->pdev;
7246 7247 7248 7249
	struct e1000_mac_info *mac = &adapter->hw.mac;

	mac->autoneg = 0;

7250
	/* Make sure dplx is at most 1 bit and lsb of speed is not set
7251 7252
	 * for the switch() below to work
	 */
7253 7254 7255
	if ((spd & 1) || (dplx & ~1))
		goto err_inval;

7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268
	/* Fiber NIC's only allow 1000 gbps Full duplex
	 * and 100Mbps Full duplex for 100baseFx sfp
	 */
	if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
		switch (spd + dplx) {
		case SPEED_10 + DUPLEX_HALF:
		case SPEED_10 + DUPLEX_FULL:
		case SPEED_100 + DUPLEX_HALF:
			goto err_inval;
		default:
			break;
		}
	}
7269

7270
	switch (spd + dplx) {
7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288
	case SPEED_10 + DUPLEX_HALF:
		mac->forced_speed_duplex = ADVERTISE_10_HALF;
		break;
	case SPEED_10 + DUPLEX_FULL:
		mac->forced_speed_duplex = ADVERTISE_10_FULL;
		break;
	case SPEED_100 + DUPLEX_HALF:
		mac->forced_speed_duplex = ADVERTISE_100_HALF;
		break;
	case SPEED_100 + DUPLEX_FULL:
		mac->forced_speed_duplex = ADVERTISE_100_FULL;
		break;
	case SPEED_1000 + DUPLEX_FULL:
		mac->autoneg = 1;
		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
		break;
	case SPEED_1000 + DUPLEX_HALF: /* not supported */
	default:
7289
		goto err_inval;
7290
	}
7291 7292 7293 7294

	/* clear MDI, MDI(-X) override is only allowed when autoneg enabled */
	adapter->hw.phy.mdix = AUTO_ALL_MODES;

7295
	return 0;
7296 7297 7298 7299

err_inval:
	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
	return -EINVAL;
7300 7301
}

Y
Yan, Zheng 已提交
7302 7303
static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
			  bool runtime)
7304 7305 7306 7307
{
	struct net_device *netdev = pci_get_drvdata(pdev);
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
A
Alexander Duyck 已提交
7308
	u32 ctrl, rctl, status;
Y
Yan, Zheng 已提交
7309
	u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
7310 7311 7312 7313 7314 7315
#ifdef CONFIG_PM
	int retval = 0;
#endif

	netif_device_detach(netdev);

A
Alexander Duyck 已提交
7316
	if (netif_running(netdev))
Y
Yan, Zheng 已提交
7317
		__igb_close(netdev, true);
A
Alexander Duyck 已提交
7318

7319
	igb_clear_interrupt_scheme(adapter);
7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332

#ifdef CONFIG_PM
	retval = pci_save_state(pdev);
	if (retval)
		return retval;
#endif

	status = rd32(E1000_STATUS);
	if (status & E1000_STATUS_LU)
		wufc &= ~E1000_WUFC_LNKC;

	if (wufc) {
		igb_setup_rctl(adapter);
7333
		igb_set_rx_mode(netdev);
7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350

		/* turn on all-multi mode if wake on multicast is enabled */
		if (wufc & E1000_WUFC_MC) {
			rctl = rd32(E1000_RCTL);
			rctl |= E1000_RCTL_MPE;
			wr32(E1000_RCTL, rctl);
		}

		ctrl = rd32(E1000_CTRL);
		/* advertise wake from D3Cold */
		#define E1000_CTRL_ADVD3WUC 0x00100000
		/* phy power management enable */
		#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
		ctrl |= E1000_CTRL_ADVD3WUC;
		wr32(E1000_CTRL, ctrl);

		/* Allow time for pending master requests to run */
7351
		igb_disable_pcie_master(hw);
7352 7353 7354 7355 7356 7357 7358 7359

		wr32(E1000_WUC, E1000_WUC_PME_EN);
		wr32(E1000_WUFC, wufc);
	} else {
		wr32(E1000_WUC, 0);
		wr32(E1000_WUFC, 0);
	}

7360 7361
	*enable_wake = wufc || adapter->en_mng_pt;
	if (!*enable_wake)
7362 7363 7364
		igb_power_down_link(adapter);
	else
		igb_power_up_link(adapter);
7365 7366

	/* Release control of h/w to f/w.  If f/w is AMT enabled, this
7367 7368
	 * would have already happened in close and is redundant.
	 */
7369 7370 7371 7372 7373 7374 7375 7376
	igb_release_hw_control(adapter);

	pci_disable_device(pdev);

	return 0;
}

#ifdef CONFIG_PM
7377
#ifdef CONFIG_PM_SLEEP
Y
Yan, Zheng 已提交
7378
static int igb_suspend(struct device *dev)
7379 7380 7381
{
	int retval;
	bool wake;
Y
Yan, Zheng 已提交
7382
	struct pci_dev *pdev = to_pci_dev(dev);
7383

Y
Yan, Zheng 已提交
7384
	retval = __igb_shutdown(pdev, &wake, 0);
7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396
	if (retval)
		return retval;

	if (wake) {
		pci_prepare_to_sleep(pdev);
	} else {
		pci_wake_from_d3(pdev, false);
		pci_set_power_state(pdev, PCI_D3hot);
	}

	return 0;
}
7397
#endif /* CONFIG_PM_SLEEP */
7398

Y
Yan, Zheng 已提交
7399
static int igb_resume(struct device *dev)
7400
{
Y
Yan, Zheng 已提交
7401
	struct pci_dev *pdev = to_pci_dev(dev);
7402 7403 7404 7405 7406 7407 7408
	struct net_device *netdev = pci_get_drvdata(pdev);
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
	u32 err;

	pci_set_power_state(pdev, PCI_D0);
	pci_restore_state(pdev);
7409
	pci_save_state(pdev);
T
Taku Izumi 已提交
7410

7411 7412
	if (!pci_device_is_present(pdev))
		return -ENODEV;
7413
	err = pci_enable_device_mem(pdev);
7414 7415 7416 7417 7418 7419 7420 7421 7422 7423
	if (err) {
		dev_err(&pdev->dev,
			"igb: Cannot enable PCI device from suspend\n");
		return err;
	}
	pci_set_master(pdev);

	pci_enable_wake(pdev, PCI_D3hot, 0);
	pci_enable_wake(pdev, PCI_D3cold, 0);

7424
	if (igb_init_interrupt_scheme(adapter, true)) {
A
Alexander Duyck 已提交
7425
		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
7426
		rtnl_unlock();
A
Alexander Duyck 已提交
7427
		return -ENOMEM;
7428 7429 7430
	}

	igb_reset(adapter);
7431 7432

	/* let the f/w know that the h/w is now under the control of the
7433 7434
	 * driver.
	 */
7435 7436
	igb_get_hw_control(adapter);

7437 7438
	wr32(E1000_WUS, ~0);

Y
Yan, Zheng 已提交
7439
	if (netdev->flags & IFF_UP) {
7440
		rtnl_lock();
Y
Yan, Zheng 已提交
7441
		err = __igb_open(netdev, true);
7442
		rtnl_unlock();
A
Alexander Duyck 已提交
7443 7444 7445
		if (err)
			return err;
	}
7446 7447

	netif_device_attach(netdev);
Y
Yan, Zheng 已提交
7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478
	return 0;
}

static int igb_runtime_idle(struct device *dev)
{
	struct pci_dev *pdev = to_pci_dev(dev);
	struct net_device *netdev = pci_get_drvdata(pdev);
	struct igb_adapter *adapter = netdev_priv(netdev);

	if (!igb_has_link(adapter))
		pm_schedule_suspend(dev, MSEC_PER_SEC * 5);

	return -EBUSY;
}

static int igb_runtime_suspend(struct device *dev)
{
	struct pci_dev *pdev = to_pci_dev(dev);
	int retval;
	bool wake;

	retval = __igb_shutdown(pdev, &wake, 1);
	if (retval)
		return retval;

	if (wake) {
		pci_prepare_to_sleep(pdev);
	} else {
		pci_wake_from_d3(pdev, false);
		pci_set_power_state(pdev, PCI_D3hot);
	}
7479 7480 7481

	return 0;
}
Y
Yan, Zheng 已提交
7482 7483 7484 7485 7486

static int igb_runtime_resume(struct device *dev)
{
	return igb_resume(dev);
}
7487
#endif /* CONFIG_PM */
7488 7489 7490

static void igb_shutdown(struct pci_dev *pdev)
{
7491 7492
	bool wake;

Y
Yan, Zheng 已提交
7493
	__igb_shutdown(pdev, &wake, 0);
7494 7495 7496 7497 7498

	if (system_state == SYSTEM_POWER_OFF) {
		pci_wake_from_d3(pdev, wake);
		pci_set_power_state(pdev, PCI_D3hot);
	}
7499 7500
}

7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511
#ifdef CONFIG_PCI_IOV
static int igb_sriov_reinit(struct pci_dev *dev)
{
	struct net_device *netdev = pci_get_drvdata(dev);
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct pci_dev *pdev = adapter->pdev;

	rtnl_lock();

	if (netif_running(netdev))
		igb_close(netdev);
7512 7513
	else
		igb_reset(adapter);
7514 7515 7516 7517 7518 7519

	igb_clear_interrupt_scheme(adapter);

	igb_init_queue_configuration(adapter);

	if (igb_init_interrupt_scheme(adapter, true)) {
7520
		rtnl_unlock();
7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569
		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
		return -ENOMEM;
	}

	if (netif_running(netdev))
		igb_open(netdev);

	rtnl_unlock();

	return 0;
}

static int igb_pci_disable_sriov(struct pci_dev *dev)
{
	int err = igb_disable_sriov(dev);

	if (!err)
		err = igb_sriov_reinit(dev);

	return err;
}

static int igb_pci_enable_sriov(struct pci_dev *dev, int num_vfs)
{
	int err = igb_enable_sriov(dev, num_vfs);

	if (err)
		goto out;

	err = igb_sriov_reinit(dev);
	if (!err)
		return num_vfs;

out:
	return err;
}

#endif
static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
{
#ifdef CONFIG_PCI_IOV
	if (num_vfs == 0)
		return igb_pci_disable_sriov(dev);
	else
		return igb_pci_enable_sriov(dev, num_vfs);
#endif
	return 0;
}

7570
#ifdef CONFIG_NET_POLL_CONTROLLER
7571
/* Polling 'interrupt' - used by things like netconsole to send skbs
7572 7573 7574 7575 7576 7577
 * without having to re-enable interrupts. It's not called while
 * the interrupt routine is executing.
 */
static void igb_netpoll(struct net_device *netdev)
{
	struct igb_adapter *adapter = netdev_priv(netdev);
7578
	struct e1000_hw *hw = &adapter->hw;
7579
	struct igb_q_vector *q_vector;
7580 7581
	int i;

7582
	for (i = 0; i < adapter->num_q_vectors; i++) {
7583
		q_vector = adapter->q_vector[i];
7584
		if (adapter->flags & IGB_FLAG_HAS_MSIX)
7585 7586 7587
			wr32(E1000_EIMC, q_vector->eims_value);
		else
			igb_irq_disable(adapter);
7588
		napi_schedule(&q_vector->napi);
7589
	}
7590 7591 7592 7593
}
#endif /* CONFIG_NET_POLL_CONTROLLER */

/**
7594 7595 7596
 *  igb_io_error_detected - called when PCI error is detected
 *  @pdev: Pointer to PCI device
 *  @state: The current pci connection state
7597
 *
7598 7599 7600
 *  This function is called after a PCI bus error affecting
 *  this device has been detected.
 **/
7601 7602 7603 7604 7605 7606 7607 7608
static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
					      pci_channel_state_t state)
{
	struct net_device *netdev = pci_get_drvdata(pdev);
	struct igb_adapter *adapter = netdev_priv(netdev);

	netif_device_detach(netdev);

7609 7610 7611
	if (state == pci_channel_io_perm_failure)
		return PCI_ERS_RESULT_DISCONNECT;

7612 7613 7614 7615 7616 7617 7618 7619 7620
	if (netif_running(netdev))
		igb_down(adapter);
	pci_disable_device(pdev);

	/* Request a slot slot reset. */
	return PCI_ERS_RESULT_NEED_RESET;
}

/**
7621 7622
 *  igb_io_slot_reset - called after the pci bus has been reset.
 *  @pdev: Pointer to PCI device
7623
 *
7624 7625 7626
 *  Restart the card from scratch, as if from a cold-boot. Implementation
 *  resembles the first-half of the igb_resume routine.
 **/
7627 7628 7629 7630 7631
static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
{
	struct net_device *netdev = pci_get_drvdata(pdev);
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
7632
	pci_ers_result_t result;
T
Taku Izumi 已提交
7633
	int err;
7634

7635
	if (pci_enable_device_mem(pdev)) {
7636 7637
		dev_err(&pdev->dev,
			"Cannot re-enable PCI device after reset.\n");
7638 7639 7640 7641
		result = PCI_ERS_RESULT_DISCONNECT;
	} else {
		pci_set_master(pdev);
		pci_restore_state(pdev);
7642
		pci_save_state(pdev);
7643

7644 7645
		pci_enable_wake(pdev, PCI_D3hot, 0);
		pci_enable_wake(pdev, PCI_D3cold, 0);
7646

7647 7648 7649 7650
		igb_reset(adapter);
		wr32(E1000_WUS, ~0);
		result = PCI_ERS_RESULT_RECOVERED;
	}
7651

7652 7653
	err = pci_cleanup_aer_uncorrect_error_status(pdev);
	if (err) {
7654 7655 7656
		dev_err(&pdev->dev,
			"pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n",
			err);
7657 7658
		/* non-fatal, continue */
	}
7659 7660

	return result;
7661 7662 7663
}

/**
7664 7665
 *  igb_io_resume - called when traffic can start flowing again.
 *  @pdev: Pointer to PCI device
7666
 *
7667 7668 7669
 *  This callback is called when the error recovery driver tells us that
 *  its OK to resume normal operation. Implementation resembles the
 *  second-half of the igb_resume routine.
7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685
 */
static void igb_io_resume(struct pci_dev *pdev)
{
	struct net_device *netdev = pci_get_drvdata(pdev);
	struct igb_adapter *adapter = netdev_priv(netdev);

	if (netif_running(netdev)) {
		if (igb_up(adapter)) {
			dev_err(&pdev->dev, "igb_up failed after reset\n");
			return;
		}
	}

	netif_device_attach(netdev);

	/* let the f/w know that the h/w is now under the control of the
7686 7687
	 * driver.
	 */
7688 7689 7690
	igb_get_hw_control(adapter);
}

7691
static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
7692
			     u8 qsel)
7693 7694 7695 7696 7697 7698 7699 7700
{
	u32 rar_low, rar_high;
	struct e1000_hw *hw = &adapter->hw;

	/* HW expects these in little endian so we reverse the byte order
	 * from network order (big endian) to little endian
	 */
	rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
7701
		   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717
	rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));

	/* Indicate to hardware the Address is Valid. */
	rar_high |= E1000_RAH_AV;

	if (hw->mac.type == e1000_82575)
		rar_high |= E1000_RAH_POOL_1 * qsel;
	else
		rar_high |= E1000_RAH_POOL_1 << qsel;

	wr32(E1000_RAL(index), rar_low);
	wrfl();
	wr32(E1000_RAH(index), rar_high);
	wrfl();
}

7718
static int igb_set_vf_mac(struct igb_adapter *adapter,
7719
			  int vf, unsigned char *mac_addr)
7720 7721
{
	struct e1000_hw *hw = &adapter->hw;
7722
	/* VF MAC addresses start at end of receive addresses and moves
7723 7724
	 * towards the first, as a result a collision should not be possible
	 */
7725
	int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7726

7727
	memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7728

7729
	igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7730 7731 7732 7733

	return 0;
}

7734 7735 7736 7737 7738 7739 7740
static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
		return -EINVAL;
	adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
	dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7741 7742
	dev_info(&adapter->pdev->dev,
		 "Reload the VF driver to make this change effective.");
7743
	if (test_bit(__IGB_DOWN, &adapter->state)) {
7744 7745 7746 7747
		dev_warn(&adapter->pdev->dev,
			 "The VF MAC address has been set, but the PF device is not up.\n");
		dev_warn(&adapter->pdev->dev,
			 "Bring the PF device up before attempting to use the VF device.\n");
7748 7749 7750 7751
	}
	return igb_set_vf_mac(adapter, vf, mac);
}

7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773
static int igb_link_mbps(int internal_link_speed)
{
	switch (internal_link_speed) {
	case SPEED_100:
		return 100;
	case SPEED_1000:
		return 1000;
	default:
		return 0;
	}
}

static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
				  int link_speed)
{
	int rf_dec, rf_int;
	u32 bcnrc_val;

	if (tx_rate != 0) {
		/* Calculate the rate factor values to set */
		rf_int = link_speed / tx_rate;
		rf_dec = (link_speed - (rf_int * tx_rate));
7774 7775
		rf_dec = (rf_dec * (1 << E1000_RTTBCNRC_RF_INT_SHIFT)) /
			 tx_rate;
7776 7777

		bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7778 7779
		bcnrc_val |= ((rf_int << E1000_RTTBCNRC_RF_INT_SHIFT) &
			      E1000_RTTBCNRC_RF_INT_MASK);
7780 7781 7782 7783 7784 7785
		bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
	} else {
		bcnrc_val = 0;
	}

	wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7786
	/* Set global transmit compensation time to the MMW_SIZE in RTTBCNRM
L
Lior Levy 已提交
7787 7788 7789
	 * register. MMW_SIZE=0x014 if 9728-byte jumbo is supported.
	 */
	wr32(E1000_RTTBCNRM, 0x14);
7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807
	wr32(E1000_RTTBCNRC, bcnrc_val);
}

static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
{
	int actual_link_speed, i;
	bool reset_rate = false;

	/* VF TX rate limit was not set or not supported */
	if ((adapter->vf_rate_link_speed == 0) ||
	    (adapter->hw.mac.type != e1000_82576))
		return;

	actual_link_speed = igb_link_mbps(adapter->link_speed);
	if (actual_link_speed != adapter->vf_rate_link_speed) {
		reset_rate = true;
		adapter->vf_rate_link_speed = 0;
		dev_info(&adapter->pdev->dev,
7808
			 "Link speed has been changed. VF Transmit rate is disabled\n");
7809 7810 7811 7812 7813 7814 7815
	}

	for (i = 0; i < adapter->vfs_allocated_count; i++) {
		if (reset_rate)
			adapter->vf_data[i].tx_rate = 0;

		igb_set_vf_rate_limit(&adapter->hw, i,
7816 7817
				      adapter->vf_data[i].tx_rate,
				      actual_link_speed);
7818 7819 7820
	}
}

7821 7822
static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf,
			     int min_tx_rate, int max_tx_rate)
7823
{
7824 7825 7826 7827 7828 7829 7830
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
	int actual_link_speed;

	if (hw->mac.type != e1000_82576)
		return -EOPNOTSUPP;

7831 7832 7833
	if (min_tx_rate)
		return -EINVAL;

7834 7835 7836
	actual_link_speed = igb_link_mbps(adapter->link_speed);
	if ((vf >= adapter->vfs_allocated_count) ||
	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7837 7838
	    (max_tx_rate < 0) ||
	    (max_tx_rate > actual_link_speed))
7839 7840 7841
		return -EINVAL;

	adapter->vf_rate_link_speed = actual_link_speed;
7842 7843
	adapter->vf_data[vf].tx_rate = (u16)max_tx_rate;
	igb_set_vf_rate_limit(hw, vf, max_tx_rate, actual_link_speed);
7844 7845

	return 0;
7846 7847
}

L
Lior Levy 已提交
7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871
static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
				   bool setting)
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	struct e1000_hw *hw = &adapter->hw;
	u32 reg_val, reg_offset;

	if (!adapter->vfs_allocated_count)
		return -EOPNOTSUPP;

	if (vf >= adapter->vfs_allocated_count)
		return -EINVAL;

	reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC;
	reg_val = rd32(reg_offset);
	if (setting)
		reg_val |= ((1 << vf) |
			    (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)));
	else
		reg_val &= ~((1 << vf) |
			     (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT)));
	wr32(reg_offset, reg_val);

	adapter->vf_data[vf].spoofchk_enabled = setting;
T
Todd Fujinaka 已提交
7872
	return 0;
L
Lior Levy 已提交
7873 7874
}

7875 7876 7877 7878 7879 7880 7881 7882
static int igb_ndo_get_vf_config(struct net_device *netdev,
				 int vf, struct ifla_vf_info *ivi)
{
	struct igb_adapter *adapter = netdev_priv(netdev);
	if (vf >= adapter->vfs_allocated_count)
		return -EINVAL;
	ivi->vf = vf;
	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7883 7884
	ivi->max_tx_rate = adapter->vf_data[vf].tx_rate;
	ivi->min_tx_rate = 0;
7885 7886
	ivi->vlan = adapter->vf_data[vf].pf_vlan;
	ivi->qos = adapter->vf_data[vf].pf_qos;
L
Lior Levy 已提交
7887
	ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
7888 7889 7890
	return 0;
}

7891 7892 7893
static void igb_vmm_control(struct igb_adapter *adapter)
{
	struct e1000_hw *hw = &adapter->hw;
7894
	u32 reg;
7895

7896 7897
	switch (hw->mac.type) {
	case e1000_82575:
7898 7899
	case e1000_i210:
	case e1000_i211:
7900
	case e1000_i354:
7901 7902
	default:
		/* replication is not supported for 82575 */
7903
		return;
7904 7905 7906 7907 7908
	case e1000_82576:
		/* notify HW that the MAC is adding vlan tags */
		reg = rd32(E1000_DTXCTL);
		reg |= E1000_DTXCTL_VLAN_ADDED;
		wr32(E1000_DTXCTL, reg);
7909
		/* Fall through */
7910 7911 7912 7913 7914
	case e1000_82580:
		/* enable replication vlan tag stripping */
		reg = rd32(E1000_RPLOLR);
		reg |= E1000_RPLOLR_STRVLAN;
		wr32(E1000_RPLOLR, reg);
7915
		/* Fall through */
7916 7917
	case e1000_i350:
		/* none of the above registers are supported by i350 */
7918 7919
		break;
	}
7920

7921 7922 7923
	if (adapter->vfs_allocated_count) {
		igb_vmdq_set_loopback_pf(hw, true);
		igb_vmdq_set_replication_pf(hw, true);
G
Greg Rose 已提交
7924
		igb_vmdq_set_anti_spoofing_pf(hw, true,
7925
					      adapter->vfs_allocated_count);
7926 7927 7928 7929
	} else {
		igb_vmdq_set_loopback_pf(hw, false);
		igb_vmdq_set_replication_pf(hw, false);
	}
7930 7931
}

7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944
static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
{
	struct e1000_hw *hw = &adapter->hw;
	u32 dmac_thr;
	u16 hwm;

	if (hw->mac.type > e1000_82580) {
		if (adapter->flags & IGB_FLAG_DMAC) {
			u32 reg;

			/* force threshold to 0. */
			wr32(E1000_DMCTXTH, 0);

7945
			/* DMA Coalescing high water mark needs to be greater
7946 7947
			 * than the Rx threshold. Set hwm to PBA - max frame
			 * size in 16B units, capping it at PBA - 6KB.
7948
			 */
7949 7950 7951 7952 7953 7954 7955 7956 7957
			hwm = 64 * pba - adapter->max_frame_size / 16;
			if (hwm < 64 * (pba - 6))
				hwm = 64 * (pba - 6);
			reg = rd32(E1000_FCRTC);
			reg &= ~E1000_FCRTC_RTH_COAL_MASK;
			reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
				& E1000_FCRTC_RTH_COAL_MASK);
			wr32(E1000_FCRTC, reg);

7958
			/* Set the DMA Coalescing Rx threshold to PBA - 2 * max
7959 7960 7961 7962 7963
			 * frame size, capping it at PBA - 10KB.
			 */
			dmac_thr = pba - adapter->max_frame_size / 512;
			if (dmac_thr < pba - 10)
				dmac_thr = pba - 10;
7964 7965 7966 7967 7968 7969 7970 7971 7972 7973
			reg = rd32(E1000_DMACR);
			reg &= ~E1000_DMACR_DMACTHR_MASK;
			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
				& E1000_DMACR_DMACTHR_MASK);

			/* transition to L0x or L1 if available..*/
			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);

			/* watchdog timer= +-1000 usec in 32usec intervals */
			reg |= (1000 >> 5);
7974 7975

			/* Disable BMC-to-OS Watchdog Enable */
7976 7977 7978
			if (hw->mac.type != e1000_i354)
				reg &= ~E1000_DMACR_DC_BMC2OSW_EN;

7979 7980
			wr32(E1000_DMACR, reg);

7981
			/* no lower threshold to disable
7982 7983 7984 7985 7986 7987 7988 7989
			 * coalescing(smart fifb)-UTRESH=0
			 */
			wr32(E1000_DMCRTRH, 0);

			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);

			wr32(E1000_DMCTLX, reg);

7990
			/* free space in tx packet buffer to wake from
7991 7992 7993 7994 7995
			 * DMA coal
			 */
			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);

7996
			/* make low power state decision controlled
7997 7998 7999 8000 8001 8002 8003 8004
			 * by DMA coal
			 */
			reg = rd32(E1000_PCIEMISC);
			reg &= ~E1000_PCIEMISC_LX_DECISION;
			wr32(E1000_PCIEMISC, reg);
		} /* endif adapter->dmac is not disabled */
	} else if (hw->mac.type == e1000_82580) {
		u32 reg = rd32(E1000_PCIEMISC);
8005

8006 8007 8008 8009 8010
		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
		wr32(E1000_DMACR, 0);
	}
}

8011 8012
/**
 *  igb_read_i2c_byte - Reads 8 bit word over I2C
C
Carolyn Wyborny 已提交
8013 8014 8015 8016 8017 8018 8019
 *  @hw: pointer to hardware structure
 *  @byte_offset: byte offset to read
 *  @dev_addr: device address
 *  @data: value read
 *
 *  Performs byte read operation over I2C interface at
 *  a specified device address.
8020
 **/
C
Carolyn Wyborny 已提交
8021
s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
8022
		      u8 dev_addr, u8 *data)
C
Carolyn Wyborny 已提交
8023 8024
{
	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
8025
	struct i2c_client *this_client = adapter->i2c_client;
C
Carolyn Wyborny 已提交
8026 8027 8028 8029 8030 8031 8032 8033
	s32 status;
	u16 swfw_mask = 0;

	if (!this_client)
		return E1000_ERR_I2C;

	swfw_mask = E1000_SWFW_PHY0_SM;

T
Todd Fujinaka 已提交
8034
	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
C
Carolyn Wyborny 已提交
8035 8036 8037 8038 8039 8040 8041 8042 8043
		return E1000_ERR_SWFW_SYNC;

	status = i2c_smbus_read_byte_data(this_client, byte_offset);
	hw->mac.ops.release_swfw_sync(hw, swfw_mask);

	if (status < 0)
		return E1000_ERR_I2C;
	else {
		*data = status;
T
Todd Fujinaka 已提交
8044
		return 0;
C
Carolyn Wyborny 已提交
8045 8046 8047
	}
}

8048 8049
/**
 *  igb_write_i2c_byte - Writes 8 bit word over I2C
C
Carolyn Wyborny 已提交
8050 8051 8052 8053 8054 8055 8056
 *  @hw: pointer to hardware structure
 *  @byte_offset: byte offset to write
 *  @dev_addr: device address
 *  @data: value to write
 *
 *  Performs byte write operation over I2C interface at
 *  a specified device address.
8057
 **/
C
Carolyn Wyborny 已提交
8058
s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset,
8059
		       u8 dev_addr, u8 data)
C
Carolyn Wyborny 已提交
8060 8061
{
	struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw);
8062
	struct i2c_client *this_client = adapter->i2c_client;
C
Carolyn Wyborny 已提交
8063 8064 8065 8066 8067 8068
	s32 status;
	u16 swfw_mask = E1000_SWFW_PHY0_SM;

	if (!this_client)
		return E1000_ERR_I2C;

T
Todd Fujinaka 已提交
8069
	if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask))
C
Carolyn Wyborny 已提交
8070 8071 8072 8073 8074 8075 8076
		return E1000_ERR_SWFW_SYNC;
	status = i2c_smbus_write_byte_data(this_client, byte_offset, data);
	hw->mac.ops.release_swfw_sync(hw, swfw_mask);

	if (status)
		return E1000_ERR_I2C;
	else
T
Todd Fujinaka 已提交
8077
		return 0;
C
Carolyn Wyborny 已提交
8078 8079

}
8080 8081 8082 8083 8084 8085 8086 8087 8088 8089

int igb_reinit_queues(struct igb_adapter *adapter)
{
	struct net_device *netdev = adapter->netdev;
	struct pci_dev *pdev = adapter->pdev;
	int err = 0;

	if (netif_running(netdev))
		igb_close(netdev);

8090
	igb_reset_interrupt_capability(adapter);
8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101

	if (igb_init_interrupt_scheme(adapter, true)) {
		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
		return -ENOMEM;
	}

	if (netif_running(netdev))
		err = igb_open(netdev);

	return err;
}
8102
/* igb_main.c */