virtio_net.c 45.5 KB
Newer Older
1
/* A network driver using virtio.
R
Rusty Russell 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
//#define DEBUG
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
22
#include <linux/ethtool.h>
R
Rusty Russell 已提交
23 24 25 26
#include <linux/module.h>
#include <linux/virtio.h>
#include <linux/virtio_net.h>
#include <linux/scatterlist.h>
27
#include <linux/if_vlan.h>
28
#include <linux/slab.h>
29
#include <linux/cpu.h>
R
Rusty Russell 已提交
30

31
static int napi_weight = NAPI_POLL_WEIGHT;
32 33
module_param(napi_weight, int, 0444);

34
static bool csum = true, gso = true;
R
Rusty Russell 已提交
35 36 37
module_param(csum, bool, 0444);
module_param(gso, bool, 0444);

R
Rusty Russell 已提交
38
/* FIXME: MTU in config. */
39
#define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
40
#define GOOD_COPY_LEN	128
R
Rusty Russell 已提交
41

42
#define VIRTNET_DRIVER_VERSION "1.0.0"
43

44
struct virtnet_stats {
45 46
	struct u64_stats_sync tx_syncp;
	struct u64_stats_sync rx_syncp;
47 48 49 50 51 52 53
	u64 tx_bytes;
	u64 tx_packets;

	u64 rx_bytes;
	u64 rx_packets;
};

54 55 56 57 58 59 60
/* Internal representation of a send virtqueue */
struct send_queue {
	/* Virtqueue associated with this send _queue */
	struct virtqueue *vq;

	/* TX: fragments + linear part + virtio header */
	struct scatterlist sg[MAX_SKB_FRAGS + 2];
J
Jason Wang 已提交
61 62 63

	/* Name of the send queue: output.$index */
	char name[40];
64 65 66 67 68 69 70
};

/* Internal representation of a receive virtqueue */
struct receive_queue {
	/* Virtqueue associated with this receive_queue */
	struct virtqueue *vq;

R
Rusty Russell 已提交
71 72 73 74 75
	struct napi_struct napi;

	/* Number of input buffers, and max we've ever had. */
	unsigned int num, max;

76 77 78 79 80
	/* Chain pages by the private ptr. */
	struct page *pages;

	/* RX: fragments + linear part + virtio header */
	struct scatterlist sg[MAX_SKB_FRAGS + 2];
J
Jason Wang 已提交
81 82 83

	/* Name of this receive queue: input.$index */
	char name[40];
84 85 86 87 88 89
};

struct virtnet_info {
	struct virtio_device *vdev;
	struct virtqueue *cvq;
	struct net_device *dev;
J
Jason Wang 已提交
90 91
	struct send_queue *sq;
	struct receive_queue *rq;
92 93
	unsigned int status;

J
Jason Wang 已提交
94 95 96 97 98 99
	/* Max # of queue pairs supported by the device */
	u16 max_queue_pairs;

	/* # of queue pairs currently used by the driver */
	u16 curr_queue_pairs;

100 101 102
	/* I like... big packets and I cannot lie! */
	bool big_packets;

103 104 105
	/* Host will merge rx buffers for big packets (shake it! shake it!) */
	bool mergeable_rx_bufs;

J
Jason Wang 已提交
106 107 108
	/* Has control virtqueue */
	bool has_cvq;

109 110 111
	/* Host can handle any s/g split between our header and packet data */
	bool any_header_sg;

112 113 114
	/* enable config space updates */
	bool config_enable;

115 116 117
	/* Active statistics */
	struct virtnet_stats __percpu *stats;

118 119 120
	/* Work struct for refilling if we run low on memory. */
	struct delayed_work refill;

121 122 123 124 125
	/* Work struct for config space updates */
	struct work_struct config_work;

	/* Lock for config space updates */
	struct mutex config_lock;
J
Jason Wang 已提交
126

127 128 129 130 131
	/* Page_frag for GFP_KERNEL packet buffer allocation when we run
	 * low on memory.
	 */
	struct page_frag alloc_frag;

J
Jason Wang 已提交
132 133
	/* Does the affinity hint is set for virtqueues? */
	bool affinity_hint_set;
134 135 136

	/* Per-cpu variable to show the mapping from CPU to virtqueue */
	int __percpu *vq_index;
137 138 139

	/* CPU hot plug notifier */
	struct notifier_block nb;
R
Rusty Russell 已提交
140 141
};

142 143 144 145 146 147 148
struct skb_vnet_hdr {
	union {
		struct virtio_net_hdr hdr;
		struct virtio_net_hdr_mrg_rxbuf mhdr;
	};
};

149 150 151 152 153 154 155 156 157 158
struct padded_vnet_hdr {
	struct virtio_net_hdr hdr;
	/*
	 * virtio_net_hdr should be in a separated sg buffer because of a
	 * QEMU bug, and data sg buffer shares same page with this header sg.
	 * This padding makes next sg 16 byte aligned after virtio_net_hdr.
	 */
	char padding[6];
};

J
Jason Wang 已提交
159 160 161 162 163
/* Converting between virtqueue no. and kernel tx/rx queue no.
 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
 */
static int vq2txq(struct virtqueue *vq)
{
164
	return (vq->index - 1) / 2;
J
Jason Wang 已提交
165 166 167 168 169 170 171 172 173
}

static int txq2vq(int txq)
{
	return txq * 2 + 1;
}

static int vq2rxq(struct virtqueue *vq)
{
174
	return vq->index / 2;
J
Jason Wang 已提交
175 176 177 178 179 180 181
}

static int rxq2vq(int rxq)
{
	return rxq * 2;
}

182
static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
R
Rusty Russell 已提交
183
{
184
	return (struct skb_vnet_hdr *)skb->cb;
R
Rusty Russell 已提交
185 186
}

187 188 189 190
/*
 * private is used to chain pages for big packets, put the whole
 * most recent used list in the beginning for reuse
 */
191
static void give_pages(struct receive_queue *rq, struct page *page)
192
{
193
	struct page *end;
194

195
	/* Find end of list, sew whole thing into vi->rq.pages. */
196
	for (end = page; end->private; end = (struct page *)end->private);
197 198
	end->private = (unsigned long)rq->pages;
	rq->pages = page;
199 200
}

201
static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
202
{
203
	struct page *p = rq->pages;
204

205
	if (p) {
206
		rq->pages = (struct page *)p->private;
207 208 209
		/* clear private here, it is used to chain pages */
		p->private = 0;
	} else
210 211 212 213
		p = alloc_page(gfp_mask);
	return p;
}

214
static void skb_xmit_done(struct virtqueue *vq)
R
Rusty Russell 已提交
215
{
216
	struct virtnet_info *vi = vq->vdev->priv;
R
Rusty Russell 已提交
217

218
	/* Suppress further interrupts. */
219
	virtqueue_disable_cb(vq);
220

221
	/* We were probably waiting for more output buffers. */
J
Jason Wang 已提交
222
	netif_wake_subqueue(vi->dev, vq2txq(vq));
R
Rusty Russell 已提交
223 224
}

225
/* Called from bottom half context */
226
static struct sk_buff *page_to_skb(struct receive_queue *rq,
227 228
				   struct page *page, unsigned int offset,
				   unsigned int len, unsigned int truesize)
229
{
230
	struct virtnet_info *vi = rq->vq->vdev->priv;
231 232
	struct sk_buff *skb;
	struct skb_vnet_hdr *hdr;
233
	unsigned int copy, hdr_len, hdr_padded_len;
234
	char *p;
235

236
	p = page_address(page) + offset;
237

238 239 240 241
	/* copy small packet so we can reuse these pages for small data */
	skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN);
	if (unlikely(!skb))
		return NULL;
242

243
	hdr = skb_vnet_hdr(skb);
244

245 246
	if (vi->mergeable_rx_bufs) {
		hdr_len = sizeof hdr->mhdr;
247
		hdr_padded_len = sizeof hdr->mhdr;
248 249
	} else {
		hdr_len = sizeof hdr->hdr;
250
		hdr_padded_len = sizeof(struct padded_vnet_hdr);
251
	}
252

253
	memcpy(hdr, p, hdr_len);
254

255
	len -= hdr_len;
256 257
	offset += hdr_padded_len;
	p += hdr_padded_len;
258

259 260 261 262
	copy = len;
	if (copy > skb_tailroom(skb))
		copy = skb_tailroom(skb);
	memcpy(skb_put(skb, copy), p, copy);
263

264 265
	len -= copy;
	offset += copy;
266

267 268 269 270 271 272 273 274
	if (vi->mergeable_rx_bufs) {
		if (len)
			skb_add_rx_frag(skb, 0, page, offset, len, truesize);
		else
			put_page(page);
		return skb;
	}

275 276 277 278 279 280 281
	/*
	 * Verify that we can indeed put this data into a skb.
	 * This is here to handle cases when the device erroneously
	 * tries to receive more than is possible. This is usually
	 * the case of a broken device.
	 */
	if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
282
		net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
283 284 285
		dev_kfree_skb(skb);
		return NULL;
	}
286
	BUG_ON(offset >= PAGE_SIZE);
287
	while (len) {
288 289 290 291
		unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
				frag_size, truesize);
		len -= frag_size;
292 293 294
		page = (struct page *)page->private;
		offset = 0;
	}
295

296
	if (page)
297
		give_pages(rq, page);
298

299 300
	return skb;
}
301

302
static int receive_mergeable(struct receive_queue *rq, struct sk_buff *head_skb)
303
{
304 305 306
	struct skb_vnet_hdr *hdr = skb_vnet_hdr(head_skb);
	struct sk_buff *curr_skb = head_skb;
	char *buf;
307
	struct page *page;
308
	int num_buf, len;
309 310 311

	num_buf = hdr->mhdr.num_buffers;
	while (--num_buf) {
312 313 314
		int num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
		buf = virtqueue_get_buf(rq->vq, &len);
		if (unlikely(!buf)) {
315
			pr_debug("%s: rx error: %d buffers missing\n",
316 317
				 head_skb->dev->name, hdr->mhdr.num_buffers);
			head_skb->dev->stats.rx_length_errors++;
318
			return -EINVAL;
319
		}
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
		if (unlikely(len > MAX_PACKET_LEN)) {
			pr_debug("%s: rx error: merge buffer too long\n",
				 head_skb->dev->name);
			len = MAX_PACKET_LEN;
		}
		if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
			struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
			if (unlikely(!nskb)) {
				head_skb->dev->stats.rx_dropped++;
				return -ENOMEM;
			}
			if (curr_skb == head_skb)
				skb_shinfo(curr_skb)->frag_list = nskb;
			else
				curr_skb->next = nskb;
			curr_skb = nskb;
			head_skb->truesize += nskb->truesize;
			num_skb_frags = 0;
		}
		if (curr_skb != head_skb) {
			head_skb->data_len += len;
			head_skb->len += len;
			head_skb->truesize += MAX_PACKET_LEN;
		}
		page = virt_to_head_page(buf);
		skb_add_rx_frag(curr_skb, num_skb_frags, page,
				buf - (char *)page_address(page), len,
				MAX_PACKET_LEN);
348
		--rq->num;
349 350 351 352
	}
	return 0;
}

353
static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
354
{
355 356
	struct virtnet_info *vi = rq->vq->vdev->priv;
	struct net_device *dev = vi->dev;
E
Eric Dumazet 已提交
357
	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
358 359 360
	struct sk_buff *skb;
	struct page *page;
	struct skb_vnet_hdr *hdr;
361

362 363 364
	if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
		pr_debug("%s: short packet %i\n", dev->name, len);
		dev->stats.rx_length_errors++;
365
		if (vi->big_packets)
366
			give_pages(rq, buf);
367 368
		else if (vi->mergeable_rx_bufs)
			put_page(virt_to_head_page(buf));
369 370 371 372
		else
			dev_kfree_skb(buf);
		return;
	}
373

374 375 376 377
	if (!vi->mergeable_rx_bufs && !vi->big_packets) {
		skb = buf;
		len -= sizeof(struct virtio_net_hdr);
		skb_trim(skb, len);
378 379 380 381 382 383 384 385 386 387 388 389 390 391
	} else if (vi->mergeable_rx_bufs) {
		struct page *page = virt_to_head_page(buf);
		skb = page_to_skb(rq, page,
				  (char *)buf - (char *)page_address(page),
				  len, MAX_PACKET_LEN);
		if (unlikely(!skb)) {
			dev->stats.rx_dropped++;
			put_page(page);
			return;
		}
		if (receive_mergeable(rq, skb)) {
			dev_kfree_skb(skb);
			return;
		}
392 393
	} else {
		page = buf;
394
		skb = page_to_skb(rq, page, 0, len, PAGE_SIZE);
395
		if (unlikely(!skb)) {
396
			dev->stats.rx_dropped++;
397
			give_pages(rq, page);
398
			return;
399
		}
400
	}
401

402
	hdr = skb_vnet_hdr(skb);
403

404
	u64_stats_update_begin(&stats->rx_syncp);
405 406
	stats->rx_bytes += skb->len;
	stats->rx_packets++;
407
	u64_stats_update_end(&stats->rx_syncp);
R
Rusty Russell 已提交
408

409
	if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
R
Rusty Russell 已提交
410
		pr_debug("Needs csum!\n");
411 412 413
		if (!skb_partial_csum_set(skb,
					  hdr->hdr.csum_start,
					  hdr->hdr.csum_offset))
R
Rusty Russell 已提交
414
			goto frame_err;
415 416
	} else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) {
		skb->ip_summed = CHECKSUM_UNNECESSARY;
R
Rusty Russell 已提交
417 418
	}

419 420 421 422
	skb->protocol = eth_type_trans(skb, dev);
	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
		 ntohs(skb->protocol), skb->len, skb->pkt_type);

423
	if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
R
Rusty Russell 已提交
424
		pr_debug("GSO!\n");
425
		switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
R
Rusty Russell 已提交
426
		case VIRTIO_NET_HDR_GSO_TCPV4:
427
			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
R
Rusty Russell 已提交
428 429
			break;
		case VIRTIO_NET_HDR_GSO_UDP:
430
			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
R
Rusty Russell 已提交
431 432
			break;
		case VIRTIO_NET_HDR_GSO_TCPV6:
433
			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
R
Rusty Russell 已提交
434 435
			break;
		default:
436 437
			net_warn_ratelimited("%s: bad gso type %u.\n",
					     dev->name, hdr->hdr.gso_type);
R
Rusty Russell 已提交
438 439 440
			goto frame_err;
		}

441
		if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
442
			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
R
Rusty Russell 已提交
443

444
		skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
R
Rusty Russell 已提交
445
		if (skb_shinfo(skb)->gso_size == 0) {
446
			net_warn_ratelimited("%s: zero gso size.\n", dev->name);
R
Rusty Russell 已提交
447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
			goto frame_err;
		}

		/* Header must be checked, and gso_segs computed. */
		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
		skb_shinfo(skb)->gso_segs = 0;
	}

	netif_receive_skb(skb);
	return;

frame_err:
	dev->stats.rx_frame_errors++;
	dev_kfree_skb(skb);
}

463
static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp)
R
Rusty Russell 已提交
464
{
465
	struct virtnet_info *vi = rq->vq->vdev->priv;
R
Rusty Russell 已提交
466
	struct sk_buff *skb;
467 468
	struct skb_vnet_hdr *hdr;
	int err;
469

470
	skb = __netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN, gfp);
471 472
	if (unlikely(!skb))
		return -ENOMEM;
R
Rusty Russell 已提交
473

474
	skb_put(skb, MAX_PACKET_LEN);
475

476
	hdr = skb_vnet_hdr(skb);
477
	sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr);
478

479
	skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
480

481
	err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp);
482 483
	if (err < 0)
		dev_kfree_skb(skb);
484

485 486
	return err;
}
487

488
static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
489 490 491 492 493
{
	struct page *first, *list = NULL;
	char *p;
	int i, err, offset;

494
	/* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
495
	for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
496
		first = get_a_page(rq, gfp);
497 498
		if (!first) {
			if (list)
499
				give_pages(rq, list);
500
			return -ENOMEM;
501
		}
502
		sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
503

504 505 506 507
		/* chain new page in list head to match sg */
		first->private = (unsigned long)list;
		list = first;
	}
R
Rusty Russell 已提交
508

509
	first = get_a_page(rq, gfp);
510
	if (!first) {
511
		give_pages(rq, list);
512 513 514 515
		return -ENOMEM;
	}
	p = page_address(first);

516 517 518
	/* rq->sg[0], rq->sg[1] share the same page */
	/* a separated rq->sg[0] for virtio_net_hdr only due to QEMU bug */
	sg_set_buf(&rq->sg[0], p, sizeof(struct virtio_net_hdr));
519

520
	/* rq->sg[1] for data packet, from offset */
521
	offset = sizeof(struct padded_vnet_hdr);
522
	sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
523 524 525

	/* chain first in list head */
	first->private = (unsigned long)list;
526 527
	err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
				  first, gfp);
528
	if (err < 0)
529
		give_pages(rq, first);
530 531

	return err;
R
Rusty Russell 已提交
532 533
}

534
static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
535
{
536 537
	struct virtnet_info *vi = rq->vq->vdev->priv;
	char *buf = NULL;
538 539
	int err;

540 541 542 543 544 545 546 547 548 549 550 551
	if (gfp & __GFP_WAIT) {
		if (skb_page_frag_refill(MAX_PACKET_LEN, &vi->alloc_frag,
					 gfp)) {
			buf = (char *)page_address(vi->alloc_frag.page) +
			      vi->alloc_frag.offset;
			get_page(vi->alloc_frag.page);
			vi->alloc_frag.offset += MAX_PACKET_LEN;
		}
	} else {
		buf = netdev_alloc_frag(MAX_PACKET_LEN);
	}
	if (!buf)
552
		return -ENOMEM;
553

554 555
	sg_init_one(rq->sg, buf, MAX_PACKET_LEN);
	err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
556
	if (err < 0)
557
		put_page(virt_to_head_page(buf));
558

559 560
	return err;
}
561

562 563 564 565 566 567 568
/*
 * Returns false if we couldn't fill entirely (OOM).
 *
 * Normally run in the receive path, but can also be run from ndo_open
 * before we're receiving packets, or from refill_work which is
 * careful to disable receiving (using napi_disable).
 */
569
static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
570
{
571
	struct virtnet_info *vi = rq->vq->vdev->priv;
572
	int err;
573
	bool oom;
574

575 576
	do {
		if (vi->mergeable_rx_bufs)
577
			err = add_recvbuf_mergeable(rq, gfp);
578
		else if (vi->big_packets)
579
			err = add_recvbuf_big(rq, gfp);
580
		else
581
			err = add_recvbuf_small(rq, gfp);
582

583
		oom = err == -ENOMEM;
584
		if (err)
585
			break;
586
		++rq->num;
587
	} while (rq->vq->num_free);
588 589 590
	if (unlikely(rq->num > rq->max))
		rq->max = rq->num;
	virtqueue_kick(rq->vq);
591
	return !oom;
592 593
}

594
static void skb_recv_done(struct virtqueue *rvq)
R
Rusty Russell 已提交
595 596
{
	struct virtnet_info *vi = rvq->vdev->priv;
J
Jason Wang 已提交
597
	struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
598

599
	/* Schedule NAPI, Suppress further interrupts if successful. */
600
	if (napi_schedule_prep(&rq->napi)) {
601
		virtqueue_disable_cb(rvq);
602
		__napi_schedule(&rq->napi);
603
	}
R
Rusty Russell 已提交
604 605
}

606
static void virtnet_napi_enable(struct receive_queue *rq)
607
{
608
	napi_enable(&rq->napi);
609 610 611 612 613

	/* If all buffers were filled by other side before we napi_enabled, we
	 * won't get another interrupt, so process any outstanding packets
	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
	 * We synchronize against interrupts via NAPI_STATE_SCHED */
614 615
	if (napi_schedule_prep(&rq->napi)) {
		virtqueue_disable_cb(rq->vq);
616
		local_bh_disable();
617
		__napi_schedule(&rq->napi);
618
		local_bh_enable();
619 620 621
	}
}

622 623
static void refill_work(struct work_struct *work)
{
624 625
	struct virtnet_info *vi =
		container_of(work, struct virtnet_info, refill.work);
626
	bool still_empty;
J
Jason Wang 已提交
627 628
	int i;

629
	for (i = 0; i < vi->curr_queue_pairs; i++) {
J
Jason Wang 已提交
630
		struct receive_queue *rq = &vi->rq[i];
631

J
Jason Wang 已提交
632 633 634
		napi_disable(&rq->napi);
		still_empty = !try_fill_recv(rq, GFP_KERNEL);
		virtnet_napi_enable(rq);
635

J
Jason Wang 已提交
636 637 638 639 640 641
		/* In theory, this can happen: if we don't get any buffers in
		 * we will *never* try to fill again.
		 */
		if (still_empty)
			schedule_delayed_work(&vi->refill, HZ/2);
	}
642 643
}

R
Rusty Russell 已提交
644 645
static int virtnet_poll(struct napi_struct *napi, int budget)
{
646 647 648
	struct receive_queue *rq =
		container_of(napi, struct receive_queue, napi);
	struct virtnet_info *vi = rq->vq->vdev->priv;
649
	void *buf;
650
	unsigned int r, len, received = 0;
R
Rusty Russell 已提交
651 652 653

again:
	while (received < budget &&
654 655 656
	       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
		receive_buf(rq, buf, len);
		--rq->num;
R
Rusty Russell 已提交
657 658 659
		received++;
	}

660 661
	if (rq->num < rq->max / 2) {
		if (!try_fill_recv(rq, GFP_ATOMIC))
662
			schedule_delayed_work(&vi->refill, 0);
663
	}
R
Rusty Russell 已提交
664

665 666
	/* Out of packets? */
	if (received < budget) {
667
		r = virtqueue_enable_cb_prepare(rq->vq);
668
		napi_complete(napi);
669
		if (unlikely(virtqueue_poll(rq->vq, r)) &&
670
		    napi_schedule_prep(napi)) {
671
			virtqueue_disable_cb(rq->vq);
672
			__napi_schedule(napi);
R
Rusty Russell 已提交
673
			goto again;
674
		}
R
Rusty Russell 已提交
675 676 677 678 679
	}

	return received;
}

J
Jason Wang 已提交
680 681 682 683 684
static int virtnet_open(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
	int i;

685 686 687 688 689
	for (i = 0; i < vi->max_queue_pairs; i++) {
		if (i < vi->curr_queue_pairs)
			/* Make sure we have some buffers: if oom use wq. */
			if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
				schedule_delayed_work(&vi->refill, 0);
J
Jason Wang 已提交
690 691 692 693 694 695
		virtnet_napi_enable(&vi->rq[i]);
	}

	return 0;
}

696
static void free_old_xmit_skbs(struct send_queue *sq)
R
Rusty Russell 已提交
697 698
{
	struct sk_buff *skb;
699
	unsigned int len;
700
	struct virtnet_info *vi = sq->vq->vdev->priv;
E
Eric Dumazet 已提交
701
	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
R
Rusty Russell 已提交
702

703
	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
R
Rusty Russell 已提交
704
		pr_debug("Sent skb %p\n", skb);
705

706
		u64_stats_update_begin(&stats->tx_syncp);
707 708
		stats->tx_bytes += skb->len;
		stats->tx_packets++;
709
		u64_stats_update_end(&stats->tx_syncp);
710

711
		dev_kfree_skb_any(skb);
R
Rusty Russell 已提交
712 713 714
	}
}

715
static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
R
Rusty Russell 已提交
716
{
717
	struct skb_vnet_hdr *hdr;
R
Rusty Russell 已提交
718
	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
719
	struct virtnet_info *vi = sq->vq->vdev->priv;
720
	unsigned num_sg;
721 722
	unsigned hdr_len;
	bool can_push;
R
Rusty Russell 已提交
723

J
Johannes Berg 已提交
724
	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
725 726 727 728 729 730 731 732 733 734 735 736 737 738
	if (vi->mergeable_rx_bufs)
		hdr_len = sizeof hdr->mhdr;
	else
		hdr_len = sizeof hdr->hdr;

	can_push = vi->any_header_sg &&
		!((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
		!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
	/* Even if we can, don't push here yet as this would skew
	 * csum_start offset below. */
	if (can_push)
		hdr = (struct skb_vnet_hdr *)(skb->data - hdr_len);
	else
		hdr = skb_vnet_hdr(skb);
R
Rusty Russell 已提交
739 740

	if (skb->ip_summed == CHECKSUM_PARTIAL) {
741
		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
742
		hdr->hdr.csum_start = skb_checksum_start_offset(skb);
743
		hdr->hdr.csum_offset = skb->csum_offset;
R
Rusty Russell 已提交
744
	} else {
745 746
		hdr->hdr.flags = 0;
		hdr->hdr.csum_offset = hdr->hdr.csum_start = 0;
R
Rusty Russell 已提交
747 748 749
	}

	if (skb_is_gso(skb)) {
750 751
		hdr->hdr.hdr_len = skb_headlen(skb);
		hdr->hdr.gso_size = skb_shinfo(skb)->gso_size;
R
Rusty Russell 已提交
752
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
753
			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
R
Rusty Russell 已提交
754
		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
755
			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
R
Rusty Russell 已提交
756
		else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
757
			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
R
Rusty Russell 已提交
758 759
		else
			BUG();
R
Rusty Russell 已提交
760
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
761
			hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
R
Rusty Russell 已提交
762
	} else {
763 764
		hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
		hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
R
Rusty Russell 已提交
765 766
	}

767
	if (vi->mergeable_rx_bufs)
768
		hdr->mhdr.num_buffers = 0;
769

770 771 772 773 774 775 776 777 778
	if (can_push) {
		__skb_push(skb, hdr_len);
		num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
		/* Pull header back to avoid skew in tx bytes calculations. */
		__skb_pull(skb, hdr_len);
	} else {
		sg_set_buf(sq->sg, hdr, hdr_len);
		num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
	}
779
	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
780 781
}

782
static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
783 784
{
	struct virtnet_info *vi = netdev_priv(dev);
J
Jason Wang 已提交
785 786
	int qnum = skb_get_queue_mapping(skb);
	struct send_queue *sq = &vi->sq[qnum];
787
	int err;
788 789

	/* Free up any pending old buffers before queueing new ones. */
790
	free_old_xmit_skbs(sq);
791

792
	/* Try to transmit */
793
	err = xmit_skb(sq, skb);
794

795
	/* This should not happen! */
796
	if (unlikely(err)) {
797 798 799
		dev->stats.tx_fifo_errors++;
		if (net_ratelimit())
			dev_warn(&dev->dev,
800
				 "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
801 802 803
		dev->stats.tx_dropped++;
		kfree_skb(skb);
		return NETDEV_TX_OK;
R
Rusty Russell 已提交
804
	}
805
	virtqueue_kick(sq->vq);
806

807 808 809 810 811 812
	/* Don't wait up for transmitted skbs to be freed. */
	skb_orphan(skb);
	nf_reset(skb);

	/* Apparently nice girls don't return TX_BUSY; stop the queue
	 * before it gets out of hand.  Naturally, this wastes entries. */
813
	if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
J
Jason Wang 已提交
814
		netif_stop_subqueue(dev, qnum);
815
		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
816
			/* More just got used, free them then recheck. */
817 818
			free_old_xmit_skbs(sq);
			if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
J
Jason Wang 已提交
819
				netif_start_subqueue(dev, qnum);
820
				virtqueue_disable_cb(sq->vq);
821 822
			}
		}
823
	}
824 825

	return NETDEV_TX_OK;
R
Rusty Russell 已提交
826 827
}

828 829 830 831 832 833
/*
 * Send command via the control virtqueue and check status.  Commands
 * supported by the hypervisor, as indicated by feature bits, should
 * never fail unless improperly formated.
 */
static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
834 835
				 struct scatterlist *out,
				 struct scatterlist *in)
836
{
837
	struct scatterlist *sgs[4], hdr, stat;
838 839
	struct virtio_net_ctrl_hdr ctrl;
	virtio_net_ctrl_ack status = ~0;
840
	unsigned out_num = 0, in_num = 0, tmp;
841 842

	/* Caller should know better */
843
	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
844 845 846

	ctrl.class = class;
	ctrl.cmd = cmd;
847 848 849
	/* Add header */
	sg_init_one(&hdr, &ctrl, sizeof(ctrl));
	sgs[out_num++] = &hdr;
850

851 852 853 854
	if (out)
		sgs[out_num++] = out;
	if (in)
		sgs[out_num + in_num++] = in;
855

856 857 858
	/* Add return status. */
	sg_init_one(&stat, &status, sizeof(status));
	sgs[out_num + in_num++] = &stat;
859

860 861 862
	BUG_ON(out_num + in_num > ARRAY_SIZE(sgs));
	BUG_ON(virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC)
	       < 0);
863 864 865 866 867 868 869 870 871 872 873 874

	virtqueue_kick(vi->cvq);

	/* Spin for a response, the kick causes an ioport write, trapping
	 * into the hypervisor, so the request should be handled immediately.
	 */
	while (!virtqueue_get_buf(vi->cvq, &tmp))
		cpu_relax();

	return status == VIRTIO_NET_OK;
}

875 876 877 878
static int virtnet_set_mac_address(struct net_device *dev, void *p)
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct virtio_device *vdev = vi->vdev;
879
	int ret;
880 881
	struct sockaddr *addr = p;
	struct scatterlist sg;
882

883
	ret = eth_prepare_mac_addr_change(dev, p);
884 885
	if (ret)
		return ret;
886

887 888 889 890
	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
		sg_init_one(&sg, addr->sa_data, dev->addr_len);
		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
					  VIRTIO_NET_CTRL_MAC_ADDR_SET,
891
					  &sg, NULL)) {
892 893 894 895 896
			dev_warn(&vdev->dev,
				 "Failed to set mac address by vq command.\n");
			return -EINVAL;
		}
	} else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
897
		vdev->config->set(vdev, offsetof(struct virtio_net_config, mac),
898 899 900 901
				  addr->sa_data, dev->addr_len);
	}

	eth_commit_mac_addr_change(dev, p);
902 903 904 905

	return 0;
}

906 907 908 909 910 911 912 913
static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
					       struct rtnl_link_stats64 *tot)
{
	struct virtnet_info *vi = netdev_priv(dev);
	int cpu;
	unsigned int start;

	for_each_possible_cpu(cpu) {
E
Eric Dumazet 已提交
914
		struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
915 916 917
		u64 tpackets, tbytes, rpackets, rbytes;

		do {
918
			start = u64_stats_fetch_begin_bh(&stats->tx_syncp);
919 920
			tpackets = stats->tx_packets;
			tbytes   = stats->tx_bytes;
921
		} while (u64_stats_fetch_retry_bh(&stats->tx_syncp, start));
922 923

		do {
924
			start = u64_stats_fetch_begin_bh(&stats->rx_syncp);
925 926
			rpackets = stats->rx_packets;
			rbytes   = stats->rx_bytes;
927
		} while (u64_stats_fetch_retry_bh(&stats->rx_syncp, start));
928 929 930 931 932 933 934 935

		tot->rx_packets += rpackets;
		tot->tx_packets += tpackets;
		tot->rx_bytes   += rbytes;
		tot->tx_bytes   += tbytes;
	}

	tot->tx_dropped = dev->stats.tx_dropped;
936
	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
937 938 939 940 941 942 943
	tot->rx_dropped = dev->stats.rx_dropped;
	tot->rx_length_errors = dev->stats.rx_length_errors;
	tot->rx_frame_errors = dev->stats.rx_frame_errors;

	return tot;
}

944 945 946 947
#ifdef CONFIG_NET_POLL_CONTROLLER
static void virtnet_netpoll(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
J
Jason Wang 已提交
948
	int i;
949

J
Jason Wang 已提交
950 951
	for (i = 0; i < vi->curr_queue_pairs; i++)
		napi_schedule(&vi->rq[i].napi);
952 953 954
}
#endif

955 956 957 958
static void virtnet_ack_link_announce(struct virtnet_info *vi)
{
	rtnl_lock();
	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
959
				  VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL, NULL))
960 961 962 963
		dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
	rtnl_unlock();
}

J
Jason Wang 已提交
964 965 966 967 968 969 970 971 972 973 974 975 976
static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
{
	struct scatterlist sg;
	struct virtio_net_ctrl_mq s;
	struct net_device *dev = vi->dev;

	if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
		return 0;

	s.virtqueue_pairs = queue_pairs;
	sg_init_one(&sg, &s, sizeof(s));

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
977
				  VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, NULL)) {
J
Jason Wang 已提交
978 979 980
		dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
			 queue_pairs);
		return -EINVAL;
981
	} else {
J
Jason Wang 已提交
982
		vi->curr_queue_pairs = queue_pairs;
983 984 985
		/* virtnet_open() will refill when device is going to up. */
		if (dev->flags & IFF_UP)
			schedule_delayed_work(&vi->refill, 0);
986
	}
J
Jason Wang 已提交
987 988 989 990

	return 0;
}

R
Rusty Russell 已提交
991 992 993
static int virtnet_close(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
J
Jason Wang 已提交
994
	int i;
R
Rusty Russell 已提交
995

996 997
	/* Make sure refill_work doesn't re-enable napi! */
	cancel_delayed_work_sync(&vi->refill);
J
Jason Wang 已提交
998 999 1000

	for (i = 0; i < vi->max_queue_pairs; i++)
		napi_disable(&vi->rq[i].napi);
R
Rusty Russell 已提交
1001 1002 1003 1004

	return 0;
}

1005 1006 1007
static void virtnet_set_rx_mode(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
1008
	struct scatterlist sg[2];
1009
	u8 promisc, allmulti;
1010
	struct virtio_net_ctrl_mac *mac_data;
J
Jiri Pirko 已提交
1011
	struct netdev_hw_addr *ha;
1012
	int uc_count;
1013
	int mc_count;
1014 1015
	void *buf;
	int i;
1016 1017 1018 1019 1020

	/* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */
	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
		return;

1021 1022
	promisc = ((dev->flags & IFF_PROMISC) != 0);
	allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
1023

1024
	sg_init_one(sg, &promisc, sizeof(promisc));
1025 1026 1027

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
				  VIRTIO_NET_CTRL_RX_PROMISC,
1028
				  sg, NULL))
1029 1030 1031
		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
			 promisc ? "en" : "dis");

1032
	sg_init_one(sg, &allmulti, sizeof(allmulti));
1033 1034 1035

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
				  VIRTIO_NET_CTRL_RX_ALLMULTI,
1036
				  sg, NULL))
1037 1038
		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
			 allmulti ? "en" : "dis");
1039

1040
	uc_count = netdev_uc_count(dev);
1041
	mc_count = netdev_mc_count(dev);
1042
	/* MAC filter - use one buffer for both lists */
1043 1044 1045
	buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
		      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
	mac_data = buf;
1046
	if (!buf)
1047 1048
		return;

1049 1050
	sg_init_table(sg, 2);

1051
	/* Store the unicast list and count in the front of the buffer */
1052
	mac_data->entries = uc_count;
J
Jiri Pirko 已提交
1053
	i = 0;
1054
	netdev_for_each_uc_addr(ha, dev)
J
Jiri Pirko 已提交
1055
		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1056 1057

	sg_set_buf(&sg[0], mac_data,
1058
		   sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
1059 1060

	/* multicast list and count fill the end */
1061
	mac_data = (void *)&mac_data->macs[uc_count][0];
1062

1063
	mac_data->entries = mc_count;
1064
	i = 0;
1065 1066
	netdev_for_each_mc_addr(ha, dev)
		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1067 1068

	sg_set_buf(&sg[1], mac_data,
1069
		   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
1070 1071 1072

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
				  VIRTIO_NET_CTRL_MAC_TABLE_SET,
1073
				  sg, NULL))
1074 1075 1076
		dev_warn(&dev->dev, "Failed to set MAC fitler table.\n");

	kfree(buf);
1077 1078
}

1079 1080
static int virtnet_vlan_rx_add_vid(struct net_device *dev,
				   __be16 proto, u16 vid)
1081 1082 1083 1084
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct scatterlist sg;

1085
	sg_init_one(&sg, &vid, sizeof(vid));
1086 1087

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1088
				  VIRTIO_NET_CTRL_VLAN_ADD, &sg, NULL))
1089
		dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
1090
	return 0;
1091 1092
}

1093 1094
static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
				    __be16 proto, u16 vid)
1095 1096 1097 1098
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct scatterlist sg;

1099
	sg_init_one(&sg, &vid, sizeof(vid));
1100 1101

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1102
				  VIRTIO_NET_CTRL_VLAN_DEL, &sg, NULL))
1103
		dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
1104
	return 0;
1105 1106
}

1107
static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
J
Jason Wang 已提交
1108 1109
{
	int i;
1110
	int cpu;
J
Jason Wang 已提交
1111

1112 1113
	if (vi->affinity_hint_set) {
		for (i = 0; i < vi->max_queue_pairs; i++) {
1114 1115 1116 1117
			virtqueue_set_affinity(vi->rq[i].vq, -1);
			virtqueue_set_affinity(vi->sq[i].vq, -1);
		}

1118 1119 1120 1121 1122 1123 1124 1125
		vi->affinity_hint_set = false;
	}

	i = 0;
	for_each_online_cpu(cpu) {
		if (cpu == hcpu) {
			*per_cpu_ptr(vi->vq_index, cpu) = -1;
		} else {
1126 1127
			*per_cpu_ptr(vi->vq_index, cpu) =
				++i % vi->curr_queue_pairs;
1128 1129 1130
		}
	}
}
1131

1132 1133 1134 1135
static void virtnet_set_affinity(struct virtnet_info *vi)
{
	int i;
	int cpu;
J
Jason Wang 已提交
1136 1137 1138 1139 1140

	/* In multiqueue mode, when the number of cpu is equal to the number of
	 * queue pairs, we let the queue pairs to be private to one cpu by
	 * setting the affinity hint to eliminate the contention.
	 */
1141 1142 1143 1144
	if (vi->curr_queue_pairs == 1 ||
	    vi->max_queue_pairs != num_online_cpus()) {
		virtnet_clean_affinity(vi, -1);
		return;
J
Jason Wang 已提交
1145 1146
	}

1147 1148
	i = 0;
	for_each_online_cpu(cpu) {
J
Jason Wang 已提交
1149 1150
		virtqueue_set_affinity(vi->rq[i].vq, cpu);
		virtqueue_set_affinity(vi->sq[i].vq, cpu);
1151 1152
		*per_cpu_ptr(vi->vq_index, cpu) = i;
		i++;
J
Jason Wang 已提交
1153 1154
	}

1155
	vi->affinity_hint_set = true;
J
Jason Wang 已提交
1156 1157
}

1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
static int virtnet_cpu_callback(struct notifier_block *nfb,
			        unsigned long action, void *hcpu)
{
	struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);

	switch(action & ~CPU_TASKS_FROZEN) {
	case CPU_ONLINE:
	case CPU_DOWN_FAILED:
	case CPU_DEAD:
		virtnet_set_affinity(vi);
		break;
	case CPU_DOWN_PREPARE:
		virtnet_clean_affinity(vi, (long)hcpu);
		break;
	default:
		break;
	}
1175

1176
	return NOTIFY_OK;
J
Jason Wang 已提交
1177 1178
}

R
Rick Jones 已提交
1179 1180 1181 1182 1183
static void virtnet_get_ringparam(struct net_device *dev,
				struct ethtool_ringparam *ring)
{
	struct virtnet_info *vi = netdev_priv(dev);

J
Jason Wang 已提交
1184 1185
	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
R
Rick Jones 已提交
1186 1187 1188 1189
	ring->rx_pending = ring->rx_max_pending;
	ring->tx_pending = ring->tx_max_pending;
}

1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202

static void virtnet_get_drvinfo(struct net_device *dev,
				struct ethtool_drvinfo *info)
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct virtio_device *vdev = vi->vdev;

	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
	strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
	strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));

}

1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
/* TODO: Eliminate OOO packets during switching */
static int virtnet_set_channels(struct net_device *dev,
				struct ethtool_channels *channels)
{
	struct virtnet_info *vi = netdev_priv(dev);
	u16 queue_pairs = channels->combined_count;
	int err;

	/* We don't support separate rx/tx channels.
	 * We don't allow setting 'other' channels.
	 */
	if (channels->rx_count || channels->tx_count || channels->other_count)
		return -EINVAL;

	if (queue_pairs > vi->max_queue_pairs)
		return -EINVAL;

1220
	get_online_cpus();
1221 1222 1223 1224 1225
	err = virtnet_set_queues(vi, queue_pairs);
	if (!err) {
		netif_set_real_num_tx_queues(dev, queue_pairs);
		netif_set_real_num_rx_queues(dev, queue_pairs);

1226
		virtnet_set_affinity(vi);
1227
	}
1228
	put_online_cpus();
1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245

	return err;
}

static void virtnet_get_channels(struct net_device *dev,
				 struct ethtool_channels *channels)
{
	struct virtnet_info *vi = netdev_priv(dev);

	channels->combined_count = vi->curr_queue_pairs;
	channels->max_combined = vi->max_queue_pairs;
	channels->max_other = 0;
	channels->rx_count = 0;
	channels->tx_count = 0;
	channels->other_count = 0;
}

1246
static const struct ethtool_ops virtnet_ethtool_ops = {
1247
	.get_drvinfo = virtnet_get_drvinfo,
1248
	.get_link = ethtool_op_get_link,
R
Rick Jones 已提交
1249
	.get_ringparam = virtnet_get_ringparam,
1250 1251
	.set_channels = virtnet_set_channels,
	.get_channels = virtnet_get_channels,
1252 1253
};

M
Mark McLoughlin 已提交
1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264
#define MIN_MTU 68
#define MAX_MTU 65535

static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
{
	if (new_mtu < MIN_MTU || new_mtu > MAX_MTU)
		return -EINVAL;
	dev->mtu = new_mtu;
	return 0;
}

J
Jason Wang 已提交
1265 1266 1267 1268 1269
/* To avoid contending a lock hold by a vcpu who would exit to host, select the
 * txq based on the processor id.
 */
static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb)
{
1270 1271 1272 1273 1274 1275 1276 1277 1278 1279
	int txq;
	struct virtnet_info *vi = netdev_priv(dev);

	if (skb_rx_queue_recorded(skb)) {
		txq = skb_get_rx_queue(skb);
	} else {
		txq = *__this_cpu_ptr(vi->vq_index);
		if (txq == -1)
			txq = 0;
	}
J
Jason Wang 已提交
1280 1281 1282 1283 1284 1285 1286

	while (unlikely(txq >= dev->real_num_tx_queues))
		txq -= dev->real_num_tx_queues;

	return txq;
}

1287 1288 1289 1290 1291
static const struct net_device_ops virtnet_netdev = {
	.ndo_open            = virtnet_open,
	.ndo_stop   	     = virtnet_close,
	.ndo_start_xmit      = start_xmit,
	.ndo_validate_addr   = eth_validate_addr,
1292
	.ndo_set_mac_address = virtnet_set_mac_address,
1293
	.ndo_set_rx_mode     = virtnet_set_rx_mode,
1294
	.ndo_change_mtu	     = virtnet_change_mtu,
1295
	.ndo_get_stats64     = virtnet_stats,
1296 1297
	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
J
Jason Wang 已提交
1298
	.ndo_select_queue     = virtnet_select_queue,
1299 1300 1301 1302 1303
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_poll_controller = virtnet_netpoll,
#endif
};

1304
static void virtnet_config_changed_work(struct work_struct *work)
1305
{
1306 1307
	struct virtnet_info *vi =
		container_of(work, struct virtnet_info, config_work);
1308 1309
	u16 v;

1310 1311 1312 1313
	mutex_lock(&vi->config_lock);
	if (!vi->config_enable)
		goto done;

1314
	if (virtio_config_val(vi->vdev, VIRTIO_NET_F_STATUS,
1315
			      offsetof(struct virtio_net_config, status),
1316
			      &v) < 0)
1317 1318 1319
		goto done;

	if (v & VIRTIO_NET_S_ANNOUNCE) {
1320
		netdev_notify_peers(vi->dev);
1321 1322
		virtnet_ack_link_announce(vi);
	}
1323 1324 1325 1326 1327

	/* Ignore unknown (future) status bits */
	v &= VIRTIO_NET_S_LINK_UP;

	if (vi->status == v)
1328
		goto done;
1329 1330 1331 1332 1333

	vi->status = v;

	if (vi->status & VIRTIO_NET_S_LINK_UP) {
		netif_carrier_on(vi->dev);
J
Jason Wang 已提交
1334
		netif_tx_wake_all_queues(vi->dev);
1335 1336
	} else {
		netif_carrier_off(vi->dev);
J
Jason Wang 已提交
1337
		netif_tx_stop_all_queues(vi->dev);
1338
	}
1339 1340
done:
	mutex_unlock(&vi->config_lock);
1341 1342 1343 1344 1345 1346
}

static void virtnet_config_changed(struct virtio_device *vdev)
{
	struct virtnet_info *vi = vdev->priv;

1347
	schedule_work(&vi->config_work);
1348 1349
}

J
Jason Wang 已提交
1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380
static void virtnet_free_queues(struct virtnet_info *vi)
{
	kfree(vi->rq);
	kfree(vi->sq);
}

static void free_receive_bufs(struct virtnet_info *vi)
{
	int i;

	for (i = 0; i < vi->max_queue_pairs; i++) {
		while (vi->rq[i].pages)
			__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
	}
}

static void free_unused_bufs(struct virtnet_info *vi)
{
	void *buf;
	int i;

	for (i = 0; i < vi->max_queue_pairs; i++) {
		struct virtqueue *vq = vi->sq[i].vq;
		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
			dev_kfree_skb(buf);
	}

	for (i = 0; i < vi->max_queue_pairs; i++) {
		struct virtqueue *vq = vi->rq[i].vq;

		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
1381
			if (vi->big_packets)
J
Jason Wang 已提交
1382
				give_pages(&vi->rq[i], buf);
1383 1384
			else if (vi->mergeable_rx_bufs)
				put_page(virt_to_head_page(buf));
J
Jason Wang 已提交
1385 1386 1387 1388 1389 1390 1391 1392
			else
				dev_kfree_skb(buf);
			--vi->rq[i].num;
		}
		BUG_ON(vi->rq[i].num != 0);
	}
}

1393 1394 1395 1396
static void virtnet_del_vqs(struct virtnet_info *vi)
{
	struct virtio_device *vdev = vi->vdev;

1397
	virtnet_clean_affinity(vi, -1);
J
Jason Wang 已提交
1398

1399
	vdev->config->del_vqs(vdev);
J
Jason Wang 已提交
1400 1401

	virtnet_free_queues(vi);
1402 1403
}

J
Jason Wang 已提交
1404
static int virtnet_find_vqs(struct virtnet_info *vi)
1405
{
J
Jason Wang 已提交
1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434
	vq_callback_t **callbacks;
	struct virtqueue **vqs;
	int ret = -ENOMEM;
	int i, total_vqs;
	const char **names;

	/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
	 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
	 * possible control vq.
	 */
	total_vqs = vi->max_queue_pairs * 2 +
		    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);

	/* Allocate space for find_vqs parameters */
	vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
	if (!vqs)
		goto err_vq;
	callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
	if (!callbacks)
		goto err_callback;
	names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
	if (!names)
		goto err_names;

	/* Parameters for control virtqueue, if any */
	if (vi->has_cvq) {
		callbacks[total_vqs - 1] = NULL;
		names[total_vqs - 1] = "control";
	}
1435

J
Jason Wang 已提交
1436 1437 1438 1439 1440 1441 1442 1443 1444
	/* Allocate/initialize parameters for send/receive virtqueues */
	for (i = 0; i < vi->max_queue_pairs; i++) {
		callbacks[rxq2vq(i)] = skb_recv_done;
		callbacks[txq2vq(i)] = skb_xmit_done;
		sprintf(vi->rq[i].name, "input.%d", i);
		sprintf(vi->sq[i].name, "output.%d", i);
		names[rxq2vq(i)] = vi->rq[i].name;
		names[txq2vq(i)] = vi->sq[i].name;
	}
1445

J
Jason Wang 已提交
1446 1447 1448 1449
	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
					 names);
	if (ret)
		goto err_find;
1450

J
Jason Wang 已提交
1451 1452
	if (vi->has_cvq) {
		vi->cvq = vqs[total_vqs - 1];
1453
		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
1454
			vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
1455
	}
J
Jason Wang 已提交
1456 1457 1458 1459 1460 1461 1462 1463 1464 1465

	for (i = 0; i < vi->max_queue_pairs; i++) {
		vi->rq[i].vq = vqs[rxq2vq(i)];
		vi->sq[i].vq = vqs[txq2vq(i)];
	}

	kfree(names);
	kfree(callbacks);
	kfree(vqs);

1466
	return 0;
J
Jason Wang 已提交
1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485

err_find:
	kfree(names);
err_names:
	kfree(callbacks);
err_callback:
	kfree(vqs);
err_vq:
	return ret;
}

static int virtnet_alloc_queues(struct virtnet_info *vi)
{
	int i;

	vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
	if (!vi->sq)
		goto err_sq;
	vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
1486
	if (!vi->rq)
J
Jason Wang 已提交
1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519
		goto err_rq;

	INIT_DELAYED_WORK(&vi->refill, refill_work);
	for (i = 0; i < vi->max_queue_pairs; i++) {
		vi->rq[i].pages = NULL;
		netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
			       napi_weight);

		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
		sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
	}

	return 0;

err_rq:
	kfree(vi->sq);
err_sq:
	return -ENOMEM;
}

static int init_vqs(struct virtnet_info *vi)
{
	int ret;

	/* Allocate send & receive queues */
	ret = virtnet_alloc_queues(vi);
	if (ret)
		goto err;

	ret = virtnet_find_vqs(vi);
	if (ret)
		goto err_free;

1520
	get_online_cpus();
1521
	virtnet_set_affinity(vi);
1522 1523
	put_online_cpus();

J
Jason Wang 已提交
1524 1525 1526 1527 1528 1529
	return 0;

err_free:
	virtnet_free_queues(vi);
err:
	return ret;
1530 1531
}

R
Rusty Russell 已提交
1532 1533
static int virtnet_probe(struct virtio_device *vdev)
{
J
Jason Wang 已提交
1534
	int i, err;
R
Rusty Russell 已提交
1535 1536
	struct net_device *dev;
	struct virtnet_info *vi;
J
Jason Wang 已提交
1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548
	u16 max_queue_pairs;

	/* Find if host supports multiqueue virtio_net device */
	err = virtio_config_val(vdev, VIRTIO_NET_F_MQ,
				offsetof(struct virtio_net_config,
				max_virtqueue_pairs), &max_queue_pairs);

	/* We need at least 2 queue's */
	if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
	    max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
	    !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
		max_queue_pairs = 1;
R
Rusty Russell 已提交
1549 1550

	/* Allocate ourselves a network device with room for our info */
J
Jason Wang 已提交
1551
	dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
R
Rusty Russell 已提交
1552 1553 1554 1555
	if (!dev)
		return -ENOMEM;

	/* Set up network device as normal. */
1556
	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
1557
	dev->netdev_ops = &virtnet_netdev;
R
Rusty Russell 已提交
1558
	dev->features = NETIF_F_HIGHDMA;
1559

1560
	SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
R
Rusty Russell 已提交
1561 1562 1563
	SET_NETDEV_DEV(dev, &vdev->dev);

	/* Do we support "hardware" checksums? */
1564
	if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
R
Rusty Russell 已提交
1565
		/* This opens up the world of extra features. */
1566 1567 1568 1569 1570 1571
		dev->hw_features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
		if (csum)
			dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;

		if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
			dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO
R
Rusty Russell 已提交
1572 1573
				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
		}
1574
		/* Individual feature bits: what can host handle? */
1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586
		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
			dev->hw_features |= NETIF_F_TSO;
		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
			dev->hw_features |= NETIF_F_TSO6;
		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
			dev->hw_features |= NETIF_F_TSO_ECN;
		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
			dev->hw_features |= NETIF_F_UFO;

		if (gso)
			dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO);
		/* (!csum && gso) case will be fixed by register_netdev() */
R
Rusty Russell 已提交
1587
	}
1588 1589
	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
		dev->features |= NETIF_F_RXCSUM;
R
Rusty Russell 已提交
1590

1591 1592
	dev->vlan_features = dev->features;

R
Rusty Russell 已提交
1593
	/* Configuration may specify what MAC to use.  Otherwise random. */
1594
	if (virtio_config_val_len(vdev, VIRTIO_NET_F_MAC,
1595
				  offsetof(struct virtio_net_config, mac),
1596
				  dev->dev_addr, dev->addr_len) < 0)
1597
		eth_hw_addr_random(dev);
R
Rusty Russell 已提交
1598 1599 1600 1601 1602

	/* Set up our device-specific information */
	vi = netdev_priv(dev);
	vi->dev = dev;
	vi->vdev = vdev;
1603
	vdev->priv = vi;
1604 1605 1606 1607 1608
	vi->stats = alloc_percpu(struct virtnet_stats);
	err = -ENOMEM;
	if (vi->stats == NULL)
		goto free;

1609 1610 1611 1612
	vi->vq_index = alloc_percpu(int);
	if (vi->vq_index == NULL)
		goto free_stats;

1613 1614 1615
	mutex_init(&vi->config_lock);
	vi->config_enable = true;
	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
R
Rusty Russell 已提交
1616

1617
	/* If we can receive ANY GSO packets, we must allocate large ones. */
1618 1619 1620
	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
1621 1622
		vi->big_packets = true;

1623 1624 1625
	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
		vi->mergeable_rx_bufs = true;

1626 1627 1628
	if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT))
		vi->any_header_sg = true;

J
Jason Wang 已提交
1629 1630 1631 1632 1633 1634 1635 1636
	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
		vi->has_cvq = true;

	/* Use single tx/rx queue pair as default */
	vi->curr_queue_pairs = 1;
	vi->max_queue_pairs = max_queue_pairs;

	/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
1637
	err = init_vqs(vi);
1638
	if (err)
1639
		goto free_index;
R
Rusty Russell 已提交
1640

J
Jason Wang 已提交
1641 1642 1643
	netif_set_real_num_tx_queues(dev, 1);
	netif_set_real_num_rx_queues(dev, 1);

R
Rusty Russell 已提交
1644 1645 1646
	err = register_netdev(dev);
	if (err) {
		pr_debug("virtio_net: registering device failed\n");
1647
		goto free_vqs;
R
Rusty Russell 已提交
1648
	}
1649 1650

	/* Last of all, set up some receive buffers. */
1651
	for (i = 0; i < vi->curr_queue_pairs; i++) {
J
Jason Wang 已提交
1652 1653 1654 1655 1656 1657 1658 1659
		try_fill_recv(&vi->rq[i], GFP_KERNEL);

		/* If we didn't even get one input buffer, we're useless. */
		if (vi->rq[i].num == 0) {
			free_unused_bufs(vi);
			err = -ENOMEM;
			goto free_recv_bufs;
		}
1660 1661
	}

1662 1663 1664 1665 1666 1667 1668
	vi->nb.notifier_call = &virtnet_cpu_callback;
	err = register_hotcpu_notifier(&vi->nb);
	if (err) {
		pr_debug("virtio_net: registering cpu notifier failed\n");
		goto free_recv_bufs;
	}

J
Jason Wang 已提交
1669 1670 1671 1672
	/* Assume link up if device can't report link status,
	   otherwise get link status from config. */
	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
		netif_carrier_off(dev);
1673
		schedule_work(&vi->config_work);
J
Jason Wang 已提交
1674 1675 1676 1677
	} else {
		vi->status = VIRTIO_NET_S_LINK_UP;
		netif_carrier_on(dev);
	}
1678

J
Jason Wang 已提交
1679 1680 1681
	pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
		 dev->name, max_queue_pairs);

R
Rusty Russell 已提交
1682 1683
	return 0;

J
Jason Wang 已提交
1684 1685
free_recv_bufs:
	free_receive_bufs(vi);
1686
	unregister_netdev(dev);
1687
free_vqs:
J
Jason Wang 已提交
1688
	cancel_delayed_work_sync(&vi->refill);
1689
	virtnet_del_vqs(vi);
1690 1691
	if (vi->alloc_frag.page)
		put_page(vi->alloc_frag.page);
1692 1693
free_index:
	free_percpu(vi->vq_index);
1694 1695
free_stats:
	free_percpu(vi->stats);
R
Rusty Russell 已提交
1696 1697 1698 1699 1700
free:
	free_netdev(dev);
	return err;
}

1701
static void remove_vq_common(struct virtnet_info *vi)
R
Rusty Russell 已提交
1702
{
1703
	vi->vdev->config->reset(vi->vdev);
S
Shirley Ma 已提交
1704 1705

	/* Free unused buffers in both send and recv, if any. */
1706
	free_unused_bufs(vi);
1707

J
Jason Wang 已提交
1708
	free_receive_bufs(vi);
1709

J
Jason Wang 已提交
1710
	virtnet_del_vqs(vi);
1711 1712
}

1713
static void virtnet_remove(struct virtio_device *vdev)
1714 1715 1716
{
	struct virtnet_info *vi = vdev->priv;

1717 1718
	unregister_hotcpu_notifier(&vi->nb);

1719 1720 1721 1722 1723
	/* Prevent config work handler from accessing the device. */
	mutex_lock(&vi->config_lock);
	vi->config_enable = false;
	mutex_unlock(&vi->config_lock);

1724 1725 1726
	unregister_netdev(vi->dev);

	remove_vq_common(vi);
1727 1728
	if (vi->alloc_frag.page)
		put_page(vi->alloc_frag.page);
1729

1730 1731
	flush_work(&vi->config_work);

1732
	free_percpu(vi->vq_index);
1733
	free_percpu(vi->stats);
1734
	free_netdev(vi->dev);
R
Rusty Russell 已提交
1735 1736
}

1737 1738 1739 1740
#ifdef CONFIG_PM
static int virtnet_freeze(struct virtio_device *vdev)
{
	struct virtnet_info *vi = vdev->priv;
J
Jason Wang 已提交
1741
	int i;
1742

1743 1744
	unregister_hotcpu_notifier(&vi->nb);

1745 1746 1747 1748 1749
	/* Prevent config work handler from accessing the device */
	mutex_lock(&vi->config_lock);
	vi->config_enable = false;
	mutex_unlock(&vi->config_lock);

1750 1751 1752 1753
	netif_device_detach(vi->dev);
	cancel_delayed_work_sync(&vi->refill);

	if (netif_running(vi->dev))
J
Jason Wang 已提交
1754 1755 1756 1757
		for (i = 0; i < vi->max_queue_pairs; i++) {
			napi_disable(&vi->rq[i].napi);
			netif_napi_del(&vi->rq[i].napi);
		}
1758 1759 1760

	remove_vq_common(vi);

1761 1762
	flush_work(&vi->config_work);

1763 1764 1765 1766 1767 1768
	return 0;
}

static int virtnet_restore(struct virtio_device *vdev)
{
	struct virtnet_info *vi = vdev->priv;
J
Jason Wang 已提交
1769
	int err, i;
1770 1771 1772 1773 1774 1775

	err = init_vqs(vi);
	if (err)
		return err;

	if (netif_running(vi->dev))
J
Jason Wang 已提交
1776 1777
		for (i = 0; i < vi->max_queue_pairs; i++)
			virtnet_napi_enable(&vi->rq[i]);
1778 1779 1780

	netif_device_attach(vi->dev);

1781
	for (i = 0; i < vi->curr_queue_pairs; i++)
J
Jason Wang 已提交
1782 1783
		if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
			schedule_delayed_work(&vi->refill, 0);
1784

1785 1786 1787 1788
	mutex_lock(&vi->config_lock);
	vi->config_enable = true;
	mutex_unlock(&vi->config_lock);

1789
	rtnl_lock();
J
Jason Wang 已提交
1790
	virtnet_set_queues(vi, vi->curr_queue_pairs);
1791
	rtnl_unlock();
J
Jason Wang 已提交
1792

1793 1794 1795 1796
	err = register_hotcpu_notifier(&vi->nb);
	if (err)
		return err;

1797 1798 1799 1800
	return 0;
}
#endif

R
Rusty Russell 已提交
1801 1802 1803 1804 1805
static struct virtio_device_id id_table[] = {
	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
	{ 0 },
};

1806
static unsigned int features[] = {
1807 1808
	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
	VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
1809
	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
1810
	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
1811
	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
1812
	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
1813
	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
J
Jason Wang 已提交
1814
	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
1815
	VIRTIO_NET_F_CTRL_MAC_ADDR,
1816
	VIRTIO_F_ANY_LAYOUT,
1817 1818
};

1819
static struct virtio_driver virtio_net_driver = {
1820 1821
	.feature_table = features,
	.feature_table_size = ARRAY_SIZE(features),
R
Rusty Russell 已提交
1822 1823 1824 1825
	.driver.name =	KBUILD_MODNAME,
	.driver.owner =	THIS_MODULE,
	.id_table =	id_table,
	.probe =	virtnet_probe,
1826
	.remove =	virtnet_remove,
1827
	.config_changed = virtnet_config_changed,
1828 1829 1830 1831
#ifdef CONFIG_PM
	.freeze =	virtnet_freeze,
	.restore =	virtnet_restore,
#endif
R
Rusty Russell 已提交
1832 1833
};

1834
module_virtio_driver(virtio_net_driver);
R
Rusty Russell 已提交
1835 1836 1837 1838

MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_DESCRIPTION("Virtio network driver");
MODULE_LICENSE("GPL");