virtio_net.c 58.5 KB
Newer Older
1
/* A network driver using virtio.
R
Rusty Russell 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15
 *
 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
R
Rusty Russell 已提交
17 18 19 20
 */
//#define DEBUG
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
21
#include <linux/ethtool.h>
R
Rusty Russell 已提交
22 23 24
#include <linux/module.h>
#include <linux/virtio.h>
#include <linux/virtio_net.h>
J
John Fastabend 已提交
25
#include <linux/bpf.h>
R
Rusty Russell 已提交
26
#include <linux/scatterlist.h>
27
#include <linux/if_vlan.h>
28
#include <linux/slab.h>
29
#include <linux/cpu.h>
30
#include <linux/average.h>
J
Jason Wang 已提交
31
#include <net/busy_poll.h>
R
Rusty Russell 已提交
32

33
static int napi_weight = NAPI_POLL_WEIGHT;
34 35
module_param(napi_weight, int, 0444);

36
static bool csum = true, gso = true;
R
Rusty Russell 已提交
37 38 39
module_param(csum, bool, 0444);
module_param(gso, bool, 0444);

R
Rusty Russell 已提交
40
/* FIXME: MTU in config. */
41
#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
42
#define GOOD_COPY_LEN	128
R
Rusty Russell 已提交
43

J
Johannes Berg 已提交
44 45 46 47
/* RX packet size EWMA. The average packet size is used to determine the packet
 * buffer size when refilling RX rings. As the entire RX ring may be refilled
 * at once, the weight is chosen so that the EWMA will be insensitive to short-
 * term, transient changes in packet size.
48
 */
J
Johannes Berg 已提交
49
DECLARE_EWMA(pkt_len, 1, 64)
50 51 52 53

/* Minimum alignment for mergeable packet buffers. */
#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256)

54
#define VIRTNET_DRIVER_VERSION "1.0.0"
55

56
struct virtnet_stats {
57 58
	struct u64_stats_sync tx_syncp;
	struct u64_stats_sync rx_syncp;
59 60 61 62 63 64 65
	u64 tx_bytes;
	u64 tx_packets;

	u64 rx_bytes;
	u64 rx_packets;
};

66 67 68 69 70 71 72
/* Internal representation of a send virtqueue */
struct send_queue {
	/* Virtqueue associated with this send _queue */
	struct virtqueue *vq;

	/* TX: fragments + linear part + virtio header */
	struct scatterlist sg[MAX_SKB_FRAGS + 2];
J
Jason Wang 已提交
73 74 75

	/* Name of the send queue: output.$index */
	char name[40];
76 77 78 79 80 81 82
};

/* Internal representation of a receive virtqueue */
struct receive_queue {
	/* Virtqueue associated with this receive_queue */
	struct virtqueue *vq;

R
Rusty Russell 已提交
83 84
	struct napi_struct napi;

J
John Fastabend 已提交
85 86
	struct bpf_prog __rcu *xdp_prog;

87 88 89
	/* Chain pages by the private ptr. */
	struct page *pages;

90
	/* Average packet length for mergeable receive buffers. */
J
Johannes Berg 已提交
91
	struct ewma_pkt_len mrg_avg_pkt_len;
92

93 94 95
	/* Page frag for packet buffer allocation. */
	struct page_frag alloc_frag;

96 97
	/* RX: fragments + linear part + virtio header */
	struct scatterlist sg[MAX_SKB_FRAGS + 2];
J
Jason Wang 已提交
98 99 100

	/* Name of this receive queue: input.$index */
	char name[40];
101 102 103 104 105 106
};

struct virtnet_info {
	struct virtio_device *vdev;
	struct virtqueue *cvq;
	struct net_device *dev;
J
Jason Wang 已提交
107 108
	struct send_queue *sq;
	struct receive_queue *rq;
109 110
	unsigned int status;

J
Jason Wang 已提交
111 112 113 114 115 116
	/* Max # of queue pairs supported by the device */
	u16 max_queue_pairs;

	/* # of queue pairs currently used by the driver */
	u16 curr_queue_pairs;

117 118 119
	/* # of XDP queue pairs currently used by the driver */
	u16 xdp_queue_pairs;

120 121 122
	/* I like... big packets and I cannot lie! */
	bool big_packets;

123 124 125
	/* Host will merge rx buffers for big packets (shake it! shake it!) */
	bool mergeable_rx_bufs;

J
Jason Wang 已提交
126 127 128
	/* Has control virtqueue */
	bool has_cvq;

129 130 131
	/* Host can handle any s/g split between our header and packet data */
	bool any_header_sg;

132 133 134
	/* Packet virtio header size */
	u8 hdr_len;

135 136 137
	/* Active statistics */
	struct virtnet_stats __percpu *stats;

138 139 140
	/* Work struct for refilling if we run low on memory. */
	struct delayed_work refill;

141 142 143
	/* Work struct for config space updates */
	struct work_struct config_work;

J
Jason Wang 已提交
144 145
	/* Does the affinity hint is set for virtqueues? */
	bool affinity_hint_set;
146

147 148 149
	/* CPU hotplug instances for online & dead */
	struct hlist_node node;
	struct hlist_node node_dead;
150 151 152 153

	/* Control VQ buffers: protected by the rtnl lock */
	struct virtio_net_ctrl_hdr ctrl_hdr;
	virtio_net_ctrl_ack ctrl_status;
154
	struct virtio_net_ctrl_mq ctrl_mq;
155 156
	u8 ctrl_promisc;
	u8 ctrl_allmulti;
157
	u16 ctrl_vid;
158 159 160 161

	/* Ethtool settings */
	u8 duplex;
	u32 speed;
R
Rusty Russell 已提交
162 163
};

164
struct padded_vnet_hdr {
165
	struct virtio_net_hdr_mrg_rxbuf hdr;
166
	/*
167 168 169
	 * hdr is in a separate sg buffer, and data sg buffer shares same page
	 * with this header sg. This padding makes next sg 16 byte aligned
	 * after the header.
170
	 */
171
	char padding[4];
172 173
};

J
Jason Wang 已提交
174 175 176 177 178
/* Converting between virtqueue no. and kernel tx/rx queue no.
 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
 */
static int vq2txq(struct virtqueue *vq)
{
179
	return (vq->index - 1) / 2;
J
Jason Wang 已提交
180 181 182 183 184 185 186 187 188
}

static int txq2vq(int txq)
{
	return txq * 2 + 1;
}

static int vq2rxq(struct virtqueue *vq)
{
189
	return vq->index / 2;
J
Jason Wang 已提交
190 191 192 193 194 195 196
}

static int rxq2vq(int rxq)
{
	return rxq * 2;
}

197
static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
R
Rusty Russell 已提交
198
{
199
	return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;
R
Rusty Russell 已提交
200 201
}

202 203 204 205
/*
 * private is used to chain pages for big packets, put the whole
 * most recent used list in the beginning for reuse
 */
206
static void give_pages(struct receive_queue *rq, struct page *page)
207
{
208
	struct page *end;
209

210
	/* Find end of list, sew whole thing into vi->rq.pages. */
211
	for (end = page; end->private; end = (struct page *)end->private);
212 213
	end->private = (unsigned long)rq->pages;
	rq->pages = page;
214 215
}

216
static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
217
{
218
	struct page *p = rq->pages;
219

220
	if (p) {
221
		rq->pages = (struct page *)p->private;
222 223 224
		/* clear private here, it is used to chain pages */
		p->private = 0;
	} else
225 226 227 228
		p = alloc_page(gfp_mask);
	return p;
}

229
static void skb_xmit_done(struct virtqueue *vq)
R
Rusty Russell 已提交
230
{
231
	struct virtnet_info *vi = vq->vdev->priv;
R
Rusty Russell 已提交
232

233
	/* Suppress further interrupts. */
234
	virtqueue_disable_cb(vq);
235

236
	/* We were probably waiting for more output buffers. */
J
Jason Wang 已提交
237
	netif_wake_subqueue(vi->dev, vq2txq(vq));
R
Rusty Russell 已提交
238 239
}

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
{
	unsigned int truesize = mrg_ctx & (MERGEABLE_BUFFER_ALIGN - 1);
	return (truesize + 1) * MERGEABLE_BUFFER_ALIGN;
}

static void *mergeable_ctx_to_buf_address(unsigned long mrg_ctx)
{
	return (void *)(mrg_ctx & -MERGEABLE_BUFFER_ALIGN);

}

static unsigned long mergeable_buf_to_ctx(void *buf, unsigned int truesize)
{
	unsigned int size = truesize / MERGEABLE_BUFFER_ALIGN;
	return (unsigned long)buf | (size - 1);
}

258
/* Called from bottom half context */
M
Michael S. Tsirkin 已提交
259 260
static struct sk_buff *page_to_skb(struct virtnet_info *vi,
				   struct receive_queue *rq,
261 262
				   struct page *page, unsigned int offset,
				   unsigned int len, unsigned int truesize)
263 264
{
	struct sk_buff *skb;
265
	struct virtio_net_hdr_mrg_rxbuf *hdr;
266
	unsigned int copy, hdr_len, hdr_padded_len;
267
	char *p;
268

269
	p = page_address(page) + offset;
270

271
	/* copy small packet so we can reuse these pages for small data */
272
	skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
273 274
	if (unlikely(!skb))
		return NULL;
275

276
	hdr = skb_vnet_hdr(skb);
277

278 279 280 281
	hdr_len = vi->hdr_len;
	if (vi->mergeable_rx_bufs)
		hdr_padded_len = sizeof *hdr;
	else
282
		hdr_padded_len = sizeof(struct padded_vnet_hdr);
283

284
	memcpy(hdr, p, hdr_len);
285

286
	len -= hdr_len;
287 288
	offset += hdr_padded_len;
	p += hdr_padded_len;
289

290 291 292 293
	copy = len;
	if (copy > skb_tailroom(skb))
		copy = skb_tailroom(skb);
	memcpy(skb_put(skb, copy), p, copy);
294

295 296
	len -= copy;
	offset += copy;
297

298 299 300 301 302 303 304 305
	if (vi->mergeable_rx_bufs) {
		if (len)
			skb_add_rx_frag(skb, 0, page, offset, len, truesize);
		else
			put_page(page);
		return skb;
	}

306 307 308 309 310 311 312
	/*
	 * Verify that we can indeed put this data into a skb.
	 * This is here to handle cases when the device erroneously
	 * tries to receive more than is possible. This is usually
	 * the case of a broken device.
	 */
	if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
313
		net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
314 315 316
		dev_kfree_skb(skb);
		return NULL;
	}
317
	BUG_ON(offset >= PAGE_SIZE);
318
	while (len) {
319 320 321 322
		unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
				frag_size, truesize);
		len -= frag_size;
323 324 325
		page = (struct page *)page->private;
		offset = 0;
	}
326

327
	if (page)
328
		give_pages(rq, page);
329

330 331
	return skb;
}
332

J
John Fastabend 已提交
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
static u32 do_xdp_prog(struct virtnet_info *vi,
		       struct bpf_prog *xdp_prog,
		       struct page *page, int offset, int len)
{
	int hdr_padded_len;
	struct xdp_buff xdp;
	u32 act;
	u8 *buf;

	buf = page_address(page) + offset;

	if (vi->mergeable_rx_bufs)
		hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
	else
		hdr_padded_len = sizeof(struct padded_vnet_hdr);

	xdp.data = buf + hdr_padded_len;
	xdp.data_end = xdp.data + (len - vi->hdr_len);

	act = bpf_prog_run_xdp(xdp_prog, &xdp);
	switch (act) {
	case XDP_PASS:
		return XDP_PASS;
	default:
		bpf_warn_invalid_xdp_action(act);
	case XDP_TX:
	case XDP_ABORTED:
	case XDP_DROP:
		return XDP_DROP;
	}
}

365
static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len)
366 367 368
{
	struct sk_buff * skb = buf;

369
	len -= vi->hdr_len;
370 371 372 373 374 375
	skb_trim(skb, len);

	return skb;
}

static struct sk_buff *receive_big(struct net_device *dev,
M
Michael S. Tsirkin 已提交
376
				   struct virtnet_info *vi,
377 378 379 380
				   struct receive_queue *rq,
				   void *buf,
				   unsigned int len)
{
J
John Fastabend 已提交
381
	struct bpf_prog *xdp_prog;
382
	struct page *page = buf;
J
John Fastabend 已提交
383
	struct sk_buff *skb;
384

J
John Fastabend 已提交
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399
	rcu_read_lock();
	xdp_prog = rcu_dereference(rq->xdp_prog);
	if (xdp_prog) {
		struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
		u32 act;

		if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
			goto err_xdp;
		act = do_xdp_prog(vi, xdp_prog, page, 0, len);
		if (act == XDP_DROP)
			goto err_xdp;
	}
	rcu_read_unlock();

	skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
400 401 402 403 404
	if (unlikely(!skb))
		goto err;

	return skb;

J
John Fastabend 已提交
405 406
err_xdp:
	rcu_read_unlock();
407 408 409 410 411 412
err:
	dev->stats.rx_dropped++;
	give_pages(rq, page);
	return NULL;
}

413
static struct sk_buff *receive_mergeable(struct net_device *dev,
M
Michael S. Tsirkin 已提交
414
					 struct virtnet_info *vi,
415
					 struct receive_queue *rq,
416
					 unsigned long ctx,
417
					 unsigned int len)
418
{
419
	void *buf = mergeable_ctx_to_buf_address(ctx);
420 421
	struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
	u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
422 423
	struct page *page = virt_to_head_page(buf);
	int offset = buf - page_address(page);
J
John Fastabend 已提交
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
	struct sk_buff *head_skb, *curr_skb;
	struct bpf_prog *xdp_prog;
	unsigned int truesize;

	rcu_read_lock();
	xdp_prog = rcu_dereference(rq->xdp_prog);
	if (xdp_prog) {
		u32 act;

		/* No known backend devices should send packets with
		 * more than a single buffer when XDP conditions are
		 * met. However it is not strictly illegal so the case
		 * is handled as an exception and a warning is thrown.
		 */
		if (unlikely(num_buf > 1)) {
			bpf_warn_invalid_xdp_buffer();
			goto err_xdp;
		}

		/* Transient failure which in theory could occur if
		 * in-flight packets from before XDP was enabled reach
		 * the receive path after XDP is loaded. In practice I
		 * was not able to create this condition.
		 */
		if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
			goto err_xdp;

		act = do_xdp_prog(vi, xdp_prog, page, offset, len);
		if (act == XDP_DROP)
			goto err_xdp;
	}
	rcu_read_unlock();
456

J
John Fastabend 已提交
457 458 459
	truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
	curr_skb = head_skb;
460

461 462
	if (unlikely(!curr_skb))
		goto err_skb;
463
	while (--num_buf) {
464 465
		int num_skb_frags;

466 467
		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
		if (unlikely(!ctx)) {
468
			pr_debug("%s: rx error: %d buffers out of %d missing\n",
M
Michael S. Tsirkin 已提交
469
				 dev->name, num_buf,
470 471
				 virtio16_to_cpu(vi->vdev,
						 hdr->num_buffers));
472 473
			dev->stats.rx_length_errors++;
			goto err_buf;
474
		}
475

476
		buf = mergeable_ctx_to_buf_address(ctx);
477 478 479
		page = virt_to_head_page(buf);

		num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
480 481
		if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
			struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
482 483 484

			if (unlikely(!nskb))
				goto err_skb;
485 486 487 488 489 490 491 492
			if (curr_skb == head_skb)
				skb_shinfo(curr_skb)->frag_list = nskb;
			else
				curr_skb->next = nskb;
			curr_skb = nskb;
			head_skb->truesize += nskb->truesize;
			num_skb_frags = 0;
		}
493
		truesize = max(len, mergeable_ctx_to_buf_truesize(ctx));
494 495 496
		if (curr_skb != head_skb) {
			head_skb->data_len += len;
			head_skb->len += len;
497
			head_skb->truesize += truesize;
498
		}
499
		offset = buf - page_address(page);
500 501 502
		if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
			put_page(page);
			skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
503
					     len, truesize);
504 505
		} else {
			skb_add_rx_frag(curr_skb, num_skb_frags, page,
506
					offset, len, truesize);
507
		}
508 509
	}

J
Johannes Berg 已提交
510
	ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
511 512
	return head_skb;

J
John Fastabend 已提交
513 514
err_xdp:
	rcu_read_unlock();
515 516 517
err_skb:
	put_page(page);
	while (--num_buf) {
518 519
		ctx = (unsigned long)virtqueue_get_buf(rq->vq, &len);
		if (unlikely(!ctx)) {
520 521 522 523 524
			pr_debug("%s: rx error: %d buffers missing\n",
				 dev->name, num_buf);
			dev->stats.rx_length_errors++;
			break;
		}
525
		page = virt_to_head_page(mergeable_ctx_to_buf_address(ctx));
526
		put_page(page);
527
	}
528 529 530 531
err_buf:
	dev->stats.rx_dropped++;
	dev_kfree_skb(head_skb);
	return NULL;
532 533
}

M
Michael S. Tsirkin 已提交
534 535
static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
			void *buf, unsigned int len)
536
{
537
	struct net_device *dev = vi->dev;
E
Eric Dumazet 已提交
538
	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
539
	struct sk_buff *skb;
540
	struct virtio_net_hdr_mrg_rxbuf *hdr;
541

542
	if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
543 544
		pr_debug("%s: short packet %i\n", dev->name, len);
		dev->stats.rx_length_errors++;
545 546 547 548 549
		if (vi->mergeable_rx_bufs) {
			unsigned long ctx = (unsigned long)buf;
			void *base = mergeable_ctx_to_buf_address(ctx);
			put_page(virt_to_head_page(base));
		} else if (vi->big_packets) {
550
			give_pages(rq, buf);
551
		} else {
552
			dev_kfree_skb(buf);
553
		}
554 555
		return;
	}
556

557
	if (vi->mergeable_rx_bufs)
M
Michael S. Tsirkin 已提交
558
		skb = receive_mergeable(dev, vi, rq, (unsigned long)buf, len);
559
	else if (vi->big_packets)
M
Michael S. Tsirkin 已提交
560
		skb = receive_big(dev, vi, rq, buf, len);
561
	else
562
		skb = receive_small(vi, buf, len);
563 564 565

	if (unlikely(!skb))
		return;
566

567
	hdr = skb_vnet_hdr(skb);
568

569
	u64_stats_update_begin(&stats->rx_syncp);
570 571
	stats->rx_bytes += skb->len;
	stats->rx_packets++;
572
	u64_stats_update_end(&stats->rx_syncp);
R
Rusty Russell 已提交
573

574
	if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
575
		skb->ip_summed = CHECKSUM_UNNECESSARY;
R
Rusty Russell 已提交
576

577 578 579 580 581 582
	if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
				  virtio_is_little_endian(vi->vdev))) {
		net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
				     dev->name, hdr->hdr.gso_type,
				     hdr->hdr.gso_size);
		goto frame_err;
R
Rusty Russell 已提交
583 584
	}

585 586 587 588
	skb->protocol = eth_type_trans(skb, dev);
	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
		 ntohs(skb->protocol), skb->len, skb->pkt_type);

E
Eric Dumazet 已提交
589
	napi_gro_receive(&rq->napi, skb);
R
Rusty Russell 已提交
590 591 592 593 594 595 596
	return;

frame_err:
	dev->stats.rx_frame_errors++;
	dev_kfree_skb(skb);
}

M
Michael S. Tsirkin 已提交
597 598
static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
			     gfp_t gfp)
R
Rusty Russell 已提交
599 600
{
	struct sk_buff *skb;
601
	struct virtio_net_hdr_mrg_rxbuf *hdr;
602
	int err;
603

604
	skb = __netdev_alloc_skb_ip_align(vi->dev, GOOD_PACKET_LEN, gfp);
605 606
	if (unlikely(!skb))
		return -ENOMEM;
R
Rusty Russell 已提交
607

608
	skb_put(skb, GOOD_PACKET_LEN);
609

610
	hdr = skb_vnet_hdr(skb);
611
	sg_init_table(rq->sg, 2);
612
	sg_set_buf(rq->sg, hdr, vi->hdr_len);
613
	skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
614

615
	err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp);
616 617
	if (err < 0)
		dev_kfree_skb(skb);
618

619 620
	return err;
}
621

622 623
static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
			   gfp_t gfp)
624 625 626 627 628
{
	struct page *first, *list = NULL;
	char *p;
	int i, err, offset;

629 630
	sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);

631
	/* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
632
	for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
633
		first = get_a_page(rq, gfp);
634 635
		if (!first) {
			if (list)
636
				give_pages(rq, list);
637
			return -ENOMEM;
638
		}
639
		sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
640

641 642 643 644
		/* chain new page in list head to match sg */
		first->private = (unsigned long)list;
		list = first;
	}
R
Rusty Russell 已提交
645

646
	first = get_a_page(rq, gfp);
647
	if (!first) {
648
		give_pages(rq, list);
649 650 651 652
		return -ENOMEM;
	}
	p = page_address(first);

653
	/* rq->sg[0], rq->sg[1] share the same page */
654 655
	/* a separated rq->sg[0] for header - required in case !any_header_sg */
	sg_set_buf(&rq->sg[0], p, vi->hdr_len);
656

657
	/* rq->sg[1] for data packet, from offset */
658
	offset = sizeof(struct padded_vnet_hdr);
659
	sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
660 661 662

	/* chain first in list head */
	first->private = (unsigned long)list;
663 664
	err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
				  first, gfp);
665
	if (err < 0)
666
		give_pages(rq, first);
667 668

	return err;
R
Rusty Russell 已提交
669 670
}

J
Johannes Berg 已提交
671
static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len)
672
{
673
	const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
674 675
	unsigned int len;

J
Johannes Berg 已提交
676
	len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
677 678 679 680 681 682
			GOOD_PACKET_LEN, PAGE_SIZE - hdr_len);
	return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
}

static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
{
683 684
	struct page_frag *alloc_frag = &rq->alloc_frag;
	char *buf;
685
	unsigned long ctx;
686
	int err;
687
	unsigned int len, hole;
688

689
	len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len);
690
	if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
691
		return -ENOMEM;
692

693
	buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
694
	ctx = mergeable_buf_to_ctx(buf, len);
695 696 697
	get_page(alloc_frag->page);
	alloc_frag->offset += len;
	hole = alloc_frag->size - alloc_frag->offset;
698 699 700 701 702 703
	if (hole < len) {
		/* To avoid internal fragmentation, if there is very likely not
		 * enough space for another buffer, add the remaining space to
		 * the current buffer. This extra space is not included in
		 * the truesize stored in ctx.
		 */
704 705 706
		len += hole;
		alloc_frag->offset += hole;
	}
707

708
	sg_init_one(rq->sg, buf, len);
709
	err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, (void *)ctx, gfp);
710
	if (err < 0)
711
		put_page(virt_to_head_page(buf));
712

713 714
	return err;
}
715

716 717 718 719 720 721 722
/*
 * Returns false if we couldn't fill entirely (OOM).
 *
 * Normally run in the receive path, but can also be run from ndo_open
 * before we're receiving packets, or from refill_work which is
 * careful to disable receiving (using napi_disable).
 */
M
Michael S. Tsirkin 已提交
723 724
static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
			  gfp_t gfp)
725 726
{
	int err;
727
	bool oom;
728

729
	gfp |= __GFP_COLD;
730 731
	do {
		if (vi->mergeable_rx_bufs)
732
			err = add_recvbuf_mergeable(rq, gfp);
733
		else if (vi->big_packets)
734
			err = add_recvbuf_big(vi, rq, gfp);
735
		else
M
Michael S. Tsirkin 已提交
736
			err = add_recvbuf_small(vi, rq, gfp);
737

738
		oom = err == -ENOMEM;
739
		if (err)
740
			break;
741
	} while (rq->vq->num_free);
742
	virtqueue_kick(rq->vq);
743
	return !oom;
744 745
}

746
static void skb_recv_done(struct virtqueue *rvq)
R
Rusty Russell 已提交
747 748
{
	struct virtnet_info *vi = rvq->vdev->priv;
J
Jason Wang 已提交
749
	struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
750

751
	/* Schedule NAPI, Suppress further interrupts if successful. */
752
	if (napi_schedule_prep(&rq->napi)) {
753
		virtqueue_disable_cb(rvq);
754
		__napi_schedule(&rq->napi);
755
	}
R
Rusty Russell 已提交
756 757
}

758
static void virtnet_napi_enable(struct receive_queue *rq)
759
{
760
	napi_enable(&rq->napi);
761 762 763 764 765

	/* If all buffers were filled by other side before we napi_enabled, we
	 * won't get another interrupt, so process any outstanding packets
	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
	 * We synchronize against interrupts via NAPI_STATE_SCHED */
766 767
	if (napi_schedule_prep(&rq->napi)) {
		virtqueue_disable_cb(rq->vq);
768
		local_bh_disable();
769
		__napi_schedule(&rq->napi);
770
		local_bh_enable();
771 772 773
	}
}

774 775
static void refill_work(struct work_struct *work)
{
776 777
	struct virtnet_info *vi =
		container_of(work, struct virtnet_info, refill.work);
778
	bool still_empty;
J
Jason Wang 已提交
779 780
	int i;

781
	for (i = 0; i < vi->curr_queue_pairs; i++) {
J
Jason Wang 已提交
782
		struct receive_queue *rq = &vi->rq[i];
783

J
Jason Wang 已提交
784
		napi_disable(&rq->napi);
M
Michael S. Tsirkin 已提交
785
		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
J
Jason Wang 已提交
786
		virtnet_napi_enable(rq);
787

J
Jason Wang 已提交
788 789 790 791 792 793
		/* In theory, this can happen: if we don't get any buffers in
		 * we will *never* try to fill again.
		 */
		if (still_empty)
			schedule_delayed_work(&vi->refill, HZ/2);
	}
794 795
}

796
static int virtnet_receive(struct receive_queue *rq, int budget)
R
Rusty Russell 已提交
797
{
798
	struct virtnet_info *vi = rq->vq->vdev->priv;
799
	unsigned int len, received = 0;
800
	void *buf;
R
Rusty Russell 已提交
801 802

	while (received < budget &&
803
	       (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
M
Michael S. Tsirkin 已提交
804
		receive_buf(vi, rq, buf, len);
R
Rusty Russell 已提交
805 806 807
		received++;
	}

808
	if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
M
Michael S. Tsirkin 已提交
809
		if (!try_fill_recv(vi, rq, GFP_ATOMIC))
810
			schedule_delayed_work(&vi->refill, 0);
811
	}
R
Rusty Russell 已提交
812

813 814 815 816 817 818 819
	return received;
}

static int virtnet_poll(struct napi_struct *napi, int budget)
{
	struct receive_queue *rq =
		container_of(napi, struct receive_queue, napi);
820
	unsigned int r, received;
821

822
	received = virtnet_receive(rq, budget);
823

824 825
	/* Out of packets? */
	if (received < budget) {
826
		r = virtqueue_enable_cb_prepare(rq->vq);
E
Eric Dumazet 已提交
827
		napi_complete_done(napi, received);
828
		if (unlikely(virtqueue_poll(rq->vq, r)) &&
829
		    napi_schedule_prep(napi)) {
830
			virtqueue_disable_cb(rq->vq);
831
			__napi_schedule(napi);
832
		}
R
Rusty Russell 已提交
833 834 835 836 837
	}

	return received;
}

J
Jason Wang 已提交
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
#ifdef CONFIG_NET_RX_BUSY_POLL
/* must be called with local_bh_disable()d */
static int virtnet_busy_poll(struct napi_struct *napi)
{
	struct receive_queue *rq =
		container_of(napi, struct receive_queue, napi);
	struct virtnet_info *vi = rq->vq->vdev->priv;
	int r, received = 0, budget = 4;

	if (!(vi->status & VIRTIO_NET_S_LINK_UP))
		return LL_FLUSH_FAILED;

	if (!napi_schedule_prep(napi))
		return LL_FLUSH_BUSY;

	virtqueue_disable_cb(rq->vq);

again:
	received += virtnet_receive(rq, budget);

	r = virtqueue_enable_cb_prepare(rq->vq);
	clear_bit(NAPI_STATE_SCHED, &napi->state);
	if (unlikely(virtqueue_poll(rq->vq, r)) &&
	    napi_schedule_prep(napi)) {
		virtqueue_disable_cb(rq->vq);
		if (received < budget) {
			budget -= received;
			goto again;
		} else {
			__napi_schedule(napi);
		}
	}

	return received;
}
#endif	/* CONFIG_NET_RX_BUSY_POLL */

J
Jason Wang 已提交
875 876 877 878 879
static int virtnet_open(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
	int i;

880 881 882
	for (i = 0; i < vi->max_queue_pairs; i++) {
		if (i < vi->curr_queue_pairs)
			/* Make sure we have some buffers: if oom use wq. */
M
Michael S. Tsirkin 已提交
883
			if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
884
				schedule_delayed_work(&vi->refill, 0);
J
Jason Wang 已提交
885 886 887 888 889 890
		virtnet_napi_enable(&vi->rq[i]);
	}

	return 0;
}

891
static void free_old_xmit_skbs(struct send_queue *sq)
R
Rusty Russell 已提交
892 893
{
	struct sk_buff *skb;
894
	unsigned int len;
895
	struct virtnet_info *vi = sq->vq->vdev->priv;
E
Eric Dumazet 已提交
896
	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
R
Rusty Russell 已提交
897

898
	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
R
Rusty Russell 已提交
899
		pr_debug("Sent skb %p\n", skb);
900

901
		u64_stats_update_begin(&stats->tx_syncp);
902 903
		stats->tx_bytes += skb->len;
		stats->tx_packets++;
904
		u64_stats_update_end(&stats->tx_syncp);
905

906
		dev_kfree_skb_any(skb);
R
Rusty Russell 已提交
907 908 909
	}
}

910
static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
R
Rusty Russell 已提交
911
{
912
	struct virtio_net_hdr_mrg_rxbuf *hdr;
R
Rusty Russell 已提交
913
	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
914
	struct virtnet_info *vi = sq->vq->vdev->priv;
915
	unsigned num_sg;
916
	unsigned hdr_len = vi->hdr_len;
917
	bool can_push;
R
Rusty Russell 已提交
918

J
Johannes Berg 已提交
919
	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
920 921 922 923 924 925 926

	can_push = vi->any_header_sg &&
		!((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
		!skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
	/* Even if we can, don't push here yet as this would skew
	 * csum_start offset below. */
	if (can_push)
927
		hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
928 929
	else
		hdr = skb_vnet_hdr(skb);
R
Rusty Russell 已提交
930

931 932 933
	if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
				    virtio_is_little_endian(vi->vdev)))
		BUG();
R
Rusty Russell 已提交
934

935
	if (vi->mergeable_rx_bufs)
936
		hdr->num_buffers = 0;
937

938
	sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
939 940 941 942 943 944 945 946 947
	if (can_push) {
		__skb_push(skb, hdr_len);
		num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
		/* Pull header back to avoid skew in tx bytes calculations. */
		__skb_pull(skb, hdr_len);
	} else {
		sg_set_buf(sq->sg, hdr, hdr_len);
		num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
	}
948
	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
949 950
}

951
static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
952 953
{
	struct virtnet_info *vi = netdev_priv(dev);
J
Jason Wang 已提交
954 955
	int qnum = skb_get_queue_mapping(skb);
	struct send_queue *sq = &vi->sq[qnum];
956
	int err;
957 958
	struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
	bool kick = !skb->xmit_more;
959 960

	/* Free up any pending old buffers before queueing new ones. */
961
	free_old_xmit_skbs(sq);
962

963 964 965
	/* timestamp packet in software */
	skb_tx_timestamp(skb);

966
	/* Try to transmit */
967
	err = xmit_skb(sq, skb);
968

969
	/* This should not happen! */
970
	if (unlikely(err)) {
971 972 973
		dev->stats.tx_fifo_errors++;
		if (net_ratelimit())
			dev_warn(&dev->dev,
974
				 "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
975
		dev->stats.tx_dropped++;
976
		dev_kfree_skb_any(skb);
977
		return NETDEV_TX_OK;
R
Rusty Russell 已提交
978
	}
979

980 981 982 983
	/* Don't wait up for transmitted skbs to be freed. */
	skb_orphan(skb);
	nf_reset(skb);

984 985 986 987 988 989 990 991 992
	/* If running out of space, stop queue to avoid getting packets that we
	 * are then unable to transmit.
	 * An alternative would be to force queuing layer to requeue the skb by
	 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
	 * returned in a normal path of operation: it means that driver is not
	 * maintaining the TX queue stop/start state properly, and causes
	 * the stack to do a non-trivial amount of useless work.
	 * Since most packets only take 1 or 2 ring slots, stopping the queue
	 * early means 16 slots are typically wasted.
993
	 */
994
	if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
J
Jason Wang 已提交
995
		netif_stop_subqueue(dev, qnum);
996
		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
997
			/* More just got used, free them then recheck. */
998 999
			free_old_xmit_skbs(sq);
			if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
J
Jason Wang 已提交
1000
				netif_start_subqueue(dev, qnum);
1001
				virtqueue_disable_cb(sq->vq);
1002 1003
			}
		}
1004
	}
1005

1006
	if (kick || netif_xmit_stopped(txq))
1007
		virtqueue_kick(sq->vq);
R
Rusty Russell 已提交
1008

1009
	return NETDEV_TX_OK;
1010 1011
}

1012 1013 1014
/*
 * Send command via the control virtqueue and check status.  Commands
 * supported by the hypervisor, as indicated by feature bits, should
S
stephen hemminger 已提交
1015
 * never fail unless improperly formatted.
1016 1017
 */
static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
1018
				 struct scatterlist *out)
1019
{
1020
	struct scatterlist *sgs[4], hdr, stat;
1021
	unsigned out_num = 0, tmp;
1022 1023

	/* Caller should know better */
1024
	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
1025

1026 1027 1028
	vi->ctrl_status = ~0;
	vi->ctrl_hdr.class = class;
	vi->ctrl_hdr.cmd = cmd;
1029
	/* Add header */
1030
	sg_init_one(&hdr, &vi->ctrl_hdr, sizeof(vi->ctrl_hdr));
1031
	sgs[out_num++] = &hdr;
1032

1033 1034
	if (out)
		sgs[out_num++] = out;
1035

1036
	/* Add return status. */
1037
	sg_init_one(&stat, &vi->ctrl_status, sizeof(vi->ctrl_status));
1038
	sgs[out_num] = &stat;
1039

1040
	BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
1041
	virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
1042

1043
	if (unlikely(!virtqueue_kick(vi->cvq)))
1044
		return vi->ctrl_status == VIRTIO_NET_OK;
1045 1046 1047 1048

	/* Spin for a response, the kick causes an ioport write, trapping
	 * into the hypervisor, so the request should be handled immediately.
	 */
1049 1050
	while (!virtqueue_get_buf(vi->cvq, &tmp) &&
	       !virtqueue_is_broken(vi->cvq))
1051 1052
		cpu_relax();

1053
	return vi->ctrl_status == VIRTIO_NET_OK;
1054 1055
}

1056 1057 1058 1059
static int virtnet_set_mac_address(struct net_device *dev, void *p)
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct virtio_device *vdev = vi->vdev;
1060
	int ret;
1061
	struct sockaddr *addr;
1062
	struct scatterlist sg;
1063

1064 1065 1066 1067 1068 1069
	addr = kmalloc(sizeof(*addr), GFP_KERNEL);
	if (!addr)
		return -ENOMEM;
	memcpy(addr, p, sizeof(*addr));

	ret = eth_prepare_mac_addr_change(dev, addr);
1070
	if (ret)
1071
		goto out;
1072

1073 1074 1075
	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
		sg_init_one(&sg, addr->sa_data, dev->addr_len);
		if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1076
					  VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
1077 1078
			dev_warn(&vdev->dev,
				 "Failed to set mac address by vq command.\n");
1079 1080
			ret = -EINVAL;
			goto out;
1081
		}
1082 1083
	} else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
		   !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1084 1085 1086 1087 1088 1089 1090
		unsigned int i;

		/* Naturally, this has an atomicity problem. */
		for (i = 0; i < dev->addr_len; i++)
			virtio_cwrite8(vdev,
				       offsetof(struct virtio_net_config, mac) +
				       i, addr->sa_data[i]);
1091 1092 1093
	}

	eth_commit_mac_addr_change(dev, p);
1094
	ret = 0;
1095

1096 1097 1098
out:
	kfree(addr);
	return ret;
1099 1100
}

1101 1102 1103 1104 1105 1106 1107 1108
static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev,
					       struct rtnl_link_stats64 *tot)
{
	struct virtnet_info *vi = netdev_priv(dev);
	int cpu;
	unsigned int start;

	for_each_possible_cpu(cpu) {
E
Eric Dumazet 已提交
1109
		struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
1110 1111 1112
		u64 tpackets, tbytes, rpackets, rbytes;

		do {
1113
			start = u64_stats_fetch_begin_irq(&stats->tx_syncp);
1114 1115
			tpackets = stats->tx_packets;
			tbytes   = stats->tx_bytes;
1116
		} while (u64_stats_fetch_retry_irq(&stats->tx_syncp, start));
1117 1118

		do {
1119
			start = u64_stats_fetch_begin_irq(&stats->rx_syncp);
1120 1121
			rpackets = stats->rx_packets;
			rbytes   = stats->rx_bytes;
1122
		} while (u64_stats_fetch_retry_irq(&stats->rx_syncp, start));
1123 1124 1125 1126 1127 1128 1129 1130

		tot->rx_packets += rpackets;
		tot->tx_packets += tpackets;
		tot->rx_bytes   += rbytes;
		tot->tx_bytes   += tbytes;
	}

	tot->tx_dropped = dev->stats.tx_dropped;
1131
	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
1132 1133 1134 1135 1136 1137 1138
	tot->rx_dropped = dev->stats.rx_dropped;
	tot->rx_length_errors = dev->stats.rx_length_errors;
	tot->rx_frame_errors = dev->stats.rx_frame_errors;

	return tot;
}

1139 1140 1141 1142
#ifdef CONFIG_NET_POLL_CONTROLLER
static void virtnet_netpoll(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
J
Jason Wang 已提交
1143
	int i;
1144

J
Jason Wang 已提交
1145 1146
	for (i = 0; i < vi->curr_queue_pairs; i++)
		napi_schedule(&vi->rq[i].napi);
1147 1148 1149
}
#endif

1150 1151 1152 1153
static void virtnet_ack_link_announce(struct virtnet_info *vi)
{
	rtnl_lock();
	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
1154
				  VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
1155 1156 1157 1158
		dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
	rtnl_unlock();
}

J
Jason Wang 已提交
1159 1160 1161 1162 1163 1164 1165 1166
static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
{
	struct scatterlist sg;
	struct net_device *dev = vi->dev;

	if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
		return 0;

1167 1168
	vi->ctrl_mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
	sg_init_one(&sg, &vi->ctrl_mq, sizeof(vi->ctrl_mq));
J
Jason Wang 已提交
1169 1170

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
1171
				  VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
J
Jason Wang 已提交
1172 1173 1174
		dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
			 queue_pairs);
		return -EINVAL;
1175
	} else {
J
Jason Wang 已提交
1176
		vi->curr_queue_pairs = queue_pairs;
1177 1178 1179
		/* virtnet_open() will refill when device is going to up. */
		if (dev->flags & IFF_UP)
			schedule_delayed_work(&vi->refill, 0);
1180
	}
J
Jason Wang 已提交
1181 1182 1183 1184

	return 0;
}

R
Rusty Russell 已提交
1185 1186 1187
static int virtnet_close(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
J
Jason Wang 已提交
1188
	int i;
R
Rusty Russell 已提交
1189

1190 1191
	/* Make sure refill_work doesn't re-enable napi! */
	cancel_delayed_work_sync(&vi->refill);
J
Jason Wang 已提交
1192 1193 1194

	for (i = 0; i < vi->max_queue_pairs; i++)
		napi_disable(&vi->rq[i].napi);
R
Rusty Russell 已提交
1195 1196 1197 1198

	return 0;
}

1199 1200 1201
static void virtnet_set_rx_mode(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
1202 1203
	struct scatterlist sg[2];
	struct virtio_net_ctrl_mac *mac_data;
J
Jiri Pirko 已提交
1204
	struct netdev_hw_addr *ha;
1205
	int uc_count;
1206
	int mc_count;
1207 1208
	void *buf;
	int i;
1209

S
stephen hemminger 已提交
1210
	/* We can't dynamically set ndo_set_rx_mode, so return gracefully */
1211 1212 1213
	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
		return;

1214 1215
	vi->ctrl_promisc = ((dev->flags & IFF_PROMISC) != 0);
	vi->ctrl_allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
1216

1217
	sg_init_one(sg, &vi->ctrl_promisc, sizeof(vi->ctrl_promisc));
1218 1219

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1220
				  VIRTIO_NET_CTRL_RX_PROMISC, sg))
1221
		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
1222
			 vi->ctrl_promisc ? "en" : "dis");
1223

1224
	sg_init_one(sg, &vi->ctrl_allmulti, sizeof(vi->ctrl_allmulti));
1225 1226

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
1227
				  VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
1228
		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
1229
			 vi->ctrl_allmulti ? "en" : "dis");
1230

1231
	uc_count = netdev_uc_count(dev);
1232
	mc_count = netdev_mc_count(dev);
1233
	/* MAC filter - use one buffer for both lists */
1234 1235 1236
	buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
		      (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
	mac_data = buf;
1237
	if (!buf)
1238 1239
		return;

1240 1241
	sg_init_table(sg, 2);

1242
	/* Store the unicast list and count in the front of the buffer */
M
Michael S. Tsirkin 已提交
1243
	mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
J
Jiri Pirko 已提交
1244
	i = 0;
1245
	netdev_for_each_uc_addr(ha, dev)
J
Jiri Pirko 已提交
1246
		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1247 1248

	sg_set_buf(&sg[0], mac_data,
1249
		   sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
1250 1251

	/* multicast list and count fill the end */
1252
	mac_data = (void *)&mac_data->macs[uc_count][0];
1253

M
Michael S. Tsirkin 已提交
1254
	mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
1255
	i = 0;
1256 1257
	netdev_for_each_mc_addr(ha, dev)
		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
1258 1259

	sg_set_buf(&sg[1], mac_data,
1260
		   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
1261 1262

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
1263
				  VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
1264
		dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
1265 1266

	kfree(buf);
1267 1268
}

1269 1270
static int virtnet_vlan_rx_add_vid(struct net_device *dev,
				   __be16 proto, u16 vid)
1271 1272 1273 1274
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct scatterlist sg;

1275 1276
	vi->ctrl_vid = vid;
	sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid));
1277 1278

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1279
				  VIRTIO_NET_CTRL_VLAN_ADD, &sg))
1280
		dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
1281
	return 0;
1282 1283
}

1284 1285
static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
				    __be16 proto, u16 vid)
1286 1287 1288 1289
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct scatterlist sg;

1290 1291
	vi->ctrl_vid = vid;
	sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid));
1292 1293

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
1294
				  VIRTIO_NET_CTRL_VLAN_DEL, &sg))
1295
		dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
1296
	return 0;
1297 1298
}

1299
static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
J
Jason Wang 已提交
1300 1301 1302
{
	int i;

1303 1304
	if (vi->affinity_hint_set) {
		for (i = 0; i < vi->max_queue_pairs; i++) {
1305 1306 1307 1308
			virtqueue_set_affinity(vi->rq[i].vq, -1);
			virtqueue_set_affinity(vi->sq[i].vq, -1);
		}

1309 1310 1311
		vi->affinity_hint_set = false;
	}
}
1312

1313 1314 1315 1316
static void virtnet_set_affinity(struct virtnet_info *vi)
{
	int i;
	int cpu;
J
Jason Wang 已提交
1317 1318 1319 1320 1321

	/* In multiqueue mode, when the number of cpu is equal to the number of
	 * queue pairs, we let the queue pairs to be private to one cpu by
	 * setting the affinity hint to eliminate the contention.
	 */
1322 1323 1324 1325
	if (vi->curr_queue_pairs == 1 ||
	    vi->max_queue_pairs != num_online_cpus()) {
		virtnet_clean_affinity(vi, -1);
		return;
J
Jason Wang 已提交
1326 1327
	}

1328 1329
	i = 0;
	for_each_online_cpu(cpu) {
J
Jason Wang 已提交
1330 1331
		virtqueue_set_affinity(vi->rq[i].vq, cpu);
		virtqueue_set_affinity(vi->sq[i].vq, cpu);
1332
		netif_set_xps_queue(vi->dev, cpumask_of(cpu), i);
1333
		i++;
J
Jason Wang 已提交
1334 1335
	}

1336
	vi->affinity_hint_set = true;
J
Jason Wang 已提交
1337 1338
}

1339
static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
1340
{
1341 1342 1343 1344 1345
	struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
						   node);
	virtnet_set_affinity(vi);
	return 0;
}
1346

1347 1348 1349 1350 1351 1352 1353
static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
{
	struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
						   node_dead);
	virtnet_set_affinity(vi);
	return 0;
}
1354

1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385
static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
{
	struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
						   node);

	virtnet_clean_affinity(vi, cpu);
	return 0;
}

static enum cpuhp_state virtionet_online;

static int virtnet_cpu_notif_add(struct virtnet_info *vi)
{
	int ret;

	ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
	if (ret)
		return ret;
	ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
					       &vi->node_dead);
	if (!ret)
		return ret;
	cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
	return ret;
}

static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
{
	cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
	cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
					    &vi->node_dead);
J
Jason Wang 已提交
1386 1387
}

R
Rick Jones 已提交
1388 1389 1390 1391 1392
static void virtnet_get_ringparam(struct net_device *dev,
				struct ethtool_ringparam *ring)
{
	struct virtnet_info *vi = netdev_priv(dev);

J
Jason Wang 已提交
1393 1394
	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
R
Rick Jones 已提交
1395 1396 1397 1398
	ring->rx_pending = ring->rx_max_pending;
	ring->tx_pending = ring->tx_max_pending;
}

1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411

static void virtnet_get_drvinfo(struct net_device *dev,
				struct ethtool_drvinfo *info)
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct virtio_device *vdev = vi->vdev;

	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
	strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
	strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));

}

1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425
/* TODO: Eliminate OOO packets during switching */
static int virtnet_set_channels(struct net_device *dev,
				struct ethtool_channels *channels)
{
	struct virtnet_info *vi = netdev_priv(dev);
	u16 queue_pairs = channels->combined_count;
	int err;

	/* We don't support separate rx/tx channels.
	 * We don't allow setting 'other' channels.
	 */
	if (channels->rx_count || channels->tx_count || channels->other_count)
		return -EINVAL;

1426
	if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
1427 1428
		return -EINVAL;

J
John Fastabend 已提交
1429 1430 1431 1432 1433 1434 1435
	/* For now we don't support modifying channels while XDP is loaded
	 * also when XDP is loaded all RX queues have XDP programs so we only
	 * need to check a single RX queue.
	 */
	if (vi->rq[0].xdp_prog)
		return -EINVAL;

1436
	get_online_cpus();
1437 1438 1439 1440 1441
	err = virtnet_set_queues(vi, queue_pairs);
	if (!err) {
		netif_set_real_num_tx_queues(dev, queue_pairs);
		netif_set_real_num_rx_queues(dev, queue_pairs);

1442
		virtnet_set_affinity(vi);
1443
	}
1444
	put_online_cpus();
1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461

	return err;
}

static void virtnet_get_channels(struct net_device *dev,
				 struct ethtool_channels *channels)
{
	struct virtnet_info *vi = netdev_priv(dev);

	channels->combined_count = vi->curr_queue_pairs;
	channels->max_combined = vi->max_queue_pairs;
	channels->max_other = 0;
	channels->rx_count = 0;
	channels->tx_count = 0;
	channels->other_count = 0;
}

1462 1463 1464 1465 1466 1467
/* Check if the user is trying to change anything besides speed/duplex */
static bool virtnet_validate_ethtool_cmd(const struct ethtool_cmd *cmd)
{
	struct ethtool_cmd diff1 = *cmd;
	struct ethtool_cmd diff2 = {};

1468 1469 1470
	/* cmd is always set so we need to clear it, validate the port type
	 * and also without autonegotiation we can ignore advertising
	 */
1471
	ethtool_cmd_speed_set(&diff1, 0);
1472
	diff2.port = PORT_OTHER;
1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
	diff1.advertising = 0;
	diff1.duplex = 0;
	diff1.cmd = 0;

	return !memcmp(&diff1, &diff2, sizeof(diff1));
}

static int virtnet_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
	struct virtnet_info *vi = netdev_priv(dev);
	u32 speed;

	speed = ethtool_cmd_speed(cmd);
	/* don't allow custom speed and duplex */
	if (!ethtool_validate_speed(speed) ||
	    !ethtool_validate_duplex(cmd->duplex) ||
	    !virtnet_validate_ethtool_cmd(cmd))
		return -EINVAL;
	vi->speed = speed;
	vi->duplex = cmd->duplex;

	return 0;
}

static int virtnet_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
	struct virtnet_info *vi = netdev_priv(dev);

	ethtool_cmd_speed_set(cmd, vi->speed);
	cmd->duplex = vi->duplex;
	cmd->port = PORT_OTHER;

	return 0;
}

static void virtnet_init_settings(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);

	vi->speed = SPEED_UNKNOWN;
	vi->duplex = DUPLEX_UNKNOWN;
}

1516
static const struct ethtool_ops virtnet_ethtool_ops = {
1517
	.get_drvinfo = virtnet_get_drvinfo,
1518
	.get_link = ethtool_op_get_link,
R
Rick Jones 已提交
1519
	.get_ringparam = virtnet_get_ringparam,
1520 1521
	.set_channels = virtnet_set_channels,
	.get_channels = virtnet_get_channels,
1522
	.get_ts_info = ethtool_op_get_ts_info,
1523 1524
	.get_settings = virtnet_get_settings,
	.set_settings = virtnet_set_settings,
1525 1526
};

J
John Fastabend 已提交
1527 1528 1529 1530 1531
static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
{
	unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
	struct virtnet_info *vi = netdev_priv(dev);
	struct bpf_prog *old_prog;
1532 1533
	u16 xdp_qp = 0, curr_qp;
	int i, err;
J
John Fastabend 已提交
1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550

	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
	    virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6)) {
		netdev_warn(dev, "can't set XDP while host is implementing LRO, disable LRO first\n");
		return -EOPNOTSUPP;
	}

	if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
		netdev_warn(dev, "XDP expects header/data in single page, any_header_sg required\n");
		return -EINVAL;
	}

	if (dev->mtu > max_sz) {
		netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
		return -EINVAL;
	}

1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567
	curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
	if (prog)
		xdp_qp = nr_cpu_ids;

	/* XDP requires extra queues for XDP_TX */
	if (curr_qp + xdp_qp > vi->max_queue_pairs) {
		netdev_warn(dev, "request %i queues but max is %i\n",
			    curr_qp + xdp_qp, vi->max_queue_pairs);
		return -ENOMEM;
	}

	err = virtnet_set_queues(vi, curr_qp + xdp_qp);
	if (err) {
		dev_warn(&dev->dev, "XDP Device queue allocation failure.\n");
		return err;
	}

J
John Fastabend 已提交
1568 1569
	if (prog) {
		prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
1570 1571
		if (IS_ERR(prog)) {
			virtnet_set_queues(vi, curr_qp);
J
John Fastabend 已提交
1572
			return PTR_ERR(prog);
1573
		}
J
John Fastabend 已提交
1574 1575
	}

1576 1577 1578
	vi->xdp_queue_pairs = xdp_qp;
	netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);

J
John Fastabend 已提交
1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613
	for (i = 0; i < vi->max_queue_pairs; i++) {
		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
		rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
		if (old_prog)
			bpf_prog_put(old_prog);
	}

	return 0;
}

static bool virtnet_xdp_query(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
	int i;

	for (i = 0; i < vi->max_queue_pairs; i++) {
		if (vi->rq[i].xdp_prog)
			return true;
	}
	return false;
}

static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp)
{
	switch (xdp->command) {
	case XDP_SETUP_PROG:
		return virtnet_xdp_set(dev, xdp->prog);
	case XDP_QUERY_PROG:
		xdp->prog_attached = virtnet_xdp_query(dev);
		return 0;
	default:
		return -EINVAL;
	}
}

1614 1615 1616 1617 1618
static const struct net_device_ops virtnet_netdev = {
	.ndo_open            = virtnet_open,
	.ndo_stop   	     = virtnet_close,
	.ndo_start_xmit      = start_xmit,
	.ndo_validate_addr   = eth_validate_addr,
1619
	.ndo_set_mac_address = virtnet_set_mac_address,
1620
	.ndo_set_rx_mode     = virtnet_set_rx_mode,
1621
	.ndo_get_stats64     = virtnet_stats,
1622 1623
	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
1624 1625 1626
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_poll_controller = virtnet_netpoll,
#endif
J
Jason Wang 已提交
1627 1628 1629
#ifdef CONFIG_NET_RX_BUSY_POLL
	.ndo_busy_poll		= virtnet_busy_poll,
#endif
J
John Fastabend 已提交
1630
	.ndo_xdp		= virtnet_xdp,
1631 1632
};

1633
static void virtnet_config_changed_work(struct work_struct *work)
1634
{
1635 1636
	struct virtnet_info *vi =
		container_of(work, struct virtnet_info, config_work);
1637 1638
	u16 v;

1639 1640
	if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
				 struct virtio_net_config, status, &v) < 0)
M
Michael S. Tsirkin 已提交
1641
		return;
1642 1643

	if (v & VIRTIO_NET_S_ANNOUNCE) {
1644
		netdev_notify_peers(vi->dev);
1645 1646
		virtnet_ack_link_announce(vi);
	}
1647 1648 1649 1650 1651

	/* Ignore unknown (future) status bits */
	v &= VIRTIO_NET_S_LINK_UP;

	if (vi->status == v)
M
Michael S. Tsirkin 已提交
1652
		return;
1653 1654 1655 1656 1657

	vi->status = v;

	if (vi->status & VIRTIO_NET_S_LINK_UP) {
		netif_carrier_on(vi->dev);
J
Jason Wang 已提交
1658
		netif_tx_wake_all_queues(vi->dev);
1659 1660
	} else {
		netif_carrier_off(vi->dev);
J
Jason Wang 已提交
1661
		netif_tx_stop_all_queues(vi->dev);
1662 1663 1664 1665 1666 1667 1668
	}
}

static void virtnet_config_changed(struct virtio_device *vdev)
{
	struct virtnet_info *vi = vdev->priv;

1669
	schedule_work(&vi->config_work);
1670 1671
}

J
Jason Wang 已提交
1672 1673
static void virtnet_free_queues(struct virtnet_info *vi)
{
1674 1675
	int i;

1676 1677
	for (i = 0; i < vi->max_queue_pairs; i++) {
		napi_hash_del(&vi->rq[i].napi);
1678
		netif_napi_del(&vi->rq[i].napi);
1679
	}
1680

1681 1682 1683 1684 1685
	/* We called napi_hash_del() before netif_napi_del(),
	 * we need to respect an RCU grace period before freeing vi->rq
	 */
	synchronize_net();

J
Jason Wang 已提交
1686 1687 1688 1689 1690 1691
	kfree(vi->rq);
	kfree(vi->sq);
}

static void free_receive_bufs(struct virtnet_info *vi)
{
J
John Fastabend 已提交
1692
	struct bpf_prog *old_prog;
J
Jason Wang 已提交
1693 1694
	int i;

J
John Fastabend 已提交
1695
	rtnl_lock();
J
Jason Wang 已提交
1696 1697 1698
	for (i = 0; i < vi->max_queue_pairs; i++) {
		while (vi->rq[i].pages)
			__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
J
John Fastabend 已提交
1699 1700 1701 1702 1703

		old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
		RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
		if (old_prog)
			bpf_prog_put(old_prog);
J
Jason Wang 已提交
1704
	}
J
John Fastabend 已提交
1705
	rtnl_unlock();
J
Jason Wang 已提交
1706 1707
}

1708 1709 1710 1711 1712 1713 1714 1715
static void free_receive_page_frags(struct virtnet_info *vi)
{
	int i;
	for (i = 0; i < vi->max_queue_pairs; i++)
		if (vi->rq[i].alloc_frag.page)
			put_page(vi->rq[i].alloc_frag.page);
}

J
Jason Wang 已提交
1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730
static void free_unused_bufs(struct virtnet_info *vi)
{
	void *buf;
	int i;

	for (i = 0; i < vi->max_queue_pairs; i++) {
		struct virtqueue *vq = vi->sq[i].vq;
		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
			dev_kfree_skb(buf);
	}

	for (i = 0; i < vi->max_queue_pairs; i++) {
		struct virtqueue *vq = vi->rq[i].vq;

		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
1731 1732 1733 1734 1735
			if (vi->mergeable_rx_bufs) {
				unsigned long ctx = (unsigned long)buf;
				void *base = mergeable_ctx_to_buf_address(ctx);
				put_page(virt_to_head_page(base));
			} else if (vi->big_packets) {
1736
				give_pages(&vi->rq[i], buf);
1737
			} else {
J
Jason Wang 已提交
1738
				dev_kfree_skb(buf);
1739
			}
J
Jason Wang 已提交
1740 1741 1742 1743
		}
	}
}

1744 1745 1746 1747
static void virtnet_del_vqs(struct virtnet_info *vi)
{
	struct virtio_device *vdev = vi->vdev;

1748
	virtnet_clean_affinity(vi, -1);
J
Jason Wang 已提交
1749

1750
	vdev->config->del_vqs(vdev);
J
Jason Wang 已提交
1751 1752

	virtnet_free_queues(vi);
1753 1754
}

J
Jason Wang 已提交
1755
static int virtnet_find_vqs(struct virtnet_info *vi)
1756
{
J
Jason Wang 已提交
1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785
	vq_callback_t **callbacks;
	struct virtqueue **vqs;
	int ret = -ENOMEM;
	int i, total_vqs;
	const char **names;

	/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
	 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
	 * possible control vq.
	 */
	total_vqs = vi->max_queue_pairs * 2 +
		    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);

	/* Allocate space for find_vqs parameters */
	vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
	if (!vqs)
		goto err_vq;
	callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
	if (!callbacks)
		goto err_callback;
	names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
	if (!names)
		goto err_names;

	/* Parameters for control virtqueue, if any */
	if (vi->has_cvq) {
		callbacks[total_vqs - 1] = NULL;
		names[total_vqs - 1] = "control";
	}
1786

J
Jason Wang 已提交
1787 1788 1789 1790 1791 1792 1793 1794 1795
	/* Allocate/initialize parameters for send/receive virtqueues */
	for (i = 0; i < vi->max_queue_pairs; i++) {
		callbacks[rxq2vq(i)] = skb_recv_done;
		callbacks[txq2vq(i)] = skb_xmit_done;
		sprintf(vi->rq[i].name, "input.%d", i);
		sprintf(vi->sq[i].name, "output.%d", i);
		names[rxq2vq(i)] = vi->rq[i].name;
		names[txq2vq(i)] = vi->sq[i].name;
	}
1796

J
Jason Wang 已提交
1797 1798 1799 1800
	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
					 names);
	if (ret)
		goto err_find;
1801

J
Jason Wang 已提交
1802 1803
	if (vi->has_cvq) {
		vi->cvq = vqs[total_vqs - 1];
1804
		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
1805
			vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
1806
	}
J
Jason Wang 已提交
1807 1808 1809 1810 1811 1812 1813 1814 1815 1816

	for (i = 0; i < vi->max_queue_pairs; i++) {
		vi->rq[i].vq = vqs[rxq2vq(i)];
		vi->sq[i].vq = vqs[txq2vq(i)];
	}

	kfree(names);
	kfree(callbacks);
	kfree(vqs);

1817
	return 0;
J
Jason Wang 已提交
1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836

err_find:
	kfree(names);
err_names:
	kfree(callbacks);
err_callback:
	kfree(vqs);
err_vq:
	return ret;
}

static int virtnet_alloc_queues(struct virtnet_info *vi)
{
	int i;

	vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
	if (!vi->sq)
		goto err_sq;
	vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
1837
	if (!vi->rq)
J
Jason Wang 已提交
1838 1839 1840 1841 1842 1843 1844 1845 1846
		goto err_rq;

	INIT_DELAYED_WORK(&vi->refill, refill_work);
	for (i = 0; i < vi->max_queue_pairs; i++) {
		vi->rq[i].pages = NULL;
		netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
			       napi_weight);

		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
J
Johannes Berg 已提交
1847
		ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
J
Jason Wang 已提交
1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871
		sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
	}

	return 0;

err_rq:
	kfree(vi->sq);
err_sq:
	return -ENOMEM;
}

static int init_vqs(struct virtnet_info *vi)
{
	int ret;

	/* Allocate send & receive queues */
	ret = virtnet_alloc_queues(vi);
	if (ret)
		goto err;

	ret = virtnet_find_vqs(vi);
	if (ret)
		goto err_free;

1872
	get_online_cpus();
1873
	virtnet_set_affinity(vi);
1874 1875
	put_online_cpus();

J
Jason Wang 已提交
1876 1877 1878 1879 1880 1881
	return 0;

err_free:
	virtnet_free_queues(vi);
err:
	return ret;
1882 1883
}

1884 1885 1886 1887 1888 1889
#ifdef CONFIG_SYSFS
static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
		struct rx_queue_attribute *attribute, char *buf)
{
	struct virtnet_info *vi = netdev_priv(queue->dev);
	unsigned int queue_index = get_netdev_rx_queue_index(queue);
J
Johannes Berg 已提交
1890
	struct ewma_pkt_len *avg;
1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910

	BUG_ON(queue_index >= vi->max_queue_pairs);
	avg = &vi->rq[queue_index].mrg_avg_pkt_len;
	return sprintf(buf, "%u\n", get_mergeable_buf_len(avg));
}

static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
	__ATTR_RO(mergeable_rx_buffer_size);

static struct attribute *virtio_net_mrg_rx_attrs[] = {
	&mergeable_rx_buffer_size_attribute.attr,
	NULL
};

static const struct attribute_group virtio_net_mrg_rx_group = {
	.name = "virtio_net",
	.attrs = virtio_net_mrg_rx_attrs
};
#endif

1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944
static bool virtnet_fail_on_feature(struct virtio_device *vdev,
				    unsigned int fbit,
				    const char *fname, const char *dname)
{
	if (!virtio_has_feature(vdev, fbit))
		return false;

	dev_err(&vdev->dev, "device advertises feature %s but not %s",
		fname, dname);

	return true;
}

#define VIRTNET_FAIL_ON(vdev, fbit, dbit)			\
	virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)

static bool virtnet_validate_features(struct virtio_device *vdev)
{
	if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
	    (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
			     "VIRTIO_NET_F_CTRL_VQ") ||
	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
			     "VIRTIO_NET_F_CTRL_VQ") ||
	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
			     "VIRTIO_NET_F_CTRL_VQ") ||
	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
	     VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
			     "VIRTIO_NET_F_CTRL_VQ"))) {
		return false;
	}

	return true;
}

1945 1946 1947
#define MIN_MTU ETH_MIN_MTU
#define MAX_MTU ETH_MAX_MTU

R
Rusty Russell 已提交
1948 1949
static int virtnet_probe(struct virtio_device *vdev)
{
J
Jason Wang 已提交
1950
	int i, err;
R
Rusty Russell 已提交
1951 1952
	struct net_device *dev;
	struct virtnet_info *vi;
J
Jason Wang 已提交
1953
	u16 max_queue_pairs;
1954
	int mtu;
J
Jason Wang 已提交
1955

1956 1957 1958 1959 1960 1961
	if (!vdev->config->get) {
		dev_err(&vdev->dev, "%s failure: config access disabled\n",
			__func__);
		return -EINVAL;
	}

1962 1963 1964
	if (!virtnet_validate_features(vdev))
		return -EINVAL;

J
Jason Wang 已提交
1965
	/* Find if host supports multiqueue virtio_net device */
1966 1967 1968
	err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
				   struct virtio_net_config,
				   max_virtqueue_pairs, &max_queue_pairs);
J
Jason Wang 已提交
1969 1970 1971 1972 1973 1974

	/* We need at least 2 queue's */
	if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
	    max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
	    !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
		max_queue_pairs = 1;
R
Rusty Russell 已提交
1975 1976

	/* Allocate ourselves a network device with room for our info */
J
Jason Wang 已提交
1977
	dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
R
Rusty Russell 已提交
1978 1979 1980 1981
	if (!dev)
		return -ENOMEM;

	/* Set up network device as normal. */
1982
	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
1983
	dev->netdev_ops = &virtnet_netdev;
R
Rusty Russell 已提交
1984
	dev->features = NETIF_F_HIGHDMA;
1985

1986
	dev->ethtool_ops = &virtnet_ethtool_ops;
R
Rusty Russell 已提交
1987 1988 1989
	SET_NETDEV_DEV(dev, &vdev->dev);

	/* Do we support "hardware" checksums? */
1990
	if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
R
Rusty Russell 已提交
1991
		/* This opens up the world of extra features. */
J
Jason Wang 已提交
1992
		dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
1993
		if (csum)
J
Jason Wang 已提交
1994
			dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
1995 1996

		if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
1997
			dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO
R
Rusty Russell 已提交
1998 1999
				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
		}
2000
		/* Individual feature bits: what can host handle? */
2001 2002 2003 2004 2005 2006
		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
			dev->hw_features |= NETIF_F_TSO;
		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
			dev->hw_features |= NETIF_F_TSO6;
		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
			dev->hw_features |= NETIF_F_TSO_ECN;
2007 2008
		if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
			dev->hw_features |= NETIF_F_UFO;
2009

2010 2011
		dev->features |= NETIF_F_GSO_ROBUST;

2012
		if (gso)
2013
			dev->features |= dev->hw_features & (NETIF_F_ALL_TSO|NETIF_F_UFO);
2014
		/* (!csum && gso) case will be fixed by register_netdev() */
R
Rusty Russell 已提交
2015
	}
2016 2017
	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
		dev->features |= NETIF_F_RXCSUM;
R
Rusty Russell 已提交
2018

2019 2020
	dev->vlan_features = dev->features;

2021 2022 2023 2024
	/* MTU range: 68 - 65535 */
	dev->min_mtu = MIN_MTU;
	dev->max_mtu = MAX_MTU;

R
Rusty Russell 已提交
2025
	/* Configuration may specify what MAC to use.  Otherwise random. */
2026 2027 2028 2029 2030
	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
		virtio_cread_bytes(vdev,
				   offsetof(struct virtio_net_config, mac),
				   dev->dev_addr, dev->addr_len);
	else
2031
		eth_hw_addr_random(dev);
R
Rusty Russell 已提交
2032 2033 2034 2035 2036

	/* Set up our device-specific information */
	vi = netdev_priv(dev);
	vi->dev = dev;
	vi->vdev = vdev;
2037
	vdev->priv = vi;
2038 2039 2040 2041 2042
	vi->stats = alloc_percpu(struct virtnet_stats);
	err = -ENOMEM;
	if (vi->stats == NULL)
		goto free;

2043 2044 2045 2046 2047 2048 2049
	for_each_possible_cpu(i) {
		struct virtnet_stats *virtnet_stats;
		virtnet_stats = per_cpu_ptr(vi->stats, i);
		u64_stats_init(&virtnet_stats->tx_syncp);
		u64_stats_init(&virtnet_stats->rx_syncp);
	}

2050
	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
R
Rusty Russell 已提交
2051

2052
	/* If we can receive ANY GSO packets, we must allocate large ones. */
2053 2054
	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
2055 2056
	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
	    virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
2057 2058
		vi->big_packets = true;

2059 2060 2061
	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
		vi->mergeable_rx_bufs = true;

2062 2063
	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
	    virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
2064 2065 2066 2067
		vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
	else
		vi->hdr_len = sizeof(struct virtio_net_hdr);

2068 2069
	if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
	    virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
2070 2071
		vi->any_header_sg = true;

J
Jason Wang 已提交
2072 2073 2074
	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
		vi->has_cvq = true;

2075 2076 2077 2078
	if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
		mtu = virtio_cread16(vdev,
				     offsetof(struct virtio_net_config,
					      mtu));
2079
		if (mtu < dev->min_mtu) {
2080
			__virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
2081
		} else {
2082
			dev->mtu = mtu;
2083 2084
			dev->max_mtu = mtu;
		}
2085 2086
	}

2087 2088
	if (vi->any_header_sg)
		dev->needed_headroom = vi->hdr_len;
2089

2090 2091 2092 2093 2094
	/* Enable multiqueue by default */
	if (num_online_cpus() >= max_queue_pairs)
		vi->curr_queue_pairs = max_queue_pairs;
	else
		vi->curr_queue_pairs = num_online_cpus();
J
Jason Wang 已提交
2095 2096 2097
	vi->max_queue_pairs = max_queue_pairs;

	/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
2098
	err = init_vqs(vi);
2099
	if (err)
2100
		goto free_stats;
R
Rusty Russell 已提交
2101

2102 2103 2104 2105
#ifdef CONFIG_SYSFS
	if (vi->mergeable_rx_bufs)
		dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
#endif
2106 2107
	netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
	netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
J
Jason Wang 已提交
2108

2109 2110
	virtnet_init_settings(dev);

R
Rusty Russell 已提交
2111 2112 2113
	err = register_netdev(dev);
	if (err) {
		pr_debug("virtio_net: registering device failed\n");
2114
		goto free_vqs;
R
Rusty Russell 已提交
2115
	}
2116

M
Michael S. Tsirkin 已提交
2117 2118
	virtio_device_ready(vdev);

2119
	err = virtnet_cpu_notif_add(vi);
2120 2121
	if (err) {
		pr_debug("virtio_net: registering cpu notifier failed\n");
2122
		goto free_unregister_netdev;
2123 2124
	}

2125 2126 2127
	rtnl_lock();
	virtnet_set_queues(vi, vi->curr_queue_pairs);
	rtnl_unlock();
2128

J
Jason Wang 已提交
2129 2130 2131 2132
	/* Assume link up if device can't report link status,
	   otherwise get link status from config. */
	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
		netif_carrier_off(dev);
2133
		schedule_work(&vi->config_work);
J
Jason Wang 已提交
2134 2135 2136 2137
	} else {
		vi->status = VIRTIO_NET_S_LINK_UP;
		netif_carrier_on(dev);
	}
2138

J
Jason Wang 已提交
2139 2140 2141
	pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
		 dev->name, max_queue_pairs);

R
Rusty Russell 已提交
2142 2143
	return 0;

2144
free_unregister_netdev:
2145 2146
	vi->vdev->config->reset(vdev);

2147
	unregister_netdev(dev);
2148
free_vqs:
J
Jason Wang 已提交
2149
	cancel_delayed_work_sync(&vi->refill);
2150
	free_receive_page_frags(vi);
2151
	virtnet_del_vqs(vi);
2152 2153
free_stats:
	free_percpu(vi->stats);
R
Rusty Russell 已提交
2154 2155 2156 2157 2158
free:
	free_netdev(dev);
	return err;
}

2159
static void remove_vq_common(struct virtnet_info *vi)
R
Rusty Russell 已提交
2160
{
2161
	vi->vdev->config->reset(vi->vdev);
S
Shirley Ma 已提交
2162 2163

	/* Free unused buffers in both send and recv, if any. */
2164
	free_unused_bufs(vi);
2165

J
Jason Wang 已提交
2166
	free_receive_bufs(vi);
2167

2168 2169
	free_receive_page_frags(vi);

J
Jason Wang 已提交
2170
	virtnet_del_vqs(vi);
2171 2172
}

2173
static void virtnet_remove(struct virtio_device *vdev)
2174 2175 2176
{
	struct virtnet_info *vi = vdev->priv;

2177
	virtnet_cpu_notif_remove(vi);
2178

2179 2180
	/* Make sure no work handler is accessing the device. */
	flush_work(&vi->config_work);
2181

2182 2183 2184
	unregister_netdev(vi->dev);

	remove_vq_common(vi);
2185

2186
	free_percpu(vi->stats);
2187
	free_netdev(vi->dev);
R
Rusty Russell 已提交
2188 2189
}

2190
#ifdef CONFIG_PM_SLEEP
2191 2192 2193
static int virtnet_freeze(struct virtio_device *vdev)
{
	struct virtnet_info *vi = vdev->priv;
J
Jason Wang 已提交
2194
	int i;
2195

2196
	virtnet_cpu_notif_remove(vi);
2197

2198 2199
	/* Make sure no work handler is accessing the device */
	flush_work(&vi->config_work);
2200

2201 2202 2203
	netif_device_detach(vi->dev);
	cancel_delayed_work_sync(&vi->refill);

J
Jason Wang 已提交
2204
	if (netif_running(vi->dev)) {
2205
		for (i = 0; i < vi->max_queue_pairs; i++)
J
Jason Wang 已提交
2206
			napi_disable(&vi->rq[i].napi);
J
Jason Wang 已提交
2207
	}
2208 2209 2210 2211 2212 2213 2214 2215 2216

	remove_vq_common(vi);

	return 0;
}

static int virtnet_restore(struct virtio_device *vdev)
{
	struct virtnet_info *vi = vdev->priv;
J
Jason Wang 已提交
2217
	int err, i;
2218 2219 2220 2221 2222

	err = init_vqs(vi);
	if (err)
		return err;

2223 2224
	virtio_device_ready(vdev);

2225 2226
	if (netif_running(vi->dev)) {
		for (i = 0; i < vi->curr_queue_pairs; i++)
M
Michael S. Tsirkin 已提交
2227
			if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
2228 2229
				schedule_delayed_work(&vi->refill, 0);

J
Jason Wang 已提交
2230 2231
		for (i = 0; i < vi->max_queue_pairs; i++)
			virtnet_napi_enable(&vi->rq[i]);
2232
	}
2233 2234 2235

	netif_device_attach(vi->dev);

2236
	rtnl_lock();
J
Jason Wang 已提交
2237
	virtnet_set_queues(vi, vi->curr_queue_pairs);
2238
	rtnl_unlock();
J
Jason Wang 已提交
2239

2240
	err = virtnet_cpu_notif_add(vi);
2241 2242 2243
	if (err)
		return err;

2244 2245 2246 2247
	return 0;
}
#endif

R
Rusty Russell 已提交
2248 2249 2250 2251 2252
static struct virtio_device_id id_table[] = {
	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
	{ 0 },
};

2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264
#define VIRTNET_FEATURES \
	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
	VIRTIO_NET_F_MAC, \
	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
	VIRTIO_NET_F_CTRL_MAC_ADDR, \
	VIRTIO_NET_F_MTU

2265
static unsigned int features[] = {
2266 2267 2268 2269 2270 2271
	VIRTNET_FEATURES,
};

static unsigned int features_legacy[] = {
	VIRTNET_FEATURES,
	VIRTIO_NET_F_GSO,
2272
	VIRTIO_F_ANY_LAYOUT,
2273 2274
};

2275
static struct virtio_driver virtio_net_driver = {
2276 2277
	.feature_table = features,
	.feature_table_size = ARRAY_SIZE(features),
2278 2279
	.feature_table_legacy = features_legacy,
	.feature_table_size_legacy = ARRAY_SIZE(features_legacy),
R
Rusty Russell 已提交
2280 2281 2282 2283
	.driver.name =	KBUILD_MODNAME,
	.driver.owner =	THIS_MODULE,
	.id_table =	id_table,
	.probe =	virtnet_probe,
2284
	.remove =	virtnet_remove,
2285
	.config_changed = virtnet_config_changed,
2286
#ifdef CONFIG_PM_SLEEP
2287 2288 2289
	.freeze =	virtnet_freeze,
	.restore =	virtnet_restore,
#endif
R
Rusty Russell 已提交
2290 2291
};

2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326
static __init int virtio_net_driver_init(void)
{
	int ret;

	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "AP_VIRT_NET_ONLINE",
				      virtnet_cpu_online,
				      virtnet_cpu_down_prep);
	if (ret < 0)
		goto out;
	virtionet_online = ret;
	ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "VIRT_NET_DEAD",
				      NULL, virtnet_cpu_dead);
	if (ret)
		goto err_dead;

        ret = register_virtio_driver(&virtio_net_driver);
	if (ret)
		goto err_virtio;
	return 0;
err_virtio:
	cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
err_dead:
	cpuhp_remove_multi_state(virtionet_online);
out:
	return ret;
}
module_init(virtio_net_driver_init);

static __exit void virtio_net_driver_exit(void)
{
	cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
	cpuhp_remove_multi_state(virtionet_online);
	unregister_virtio_driver(&virtio_net_driver);
}
module_exit(virtio_net_driver_exit);
R
Rusty Russell 已提交
2327 2328 2329 2330

MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_DESCRIPTION("Virtio network driver");
MODULE_LICENSE("GPL");