virtio_net.c 25.9 KB
Newer Older
1
/* A network driver using virtio.
R
Rusty Russell 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
//#define DEBUG
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
22
#include <linux/ethtool.h>
R
Rusty Russell 已提交
23 24
#include <linux/module.h>
#include <linux/virtio.h>
25
#include <linux/virtio_ids.h>
R
Rusty Russell 已提交
26 27
#include <linux/virtio_net.h>
#include <linux/scatterlist.h>
28
#include <linux/if_vlan.h>
R
Rusty Russell 已提交
29

30 31 32
static int napi_weight = 128;
module_param(napi_weight, int, 0444);

R
Rusty Russell 已提交
33 34 35 36
static int csum = 1, gso = 1;
module_param(csum, bool, 0444);
module_param(gso, bool, 0444);

R
Rusty Russell 已提交
37
/* FIXME: MTU in config. */
38
#define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
39
#define GOOD_COPY_LEN	128
R
Rusty Russell 已提交
40

41
#define VIRTNET_SEND_COMMAND_SG_MAX    2
42

R
Rusty Russell 已提交
43 44 45
struct virtnet_info
{
	struct virtio_device *vdev;
46
	struct virtqueue *rvq, *svq, *cvq;
R
Rusty Russell 已提交
47 48
	struct net_device *dev;
	struct napi_struct napi;
49
	unsigned int status;
R
Rusty Russell 已提交
50 51 52 53

	/* Number of input buffers, and max we've ever had. */
	unsigned int num, max;

54 55 56
	/* I like... big packets and I cannot lie! */
	bool big_packets;

57 58 59
	/* Host will merge rx buffers for big packets (shake it! shake it!) */
	bool mergeable_rx_bufs;

R
Rusty Russell 已提交
60 61 62
	/* Receive & send queues. */
	struct sk_buff_head recv;
	struct sk_buff_head send;
63

64 65 66
	/* Work struct for refilling if we run low on memory. */
	struct delayed_work refill;

67 68
	/* Chain pages by the private ptr. */
	struct page *pages;
R
Rusty Russell 已提交
69 70
};

71 72 73 74 75
struct skb_vnet_hdr {
	union {
		struct virtio_net_hdr hdr;
		struct virtio_net_hdr_mrg_rxbuf mhdr;
	};
76
	unsigned int num_sg;
77 78 79
};

static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
R
Rusty Russell 已提交
80
{
81
	return (struct skb_vnet_hdr *)skb->cb;
R
Rusty Russell 已提交
82 83
}

84 85 86 87 88 89
static void give_a_page(struct virtnet_info *vi, struct page *page)
{
	page->private = (unsigned long)vi->pages;
	vi->pages = page;
}

90 91 92 93 94 95 96 97 98 99
static void trim_pages(struct virtnet_info *vi, struct sk_buff *skb)
{
	unsigned int i;

	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
		give_a_page(vi, skb_shinfo(skb)->frags[i].page);
	skb_shinfo(skb)->nr_frags = 0;
	skb->data_len = 0;
}

100 101 102 103 104 105 106 107 108 109 110
static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
{
	struct page *p = vi->pages;

	if (p)
		vi->pages = (struct page *)p->private;
	else
		p = alloc_page(gfp_mask);
	return p;
}

111
static void skb_xmit_done(struct virtqueue *svq)
R
Rusty Russell 已提交
112
{
113
	struct virtnet_info *vi = svq->vdev->priv;
R
Rusty Russell 已提交
114

115 116
	/* Suppress further interrupts. */
	svq->vq_ops->disable_cb(svq);
117

118
	/* We were probably waiting for more output buffers. */
R
Rusty Russell 已提交
119 120 121 122 123 124
	netif_wake_queue(vi->dev);
}

static void receive_skb(struct net_device *dev, struct sk_buff *skb,
			unsigned len)
{
125
	struct virtnet_info *vi = netdev_priv(dev);
126
	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
127
	int err;
128
	int i;
R
Rusty Russell 已提交
129 130 131 132 133 134

	if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
		pr_debug("%s: short packet %i\n", dev->name, len);
		dev->stats.rx_length_errors++;
		goto drop;
	}
135

136 137 138
	if (vi->mergeable_rx_bufs) {
		unsigned int copy;
		char *p = page_address(skb_shinfo(skb)->frags[0].page);
139

140 141 142 143
		if (len > PAGE_SIZE)
			len = PAGE_SIZE;
		len -= sizeof(struct virtio_net_hdr_mrg_rxbuf);

144 145
		memcpy(&hdr->mhdr, p, sizeof(hdr->mhdr));
		p += sizeof(hdr->mhdr);
146 147 148 149 150 151 152 153 154 155 156 157 158 159

		copy = len;
		if (copy > skb_tailroom(skb))
			copy = skb_tailroom(skb);

		memcpy(skb_put(skb, copy), p, copy);

		len -= copy;

		if (!len) {
			give_a_page(vi, skb_shinfo(skb)->frags[0].page);
			skb_shinfo(skb)->nr_frags--;
		} else {
			skb_shinfo(skb)->frags[0].page_offset +=
160
				sizeof(hdr->mhdr) + copy;
161 162 163 164 165
			skb_shinfo(skb)->frags[0].size = len;
			skb->data_len += len;
			skb->len += len;
		}

166
		while (--hdr->mhdr.num_buffers) {
167 168 169 170 171 172 173 174 175 176 177 178 179
			struct sk_buff *nskb;

			i = skb_shinfo(skb)->nr_frags;
			if (i >= MAX_SKB_FRAGS) {
				pr_debug("%s: packet too long %d\n", dev->name,
					 len);
				dev->stats.rx_length_errors++;
				goto drop;
			}

			nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len);
			if (!nskb) {
				pr_debug("%s: rx error: %d buffers missing\n",
180
					 dev->name, hdr->mhdr.num_buffers);
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
				dev->stats.rx_length_errors++;
				goto drop;
			}

			__skb_unlink(nskb, &vi->recv);
			vi->num--;

			skb_shinfo(skb)->frags[i] = skb_shinfo(nskb)->frags[0];
			skb_shinfo(nskb)->nr_frags = 0;
			kfree_skb(nskb);

			if (len > PAGE_SIZE)
				len = PAGE_SIZE;

			skb_shinfo(skb)->frags[i].size = len;
			skb_shinfo(skb)->nr_frags++;
			skb->data_len += len;
			skb->len += len;
		}
	} else {
201
		len -= sizeof(hdr->hdr);
202 203 204 205 206 207 208 209 210 211 212

		if (len <= MAX_PACKET_LEN)
			trim_pages(vi, skb);

		err = pskb_trim(skb, len);
		if (err) {
			pr_debug("%s: pskb_trim failed %i %d\n", dev->name,
				 len, err);
			dev->stats.rx_dropped++;
			goto drop;
		}
213
	}
214

215
	skb->truesize += skb->data_len;
R
Rusty Russell 已提交
216 217 218
	dev->stats.rx_bytes += skb->len;
	dev->stats.rx_packets++;

219
	if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
R
Rusty Russell 已提交
220
		pr_debug("Needs csum!\n");
221 222 223
		if (!skb_partial_csum_set(skb,
					  hdr->hdr.csum_start,
					  hdr->hdr.csum_offset))
R
Rusty Russell 已提交
224 225 226
			goto frame_err;
	}

227 228 229 230
	skb->protocol = eth_type_trans(skb, dev);
	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
		 ntohs(skb->protocol), skb->len, skb->pkt_type);

231
	if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
R
Rusty Russell 已提交
232
		pr_debug("GSO!\n");
233
		switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
R
Rusty Russell 已提交
234 235 236 237 238 239 240 241 242 243 244 245
		case VIRTIO_NET_HDR_GSO_TCPV4:
			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
			break;
		case VIRTIO_NET_HDR_GSO_UDP:
			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
			break;
		case VIRTIO_NET_HDR_GSO_TCPV6:
			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
			break;
		default:
			if (net_ratelimit())
				printk(KERN_WARNING "%s: bad gso type %u.\n",
246
				       dev->name, hdr->hdr.gso_type);
R
Rusty Russell 已提交
247 248 249
			goto frame_err;
		}

250
		if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
R
Rusty Russell 已提交
251 252
			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;

253
		skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
R
Rusty Russell 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
		if (skb_shinfo(skb)->gso_size == 0) {
			if (net_ratelimit())
				printk(KERN_WARNING "%s: zero gso size.\n",
				       dev->name);
			goto frame_err;
		}

		/* Header must be checked, and gso_segs computed. */
		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
		skb_shinfo(skb)->gso_segs = 0;
	}

	netif_receive_skb(skb);
	return;

frame_err:
	dev->stats.rx_frame_errors++;
drop:
	dev_kfree_skb(skb);
}

275
static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp)
R
Rusty Russell 已提交
276 277
{
	struct sk_buff *skb;
278
	struct scatterlist sg[2+MAX_SKB_FRAGS];
279
	int num, err, i;
280
	bool oom = false;
R
Rusty Russell 已提交
281

282
	sg_init_table(sg, 2+MAX_SKB_FRAGS);
283
	do {
284
		struct skb_vnet_hdr *hdr;
285

286
		skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN + NET_IP_ALIGN);
287 288
		if (unlikely(!skb)) {
			oom = true;
R
Rusty Russell 已提交
289
			break;
290
		}
R
Rusty Russell 已提交
291

292
		skb_reserve(skb, NET_IP_ALIGN);
R
Rusty Russell 已提交
293
		skb_put(skb, MAX_PACKET_LEN);
294 295

		hdr = skb_vnet_hdr(skb);
296
		sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr));
297 298 299 300

		if (vi->big_packets) {
			for (i = 0; i < MAX_SKB_FRAGS; i++) {
				skb_frag_t *f = &skb_shinfo(skb)->frags[i];
301
				f->page = get_a_page(vi, gfp);
302 303 304 305 306 307 308 309 310 311 312 313 314
				if (!f->page)
					break;

				f->page_offset = 0;
				f->size = PAGE_SIZE;

				skb->data_len += PAGE_SIZE;
				skb->len += PAGE_SIZE;

				skb_shinfo(skb)->nr_frags++;
			}
		}

R
Rusty Russell 已提交
315 316 317 318
		num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
		skb_queue_head(&vi->recv, skb);

		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
319
		if (err < 0) {
R
Rusty Russell 已提交
320
			skb_unlink(skb, &vi->recv);
321
			trim_pages(vi, skb);
R
Rusty Russell 已提交
322 323 324 325
			kfree_skb(skb);
			break;
		}
		vi->num++;
326
	} while (err >= num);
R
Rusty Russell 已提交
327 328 329
	if (unlikely(vi->num > vi->max))
		vi->max = vi->num;
	vi->rvq->vq_ops->kick(vi->rvq);
330
	return !oom;
R
Rusty Russell 已提交
331 332
}

333 334
/* Returns false if we couldn't fill entirely (OOM). */
static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
335 336 337 338
{
	struct sk_buff *skb;
	struct scatterlist sg[1];
	int err;
339
	bool oom = false;
340

341 342
	if (!vi->mergeable_rx_bufs)
		return try_fill_recv_maxbufs(vi, gfp);
343

344
	do {
345 346 347
		skb_frag_t *f;

		skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN);
348 349
		if (unlikely(!skb)) {
			oom = true;
350
			break;
351
		}
352 353 354 355

		skb_reserve(skb, NET_IP_ALIGN);

		f = &skb_shinfo(skb)->frags[0];
356
		f->page = get_a_page(vi, gfp);
357
		if (!f->page) {
358
			oom = true;
359 360 361 362 363 364 365 366 367 368 369 370 371
			kfree_skb(skb);
			break;
		}

		f->page_offset = 0;
		f->size = PAGE_SIZE;

		skb_shinfo(skb)->nr_frags++;

		sg_init_one(sg, page_address(f->page), PAGE_SIZE);
		skb_queue_head(&vi->recv, skb);

		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb);
372
		if (err < 0) {
373 374 375 376 377
			skb_unlink(skb, &vi->recv);
			kfree_skb(skb);
			break;
		}
		vi->num++;
378
	} while (err > 0);
379 380 381
	if (unlikely(vi->num > vi->max))
		vi->max = vi->num;
	vi->rvq->vq_ops->kick(vi->rvq);
382
	return !oom;
383 384
}

385
static void skb_recv_done(struct virtqueue *rvq)
R
Rusty Russell 已提交
386 387
{
	struct virtnet_info *vi = rvq->vdev->priv;
388
	/* Schedule NAPI, Suppress further interrupts if successful. */
389
	if (napi_schedule_prep(&vi->napi)) {
390
		rvq->vq_ops->disable_cb(rvq);
391
		__napi_schedule(&vi->napi);
392
	}
R
Rusty Russell 已提交
393 394
}

395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
static void refill_work(struct work_struct *work)
{
	struct virtnet_info *vi;
	bool still_empty;

	vi = container_of(work, struct virtnet_info, refill.work);
	napi_disable(&vi->napi);
	try_fill_recv(vi, GFP_KERNEL);
	still_empty = (vi->num == 0);
	napi_enable(&vi->napi);

	/* In theory, this can happen: if we don't get any buffers in
	 * we will *never* try to fill again. */
	if (still_empty)
		schedule_delayed_work(&vi->refill, HZ/2);
}

R
Rusty Russell 已提交
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
static int virtnet_poll(struct napi_struct *napi, int budget)
{
	struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
	struct sk_buff *skb = NULL;
	unsigned int len, received = 0;

again:
	while (received < budget &&
	       (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
		__skb_unlink(skb, &vi->recv);
		receive_skb(vi->dev, skb, len);
		vi->num--;
		received++;
	}

427 428 429 430
	if (vi->num < vi->max / 2) {
		if (!try_fill_recv(vi, GFP_ATOMIC))
			schedule_delayed_work(&vi->refill, 0);
	}
R
Rusty Russell 已提交
431

432 433
	/* Out of packets? */
	if (received < budget) {
434
		napi_complete(napi);
435
		if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
436 437
		    && napi_schedule_prep(napi)) {
			vi->rvq->vq_ops->disable_cb(vi->rvq);
438
			__napi_schedule(napi);
R
Rusty Russell 已提交
439
			goto again;
440
		}
R
Rusty Russell 已提交
441 442 443 444 445
	}

	return received;
}

446
static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
R
Rusty Russell 已提交
447 448
{
	struct sk_buff *skb;
449
	unsigned int len, tot_sgs = 0;
R
Rusty Russell 已提交
450 451 452 453

	while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) {
		pr_debug("Sent skb %p\n", skb);
		__skb_unlink(skb, &vi->send);
454
		vi->dev->stats.tx_bytes += skb->len;
R
Rusty Russell 已提交
455
		vi->dev->stats.tx_packets++;
456
		tot_sgs += skb_vnet_hdr(skb)->num_sg;
457
		dev_kfree_skb_any(skb);
R
Rusty Russell 已提交
458
	}
459
	return tot_sgs;
R
Rusty Russell 已提交
460 461
}

462
static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
R
Rusty Russell 已提交
463
{
464
	struct scatterlist sg[2+MAX_SKB_FRAGS];
465
	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
R
Rusty Russell 已提交
466 467
	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;

468
	sg_init_table(sg, 2+MAX_SKB_FRAGS);
469

J
Johannes Berg 已提交
470
	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
R
Rusty Russell 已提交
471 472

	if (skb->ip_summed == CHECKSUM_PARTIAL) {
473 474 475
		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
		hdr->hdr.csum_start = skb->csum_start - skb_headroom(skb);
		hdr->hdr.csum_offset = skb->csum_offset;
R
Rusty Russell 已提交
476
	} else {
477 478
		hdr->hdr.flags = 0;
		hdr->hdr.csum_offset = hdr->hdr.csum_start = 0;
R
Rusty Russell 已提交
479 480 481
	}

	if (skb_is_gso(skb)) {
482 483
		hdr->hdr.hdr_len = skb_headlen(skb);
		hdr->hdr.gso_size = skb_shinfo(skb)->gso_size;
R
Rusty Russell 已提交
484
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
485
			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
R
Rusty Russell 已提交
486
		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
487
			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
R
Rusty Russell 已提交
488
		else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
489
			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
R
Rusty Russell 已提交
490 491
		else
			BUG();
R
Rusty Russell 已提交
492
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
493
			hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
R
Rusty Russell 已提交
494
	} else {
495 496
		hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
		hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
R
Rusty Russell 已提交
497 498
	}

499
	hdr->mhdr.num_buffers = 0;
500 501 502

	/* Encode metadata header at front. */
	if (vi->mergeable_rx_bufs)
503
		sg_set_buf(sg, &hdr->mhdr, sizeof(hdr->mhdr));
504
	else
505
		sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr));
506

507 508
	hdr->num_sg = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
	return vi->svq->vq_ops->add_buf(vi->svq, sg, hdr->num_sg, 0, skb);
509 510
}

511
static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
512 513
{
	struct virtnet_info *vi = netdev_priv(dev);
514
	int capacity;
515 516 517 518

again:
	/* Free up any pending old buffers before queueing new ones. */
	free_old_xmit_skbs(vi);
519

520
	/* Try to transmit */
521 522 523 524 525 526 527 528 529 530 531 532
	capacity = xmit_skb(vi, skb);

	/* This can happen with OOM and indirect buffers. */
	if (unlikely(capacity < 0)) {
		netif_stop_queue(dev);
		dev_warn(&dev->dev, "Unexpected full queue\n");
		if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
			vi->svq->vq_ops->disable_cb(vi->svq);
			netif_start_queue(dev);
			goto again;
		}
		return NETDEV_TX_BUSY;
R
Rusty Russell 已提交
533
	}
534
	vi->svq->vq_ops->kick(vi->svq);
535 536 537 538 539 540 541 542 543 544

	/*
	 * Put new one in send queue.  You'd expect we'd need this before
	 * xmit_skb calls add_buf(), since the callback can be triggered
	 * immediately after that.  But since the callback just triggers
	 * another call back here, normal network xmit locking prevents the
	 * race.
	 */
	__skb_queue_head(&vi->send, skb);

545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
	/* Don't wait up for transmitted skbs to be freed. */
	skb_orphan(skb);
	nf_reset(skb);

	/* Apparently nice girls don't return TX_BUSY; stop the queue
	 * before it gets out of hand.  Naturally, this wastes entries. */
	if (capacity < 2+MAX_SKB_FRAGS) {
		netif_stop_queue(dev);
		if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
			/* More just got used, free them then recheck. */
			capacity += free_old_xmit_skbs(vi);
			if (capacity >= 2+MAX_SKB_FRAGS) {
				netif_start_queue(dev);
				vi->svq->vq_ops->disable_cb(vi->svq);
			}
		}
561
	}
562 563

	return NETDEV_TX_OK;
R
Rusty Russell 已提交
564 565
}

566 567 568 569 570 571 572 573 574 575
static int virtnet_set_mac_address(struct net_device *dev, void *p)
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct virtio_device *vdev = vi->vdev;
	int ret;

	ret = eth_mac_addr(dev, p);
	if (ret)
		return ret;

576 577 578
	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
		vdev->config->set(vdev, offsetof(struct virtio_net_config, mac),
		                  dev->dev_addr, dev->addr_len);
579 580 581 582

	return 0;
}

583 584 585 586 587 588 589 590 591
#ifdef CONFIG_NET_POLL_CONTROLLER
static void virtnet_netpoll(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);

	napi_schedule(&vi->napi);
}
#endif

R
Rusty Russell 已提交
592 593 594 595 596
static int virtnet_open(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);

	napi_enable(&vi->napi);
R
Rusty Russell 已提交
597 598 599

	/* If all buffers were filled by other side before we napi_enabled, we
	 * won't get another interrupt, so process any outstanding packets
600 601
	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
	 * We synchronize against interrupts via NAPI_STATE_SCHED */
602
	if (napi_schedule_prep(&vi->napi)) {
603
		vi->rvq->vq_ops->disable_cb(vi->rvq);
604
		__napi_schedule(&vi->napi);
605
	}
R
Rusty Russell 已提交
606 607 608
	return 0;
}

609 610 611 612 613 614 615 616
/*
 * Send command via the control virtqueue and check status.  Commands
 * supported by the hypervisor, as indicated by feature bits, should
 * never fail unless improperly formated.
 */
static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
				 struct scatterlist *data, int out, int in)
{
617
	struct scatterlist *s, sg[VIRTNET_SEND_COMMAND_SG_MAX + 2];
618 619 620
	struct virtio_net_ctrl_hdr ctrl;
	virtio_net_ctrl_ack status = ~0;
	unsigned int tmp;
621
	int i;
622

623 624 625
	/* Caller should know better */
	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ||
		(out + in > VIRTNET_SEND_COMMAND_SG_MAX));
626 627 628 629 630 631 632 633 634 635

	out++; /* Add header */
	in++; /* Add return status */

	ctrl.class = class;
	ctrl.cmd = cmd;

	sg_init_table(sg, out + in);

	sg_set_buf(&sg[0], &ctrl, sizeof(ctrl));
636 637
	for_each_sg(data, s, out + in - 2, i)
		sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
638 639
	sg_set_buf(&sg[out + in - 1], &status, sizeof(status));

640
	BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi) < 0);
641 642 643 644 645 646 647 648 649 650 651 652 653

	vi->cvq->vq_ops->kick(vi->cvq);

	/*
	 * Spin for a response, the kick causes an ioport write, trapping
	 * into the hypervisor, so the request should be handled immediately.
	 */
	while (!vi->cvq->vq_ops->get_buf(vi->cvq, &tmp))
		cpu_relax();

	return status == VIRTIO_NET_OK;
}

R
Rusty Russell 已提交
654 655 656 657 658 659 660 661 662
static int virtnet_close(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);

	napi_disable(&vi->napi);

	return 0;
}

663 664 665 666 667 668 669 670 671 672 673
static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct virtio_device *vdev = vi->vdev;

	if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM))
		return -ENOSYS;

	return ethtool_op_set_tx_hw_csum(dev, data);
}

674 675 676
static void virtnet_set_rx_mode(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
677
	struct scatterlist sg[2];
678
	u8 promisc, allmulti;
679 680
	struct virtio_net_ctrl_mac *mac_data;
	struct dev_addr_list *addr;
J
Jiri Pirko 已提交
681
	struct netdev_hw_addr *ha;
682 683
	void *buf;
	int i;
684 685 686 687 688

	/* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */
	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
		return;

689 690
	promisc = ((dev->flags & IFF_PROMISC) != 0);
	allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
691

692
	sg_init_one(sg, &promisc, sizeof(promisc));
693 694 695

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
				  VIRTIO_NET_CTRL_RX_PROMISC,
696
				  sg, 1, 0))
697 698 699
		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
			 promisc ? "en" : "dis");

700
	sg_init_one(sg, &allmulti, sizeof(allmulti));
701 702 703

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
				  VIRTIO_NET_CTRL_RX_ALLMULTI,
704
				  sg, 1, 0))
705 706
		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
			 allmulti ? "en" : "dis");
707 708

	/* MAC filter - use one buffer for both lists */
709
	mac_data = buf = kzalloc(((dev->uc.count + dev->mc_count) * ETH_ALEN) +
710 711 712 713 714 715
				 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
	if (!buf) {
		dev_warn(&dev->dev, "No memory for MAC address buffer\n");
		return;
	}

716 717
	sg_init_table(sg, 2);

718
	/* Store the unicast list and count in the front of the buffer */
719
	mac_data->entries = dev->uc.count;
J
Jiri Pirko 已提交
720
	i = 0;
721
	list_for_each_entry(ha, &dev->uc.list, list)
J
Jiri Pirko 已提交
722
		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
723 724

	sg_set_buf(&sg[0], mac_data,
725
		   sizeof(mac_data->entries) + (dev->uc.count * ETH_ALEN));
726 727

	/* multicast list and count fill the end */
728
	mac_data = (void *)&mac_data->macs[dev->uc.count][0];
729 730 731 732 733 734 735 736 737 738 739 740 741 742 743

	mac_data->entries = dev->mc_count;
	addr = dev->mc_list;
	for (i = 0; i < dev->mc_count; i++, addr = addr->next)
		memcpy(&mac_data->macs[i][0], addr->da_addr, ETH_ALEN);

	sg_set_buf(&sg[1], mac_data,
		   sizeof(mac_data->entries) + (dev->mc_count * ETH_ALEN));

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
				  VIRTIO_NET_CTRL_MAC_TABLE_SET,
				  sg, 2, 0))
		dev_warn(&dev->dev, "Failed to set MAC fitler table.\n");

	kfree(buf);
744 745
}

746
static void virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
747 748 749 750
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct scatterlist sg;

751
	sg_init_one(&sg, &vid, sizeof(vid));
752 753 754 755 756 757

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
				  VIRTIO_NET_CTRL_VLAN_ADD, &sg, 1, 0))
		dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
}

758
static void virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
759 760 761 762
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct scatterlist sg;

763
	sg_init_one(&sg, &vid, sizeof(vid));
764 765 766 767 768 769

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
				  VIRTIO_NET_CTRL_VLAN_DEL, &sg, 1, 0))
		dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
}

770
static const struct ethtool_ops virtnet_ethtool_ops = {
771 772
	.set_tx_csum = virtnet_set_tx_csum,
	.set_sg = ethtool_op_set_sg,
773
	.set_tso = ethtool_op_set_tso,
774
	.set_ufo = ethtool_op_set_ufo,
775
	.get_link = ethtool_op_get_link,
776 777
};

M
Mark McLoughlin 已提交
778 779 780 781 782 783 784 785 786 787 788
#define MIN_MTU 68
#define MAX_MTU 65535

static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
{
	if (new_mtu < MIN_MTU || new_mtu > MAX_MTU)
		return -EINVAL;
	dev->mtu = new_mtu;
	return 0;
}

789 790 791 792 793
static const struct net_device_ops virtnet_netdev = {
	.ndo_open            = virtnet_open,
	.ndo_stop   	     = virtnet_close,
	.ndo_start_xmit      = start_xmit,
	.ndo_validate_addr   = eth_validate_addr,
794
	.ndo_set_mac_address = virtnet_set_mac_address,
795
	.ndo_set_rx_mode     = virtnet_set_rx_mode,
796
	.ndo_change_mtu	     = virtnet_change_mtu,
797 798
	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
799 800 801 802 803
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_poll_controller = virtnet_netpoll,
#endif
};

804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838
static void virtnet_update_status(struct virtnet_info *vi)
{
	u16 v;

	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS))
		return;

	vi->vdev->config->get(vi->vdev,
			      offsetof(struct virtio_net_config, status),
			      &v, sizeof(v));

	/* Ignore unknown (future) status bits */
	v &= VIRTIO_NET_S_LINK_UP;

	if (vi->status == v)
		return;

	vi->status = v;

	if (vi->status & VIRTIO_NET_S_LINK_UP) {
		netif_carrier_on(vi->dev);
		netif_wake_queue(vi->dev);
	} else {
		netif_carrier_off(vi->dev);
		netif_stop_queue(vi->dev);
	}
}

static void virtnet_config_changed(struct virtio_device *vdev)
{
	struct virtnet_info *vi = vdev->priv;

	virtnet_update_status(vi);
}

R
Rusty Russell 已提交
839 840 841 842 843
static int virtnet_probe(struct virtio_device *vdev)
{
	int err;
	struct net_device *dev;
	struct virtnet_info *vi;
844 845 846 847
	struct virtqueue *vqs[3];
	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
	const char *names[] = { "input", "output", "control" };
	int nvqs;
R
Rusty Russell 已提交
848 849 850 851 852 853 854

	/* Allocate ourselves a network device with room for our info */
	dev = alloc_etherdev(sizeof(struct virtnet_info));
	if (!dev)
		return -ENOMEM;

	/* Set up network device as normal. */
855
	dev->netdev_ops = &virtnet_netdev;
R
Rusty Russell 已提交
856
	dev->features = NETIF_F_HIGHDMA;
857
	SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
R
Rusty Russell 已提交
858 859 860
	SET_NETDEV_DEV(dev, &vdev->dev);

	/* Do we support "hardware" checksums? */
861
	if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
R
Rusty Russell 已提交
862 863
		/* This opens up the world of extra features. */
		dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
864
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
R
Rusty Russell 已提交
865 866 867
			dev->features |= NETIF_F_TSO | NETIF_F_UFO
				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
		}
868
		/* Individual feature bits: what can host handle? */
869
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
870
			dev->features |= NETIF_F_TSO;
871
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
872
			dev->features |= NETIF_F_TSO6;
873
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
874
			dev->features |= NETIF_F_TSO_ECN;
875
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
876
			dev->features |= NETIF_F_UFO;
R
Rusty Russell 已提交
877 878 879
	}

	/* Configuration may specify what MAC to use.  Otherwise random. */
880
	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
881 882 883
		vdev->config->get(vdev,
				  offsetof(struct virtio_net_config, mac),
				  dev->dev_addr, dev->addr_len);
884
	} else
R
Rusty Russell 已提交
885 886 887 888
		random_ether_addr(dev->dev_addr);

	/* Set up our device-specific information */
	vi = netdev_priv(dev);
889
	netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
R
Rusty Russell 已提交
890 891
	vi->dev = dev;
	vi->vdev = vdev;
892
	vdev->priv = vi;
893
	vi->pages = NULL;
894
	INIT_DELAYED_WORK(&vi->refill, refill_work);
R
Rusty Russell 已提交
895

896 897 898 899 900 901
	/* If we can receive ANY GSO packets, we must allocate large ones. */
	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4)
	    || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)
	    || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
		vi->big_packets = true;

902 903 904
	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
		vi->mergeable_rx_bufs = true;

905 906 907 908 909 910
	/* We expect two virtqueues, receive then send,
	 * and optionally control. */
	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;

	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
	if (err)
R
Rusty Russell 已提交
911 912
		goto free;

913 914
	vi->rvq = vqs[0];
	vi->svq = vqs[1];
R
Rusty Russell 已提交
915

916
	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
917
		vi->cvq = vqs[2];
918 919 920

		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
			dev->features |= NETIF_F_HW_VLAN_FILTER;
921 922
	}

R
Rusty Russell 已提交
923 924 925 926 927 928 929
	/* Initialize our empty receive and send queues. */
	skb_queue_head_init(&vi->recv);
	skb_queue_head_init(&vi->send);

	err = register_netdev(dev);
	if (err) {
		pr_debug("virtio_net: registering device failed\n");
930
		goto free_vqs;
R
Rusty Russell 已提交
931
	}
932 933

	/* Last of all, set up some receive buffers. */
934
	try_fill_recv(vi, GFP_KERNEL);
935 936 937 938 939 940 941

	/* If we didn't even get one input buffer, we're useless. */
	if (vi->num == 0) {
		err = -ENOMEM;
		goto unregister;
	}

942 943
	vi->status = VIRTIO_NET_S_LINK_UP;
	virtnet_update_status(vi);
944
	netif_carrier_on(dev);
945

R
Rusty Russell 已提交
946 947 948
	pr_debug("virtnet: registered device %s\n", dev->name);
	return 0;

949 950
unregister:
	unregister_netdev(dev);
951
	cancel_delayed_work_sync(&vi->refill);
952 953
free_vqs:
	vdev->config->del_vqs(vdev);
R
Rusty Russell 已提交
954 955 956 957 958
free:
	free_netdev(dev);
	return err;
}

959
static void __devexit virtnet_remove(struct virtio_device *vdev)
R
Rusty Russell 已提交
960
{
961
	struct virtnet_info *vi = vdev->priv;
962 963
	struct sk_buff *skb;

R
Rusty Russell 已提交
964 965 966
	/* Stop all the virtqueues. */
	vdev->config->reset(vdev);

967 968 969 970 971
	/* Free our skbs in send and recv queues, if any. */
	while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
		kfree_skb(skb);
		vi->num--;
	}
W
Wang Chen 已提交
972
	__skb_queue_purge(&vi->send);
973 974

	BUG_ON(vi->num != 0);
975 976

	unregister_netdev(vi->dev);
977
	cancel_delayed_work_sync(&vi->refill);
978

979 980
	vdev->config->del_vqs(vi->vdev);

981 982 983
	while (vi->pages)
		__free_pages(get_a_page(vi, GFP_KERNEL), 0);

984
	free_netdev(vi->dev);
R
Rusty Russell 已提交
985 986 987 988 989 990 991
}

static struct virtio_device_id id_table[] = {
	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
	{ 0 },
};

992
static unsigned int features[] = {
993 994
	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
	VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
995
	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
996
	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
997
	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
998
	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
999
	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
1000 1001
};

R
Rusty Russell 已提交
1002
static struct virtio_driver virtio_net = {
1003 1004
	.feature_table = features,
	.feature_table_size = ARRAY_SIZE(features),
R
Rusty Russell 已提交
1005 1006 1007 1008 1009
	.driver.name =	KBUILD_MODNAME,
	.driver.owner =	THIS_MODULE,
	.id_table =	id_table,
	.probe =	virtnet_probe,
	.remove =	__devexit_p(virtnet_remove),
1010
	.config_changed = virtnet_config_changed,
R
Rusty Russell 已提交
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
};

static int __init init(void)
{
	return register_virtio_driver(&virtio_net);
}

static void __exit fini(void)
{
	unregister_virtio_driver(&virtio_net);
}
module_init(init);
module_exit(fini);

MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_DESCRIPTION("Virtio network driver");
MODULE_LICENSE("GPL");