virtio_net.c 25.7 KB
Newer Older
1
/* A network driver using virtio.
R
Rusty Russell 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
//#define DEBUG
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
22
#include <linux/ethtool.h>
R
Rusty Russell 已提交
23 24
#include <linux/module.h>
#include <linux/virtio.h>
25
#include <linux/virtio_ids.h>
R
Rusty Russell 已提交
26 27
#include <linux/virtio_net.h>
#include <linux/scatterlist.h>
28
#include <linux/if_vlan.h>
R
Rusty Russell 已提交
29

30 31 32
static int napi_weight = 128;
module_param(napi_weight, int, 0444);

R
Rusty Russell 已提交
33 34 35 36
static int csum = 1, gso = 1;
module_param(csum, bool, 0444);
module_param(gso, bool, 0444);

R
Rusty Russell 已提交
37
/* FIXME: MTU in config. */
38
#define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
39
#define GOOD_COPY_LEN	128
R
Rusty Russell 已提交
40

41
#define VIRTNET_SEND_COMMAND_SG_MAX    2
42

R
Rusty Russell 已提交
43 44 45
struct virtnet_info
{
	struct virtio_device *vdev;
46
	struct virtqueue *rvq, *svq, *cvq;
R
Rusty Russell 已提交
47 48
	struct net_device *dev;
	struct napi_struct napi;
49
	unsigned int status;
R
Rusty Russell 已提交
50 51 52 53

	/* Number of input buffers, and max we've ever had. */
	unsigned int num, max;

54 55 56
	/* I like... big packets and I cannot lie! */
	bool big_packets;

57 58 59
	/* Host will merge rx buffers for big packets (shake it! shake it!) */
	bool mergeable_rx_bufs;

R
Rusty Russell 已提交
60 61 62
	/* Receive & send queues. */
	struct sk_buff_head recv;
	struct sk_buff_head send;
63

64 65 66
	/* Work struct for refilling if we run low on memory. */
	struct delayed_work refill;

67 68
	/* Chain pages by the private ptr. */
	struct page *pages;
R
Rusty Russell 已提交
69 70
};

71 72 73 74 75
struct skb_vnet_hdr {
	union {
		struct virtio_net_hdr hdr;
		struct virtio_net_hdr_mrg_rxbuf mhdr;
	};
76
	unsigned int num_sg;
77 78 79
};

static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
R
Rusty Russell 已提交
80
{
81
	return (struct skb_vnet_hdr *)skb->cb;
R
Rusty Russell 已提交
82 83
}

84 85 86 87 88 89
static void give_a_page(struct virtnet_info *vi, struct page *page)
{
	page->private = (unsigned long)vi->pages;
	vi->pages = page;
}

90 91 92 93 94 95 96 97 98 99
static void trim_pages(struct virtnet_info *vi, struct sk_buff *skb)
{
	unsigned int i;

	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
		give_a_page(vi, skb_shinfo(skb)->frags[i].page);
	skb_shinfo(skb)->nr_frags = 0;
	skb->data_len = 0;
}

100 101 102 103 104 105 106 107 108 109 110
static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
{
	struct page *p = vi->pages;

	if (p)
		vi->pages = (struct page *)p->private;
	else
		p = alloc_page(gfp_mask);
	return p;
}

111
static void skb_xmit_done(struct virtqueue *svq)
R
Rusty Russell 已提交
112
{
113
	struct virtnet_info *vi = svq->vdev->priv;
R
Rusty Russell 已提交
114

115 116
	/* Suppress further interrupts. */
	svq->vq_ops->disable_cb(svq);
117

118
	/* We were probably waiting for more output buffers. */
R
Rusty Russell 已提交
119 120 121 122 123 124
	netif_wake_queue(vi->dev);
}

static void receive_skb(struct net_device *dev, struct sk_buff *skb,
			unsigned len)
{
125
	struct virtnet_info *vi = netdev_priv(dev);
126
	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
127
	int err;
128
	int i;
R
Rusty Russell 已提交
129 130 131 132 133 134

	if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
		pr_debug("%s: short packet %i\n", dev->name, len);
		dev->stats.rx_length_errors++;
		goto drop;
	}
135

136 137 138
	if (vi->mergeable_rx_bufs) {
		unsigned int copy;
		char *p = page_address(skb_shinfo(skb)->frags[0].page);
139

140 141 142 143
		if (len > PAGE_SIZE)
			len = PAGE_SIZE;
		len -= sizeof(struct virtio_net_hdr_mrg_rxbuf);

144 145
		memcpy(&hdr->mhdr, p, sizeof(hdr->mhdr));
		p += sizeof(hdr->mhdr);
146 147 148 149 150 151 152 153 154 155 156 157 158 159

		copy = len;
		if (copy > skb_tailroom(skb))
			copy = skb_tailroom(skb);

		memcpy(skb_put(skb, copy), p, copy);

		len -= copy;

		if (!len) {
			give_a_page(vi, skb_shinfo(skb)->frags[0].page);
			skb_shinfo(skb)->nr_frags--;
		} else {
			skb_shinfo(skb)->frags[0].page_offset +=
160
				sizeof(hdr->mhdr) + copy;
161 162 163 164 165
			skb_shinfo(skb)->frags[0].size = len;
			skb->data_len += len;
			skb->len += len;
		}

166
		while (--hdr->mhdr.num_buffers) {
167 168 169 170 171 172 173 174 175 176 177 178 179
			struct sk_buff *nskb;

			i = skb_shinfo(skb)->nr_frags;
			if (i >= MAX_SKB_FRAGS) {
				pr_debug("%s: packet too long %d\n", dev->name,
					 len);
				dev->stats.rx_length_errors++;
				goto drop;
			}

			nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len);
			if (!nskb) {
				pr_debug("%s: rx error: %d buffers missing\n",
180
					 dev->name, hdr->mhdr.num_buffers);
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
				dev->stats.rx_length_errors++;
				goto drop;
			}

			__skb_unlink(nskb, &vi->recv);
			vi->num--;

			skb_shinfo(skb)->frags[i] = skb_shinfo(nskb)->frags[0];
			skb_shinfo(nskb)->nr_frags = 0;
			kfree_skb(nskb);

			if (len > PAGE_SIZE)
				len = PAGE_SIZE;

			skb_shinfo(skb)->frags[i].size = len;
			skb_shinfo(skb)->nr_frags++;
			skb->data_len += len;
			skb->len += len;
		}
	} else {
201
		len -= sizeof(hdr->hdr);
202 203 204 205 206 207 208 209 210 211 212

		if (len <= MAX_PACKET_LEN)
			trim_pages(vi, skb);

		err = pskb_trim(skb, len);
		if (err) {
			pr_debug("%s: pskb_trim failed %i %d\n", dev->name,
				 len, err);
			dev->stats.rx_dropped++;
			goto drop;
		}
213
	}
214

215
	skb->truesize += skb->data_len;
R
Rusty Russell 已提交
216 217 218
	dev->stats.rx_bytes += skb->len;
	dev->stats.rx_packets++;

219
	if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
R
Rusty Russell 已提交
220
		pr_debug("Needs csum!\n");
221 222 223
		if (!skb_partial_csum_set(skb,
					  hdr->hdr.csum_start,
					  hdr->hdr.csum_offset))
R
Rusty Russell 已提交
224 225 226
			goto frame_err;
	}

227 228 229 230
	skb->protocol = eth_type_trans(skb, dev);
	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
		 ntohs(skb->protocol), skb->len, skb->pkt_type);

231
	if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
R
Rusty Russell 已提交
232
		pr_debug("GSO!\n");
233
		switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
R
Rusty Russell 已提交
234 235 236 237 238 239 240 241 242 243 244 245
		case VIRTIO_NET_HDR_GSO_TCPV4:
			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
			break;
		case VIRTIO_NET_HDR_GSO_UDP:
			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
			break;
		case VIRTIO_NET_HDR_GSO_TCPV6:
			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
			break;
		default:
			if (net_ratelimit())
				printk(KERN_WARNING "%s: bad gso type %u.\n",
246
				       dev->name, hdr->hdr.gso_type);
R
Rusty Russell 已提交
247 248 249
			goto frame_err;
		}

250
		if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
R
Rusty Russell 已提交
251 252
			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;

253
		skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
R
Rusty Russell 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
		if (skb_shinfo(skb)->gso_size == 0) {
			if (net_ratelimit())
				printk(KERN_WARNING "%s: zero gso size.\n",
				       dev->name);
			goto frame_err;
		}

		/* Header must be checked, and gso_segs computed. */
		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
		skb_shinfo(skb)->gso_segs = 0;
	}

	netif_receive_skb(skb);
	return;

frame_err:
	dev->stats.rx_frame_errors++;
drop:
	dev_kfree_skb(skb);
}

275
static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp)
R
Rusty Russell 已提交
276 277
{
	struct sk_buff *skb;
278
	struct scatterlist sg[2+MAX_SKB_FRAGS];
279
	int num, err, i;
280
	bool oom = false;
R
Rusty Russell 已提交
281

282
	sg_init_table(sg, 2+MAX_SKB_FRAGS);
283
	do {
284
		struct skb_vnet_hdr *hdr;
285

286
		skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN + NET_IP_ALIGN);
287 288
		if (unlikely(!skb)) {
			oom = true;
R
Rusty Russell 已提交
289
			break;
290
		}
R
Rusty Russell 已提交
291

292
		skb_reserve(skb, NET_IP_ALIGN);
R
Rusty Russell 已提交
293
		skb_put(skb, MAX_PACKET_LEN);
294 295

		hdr = skb_vnet_hdr(skb);
296
		sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr));
297 298 299 300

		if (vi->big_packets) {
			for (i = 0; i < MAX_SKB_FRAGS; i++) {
				skb_frag_t *f = &skb_shinfo(skb)->frags[i];
301
				f->page = get_a_page(vi, gfp);
302 303 304 305 306 307 308 309 310 311 312 313 314
				if (!f->page)
					break;

				f->page_offset = 0;
				f->size = PAGE_SIZE;

				skb->data_len += PAGE_SIZE;
				skb->len += PAGE_SIZE;

				skb_shinfo(skb)->nr_frags++;
			}
		}

R
Rusty Russell 已提交
315 316 317 318
		num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
		skb_queue_head(&vi->recv, skb);

		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
319
		if (err < 0) {
R
Rusty Russell 已提交
320
			skb_unlink(skb, &vi->recv);
321
			trim_pages(vi, skb);
R
Rusty Russell 已提交
322 323 324 325
			kfree_skb(skb);
			break;
		}
		vi->num++;
326
	} while (err >= num);
R
Rusty Russell 已提交
327 328 329
	if (unlikely(vi->num > vi->max))
		vi->max = vi->num;
	vi->rvq->vq_ops->kick(vi->rvq);
330
	return !oom;
R
Rusty Russell 已提交
331 332
}

333 334
/* Returns false if we couldn't fill entirely (OOM). */
static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
335 336 337 338
{
	struct sk_buff *skb;
	struct scatterlist sg[1];
	int err;
339
	bool oom = false;
340

341 342
	if (!vi->mergeable_rx_bufs)
		return try_fill_recv_maxbufs(vi, gfp);
343

344
	do {
345 346 347
		skb_frag_t *f;

		skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN);
348 349
		if (unlikely(!skb)) {
			oom = true;
350
			break;
351
		}
352 353 354 355

		skb_reserve(skb, NET_IP_ALIGN);

		f = &skb_shinfo(skb)->frags[0];
356
		f->page = get_a_page(vi, gfp);
357
		if (!f->page) {
358
			oom = true;
359 360 361 362 363 364 365 366 367 368 369 370 371
			kfree_skb(skb);
			break;
		}

		f->page_offset = 0;
		f->size = PAGE_SIZE;

		skb_shinfo(skb)->nr_frags++;

		sg_init_one(sg, page_address(f->page), PAGE_SIZE);
		skb_queue_head(&vi->recv, skb);

		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb);
372
		if (err < 0) {
373 374 375 376 377
			skb_unlink(skb, &vi->recv);
			kfree_skb(skb);
			break;
		}
		vi->num++;
378
	} while (err > 0);
379 380 381
	if (unlikely(vi->num > vi->max))
		vi->max = vi->num;
	vi->rvq->vq_ops->kick(vi->rvq);
382
	return !oom;
383 384
}

385
static void skb_recv_done(struct virtqueue *rvq)
R
Rusty Russell 已提交
386 387
{
	struct virtnet_info *vi = rvq->vdev->priv;
388
	/* Schedule NAPI, Suppress further interrupts if successful. */
389
	if (napi_schedule_prep(&vi->napi)) {
390
		rvq->vq_ops->disable_cb(rvq);
391
		__napi_schedule(&vi->napi);
392
	}
R
Rusty Russell 已提交
393 394
}

395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
static void refill_work(struct work_struct *work)
{
	struct virtnet_info *vi;
	bool still_empty;

	vi = container_of(work, struct virtnet_info, refill.work);
	napi_disable(&vi->napi);
	try_fill_recv(vi, GFP_KERNEL);
	still_empty = (vi->num == 0);
	napi_enable(&vi->napi);

	/* In theory, this can happen: if we don't get any buffers in
	 * we will *never* try to fill again. */
	if (still_empty)
		schedule_delayed_work(&vi->refill, HZ/2);
}

R
Rusty Russell 已提交
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
static int virtnet_poll(struct napi_struct *napi, int budget)
{
	struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
	struct sk_buff *skb = NULL;
	unsigned int len, received = 0;

again:
	while (received < budget &&
	       (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
		__skb_unlink(skb, &vi->recv);
		receive_skb(vi->dev, skb, len);
		vi->num--;
		received++;
	}

427 428 429 430
	if (vi->num < vi->max / 2) {
		if (!try_fill_recv(vi, GFP_ATOMIC))
			schedule_delayed_work(&vi->refill, 0);
	}
R
Rusty Russell 已提交
431

432 433
	/* Out of packets? */
	if (received < budget) {
434
		napi_complete(napi);
435
		if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
436 437
		    && napi_schedule_prep(napi)) {
			vi->rvq->vq_ops->disable_cb(vi->rvq);
438
			__napi_schedule(napi);
R
Rusty Russell 已提交
439
			goto again;
440
		}
R
Rusty Russell 已提交
441 442 443 444 445
	}

	return received;
}

446
static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
R
Rusty Russell 已提交
447 448
{
	struct sk_buff *skb;
449
	unsigned int len, tot_sgs = 0;
R
Rusty Russell 已提交
450 451 452 453

	while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) {
		pr_debug("Sent skb %p\n", skb);
		__skb_unlink(skb, &vi->send);
454
		vi->dev->stats.tx_bytes += skb->len;
R
Rusty Russell 已提交
455
		vi->dev->stats.tx_packets++;
456
		tot_sgs += skb_vnet_hdr(skb)->num_sg;
457
		dev_kfree_skb_any(skb);
R
Rusty Russell 已提交
458
	}
459
	return tot_sgs;
R
Rusty Russell 已提交
460 461
}

462
static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
R
Rusty Russell 已提交
463
{
464
	struct scatterlist sg[2+MAX_SKB_FRAGS];
465
	struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
R
Rusty Russell 已提交
466 467
	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;

468
	sg_init_table(sg, 2+MAX_SKB_FRAGS);
469

J
Johannes Berg 已提交
470
	pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
R
Rusty Russell 已提交
471 472

	if (skb->ip_summed == CHECKSUM_PARTIAL) {
473 474 475
		hdr->hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
		hdr->hdr.csum_start = skb->csum_start - skb_headroom(skb);
		hdr->hdr.csum_offset = skb->csum_offset;
R
Rusty Russell 已提交
476
	} else {
477 478
		hdr->hdr.flags = 0;
		hdr->hdr.csum_offset = hdr->hdr.csum_start = 0;
R
Rusty Russell 已提交
479 480 481
	}

	if (skb_is_gso(skb)) {
482 483
		hdr->hdr.hdr_len = skb_headlen(skb);
		hdr->hdr.gso_size = skb_shinfo(skb)->gso_size;
R
Rusty Russell 已提交
484
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
485
			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
R
Rusty Russell 已提交
486
		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
487
			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
R
Rusty Russell 已提交
488
		else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
489
			hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
R
Rusty Russell 已提交
490 491
		else
			BUG();
R
Rusty Russell 已提交
492
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
493
			hdr->hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
R
Rusty Russell 已提交
494
	} else {
495 496
		hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
		hdr->hdr.gso_size = hdr->hdr.hdr_len = 0;
R
Rusty Russell 已提交
497 498
	}

499
	hdr->mhdr.num_buffers = 0;
500 501 502

	/* Encode metadata header at front. */
	if (vi->mergeable_rx_bufs)
503
		sg_set_buf(sg, &hdr->mhdr, sizeof(hdr->mhdr));
504
	else
505
		sg_set_buf(sg, &hdr->hdr, sizeof(hdr->hdr));
506

507 508
	hdr->num_sg = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
	return vi->svq->vq_ops->add_buf(vi->svq, sg, hdr->num_sg, 0, skb);
509 510
}

511
static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
512 513
{
	struct virtnet_info *vi = netdev_priv(dev);
514
	int capacity;
515 516 517 518

again:
	/* Free up any pending old buffers before queueing new ones. */
	free_old_xmit_skbs(vi);
519 520

	/* Put new one in send queue and do transmit */
521
	__skb_queue_head(&vi->send, skb);
522 523 524 525 526 527 528 529 530 531 532 533
	capacity = xmit_skb(vi, skb);

	/* This can happen with OOM and indirect buffers. */
	if (unlikely(capacity < 0)) {
		netif_stop_queue(dev);
		dev_warn(&dev->dev, "Unexpected full queue\n");
		if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
			vi->svq->vq_ops->disable_cb(vi->svq);
			netif_start_queue(dev);
			goto again;
		}
		return NETDEV_TX_BUSY;
R
Rusty Russell 已提交
534
	}
535

536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
	vi->svq->vq_ops->kick(vi->svq);
	/* Don't wait up for transmitted skbs to be freed. */
	skb_orphan(skb);
	nf_reset(skb);

	/* Apparently nice girls don't return TX_BUSY; stop the queue
	 * before it gets out of hand.  Naturally, this wastes entries. */
	if (capacity < 2+MAX_SKB_FRAGS) {
		netif_stop_queue(dev);
		if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
			/* More just got used, free them then recheck. */
			capacity += free_old_xmit_skbs(vi);
			if (capacity >= 2+MAX_SKB_FRAGS) {
				netif_start_queue(dev);
				vi->svq->vq_ops->disable_cb(vi->svq);
			}
		}
553
	}
554 555

	return NETDEV_TX_OK;
R
Rusty Russell 已提交
556 557
}

558 559 560 561 562 563 564 565 566 567
static int virtnet_set_mac_address(struct net_device *dev, void *p)
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct virtio_device *vdev = vi->vdev;
	int ret;

	ret = eth_mac_addr(dev, p);
	if (ret)
		return ret;

568 569 570
	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
		vdev->config->set(vdev, offsetof(struct virtio_net_config, mac),
		                  dev->dev_addr, dev->addr_len);
571 572 573 574

	return 0;
}

575 576 577 578 579 580 581 582 583
#ifdef CONFIG_NET_POLL_CONTROLLER
static void virtnet_netpoll(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);

	napi_schedule(&vi->napi);
}
#endif

R
Rusty Russell 已提交
584 585 586 587 588
static int virtnet_open(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);

	napi_enable(&vi->napi);
R
Rusty Russell 已提交
589 590 591

	/* If all buffers were filled by other side before we napi_enabled, we
	 * won't get another interrupt, so process any outstanding packets
592 593
	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
	 * We synchronize against interrupts via NAPI_STATE_SCHED */
594
	if (napi_schedule_prep(&vi->napi)) {
595
		vi->rvq->vq_ops->disable_cb(vi->rvq);
596
		__napi_schedule(&vi->napi);
597
	}
R
Rusty Russell 已提交
598 599 600
	return 0;
}

601 602 603 604 605 606 607 608
/*
 * Send command via the control virtqueue and check status.  Commands
 * supported by the hypervisor, as indicated by feature bits, should
 * never fail unless improperly formated.
 */
static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
				 struct scatterlist *data, int out, int in)
{
609
	struct scatterlist *s, sg[VIRTNET_SEND_COMMAND_SG_MAX + 2];
610 611 612
	struct virtio_net_ctrl_hdr ctrl;
	virtio_net_ctrl_ack status = ~0;
	unsigned int tmp;
613
	int i;
614

615 616 617
	/* Caller should know better */
	BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ||
		(out + in > VIRTNET_SEND_COMMAND_SG_MAX));
618 619 620 621 622 623 624 625 626 627

	out++; /* Add header */
	in++; /* Add return status */

	ctrl.class = class;
	ctrl.cmd = cmd;

	sg_init_table(sg, out + in);

	sg_set_buf(&sg[0], &ctrl, sizeof(ctrl));
628 629
	for_each_sg(data, s, out + in - 2, i)
		sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
630 631
	sg_set_buf(&sg[out + in - 1], &status, sizeof(status));

632
	BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi) < 0);
633 634 635 636 637 638 639 640 641 642 643 644 645

	vi->cvq->vq_ops->kick(vi->cvq);

	/*
	 * Spin for a response, the kick causes an ioport write, trapping
	 * into the hypervisor, so the request should be handled immediately.
	 */
	while (!vi->cvq->vq_ops->get_buf(vi->cvq, &tmp))
		cpu_relax();

	return status == VIRTIO_NET_OK;
}

R
Rusty Russell 已提交
646 647 648 649 650 651 652 653 654
static int virtnet_close(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);

	napi_disable(&vi->napi);

	return 0;
}

655 656 657 658 659 660 661 662 663 664 665
static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct virtio_device *vdev = vi->vdev;

	if (data && !virtio_has_feature(vdev, VIRTIO_NET_F_CSUM))
		return -ENOSYS;

	return ethtool_op_set_tx_hw_csum(dev, data);
}

666 667 668
static void virtnet_set_rx_mode(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
669
	struct scatterlist sg[2];
670
	u8 promisc, allmulti;
671 672
	struct virtio_net_ctrl_mac *mac_data;
	struct dev_addr_list *addr;
J
Jiri Pirko 已提交
673
	struct netdev_hw_addr *ha;
674 675
	void *buf;
	int i;
676 677 678 679 680

	/* We can't dynamicaly set ndo_set_rx_mode, so return gracefully */
	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
		return;

681 682
	promisc = ((dev->flags & IFF_PROMISC) != 0);
	allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
683

684
	sg_init_one(sg, &promisc, sizeof(promisc));
685 686 687

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
				  VIRTIO_NET_CTRL_RX_PROMISC,
688
				  sg, 1, 0))
689 690 691
		dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
			 promisc ? "en" : "dis");

692
	sg_init_one(sg, &allmulti, sizeof(allmulti));
693 694 695

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
				  VIRTIO_NET_CTRL_RX_ALLMULTI,
696
				  sg, 1, 0))
697 698
		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
			 allmulti ? "en" : "dis");
699 700

	/* MAC filter - use one buffer for both lists */
701
	mac_data = buf = kzalloc(((dev->uc.count + dev->mc_count) * ETH_ALEN) +
702 703 704 705 706 707
				 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
	if (!buf) {
		dev_warn(&dev->dev, "No memory for MAC address buffer\n");
		return;
	}

708 709
	sg_init_table(sg, 2);

710
	/* Store the unicast list and count in the front of the buffer */
711
	mac_data->entries = dev->uc.count;
J
Jiri Pirko 已提交
712
	i = 0;
713
	list_for_each_entry(ha, &dev->uc.list, list)
J
Jiri Pirko 已提交
714
		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
715 716

	sg_set_buf(&sg[0], mac_data,
717
		   sizeof(mac_data->entries) + (dev->uc.count * ETH_ALEN));
718 719

	/* multicast list and count fill the end */
720
	mac_data = (void *)&mac_data->macs[dev->uc.count][0];
721 722 723 724 725 726 727 728 729 730 731 732 733 734 735

	mac_data->entries = dev->mc_count;
	addr = dev->mc_list;
	for (i = 0; i < dev->mc_count; i++, addr = addr->next)
		memcpy(&mac_data->macs[i][0], addr->da_addr, ETH_ALEN);

	sg_set_buf(&sg[1], mac_data,
		   sizeof(mac_data->entries) + (dev->mc_count * ETH_ALEN));

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
				  VIRTIO_NET_CTRL_MAC_TABLE_SET,
				  sg, 2, 0))
		dev_warn(&dev->dev, "Failed to set MAC fitler table.\n");

	kfree(buf);
736 737
}

738
static void virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
739 740 741 742
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct scatterlist sg;

743
	sg_init_one(&sg, &vid, sizeof(vid));
744 745 746 747 748 749

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
				  VIRTIO_NET_CTRL_VLAN_ADD, &sg, 1, 0))
		dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
}

750
static void virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
751 752 753 754
{
	struct virtnet_info *vi = netdev_priv(dev);
	struct scatterlist sg;

755
	sg_init_one(&sg, &vid, sizeof(vid));
756 757 758 759 760 761

	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
				  VIRTIO_NET_CTRL_VLAN_DEL, &sg, 1, 0))
		dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
}

762
static const struct ethtool_ops virtnet_ethtool_ops = {
763 764
	.set_tx_csum = virtnet_set_tx_csum,
	.set_sg = ethtool_op_set_sg,
765
	.set_tso = ethtool_op_set_tso,
766
	.set_ufo = ethtool_op_set_ufo,
767
	.get_link = ethtool_op_get_link,
768 769
};

M
Mark McLoughlin 已提交
770 771 772 773 774 775 776 777 778 779 780
#define MIN_MTU 68
#define MAX_MTU 65535

static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
{
	if (new_mtu < MIN_MTU || new_mtu > MAX_MTU)
		return -EINVAL;
	dev->mtu = new_mtu;
	return 0;
}

781 782 783 784 785
static const struct net_device_ops virtnet_netdev = {
	.ndo_open            = virtnet_open,
	.ndo_stop   	     = virtnet_close,
	.ndo_start_xmit      = start_xmit,
	.ndo_validate_addr   = eth_validate_addr,
786
	.ndo_set_mac_address = virtnet_set_mac_address,
787
	.ndo_set_rx_mode     = virtnet_set_rx_mode,
788
	.ndo_change_mtu	     = virtnet_change_mtu,
789 790
	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
791 792 793 794 795
#ifdef CONFIG_NET_POLL_CONTROLLER
	.ndo_poll_controller = virtnet_netpoll,
#endif
};

796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
static void virtnet_update_status(struct virtnet_info *vi)
{
	u16 v;

	if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS))
		return;

	vi->vdev->config->get(vi->vdev,
			      offsetof(struct virtio_net_config, status),
			      &v, sizeof(v));

	/* Ignore unknown (future) status bits */
	v &= VIRTIO_NET_S_LINK_UP;

	if (vi->status == v)
		return;

	vi->status = v;

	if (vi->status & VIRTIO_NET_S_LINK_UP) {
		netif_carrier_on(vi->dev);
		netif_wake_queue(vi->dev);
	} else {
		netif_carrier_off(vi->dev);
		netif_stop_queue(vi->dev);
	}
}

static void virtnet_config_changed(struct virtio_device *vdev)
{
	struct virtnet_info *vi = vdev->priv;

	virtnet_update_status(vi);
}

R
Rusty Russell 已提交
831 832 833 834 835
static int virtnet_probe(struct virtio_device *vdev)
{
	int err;
	struct net_device *dev;
	struct virtnet_info *vi;
836 837 838 839
	struct virtqueue *vqs[3];
	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
	const char *names[] = { "input", "output", "control" };
	int nvqs;
R
Rusty Russell 已提交
840 841 842 843 844 845 846

	/* Allocate ourselves a network device with room for our info */
	dev = alloc_etherdev(sizeof(struct virtnet_info));
	if (!dev)
		return -ENOMEM;

	/* Set up network device as normal. */
847
	dev->netdev_ops = &virtnet_netdev;
R
Rusty Russell 已提交
848
	dev->features = NETIF_F_HIGHDMA;
849
	SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
R
Rusty Russell 已提交
850 851 852
	SET_NETDEV_DEV(dev, &vdev->dev);

	/* Do we support "hardware" checksums? */
853
	if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
R
Rusty Russell 已提交
854 855
		/* This opens up the world of extra features. */
		dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
856
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
R
Rusty Russell 已提交
857 858 859
			dev->features |= NETIF_F_TSO | NETIF_F_UFO
				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
		}
860
		/* Individual feature bits: what can host handle? */
861
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
862
			dev->features |= NETIF_F_TSO;
863
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
864
			dev->features |= NETIF_F_TSO6;
865
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
866
			dev->features |= NETIF_F_TSO_ECN;
867
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
868
			dev->features |= NETIF_F_UFO;
R
Rusty Russell 已提交
869 870 871
	}

	/* Configuration may specify what MAC to use.  Otherwise random. */
872
	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
873 874 875
		vdev->config->get(vdev,
				  offsetof(struct virtio_net_config, mac),
				  dev->dev_addr, dev->addr_len);
876
	} else
R
Rusty Russell 已提交
877 878 879 880
		random_ether_addr(dev->dev_addr);

	/* Set up our device-specific information */
	vi = netdev_priv(dev);
881
	netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
R
Rusty Russell 已提交
882 883
	vi->dev = dev;
	vi->vdev = vdev;
884
	vdev->priv = vi;
885
	vi->pages = NULL;
886
	INIT_DELAYED_WORK(&vi->refill, refill_work);
R
Rusty Russell 已提交
887

888 889 890 891 892 893
	/* If we can receive ANY GSO packets, we must allocate large ones. */
	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4)
	    || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)
	    || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
		vi->big_packets = true;

894 895 896
	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
		vi->mergeable_rx_bufs = true;

897 898 899 900 901 902
	/* We expect two virtqueues, receive then send,
	 * and optionally control. */
	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;

	err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
	if (err)
R
Rusty Russell 已提交
903 904
		goto free;

905 906
	vi->rvq = vqs[0];
	vi->svq = vqs[1];
R
Rusty Russell 已提交
907

908
	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
909
		vi->cvq = vqs[2];
910 911 912

		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
			dev->features |= NETIF_F_HW_VLAN_FILTER;
913 914
	}

R
Rusty Russell 已提交
915 916 917 918 919 920 921
	/* Initialize our empty receive and send queues. */
	skb_queue_head_init(&vi->recv);
	skb_queue_head_init(&vi->send);

	err = register_netdev(dev);
	if (err) {
		pr_debug("virtio_net: registering device failed\n");
922
		goto free_vqs;
R
Rusty Russell 已提交
923
	}
924 925

	/* Last of all, set up some receive buffers. */
926
	try_fill_recv(vi, GFP_KERNEL);
927 928 929 930 931 932 933

	/* If we didn't even get one input buffer, we're useless. */
	if (vi->num == 0) {
		err = -ENOMEM;
		goto unregister;
	}

934 935
	vi->status = VIRTIO_NET_S_LINK_UP;
	virtnet_update_status(vi);
936
	netif_carrier_on(dev);
937

R
Rusty Russell 已提交
938 939 940
	pr_debug("virtnet: registered device %s\n", dev->name);
	return 0;

941 942
unregister:
	unregister_netdev(dev);
943
	cancel_delayed_work_sync(&vi->refill);
944 945
free_vqs:
	vdev->config->del_vqs(vdev);
R
Rusty Russell 已提交
946 947 948 949 950
free:
	free_netdev(dev);
	return err;
}

951
static void __devexit virtnet_remove(struct virtio_device *vdev)
R
Rusty Russell 已提交
952
{
953
	struct virtnet_info *vi = vdev->priv;
954 955
	struct sk_buff *skb;

R
Rusty Russell 已提交
956 957 958
	/* Stop all the virtqueues. */
	vdev->config->reset(vdev);

959 960 961 962 963
	/* Free our skbs in send and recv queues, if any. */
	while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
		kfree_skb(skb);
		vi->num--;
	}
W
Wang Chen 已提交
964
	__skb_queue_purge(&vi->send);
965 966

	BUG_ON(vi->num != 0);
967 968

	unregister_netdev(vi->dev);
969
	cancel_delayed_work_sync(&vi->refill);
970

971 972
	vdev->config->del_vqs(vi->vdev);

973 974 975
	while (vi->pages)
		__free_pages(get_a_page(vi, GFP_KERNEL), 0);

976
	free_netdev(vi->dev);
R
Rusty Russell 已提交
977 978 979 980 981 982 983
}

static struct virtio_device_id id_table[] = {
	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
	{ 0 },
};

984
static unsigned int features[] = {
985 986
	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
	VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
987
	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
988
	VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
989
	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
990
	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
991
	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
992 993
};

R
Rusty Russell 已提交
994
static struct virtio_driver virtio_net = {
995 996
	.feature_table = features,
	.feature_table_size = ARRAY_SIZE(features),
R
Rusty Russell 已提交
997 998 999 1000 1001
	.driver.name =	KBUILD_MODNAME,
	.driver.owner =	THIS_MODULE,
	.id_table =	id_table,
	.probe =	virtnet_probe,
	.remove =	__devexit_p(virtnet_remove),
1002
	.config_changed = virtnet_config_changed,
R
Rusty Russell 已提交
1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019
};

static int __init init(void)
{
	return register_virtio_driver(&virtio_net);
}

static void __exit fini(void)
{
	unregister_virtio_driver(&virtio_net);
}
module_init(init);
module_exit(fini);

MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_DESCRIPTION("Virtio network driver");
MODULE_LICENSE("GPL");