virtio_net.c 14.5 KB
Newer Older
R
Rusty Russell 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/* A simple network driver using virtio.
 *
 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
//#define DEBUG
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/module.h>
#include <linux/virtio.h>
#include <linux/virtio_net.h>
#include <linux/scatterlist.h>

27 28 29
static int napi_weight = 128;
module_param(napi_weight, int, 0444);

R
Rusty Russell 已提交
30 31 32 33
static int csum = 1, gso = 1;
module_param(csum, bool, 0444);
module_param(gso, bool, 0444);

R
Rusty Russell 已提交
34 35 36 37 38 39 40 41 42 43
/* FIXME: MTU in config. */
#define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN)

struct virtnet_info
{
	struct virtio_device *vdev;
	struct virtqueue *rvq, *svq;
	struct net_device *dev;
	struct napi_struct napi;

44 45 46
	/* The skb we couldn't send because buffers were full. */
	struct sk_buff *last_xmit_skb;

47 48
	struct timer_list xmit_free_timer;

R
Rusty Russell 已提交
49 50 51
	/* Number of input buffers, and max we've ever had. */
	unsigned int num, max;

52 53 54
	/* For cleaning up after transmission. */
	struct tasklet_struct tasklet;

R
Rusty Russell 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
	/* Receive & send queues. */
	struct sk_buff_head recv;
	struct sk_buff_head send;
};

static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
{
	return (struct virtio_net_hdr *)skb->cb;
}

static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
{
	sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
}

70
static void skb_xmit_done(struct virtqueue *svq)
R
Rusty Russell 已提交
71
{
72
	struct virtnet_info *vi = svq->vdev->priv;
R
Rusty Russell 已提交
73

74 75
	/* Suppress further interrupts. */
	svq->vq_ops->disable_cb(svq);
76

77
	/* We were waiting for more output buffers. */
R
Rusty Russell 已提交
78
	netif_wake_queue(vi->dev);
79 80 81 82

	/* Make sure we re-xmit last_xmit_skb: if there are no more packets
	 * queued, start_xmit won't be called. */
	tasklet_schedule(&vi->tasklet);
R
Rusty Russell 已提交
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
}

static void receive_skb(struct net_device *dev, struct sk_buff *skb,
			unsigned len)
{
	struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);

	if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
		pr_debug("%s: short packet %i\n", dev->name, len);
		dev->stats.rx_length_errors++;
		goto drop;
	}
	len -= sizeof(struct virtio_net_hdr);
	BUG_ON(len > MAX_PACKET_LEN);

	skb_trim(skb, len);
99

R
Rusty Russell 已提交
100 101 102 103 104
	dev->stats.rx_bytes += skb->len;
	dev->stats.rx_packets++;

	if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
		pr_debug("Needs csum!\n");
105
		if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset))
R
Rusty Russell 已提交
106 107 108
			goto frame_err;
	}

109 110 111 112
	skb->protocol = eth_type_trans(skb, dev);
	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
		 ntohs(skb->protocol), skb->len, skb->pkt_type);

R
Rusty Russell 已提交
113 114
	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
		pr_debug("GSO!\n");
R
Rusty Russell 已提交
115
		switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
R
Rusty Russell 已提交
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
		case VIRTIO_NET_HDR_GSO_TCPV4:
			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
			break;
		case VIRTIO_NET_HDR_GSO_UDP:
			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
			break;
		case VIRTIO_NET_HDR_GSO_TCPV6:
			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
			break;
		default:
			if (net_ratelimit())
				printk(KERN_WARNING "%s: bad gso type %u.\n",
				       dev->name, hdr->gso_type);
			goto frame_err;
		}

R
Rusty Russell 已提交
132 133 134
		if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
			skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;

R
Rusty Russell 已提交
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
		skb_shinfo(skb)->gso_size = hdr->gso_size;
		if (skb_shinfo(skb)->gso_size == 0) {
			if (net_ratelimit())
				printk(KERN_WARNING "%s: zero gso size.\n",
				       dev->name);
			goto frame_err;
		}

		/* Header must be checked, and gso_segs computed. */
		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
		skb_shinfo(skb)->gso_segs = 0;
	}

	netif_receive_skb(skb);
	return;

frame_err:
	dev->stats.rx_frame_errors++;
drop:
	dev_kfree_skb(skb);
}

static void try_fill_recv(struct virtnet_info *vi)
{
	struct sk_buff *skb;
160
	struct scatterlist sg[2+MAX_SKB_FRAGS];
R
Rusty Russell 已提交
161 162
	int num, err;

163
	sg_init_table(sg, 2+MAX_SKB_FRAGS);
R
Rusty Russell 已提交
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
	for (;;) {
		skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
		if (unlikely(!skb))
			break;

		skb_put(skb, MAX_PACKET_LEN);
		vnet_hdr_to_sg(sg, skb);
		num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
		skb_queue_head(&vi->recv, skb);

		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
		if (err) {
			skb_unlink(skb, &vi->recv);
			kfree_skb(skb);
			break;
		}
		vi->num++;
	}
	if (unlikely(vi->num > vi->max))
		vi->max = vi->num;
	vi->rvq->vq_ops->kick(vi->rvq);
}

187
static void skb_recv_done(struct virtqueue *rvq)
R
Rusty Russell 已提交
188 189
{
	struct virtnet_info *vi = rvq->vdev->priv;
190 191 192 193 194
	/* Schedule NAPI, Suppress further interrupts if successful. */
	if (netif_rx_schedule_prep(vi->dev, &vi->napi)) {
		rvq->vq_ops->disable_cb(rvq);
		__netif_rx_schedule(vi->dev, &vi->napi);
	}
R
Rusty Russell 已提交
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
}

static int virtnet_poll(struct napi_struct *napi, int budget)
{
	struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
	struct sk_buff *skb = NULL;
	unsigned int len, received = 0;

again:
	while (received < budget &&
	       (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
		__skb_unlink(skb, &vi->recv);
		receive_skb(vi->dev, skb, len);
		vi->num--;
		received++;
	}

	/* FIXME: If we oom and completely run out of inbufs, we need
	 * to start a timer trying to fill more. */
	if (vi->num < vi->max / 2)
		try_fill_recv(vi);

217 218
	/* Out of packets? */
	if (received < budget) {
R
Rusty Russell 已提交
219
		netif_rx_complete(vi->dev, napi);
220
		if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
221 222 223
		    && napi_schedule_prep(napi)) {
			vi->rvq->vq_ops->disable_cb(vi->rvq);
			__netif_rx_schedule(vi->dev, napi);
R
Rusty Russell 已提交
224
			goto again;
225
		}
R
Rusty Russell 已提交
226 227 228 229 230 231 232 233 234 235 236 237 238
	}

	return received;
}

static void free_old_xmit_skbs(struct virtnet_info *vi)
{
	struct sk_buff *skb;
	unsigned int len;

	while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) {
		pr_debug("Sent skb %p\n", skb);
		__skb_unlink(skb, &vi->send);
239
		vi->dev->stats.tx_bytes += skb->len;
R
Rusty Russell 已提交
240 241 242 243 244
		vi->dev->stats.tx_packets++;
		kfree_skb(skb);
	}
}

245 246 247 248 249 250 251 252 253 254 255 256 257 258
static void xmit_free(unsigned long data)
{
	struct virtnet_info *vi = (void *)data;

	netif_tx_lock(vi->dev);

	free_old_xmit_skbs(vi);

	if (!skb_queue_empty(&vi->send))
		mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));

	netif_tx_unlock(vi->dev);
}

259
static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
R
Rusty Russell 已提交
260
{
261
	int num, err;
262
	struct scatterlist sg[2+MAX_SKB_FRAGS];
R
Rusty Russell 已提交
263 264 265
	struct virtio_net_hdr *hdr;
	const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;

266
	sg_init_table(sg, 2+MAX_SKB_FRAGS);
267

268
	pr_debug("%s: xmit %p " MAC_FMT "\n", vi->dev->name, skb,
269 270
		 dest[0], dest[1], dest[2],
		 dest[3], dest[4], dest[5]);
R
Rusty Russell 已提交
271 272 273 274 275 276 277 278 279 280 281 282 283

	/* Encode metadata header at front. */
	hdr = skb_vnet_hdr(skb);
	if (skb->ip_summed == CHECKSUM_PARTIAL) {
		hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
		hdr->csum_start = skb->csum_start - skb_headroom(skb);
		hdr->csum_offset = skb->csum_offset;
	} else {
		hdr->flags = 0;
		hdr->csum_offset = hdr->csum_start = 0;
	}

	if (skb_is_gso(skb)) {
R
Rusty Russell 已提交
284
		hdr->hdr_len = skb_transport_header(skb) - skb->data;
R
Rusty Russell 已提交
285
		hdr->gso_size = skb_shinfo(skb)->gso_size;
R
Rusty Russell 已提交
286
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
R
Rusty Russell 已提交
287 288 289 290 291 292 293
			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
			hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
		else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
			hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
		else
			BUG();
R
Rusty Russell 已提交
294 295
		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
			hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
R
Rusty Russell 已提交
296 297
	} else {
		hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
R
Rusty Russell 已提交
298
		hdr->gso_size = hdr->hdr_len = 0;
R
Rusty Russell 已提交
299 300 301 302
	}

	vnet_hdr_to_sg(sg, skb);
	num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
303

304 305 306 307 308
	err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
	if (!err)
		mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));

	return err;
309 310
}

311 312 313 314 315 316 317 318 319 320 321 322
static void xmit_tasklet(unsigned long data)
{
	struct virtnet_info *vi = (void *)data;

	netif_tx_lock_bh(vi->dev);
	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) {
		vi->svq->vq_ops->kick(vi->svq);
		vi->last_xmit_skb = NULL;
	}
	netif_tx_unlock_bh(vi->dev);
}

323 324 325
static int start_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);
326 327 328 329

again:
	/* Free up any pending old buffers before queueing new ones. */
	free_old_xmit_skbs(vi);
330 331

	/* If we has a buffer left over from last time, send it now. */
332
	if (unlikely(vi->last_xmit_skb)) {
333 334 335 336
		if (xmit_skb(vi, vi->last_xmit_skb) != 0) {
			/* Drop this skb: we only queue one. */
			vi->dev->stats.tx_dropped++;
			kfree_skb(skb);
337
			skb = NULL;
338
			goto stop_queue;
339
		}
340 341
		vi->last_xmit_skb = NULL;
	}
342

343
	/* Put new one in send queue and do transmit */
344 345 346 347 348 349 350
	if (likely(skb)) {
		__skb_queue_head(&vi->send, skb);
		if (xmit_skb(vi, skb) != 0) {
			vi->last_xmit_skb = skb;
			skb = NULL;
			goto stop_queue;
		}
R
Rusty Russell 已提交
351
	}
352
done:
R
Rusty Russell 已提交
353
	vi->svq->vq_ops->kick(vi->svq);
354 355 356 357 358 359 360 361 362 363 364 365 366 367
	return NETDEV_TX_OK;

stop_queue:
	pr_debug("%s: virtio not prepared to send\n", dev->name);
	netif_stop_queue(dev);

	/* Activate callback for using skbs: if this returns false it
	 * means some were used in the meantime. */
	if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
		vi->svq->vq_ops->disable_cb(vi->svq);
		netif_start_queue(dev);
		goto again;
	}
	goto done;
R
Rusty Russell 已提交
368 369
}

370 371 372 373 374 375 376 377 378
#ifdef CONFIG_NET_POLL_CONTROLLER
static void virtnet_netpoll(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);

	napi_schedule(&vi->napi);
}
#endif

R
Rusty Russell 已提交
379 380 381 382 383
static int virtnet_open(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);

	napi_enable(&vi->napi);
R
Rusty Russell 已提交
384 385 386

	/* If all buffers were filled by other side before we napi_enabled, we
	 * won't get another interrupt, so process any outstanding packets
387 388 389 390 391 392
	 * now.  virtnet_poll wants re-enable the queue, so we disable here.
	 * We synchronize against interrupts via NAPI_STATE_SCHED */
	if (netif_rx_schedule_prep(dev, &vi->napi)) {
		vi->rvq->vq_ops->disable_cb(vi->rvq);
		__netif_rx_schedule(dev, &vi->napi);
	}
R
Rusty Russell 已提交
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
	return 0;
}

static int virtnet_close(struct net_device *dev)
{
	struct virtnet_info *vi = netdev_priv(dev);

	napi_disable(&vi->napi);

	return 0;
}

static int virtnet_probe(struct virtio_device *vdev)
{
	int err;
	struct net_device *dev;
	struct virtnet_info *vi;

	/* Allocate ourselves a network device with room for our info */
	dev = alloc_etherdev(sizeof(struct virtnet_info));
	if (!dev)
		return -ENOMEM;

	/* Set up network device as normal. */
	dev->open = virtnet_open;
	dev->stop = virtnet_close;
	dev->hard_start_xmit = start_xmit;
	dev->features = NETIF_F_HIGHDMA;
421 422 423
#ifdef CONFIG_NET_POLL_CONTROLLER
	dev->poll_controller = virtnet_netpoll;
#endif
R
Rusty Russell 已提交
424 425 426
	SET_NETDEV_DEV(dev, &vdev->dev);

	/* Do we support "hardware" checksums? */
427
	if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
R
Rusty Russell 已提交
428 429
		/* This opens up the world of extra features. */
		dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
430
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
R
Rusty Russell 已提交
431 432 433
			dev->features |= NETIF_F_TSO | NETIF_F_UFO
				| NETIF_F_TSO_ECN | NETIF_F_TSO6;
		}
434
		/* Individual feature bits: what can host handle? */
435
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
436
			dev->features |= NETIF_F_TSO;
437
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
438
			dev->features |= NETIF_F_TSO6;
439
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
440
			dev->features |= NETIF_F_TSO_ECN;
441
		if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
442
			dev->features |= NETIF_F_UFO;
R
Rusty Russell 已提交
443 444 445
	}

	/* Configuration may specify what MAC to use.  Otherwise random. */
446
	if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
447 448 449
		vdev->config->get(vdev,
				  offsetof(struct virtio_net_config, mac),
				  dev->dev_addr, dev->addr_len);
R
Rusty Russell 已提交
450 451 452 453 454
	} else
		random_ether_addr(dev->dev_addr);

	/* Set up our device-specific information */
	vi = netdev_priv(dev);
455
	netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
R
Rusty Russell 已提交
456 457
	vi->dev = dev;
	vi->vdev = vdev;
458
	vdev->priv = vi;
R
Rusty Russell 已提交
459 460

	/* We expect two virtqueues, receive then send. */
461
	vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
R
Rusty Russell 已提交
462 463 464 465 466
	if (IS_ERR(vi->rvq)) {
		err = PTR_ERR(vi->rvq);
		goto free;
	}

467
	vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done);
R
Rusty Russell 已提交
468 469 470 471 472 473 474 475 476
	if (IS_ERR(vi->svq)) {
		err = PTR_ERR(vi->svq);
		goto free_recv;
	}

	/* Initialize our empty receive and send queues. */
	skb_queue_head_init(&vi->recv);
	skb_queue_head_init(&vi->send);

477 478
	tasklet_init(&vi->tasklet, xmit_tasklet, (unsigned long)vi);

479 480
	setup_timer(&vi->xmit_free_timer, xmit_free, (unsigned long)vi);

R
Rusty Russell 已提交
481 482 483 484 485
	err = register_netdev(dev);
	if (err) {
		pr_debug("virtio_net: registering device failed\n");
		goto free_send;
	}
486 487 488 489 490 491 492 493 494 495

	/* Last of all, set up some receive buffers. */
	try_fill_recv(vi);

	/* If we didn't even get one input buffer, we're useless. */
	if (vi->num == 0) {
		err = -ENOMEM;
		goto unregister;
	}

R
Rusty Russell 已提交
496 497 498
	pr_debug("virtnet: registered device %s\n", dev->name);
	return 0;

499 500
unregister:
	unregister_netdev(dev);
R
Rusty Russell 已提交
501 502 503 504 505 506 507 508 509 510 511
free_send:
	vdev->config->del_vq(vi->svq);
free_recv:
	vdev->config->del_vq(vi->rvq);
free:
	free_netdev(dev);
	return err;
}

static void virtnet_remove(struct virtio_device *vdev)
{
512
	struct virtnet_info *vi = vdev->priv;
513 514
	struct sk_buff *skb;

R
Rusty Russell 已提交
515 516 517
	/* Stop all the virtqueues. */
	vdev->config->reset(vdev);

518 519
	del_timer_sync(&vi->xmit_free_timer);

520 521 522 523 524
	/* Free our skbs in send and recv queues, if any. */
	while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
		kfree_skb(skb);
		vi->num--;
	}
W
Wang Chen 已提交
525
	__skb_queue_purge(&vi->send);
526 527

	BUG_ON(vi->num != 0);
528 529 530 531 532

	vdev->config->del_vq(vi->svq);
	vdev->config->del_vq(vi->rvq);
	unregister_netdev(vi->dev);
	free_netdev(vi->dev);
R
Rusty Russell 已提交
533 534 535 536 537 538 539
}

static struct virtio_device_id id_table[] = {
	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
	{ 0 },
};

540 541 542 543 544 545
static unsigned int features[] = {
	VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
	VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
	VIRTIO_NET_F_HOST_ECN,
};

R
Rusty Russell 已提交
546
static struct virtio_driver virtio_net = {
547 548
	.feature_table = features,
	.feature_table_size = ARRAY_SIZE(features),
R
Rusty Russell 已提交
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
	.driver.name =	KBUILD_MODNAME,
	.driver.owner =	THIS_MODULE,
	.id_table =	id_table,
	.probe =	virtnet_probe,
	.remove =	__devexit_p(virtnet_remove),
};

static int __init init(void)
{
	return register_virtio_driver(&virtio_net);
}

static void __exit fini(void)
{
	unregister_virtio_driver(&virtio_net);
}
module_init(init);
module_exit(fini);

MODULE_DEVICE_TABLE(virtio, id_table);
MODULE_DESCRIPTION("Virtio network driver");
MODULE_LICENSE("GPL");