netvsc_drv.c 13.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
18
 *   Haiyang Zhang <haiyangz@microsoft.com>
19 20
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/init.h>
24
#include <linux/atomic.h>
25 26 27 28 29 30 31 32 33 34
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/device.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/in.h>
35
#include <linux/slab.h>
36 37 38 39
#include <net/arp.h>
#include <net/route.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
40

41
#include "hyperv_net.h"
42 43

struct net_device_context {
44
	/* point back to our device context */
45
	struct hv_device *device_ctx;
46
	struct delayed_work dwork;
47
	struct work_struct work;
48 49
};

50
#define RING_SIZE_MIN 64
51
static int ring_size = 128;
S
Stephen Hemminger 已提交
52 53
module_param(ring_size, int, S_IRUGO);
MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
54

55 56
static void do_set_multicast(struct work_struct *w)
{
57 58
	struct net_device_context *ndevctx =
		container_of(w, struct net_device_context, work);
59 60 61 62
	struct netvsc_device *nvdev;
	struct rndis_device *rdev;

	nvdev = hv_get_drvdata(ndevctx->device_ctx);
63 64
	if (nvdev == NULL || nvdev->ndev == NULL)
		return;
65 66 67

	rdev = nvdev->extension;
	if (rdev == NULL)
68
		return;
69

70
	if (nvdev->ndev->flags & IFF_PROMISC)
71 72 73 74 75 76 77 78 79
		rndis_filter_set_packet_filter(rdev,
			NDIS_PACKET_TYPE_PROMISCUOUS);
	else
		rndis_filter_set_packet_filter(rdev,
			NDIS_PACKET_TYPE_BROADCAST |
			NDIS_PACKET_TYPE_ALL_MULTICAST |
			NDIS_PACKET_TYPE_DIRECTED);
}

80
static void netvsc_set_multicast_list(struct net_device *net)
81
{
82
	struct net_device_context *net_device_ctx = netdev_priv(net);
83

84
	schedule_work(&net_device_ctx->work);
85 86 87 88 89
}

static int netvsc_open(struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
90
	struct hv_device *device_obj = net_device_ctx->device_ctx;
91
	int ret = 0;
92

93 94 95 96 97
	/* Open up the device */
	ret = rndis_filter_open(device_obj);
	if (ret != 0) {
		netdev_err(net, "unable to open device (ret %d).\n", ret);
		return ret;
98 99
	}

100 101
	netif_start_queue(net);

102 103 104 105 106 107
	return ret;
}

static int netvsc_close(struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
108
	struct hv_device *device_obj = net_device_ctx->device_ctx;
109
	int ret;
110

111
	netif_tx_disable(net);
112

113 114
	/* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
	cancel_work_sync(&net_device_ctx->work);
115
	ret = rndis_filter_close(device_obj);
116
	if (ret != 0)
117
		netdev_err(net, "unable to close device (ret %d).\n", ret);
118 119 120 121 122 123

	return ret;
}

static void netvsc_xmit_completion(void *context)
{
124
	struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
125
	struct sk_buff *skb = (struct sk_buff *)
126
		(unsigned long)packet->completion.send.send_completion_tid;
127 128 129

	kfree(packet);

130
	if (skb)
131
		dev_kfree_skb_any(skb);
132 133
}

134
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
135 136
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
137
	struct hv_netvsc_packet *packet;
138
	int ret;
139
	unsigned int i, num_pages, npg_data;
140

141
	/* Add multipages for skb->data and additional 2 for RNDIS */
142 143
	npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1)
		>> PAGE_SHIFT) - ((unsigned long)skb->data >> PAGE_SHIFT) + 1;
144
	num_pages = skb_shinfo(skb)->nr_frags + npg_data + 2;
145

146
	/* Allocate a netvsc packet based on # of frags. */
147
	packet = kzalloc(sizeof(struct hv_netvsc_packet) +
148
			 (num_pages * sizeof(struct hv_page_buffer)) +
149 150
			 sizeof(struct rndis_filter_packet) +
			 NDIS_VLAN_PPI_SIZE, GFP_ATOMIC);
151
	if (!packet) {
152
		/* out of memory, drop packet */
153
		netdev_err(net, "unable to allocate hv_netvsc_packet\n");
154 155 156

		dev_kfree_skb(skb);
		net->stats.tx_dropped++;
157
		return NETDEV_TX_OK;
158 159
	}

160 161
	packet->vlan_tci = skb->vlan_tci;

162
	packet->extension = (void *)(unsigned long)packet +
163
				sizeof(struct hv_netvsc_packet) +
164
				    (num_pages * sizeof(struct hv_page_buffer));
165

166 167
	/* If the rndis msg goes beyond 1 page, we will add 1 later */
	packet->page_buf_cnt = num_pages - 1;
168

169
	/* Initialize it from the skb */
170
	packet->total_data_buflen = skb->len;
171

172
	/* Start filling in the page buffers starting after RNDIS buffer. */
173 174
	packet->page_buf[1].pfn = virt_to_phys(skb->data) >> PAGE_SHIFT;
	packet->page_buf[1].offset
175
		= (unsigned long)skb->data & (PAGE_SIZE - 1);
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
	if (npg_data == 1)
		packet->page_buf[1].len = skb_headlen(skb);
	else
		packet->page_buf[1].len = PAGE_SIZE
			- packet->page_buf[1].offset;

	for (i = 2; i <= npg_data; i++) {
		packet->page_buf[i].pfn = virt_to_phys(skb->data
			+ PAGE_SIZE * (i-1)) >> PAGE_SHIFT;
		packet->page_buf[i].offset = 0;
		packet->page_buf[i].len = PAGE_SIZE;
	}
	if (npg_data > 1)
		packet->page_buf[npg_data].len = (((unsigned long)skb->data
			+ skb_headlen(skb) - 1) & (PAGE_SIZE - 1)) + 1;
191 192 193

	/* Additional fragments are after SKB data */
	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
E
Eric Dumazet 已提交
194
		const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
195

196 197 198 199
		packet->page_buf[i+npg_data+1].pfn =
			page_to_pfn(skb_frag_page(f));
		packet->page_buf[i+npg_data+1].offset = f->page_offset;
		packet->page_buf[i+npg_data+1].len = skb_frag_size(f);
200 201
	}

202
	/* Set the completion routine */
203 204 205
	packet->completion.send.send_completion = netvsc_xmit_completion;
	packet->completion.send.send_completion_ctx = packet;
	packet->completion.send.send_completion_tid = (unsigned long)skb;
206

207
	ret = rndis_filter_send(net_device_ctx->device_ctx,
208 209
				  packet);
	if (ret == 0) {
210 211
		net->stats.tx_bytes += skb->len;
		net->stats.tx_packets++;
212
	} else {
213
		kfree(packet);
214 215 216 217
		if (ret != -EAGAIN) {
			dev_kfree_skb_any(skb);
			net->stats.tx_dropped++;
		}
218 219
	}

220
	return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK;
221 222
}

223
/*
224 225
 * netvsc_linkstatus_callback - Link up/down notification
 */
226
void netvsc_linkstatus_callback(struct hv_device *device_obj,
227
				       unsigned int status)
228
{
229
	struct net_device *net;
230
	struct net_device_context *ndev_ctx;
231 232 233 234
	struct netvsc_device *net_device;

	net_device = hv_get_drvdata(device_obj);
	net = net_device->ndev;
235

236
	if (!net) {
237 238
		netdev_err(net, "got link status but net device "
				"not initialized yet\n");
239 240 241
		return;
	}

242
	if (status == 1) {
243 244
		netif_carrier_on(net);
		netif_wake_queue(net);
245
		ndev_ctx = netdev_priv(net);
246
		schedule_delayed_work(&ndev_ctx->dwork, 0);
247
		schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20));
248
	} else {
249
		netif_carrier_off(net);
250
		netif_tx_disable(net);
251 252 253
	}
}

254 255 256
/*
 * netvsc_recv_callback -  Callback when we receive a packet from the
 * "wire" on the specified device.
257
 */
258
int netvsc_recv_callback(struct hv_device *device_obj,
259
				struct hv_netvsc_packet *packet)
260
{
261
	struct net_device *net;
262 263
	struct sk_buff *skb;

264
	net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
265
	if (!net) {
266 267
		netdev_err(net, "got receive callback but net device"
			" not initialized yet\n");
268 269 270
		return 0;
	}

271
	/* Allocate a skb - TODO direct I/O to pages? */
272
	skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
273 274 275 276
	if (unlikely(!skb)) {
		++net->stats.rx_dropped;
		return 0;
	}
277

278 279 280 281
	/*
	 * Copy to skb. This copy is needed here since the memory pointed by
	 * hv_netvsc_packet cannot be deallocated
	 */
282 283
	memcpy(skb_put(skb, packet->total_data_buflen), packet->data,
		packet->total_data_buflen);
284 285 286

	skb->protocol = eth_type_trans(skb, net);
	skb->ip_summed = CHECKSUM_NONE;
287
	skb->vlan_tci = packet->vlan_tci;
288

289
	net->stats.rx_packets++;
290
	net->stats.rx_bytes += packet->total_data_buflen;
291

292 293
	/*
	 * Pass the skb back up. Network stack will deallocate the skb when it
294 295
	 * is done.
	 * TODO - use NAPI?
296
	 */
297
	netif_rx(skb);
298 299 300 301

	return 0;
}

302 303 304
static void netvsc_get_drvinfo(struct net_device *net,
			       struct ethtool_drvinfo *info)
{
305
	strcpy(info->driver, KBUILD_MODNAME);
306 307 308 309
	strcpy(info->version, HV_DRV_VERSION);
	strcpy(info->fw_version, "N/A");
}

310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
static int netvsc_change_mtu(struct net_device *ndev, int mtu)
{
	struct net_device_context *ndevctx = netdev_priv(ndev);
	struct hv_device *hdev =  ndevctx->device_ctx;
	struct netvsc_device *nvdev = hv_get_drvdata(hdev);
	struct netvsc_device_info device_info;
	int limit = ETH_DATA_LEN;

	if (nvdev == NULL || nvdev->destroy)
		return -ENODEV;

	if (nvdev->nvsp_version == NVSP_PROTOCOL_VERSION_2)
		limit = NETVSC_MTU;

	if (mtu < 68 || mtu > limit)
		return -EINVAL;

	nvdev->start_remove = true;
	cancel_delayed_work_sync(&ndevctx->dwork);
329
	cancel_work_sync(&ndevctx->work);
330
	netif_tx_disable(ndev);
331 332 333 334 335 336 337 338 339 340 341 342 343
	rndis_filter_device_remove(hdev);

	ndev->mtu = mtu;

	ndevctx->device_ctx = hdev;
	hv_set_drvdata(hdev, ndev);
	device_info.ring_size = ring_size;
	rndis_filter_device_add(hdev, &device_info);
	netif_wake_queue(ndev);

	return 0;
}

344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371

static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
{
	struct net_device_context *ndevctx = netdev_priv(ndev);
	struct hv_device *hdev =  ndevctx->device_ctx;
	struct sockaddr *addr = p;
	char save_adr[14];
	unsigned char save_aatype;
	int err;

	memcpy(save_adr, ndev->dev_addr, ETH_ALEN);
	save_aatype = ndev->addr_assign_type;

	err = eth_mac_addr(ndev, p);
	if (err != 0)
		return err;

	err = rndis_filter_set_device_mac(hdev, addr->sa_data);
	if (err != 0) {
		/* roll back to saved MAC */
		memcpy(ndev->dev_addr, save_adr, ETH_ALEN);
		ndev->addr_assign_type = save_aatype;
	}

	return err;
}


372 373 374 375 376
static const struct ethtool_ops ethtool_ops = {
	.get_drvinfo	= netvsc_get_drvinfo,
	.get_link	= ethtool_op_get_link,
};

377 378 379 380
static const struct net_device_ops device_ops = {
	.ndo_open =			netvsc_open,
	.ndo_stop =			netvsc_close,
	.ndo_start_xmit =		netvsc_start_xmit,
381
	.ndo_set_rx_mode =		netvsc_set_multicast_list,
382
	.ndo_change_mtu =		netvsc_change_mtu,
383
	.ndo_validate_addr =		eth_validate_addr,
384
	.ndo_set_mac_address =		netvsc_set_mac_addr,
385 386
};

387 388 389 390
/*
 * Send GARP packet to network peers after migrations.
 * After Quick Migration, the network is not immediately operational in the
 * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add
391
 * another netif_notify_peers() into a delayed work, otherwise GARP packet
392 393 394 395 396 397
 * will not be sent after quick migration, and cause network disconnection.
 */
static void netvsc_send_garp(struct work_struct *w)
{
	struct net_device_context *ndev_ctx;
	struct net_device *net;
398
	struct netvsc_device *net_device;
399

400
	ndev_ctx = container_of(w, struct net_device_context, dwork.work);
401 402
	net_device = hv_get_drvdata(ndev_ctx->device_ctx);
	net = net_device->ndev;
403
	netdev_notify_peers(net);
404 405 406
}


407 408
static int netvsc_probe(struct hv_device *dev,
			const struct hv_vmbus_device_id *dev_id)
409 410 411 412 413 414
{
	struct net_device *net = NULL;
	struct net_device_context *net_device_ctx;
	struct netvsc_device_info device_info;
	int ret;

415
	net = alloc_etherdev(sizeof(struct net_device_context));
416
	if (!net)
417
		return -ENOMEM;
418 419 420 421 422

	/* Set initial state */
	netif_carrier_off(net);

	net_device_ctx = netdev_priv(net);
423
	net_device_ctx->device_ctx = dev;
424
	hv_set_drvdata(dev, net);
425
	INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_send_garp);
426
	INIT_WORK(&net_device_ctx->work, do_set_multicast);
427 428 429

	net->netdev_ops = &device_ops;

430
	/* TODO: Add GSO and Checksum offload */
431
	net->hw_features = NETIF_F_SG;
432
	net->features = NETIF_F_SG | NETIF_F_HW_VLAN_TX;
433

434
	SET_ETHTOOL_OPS(net, &ethtool_ops);
435
	SET_NETDEV_DEV(net, &dev->device);
436 437 438

	ret = register_netdev(net);
	if (ret != 0) {
439
		pr_err("Unable to register netdev.\n");
440
		free_netdev(net);
441
		goto out;
442 443
	}

444 445 446 447 448 449
	/* Notify the netvsc driver of the new device */
	device_info.ring_size = ring_size;
	ret = rndis_filter_device_add(dev, &device_info);
	if (ret != 0) {
		netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
		unregister_netdev(net);
450
		free_netdev(net);
451
		hv_set_drvdata(dev, NULL);
452
		return ret;
453
	}
454 455 456
	memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);

	netif_carrier_on(net);
457

458
out:
459 460 461
	return ret;
}

462
static int netvsc_remove(struct hv_device *dev)
463
{
464
	struct net_device *net;
465
	struct net_device_context *ndev_ctx;
466 467 468 469
	struct netvsc_device *net_device;

	net_device = hv_get_drvdata(dev);
	net = net_device->ndev;
470 471

	if (net == NULL) {
472
		dev_err(&dev->device, "No net device to remove\n");
473 474 475
		return 0;
	}

476 477
	net_device->start_remove = true;

478 479
	ndev_ctx = netdev_priv(net);
	cancel_delayed_work_sync(&ndev_ctx->dwork);
480
	cancel_work_sync(&ndev_ctx->work);
481

482
	/* Stop outbound asap */
483
	netif_tx_disable(net);
484 485 486 487 488 489 490

	unregister_netdev(net);

	/*
	 * Call to the vsc driver to let it know that the device is being
	 * removed
	 */
491
	rndis_filter_device_remove(dev);
492 493

	free_netdev(net);
494
	return 0;
495 496
}

497
static const struct hv_vmbus_device_id id_table[] = {
498 499 500 501
	/* Network guid */
	{ VMBUS_DEVICE(0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
		       0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E) },
	{ },
502 503 504 505
};

MODULE_DEVICE_TABLE(vmbus, id_table);

506
/* The one and only one */
507
static struct  hv_driver netvsc_drv = {
508
	.name = KBUILD_MODNAME,
509
	.id_table = id_table,
510 511
	.probe = netvsc_probe,
	.remove = netvsc_remove,
512
};
513

514
static void __exit netvsc_drv_exit(void)
515
{
516
	vmbus_driver_unregister(&netvsc_drv);
517 518
}

519
static int __init netvsc_drv_init(void)
520
{
521 522 523 524 525
	if (ring_size < RING_SIZE_MIN) {
		ring_size = RING_SIZE_MIN;
		pr_info("Increased ring_size to %d (min allowed)\n",
			ring_size);
	}
526
	return vmbus_driver_register(&netvsc_drv);
527 528
}

529 530
MODULE_LICENSE("GPL");
MODULE_VERSION(HV_DRV_VERSION);
531
MODULE_DESCRIPTION("Microsoft Hyper-V network driver");
532

533
module_init(netvsc_drv_init);
534
module_exit(netvsc_drv_exit);