netvsc_drv.c 12.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
18
 *   Haiyang Zhang <haiyangz@microsoft.com>
19 20
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/init.h>
24
#include <linux/atomic.h>
25 26 27 28 29 30 31 32 33 34
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/device.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/in.h>
35
#include <linux/slab.h>
36 37 38 39
#include <net/arp.h>
#include <net/route.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
40

41
#include "hyperv_net.h"
42 43

struct net_device_context {
44
	/* point back to our device context */
45
	struct hv_device *device_ctx;
46
	struct delayed_work dwork;
47 48 49
};


50
static int ring_size = 128;
S
Stephen Hemminger 已提交
51 52
module_param(ring_size, int, S_IRUGO);
MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
53

54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
struct set_multicast_work {
	struct work_struct work;
	struct net_device *net;
};

static void do_set_multicast(struct work_struct *w)
{
	struct set_multicast_work *swk =
		container_of(w, struct set_multicast_work, work);
	struct net_device *net = swk->net;

	struct net_device_context *ndevctx = netdev_priv(net);
	struct netvsc_device *nvdev;
	struct rndis_device *rdev;

	nvdev = hv_get_drvdata(ndevctx->device_ctx);
	if (nvdev == NULL)
71
		goto out;
72 73 74

	rdev = nvdev->extension;
	if (rdev == NULL)
75
		goto out;
76 77 78 79 80 81 82 83 84 85

	if (net->flags & IFF_PROMISC)
		rndis_filter_set_packet_filter(rdev,
			NDIS_PACKET_TYPE_PROMISCUOUS);
	else
		rndis_filter_set_packet_filter(rdev,
			NDIS_PACKET_TYPE_BROADCAST |
			NDIS_PACKET_TYPE_ALL_MULTICAST |
			NDIS_PACKET_TYPE_DIRECTED);

86
out:
87 88 89
	kfree(w);
}

90
static void netvsc_set_multicast_list(struct net_device *net)
91
{
92 93 94 95 96 97 98 99
	struct set_multicast_work *swk =
		kmalloc(sizeof(struct set_multicast_work), GFP_ATOMIC);
	if (swk == NULL)
		return;

	swk->net = net;
	INIT_WORK(&swk->work, do_set_multicast);
	schedule_work(&swk->work);
100 101 102 103 104
}

static int netvsc_open(struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
105
	struct hv_device *device_obj = net_device_ctx->device_ctx;
106
	int ret = 0;
107

108 109 110 111 112
	/* Open up the device */
	ret = rndis_filter_open(device_obj);
	if (ret != 0) {
		netdev_err(net, "unable to open device (ret %d).\n", ret);
		return ret;
113 114
	}

115 116
	netif_start_queue(net);

117 118 119 120 121 122
	return ret;
}

static int netvsc_close(struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
123
	struct hv_device *device_obj = net_device_ctx->device_ctx;
124
	int ret;
125 126 127

	netif_stop_queue(net);

128
	ret = rndis_filter_close(device_obj);
129
	if (ret != 0)
130
		netdev_err(net, "unable to close device (ret %d).\n", ret);
131 132 133 134 135 136

	return ret;
}

static void netvsc_xmit_completion(void *context)
{
137
	struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
138
	struct sk_buff *skb = (struct sk_buff *)
139
		(unsigned long)packet->completion.send.send_completion_tid;
140 141 142

	kfree(packet);

143
	if (skb)
144
		dev_kfree_skb_any(skb);
145 146
}

147
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
148 149
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
150
	struct hv_netvsc_packet *packet;
151
	int ret;
152
	unsigned int i, num_pages, npg_data;
153

154 155 156 157
	/* Add multipage for skb->data and additional one for RNDIS */
	npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1)
		>> PAGE_SHIFT) - ((unsigned long)skb->data >> PAGE_SHIFT) + 1;
	num_pages = skb_shinfo(skb)->nr_frags + npg_data + 1;
158

159
	/* Allocate a netvsc packet based on # of frags. */
160
	packet = kzalloc(sizeof(struct hv_netvsc_packet) +
161
			 (num_pages * sizeof(struct hv_page_buffer)) +
162
			 sizeof(struct rndis_filter_packet), GFP_ATOMIC);
163
	if (!packet) {
164
		/* out of memory, drop packet */
165
		netdev_err(net, "unable to allocate hv_netvsc_packet\n");
166 167 168

		dev_kfree_skb(skb);
		net->stats.tx_dropped++;
169
		return NETDEV_TX_BUSY;
170 171
	}

172
	packet->extension = (void *)(unsigned long)packet +
173
				sizeof(struct hv_netvsc_packet) +
174
				    (num_pages * sizeof(struct hv_page_buffer));
175

176
	/* Setup the rndis header */
177
	packet->page_buf_cnt = num_pages;
178

179
	/* Initialize it from the skb */
180
	packet->total_data_buflen = skb->len;
181

182
	/* Start filling in the page buffers starting after RNDIS buffer. */
183 184
	packet->page_buf[1].pfn = virt_to_phys(skb->data) >> PAGE_SHIFT;
	packet->page_buf[1].offset
185
		= (unsigned long)skb->data & (PAGE_SIZE - 1);
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
	if (npg_data == 1)
		packet->page_buf[1].len = skb_headlen(skb);
	else
		packet->page_buf[1].len = PAGE_SIZE
			- packet->page_buf[1].offset;

	for (i = 2; i <= npg_data; i++) {
		packet->page_buf[i].pfn = virt_to_phys(skb->data
			+ PAGE_SIZE * (i-1)) >> PAGE_SHIFT;
		packet->page_buf[i].offset = 0;
		packet->page_buf[i].len = PAGE_SIZE;
	}
	if (npg_data > 1)
		packet->page_buf[npg_data].len = (((unsigned long)skb->data
			+ skb_headlen(skb) - 1) & (PAGE_SIZE - 1)) + 1;
201 202 203

	/* Additional fragments are after SKB data */
	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
E
Eric Dumazet 已提交
204
		const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
205

206 207 208 209
		packet->page_buf[i+npg_data+1].pfn =
			page_to_pfn(skb_frag_page(f));
		packet->page_buf[i+npg_data+1].offset = f->page_offset;
		packet->page_buf[i+npg_data+1].len = skb_frag_size(f);
210 211
	}

212
	/* Set the completion routine */
213 214 215
	packet->completion.send.send_completion = netvsc_xmit_completion;
	packet->completion.send.send_completion_ctx = packet;
	packet->completion.send.send_completion_tid = (unsigned long)skb;
216

217
	ret = rndis_filter_send(net_device_ctx->device_ctx,
218 219
				  packet);
	if (ret == 0) {
220 221
		net->stats.tx_bytes += skb->len;
		net->stats.tx_packets++;
222 223
	} else {
		/* we are shutting down or bus overloaded, just drop packet */
224
		net->stats.tx_dropped++;
225 226
		kfree(packet);
		dev_kfree_skb_any(skb);
227 228
	}

229
	return ret ? NETDEV_TX_BUSY : NETDEV_TX_OK;
230 231
}

232
/*
233 234
 * netvsc_linkstatus_callback - Link up/down notification
 */
235
void netvsc_linkstatus_callback(struct hv_device *device_obj,
236
				       unsigned int status)
237
{
238
	struct net_device *net;
239
	struct net_device_context *ndev_ctx;
240 241 242 243
	struct netvsc_device *net_device;

	net_device = hv_get_drvdata(device_obj);
	net = net_device->ndev;
244

245
	if (!net) {
246 247
		netdev_err(net, "got link status but net device "
				"not initialized yet\n");
248 249 250
		return;
	}

251
	if (status == 1) {
252 253
		netif_carrier_on(net);
		netif_wake_queue(net);
254
		ndev_ctx = netdev_priv(net);
255
		schedule_delayed_work(&ndev_ctx->dwork, 0);
256
		schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20));
257
	} else {
258 259 260 261 262
		netif_carrier_off(net);
		netif_stop_queue(net);
	}
}

263 264 265
/*
 * netvsc_recv_callback -  Callback when we receive a packet from the
 * "wire" on the specified device.
266
 */
267
int netvsc_recv_callback(struct hv_device *device_obj,
268
				struct hv_netvsc_packet *packet)
269
{
270
	struct net_device *net = dev_get_drvdata(&device_obj->device);
271
	struct sk_buff *skb;
272 273 274 275
	struct netvsc_device *net_device;

	net_device = hv_get_drvdata(device_obj);
	net = net_device->ndev;
276

277
	if (!net) {
278 279
		netdev_err(net, "got receive callback but net device"
			" not initialized yet\n");
280 281 282
		return 0;
	}

283
	/* Allocate a skb - TODO direct I/O to pages? */
284
	skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
285 286 287 288
	if (unlikely(!skb)) {
		++net->stats.rx_dropped;
		return 0;
	}
289

290 291 292 293
	/*
	 * Copy to skb. This copy is needed here since the memory pointed by
	 * hv_netvsc_packet cannot be deallocated
	 */
294 295
	memcpy(skb_put(skb, packet->total_data_buflen), packet->data,
		packet->total_data_buflen);
296 297 298 299

	skb->protocol = eth_type_trans(skb, net);
	skb->ip_summed = CHECKSUM_NONE;

300
	net->stats.rx_packets++;
301
	net->stats.rx_bytes += packet->total_data_buflen;
302

303 304
	/*
	 * Pass the skb back up. Network stack will deallocate the skb when it
305 306
	 * is done.
	 * TODO - use NAPI?
307
	 */
308
	netif_rx(skb);
309 310 311 312

	return 0;
}

313 314 315 316 317 318 319 320
static void netvsc_get_drvinfo(struct net_device *net,
			       struct ethtool_drvinfo *info)
{
	strcpy(info->driver, "hv_netvsc");
	strcpy(info->version, HV_DRV_VERSION);
	strcpy(info->fw_version, "N/A");
}

321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
static int netvsc_change_mtu(struct net_device *ndev, int mtu)
{
	struct net_device_context *ndevctx = netdev_priv(ndev);
	struct hv_device *hdev =  ndevctx->device_ctx;
	struct netvsc_device *nvdev = hv_get_drvdata(hdev);
	struct netvsc_device_info device_info;
	int limit = ETH_DATA_LEN;

	if (nvdev == NULL || nvdev->destroy)
		return -ENODEV;

	if (nvdev->nvsp_version == NVSP_PROTOCOL_VERSION_2)
		limit = NETVSC_MTU;

	if (mtu < 68 || mtu > limit)
		return -EINVAL;

	nvdev->start_remove = true;
	cancel_delayed_work_sync(&ndevctx->dwork);
	netif_stop_queue(ndev);
	rndis_filter_device_remove(hdev);

	ndev->mtu = mtu;

	ndevctx->device_ctx = hdev;
	hv_set_drvdata(hdev, ndev);
	device_info.ring_size = ring_size;
	rndis_filter_device_add(hdev, &device_info);
	netif_wake_queue(ndev);

	return 0;
}

354 355 356 357 358
static const struct ethtool_ops ethtool_ops = {
	.get_drvinfo	= netvsc_get_drvinfo,
	.get_link	= ethtool_op_get_link,
};

359 360 361 362
static const struct net_device_ops device_ops = {
	.ndo_open =			netvsc_open,
	.ndo_stop =			netvsc_close,
	.ndo_start_xmit =		netvsc_start_xmit,
363
	.ndo_set_rx_mode =		netvsc_set_multicast_list,
364
	.ndo_change_mtu =		netvsc_change_mtu,
365 366
	.ndo_validate_addr =		eth_validate_addr,
	.ndo_set_mac_address =		eth_mac_addr,
367 368
};

369 370 371 372
/*
 * Send GARP packet to network peers after migrations.
 * After Quick Migration, the network is not immediately operational in the
 * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add
373
 * another netif_notify_peers() into a delayed work, otherwise GARP packet
374 375 376 377 378 379
 * will not be sent after quick migration, and cause network disconnection.
 */
static void netvsc_send_garp(struct work_struct *w)
{
	struct net_device_context *ndev_ctx;
	struct net_device *net;
380
	struct netvsc_device *net_device;
381

382
	ndev_ctx = container_of(w, struct net_device_context, dwork.work);
383 384
	net_device = hv_get_drvdata(ndev_ctx->device_ctx);
	net = net_device->ndev;
385 386 387 388
	netif_notify_peers(net);
}


389 390
static int netvsc_probe(struct hv_device *dev,
			const struct hv_vmbus_device_id *dev_id)
391 392 393 394 395 396
{
	struct net_device *net = NULL;
	struct net_device_context *net_device_ctx;
	struct netvsc_device_info device_info;
	int ret;

397
	net = alloc_etherdev(sizeof(struct net_device_context));
398
	if (!net)
399
		return -ENOMEM;
400 401 402 403 404

	/* Set initial state */
	netif_carrier_off(net);

	net_device_ctx = netdev_priv(net);
405
	net_device_ctx->device_ctx = dev;
406
	hv_set_drvdata(dev, net);
407
	INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_send_garp);
408 409 410

	net->netdev_ops = &device_ops;

411
	/* TODO: Add GSO and Checksum offload */
412
	net->hw_features = NETIF_F_SG;
413 414
	net->features = NETIF_F_SG;

415
	SET_ETHTOOL_OPS(net, &ethtool_ops);
416
	SET_NETDEV_DEV(net, &dev->device);
417 418 419

	ret = register_netdev(net);
	if (ret != 0) {
420
		pr_err("Unable to register netdev.\n");
421
		free_netdev(net);
422
		goto out;
423 424
	}

425 426 427 428 429 430
	/* Notify the netvsc driver of the new device */
	device_info.ring_size = ring_size;
	ret = rndis_filter_device_add(dev, &device_info);
	if (ret != 0) {
		netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
		unregister_netdev(net);
431
		free_netdev(net);
432
		hv_set_drvdata(dev, NULL);
433
		return ret;
434
	}
435 436 437
	memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);

	netif_carrier_on(net);
438

439
out:
440 441 442
	return ret;
}

443
static int netvsc_remove(struct hv_device *dev)
444
{
445
	struct net_device *net;
446
	struct net_device_context *ndev_ctx;
447 448 449 450
	struct netvsc_device *net_device;

	net_device = hv_get_drvdata(dev);
	net = net_device->ndev;
451 452

	if (net == NULL) {
453
		dev_err(&dev->device, "No net device to remove\n");
454 455 456
		return 0;
	}

457 458
	net_device->start_remove = true;

459 460 461
	ndev_ctx = netdev_priv(net);
	cancel_delayed_work_sync(&ndev_ctx->dwork);

462 463 464 465 466 467 468 469 470
	/* Stop outbound asap */
	netif_stop_queue(net);

	unregister_netdev(net);

	/*
	 * Call to the vsc driver to let it know that the device is being
	 * removed
	 */
471
	rndis_filter_device_remove(dev);
472 473

	free_netdev(net);
474
	return 0;
475 476
}

477
static const struct hv_vmbus_device_id id_table[] = {
478 479 480 481
	/* Network guid */
	{ VMBUS_DEVICE(0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
		       0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E) },
	{ },
482 483 484 485
};

MODULE_DEVICE_TABLE(vmbus, id_table);

486
/* The one and only one */
487
static struct  hv_driver netvsc_drv = {
488
	.name = "netvsc",
489
	.id_table = id_table,
490 491
	.probe = netvsc_probe,
	.remove = netvsc_remove,
492
};
493

494
static void __exit netvsc_drv_exit(void)
495
{
496
	vmbus_driver_unregister(&netvsc_drv);
497 498
}

499
static int __init netvsc_drv_init(void)
500
{
501
	return vmbus_driver_register(&netvsc_drv);
502 503
}

504 505
MODULE_LICENSE("GPL");
MODULE_VERSION(HV_DRV_VERSION);
506
MODULE_DESCRIPTION("Microsoft Hyper-V network driver");
507

508
module_init(netvsc_drv_init);
509
module_exit(netvsc_drv_exit);