netvsc_drv.c 13.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
18
 *   Haiyang Zhang <haiyangz@microsoft.com>
19 20
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/init.h>
24
#include <linux/atomic.h>
25 26 27 28 29 30 31 32 33 34
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/device.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/in.h>
35
#include <linux/slab.h>
36 37 38 39
#include <net/arp.h>
#include <net/route.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
40

41
#include "hyperv_net.h"
42 43

struct net_device_context {
44
	/* point back to our device context */
45
	struct hv_device *device_ctx;
46
	struct delayed_work dwork;
47
	struct work_struct work;
48 49
};

50
#define RING_SIZE_MIN 64
51
static int ring_size = 128;
S
Stephen Hemminger 已提交
52 53
module_param(ring_size, int, S_IRUGO);
MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
54

55 56
static void do_set_multicast(struct work_struct *w)
{
57 58
	struct net_device_context *ndevctx =
		container_of(w, struct net_device_context, work);
59 60 61 62
	struct netvsc_device *nvdev;
	struct rndis_device *rdev;

	nvdev = hv_get_drvdata(ndevctx->device_ctx);
63 64
	if (nvdev == NULL || nvdev->ndev == NULL)
		return;
65 66 67

	rdev = nvdev->extension;
	if (rdev == NULL)
68
		return;
69

70
	if (nvdev->ndev->flags & IFF_PROMISC)
71 72 73 74 75 76 77 78 79
		rndis_filter_set_packet_filter(rdev,
			NDIS_PACKET_TYPE_PROMISCUOUS);
	else
		rndis_filter_set_packet_filter(rdev,
			NDIS_PACKET_TYPE_BROADCAST |
			NDIS_PACKET_TYPE_ALL_MULTICAST |
			NDIS_PACKET_TYPE_DIRECTED);
}

80
static void netvsc_set_multicast_list(struct net_device *net)
81
{
82
	struct net_device_context *net_device_ctx = netdev_priv(net);
83

84
	schedule_work(&net_device_ctx->work);
85 86 87 88 89
}

static int netvsc_open(struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
90
	struct hv_device *device_obj = net_device_ctx->device_ctx;
91
	int ret = 0;
92

93 94 95 96 97
	/* Open up the device */
	ret = rndis_filter_open(device_obj);
	if (ret != 0) {
		netdev_err(net, "unable to open device (ret %d).\n", ret);
		return ret;
98 99
	}

100 101
	netif_start_queue(net);

102 103 104 105 106 107
	return ret;
}

static int netvsc_close(struct net_device *net)
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
108
	struct hv_device *device_obj = net_device_ctx->device_ctx;
109
	int ret;
110

111
	netif_tx_disable(net);
112

113 114
	/* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
	cancel_work_sync(&net_device_ctx->work);
115
	ret = rndis_filter_close(device_obj);
116
	if (ret != 0)
117
		netdev_err(net, "unable to close device (ret %d).\n", ret);
118 119 120 121 122 123

	return ret;
}

static void netvsc_xmit_completion(void *context)
{
124
	struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)context;
125
	struct sk_buff *skb = (struct sk_buff *)
126
		(unsigned long)packet->completion.send.send_completion_tid;
127 128 129

	kfree(packet);

130
	if (skb)
131
		dev_kfree_skb_any(skb);
132 133
}

134
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
135 136
{
	struct net_device_context *net_device_ctx = netdev_priv(net);
137
	struct hv_netvsc_packet *packet;
138
	int ret;
139
	unsigned int i, num_pages, npg_data;
140

141
	/* Add multipages for skb->data and additional 2 for RNDIS */
142 143
	npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1)
		>> PAGE_SHIFT) - ((unsigned long)skb->data >> PAGE_SHIFT) + 1;
144
	num_pages = skb_shinfo(skb)->nr_frags + npg_data + 2;
145

146
	/* Allocate a netvsc packet based on # of frags. */
147
	packet = kzalloc(sizeof(struct hv_netvsc_packet) +
148
			 (num_pages * sizeof(struct hv_page_buffer)) +
149 150
			 sizeof(struct rndis_filter_packet) +
			 NDIS_VLAN_PPI_SIZE, GFP_ATOMIC);
151
	if (!packet) {
152
		/* out of memory, drop packet */
153
		netdev_err(net, "unable to allocate hv_netvsc_packet\n");
154 155 156

		dev_kfree_skb(skb);
		net->stats.tx_dropped++;
157
		return NETDEV_TX_OK;
158 159
	}

160 161
	packet->vlan_tci = skb->vlan_tci;

162
	packet->extension = (void *)(unsigned long)packet +
163
				sizeof(struct hv_netvsc_packet) +
164
				    (num_pages * sizeof(struct hv_page_buffer));
165

166 167
	/* If the rndis msg goes beyond 1 page, we will add 1 later */
	packet->page_buf_cnt = num_pages - 1;
168

169
	/* Initialize it from the skb */
170
	packet->total_data_buflen = skb->len;
171

172
	/* Start filling in the page buffers starting after RNDIS buffer. */
173 174
	packet->page_buf[1].pfn = virt_to_phys(skb->data) >> PAGE_SHIFT;
	packet->page_buf[1].offset
175
		= (unsigned long)skb->data & (PAGE_SIZE - 1);
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
	if (npg_data == 1)
		packet->page_buf[1].len = skb_headlen(skb);
	else
		packet->page_buf[1].len = PAGE_SIZE
			- packet->page_buf[1].offset;

	for (i = 2; i <= npg_data; i++) {
		packet->page_buf[i].pfn = virt_to_phys(skb->data
			+ PAGE_SIZE * (i-1)) >> PAGE_SHIFT;
		packet->page_buf[i].offset = 0;
		packet->page_buf[i].len = PAGE_SIZE;
	}
	if (npg_data > 1)
		packet->page_buf[npg_data].len = (((unsigned long)skb->data
			+ skb_headlen(skb) - 1) & (PAGE_SIZE - 1)) + 1;
191 192 193

	/* Additional fragments are after SKB data */
	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
E
Eric Dumazet 已提交
194
		const skb_frag_t *f = &skb_shinfo(skb)->frags[i];
195

196 197 198 199
		packet->page_buf[i+npg_data+1].pfn =
			page_to_pfn(skb_frag_page(f));
		packet->page_buf[i+npg_data+1].offset = f->page_offset;
		packet->page_buf[i+npg_data+1].len = skb_frag_size(f);
200 201
	}

202
	/* Set the completion routine */
203 204 205
	packet->completion.send.send_completion = netvsc_xmit_completion;
	packet->completion.send.send_completion_ctx = packet;
	packet->completion.send.send_completion_tid = (unsigned long)skb;
206

207
	ret = rndis_filter_send(net_device_ctx->device_ctx,
208 209
				  packet);
	if (ret == 0) {
210 211
		net->stats.tx_bytes += skb->len;
		net->stats.tx_packets++;
212
	} else {
213
		kfree(packet);
214 215 216 217
		if (ret != -EAGAIN) {
			dev_kfree_skb_any(skb);
			net->stats.tx_dropped++;
		}
218 219
	}

220
	return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK;
221 222
}

223
/*
224 225
 * netvsc_linkstatus_callback - Link up/down notification
 */
226
void netvsc_linkstatus_callback(struct hv_device *device_obj,
227
				       unsigned int status)
228
{
229
	struct net_device *net;
230
	struct net_device_context *ndev_ctx;
231 232 233 234
	struct netvsc_device *net_device;

	net_device = hv_get_drvdata(device_obj);
	net = net_device->ndev;
235

236
	if (!net) {
237 238
		netdev_err(net, "got link status but net device "
				"not initialized yet\n");
239 240 241
		return;
	}

242
	if (status == 1) {
243 244
		netif_carrier_on(net);
		netif_wake_queue(net);
245
		ndev_ctx = netdev_priv(net);
246
		schedule_delayed_work(&ndev_ctx->dwork, 0);
247
		schedule_delayed_work(&ndev_ctx->dwork, msecs_to_jiffies(20));
248
	} else {
249
		netif_carrier_off(net);
250
		netif_tx_disable(net);
251 252 253
	}
}

254 255 256
/*
 * netvsc_recv_callback -  Callback when we receive a packet from the
 * "wire" on the specified device.
257
 */
258
int netvsc_recv_callback(struct hv_device *device_obj,
259
				struct hv_netvsc_packet *packet)
260
{
261
	struct net_device *net;
262 263
	struct sk_buff *skb;

264
	net = ((struct netvsc_device *)hv_get_drvdata(device_obj))->ndev;
265
	if (!net) {
266 267
		netdev_err(net, "got receive callback but net device"
			" not initialized yet\n");
268
		packet->status = NVSP_STAT_FAIL;
269 270 271
		return 0;
	}

272
	/* Allocate a skb - TODO direct I/O to pages? */
273
	skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
274 275
	if (unlikely(!skb)) {
		++net->stats.rx_dropped;
276
		packet->status = NVSP_STAT_FAIL;
277 278
		return 0;
	}
279

280 281 282 283
	/*
	 * Copy to skb. This copy is needed here since the memory pointed by
	 * hv_netvsc_packet cannot be deallocated
	 */
284 285
	memcpy(skb_put(skb, packet->total_data_buflen), packet->data,
		packet->total_data_buflen);
286 287 288

	skb->protocol = eth_type_trans(skb, net);
	skb->ip_summed = CHECKSUM_NONE;
289
	skb->vlan_tci = packet->vlan_tci;
290

291
	net->stats.rx_packets++;
292
	net->stats.rx_bytes += packet->total_data_buflen;
293

294 295
	/*
	 * Pass the skb back up. Network stack will deallocate the skb when it
296 297
	 * is done.
	 * TODO - use NAPI?
298
	 */
299
	netif_rx(skb);
300 301 302 303

	return 0;
}

304 305 306
static void netvsc_get_drvinfo(struct net_device *net,
			       struct ethtool_drvinfo *info)
{
307
	strcpy(info->driver, KBUILD_MODNAME);
308 309 310 311
	strcpy(info->version, HV_DRV_VERSION);
	strcpy(info->fw_version, "N/A");
}

312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
static int netvsc_change_mtu(struct net_device *ndev, int mtu)
{
	struct net_device_context *ndevctx = netdev_priv(ndev);
	struct hv_device *hdev =  ndevctx->device_ctx;
	struct netvsc_device *nvdev = hv_get_drvdata(hdev);
	struct netvsc_device_info device_info;
	int limit = ETH_DATA_LEN;

	if (nvdev == NULL || nvdev->destroy)
		return -ENODEV;

	if (nvdev->nvsp_version == NVSP_PROTOCOL_VERSION_2)
		limit = NETVSC_MTU;

	if (mtu < 68 || mtu > limit)
		return -EINVAL;

	nvdev->start_remove = true;
	cancel_delayed_work_sync(&ndevctx->dwork);
331
	cancel_work_sync(&ndevctx->work);
332
	netif_tx_disable(ndev);
333 334 335 336 337 338 339 340 341 342 343 344 345
	rndis_filter_device_remove(hdev);

	ndev->mtu = mtu;

	ndevctx->device_ctx = hdev;
	hv_set_drvdata(hdev, ndev);
	device_info.ring_size = ring_size;
	rndis_filter_device_add(hdev, &device_info);
	netif_wake_queue(ndev);

	return 0;
}

346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373

static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
{
	struct net_device_context *ndevctx = netdev_priv(ndev);
	struct hv_device *hdev =  ndevctx->device_ctx;
	struct sockaddr *addr = p;
	char save_adr[14];
	unsigned char save_aatype;
	int err;

	memcpy(save_adr, ndev->dev_addr, ETH_ALEN);
	save_aatype = ndev->addr_assign_type;

	err = eth_mac_addr(ndev, p);
	if (err != 0)
		return err;

	err = rndis_filter_set_device_mac(hdev, addr->sa_data);
	if (err != 0) {
		/* roll back to saved MAC */
		memcpy(ndev->dev_addr, save_adr, ETH_ALEN);
		ndev->addr_assign_type = save_aatype;
	}

	return err;
}


374 375 376 377 378
static const struct ethtool_ops ethtool_ops = {
	.get_drvinfo	= netvsc_get_drvinfo,
	.get_link	= ethtool_op_get_link,
};

379 380 381 382
static const struct net_device_ops device_ops = {
	.ndo_open =			netvsc_open,
	.ndo_stop =			netvsc_close,
	.ndo_start_xmit =		netvsc_start_xmit,
383
	.ndo_set_rx_mode =		netvsc_set_multicast_list,
384
	.ndo_change_mtu =		netvsc_change_mtu,
385
	.ndo_validate_addr =		eth_validate_addr,
386
	.ndo_set_mac_address =		netvsc_set_mac_addr,
387 388
};

389 390 391 392
/*
 * Send GARP packet to network peers after migrations.
 * After Quick Migration, the network is not immediately operational in the
 * current context when receiving RNDIS_STATUS_MEDIA_CONNECT event. So, add
393
 * another netif_notify_peers() into a delayed work, otherwise GARP packet
394 395 396 397 398 399
 * will not be sent after quick migration, and cause network disconnection.
 */
static void netvsc_send_garp(struct work_struct *w)
{
	struct net_device_context *ndev_ctx;
	struct net_device *net;
400
	struct netvsc_device *net_device;
401

402
	ndev_ctx = container_of(w, struct net_device_context, dwork.work);
403 404
	net_device = hv_get_drvdata(ndev_ctx->device_ctx);
	net = net_device->ndev;
405
	netdev_notify_peers(net);
406 407 408
}


409 410
static int netvsc_probe(struct hv_device *dev,
			const struct hv_vmbus_device_id *dev_id)
411 412 413 414 415 416
{
	struct net_device *net = NULL;
	struct net_device_context *net_device_ctx;
	struct netvsc_device_info device_info;
	int ret;

417
	net = alloc_etherdev(sizeof(struct net_device_context));
418
	if (!net)
419
		return -ENOMEM;
420 421 422 423 424

	/* Set initial state */
	netif_carrier_off(net);

	net_device_ctx = netdev_priv(net);
425
	net_device_ctx->device_ctx = dev;
426
	hv_set_drvdata(dev, net);
427
	INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_send_garp);
428
	INIT_WORK(&net_device_ctx->work, do_set_multicast);
429 430 431

	net->netdev_ops = &device_ops;

432
	/* TODO: Add GSO and Checksum offload */
433
	net->hw_features = NETIF_F_SG;
434
	net->features = NETIF_F_SG | NETIF_F_HW_VLAN_TX;
435

436
	SET_ETHTOOL_OPS(net, &ethtool_ops);
437
	SET_NETDEV_DEV(net, &dev->device);
438 439 440

	ret = register_netdev(net);
	if (ret != 0) {
441
		pr_err("Unable to register netdev.\n");
442
		free_netdev(net);
443
		goto out;
444 445
	}

446 447 448 449 450 451
	/* Notify the netvsc driver of the new device */
	device_info.ring_size = ring_size;
	ret = rndis_filter_device_add(dev, &device_info);
	if (ret != 0) {
		netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
		unregister_netdev(net);
452
		free_netdev(net);
453
		hv_set_drvdata(dev, NULL);
454
		return ret;
455
	}
456 457 458
	memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);

	netif_carrier_on(net);
459

460
out:
461 462 463
	return ret;
}

464
static int netvsc_remove(struct hv_device *dev)
465
{
466
	struct net_device *net;
467
	struct net_device_context *ndev_ctx;
468 469 470 471
	struct netvsc_device *net_device;

	net_device = hv_get_drvdata(dev);
	net = net_device->ndev;
472 473

	if (net == NULL) {
474
		dev_err(&dev->device, "No net device to remove\n");
475 476 477
		return 0;
	}

478 479
	net_device->start_remove = true;

480 481
	ndev_ctx = netdev_priv(net);
	cancel_delayed_work_sync(&ndev_ctx->dwork);
482
	cancel_work_sync(&ndev_ctx->work);
483

484
	/* Stop outbound asap */
485
	netif_tx_disable(net);
486 487 488 489 490 491 492

	unregister_netdev(net);

	/*
	 * Call to the vsc driver to let it know that the device is being
	 * removed
	 */
493
	rndis_filter_device_remove(dev);
494 495

	free_netdev(net);
496
	return 0;
497 498
}

499
static const struct hv_vmbus_device_id id_table[] = {
500 501 502 503
	/* Network guid */
	{ VMBUS_DEVICE(0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
		       0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E) },
	{ },
504 505 506 507
};

MODULE_DEVICE_TABLE(vmbus, id_table);

508
/* The one and only one */
509
static struct  hv_driver netvsc_drv = {
510
	.name = KBUILD_MODNAME,
511
	.id_table = id_table,
512 513
	.probe = netvsc_probe,
	.remove = netvsc_remove,
514
};
515

516
static void __exit netvsc_drv_exit(void)
517
{
518
	vmbus_driver_unregister(&netvsc_drv);
519 520
}

521
static int __init netvsc_drv_init(void)
522
{
523 524 525 526 527
	if (ring_size < RING_SIZE_MIN) {
		ring_size = RING_SIZE_MIN;
		pr_info("Increased ring_size to %d (min allowed)\n",
			ring_size);
	}
528
	return vmbus_driver_register(&netvsc_drv);
529 530
}

531 532
MODULE_LICENSE("GPL");
MODULE_VERSION(HV_DRV_VERSION);
533
MODULE_DESCRIPTION("Microsoft Hyper-V network driver");
534

535
module_init(netvsc_drv_init);
536
module_exit(netvsc_drv_exit);